Blog Post

usp_SQLServerCarpenter_Tools_Estimate_Cardinality

,

/*
Author:Brahmanand Shukla (SQLServerCarpenter.com)
Date:04-Jul-2022
Purpose:This procedure will estimate the cardinality of all the columns of the supplied table list. 
If the table list is not supplied (or supplied with NULL, the default value) then it will estimate the cardinality for all the tables in the database.
Example: 
/* For specific tables with default @Column_Batch_Size */EXEC usp_SQLServerCarpenter_Tools_Estimate_Cardinality
@TableList='dbo.Parent,dbo.Child,dbo.SubChild'
/* For specific tables with custom @Column_Batch_Size */EXEC usp_SQLServerCarpenter_Tools_Estimate_Cardinality
@TableList='dbo.Parent,dbo.Child,dbo.SubChild'
, @Column_Batch_Size=50

/* For all tables with default @Column_Batch_Size */EXEC usp_SQLServerCarpenter_Tools_Estimate_Cardinality
/* For all tables with custom @Column_Batch_Size */EXEC usp_SQLServerCarpenter_Tools_Estimate_Cardinality
@Column_Batch_Size=50
*** Compatibility Warning: ***
This tool is compatible with SQL Server 2016 (13.x) and later since it uses the STRING_SPLIT function. 
In case it needs to be used in versions prior to SQL Server 2016 (13.x) 
1)Find an alternative to STRING_SPLIT function and you can use it without any hiccups.
2)In case this tool is not required to be run for the specific table list, it can be used even for versions prior to SQL Server 2016 (13.x). 
Just comment the relevant lines of code which is splitting the comma separated table names and loading it in the @tbl_InputTableList table variable.
*** Notes: ***
1) The usp_SQLServerCarpenter_Tools_Estimate_Cardinality uses the following physical table to avoid the repeatation of calculation of cardinality of same columns and table. 
2) The cardinality once calculated won't be calculated again, unless the respective column entries are deleted from the tbl_Cardinality table. 
IF NOT EXISTS (SELECT 1 FROM sys.tables WHERE name = 'tbl_Cardinality')
BEGIN
CREATE TABLE tbl_Cardinality
(
ObjectIDINT
, SchemaNameSYSNAME
, TableNameSYSNAME
, TotalRowsDECIMAL
, ColumnNameSYSNAME
, Column_Distinct_Values_CountDECIMAL
, CardinalityASCAST(((Column_Distinct_Values_Count / NULLIF(TotalRows, 0)) * 100) AS INT)
)
END
*/CREATE OR ALTER PROCEDURE usp_SQLServerCarpenter_Tools_Estimate_Cardinality
(
@TableListNVARCHAR(MAX) = NULL/*Table Names should be in two part naming format and separated by comma. 
For e.g. 'dbo.Parent,dbo.Child,dbo.SubChild' etc.*/, @Column_Batch_SizeINT = 99999/*This is Column Batch Size. 
Default is 99999 which means Cardinality of All the columns of a table will be calculated in a single iteration. 
But if the table is heavy in terms of rows and columns then this parameter can be tuned/refined to limit the number of columns 
in an iteration of calculation of cardinality of columns of a table. 
For example if the table has 100 columns and the @Column_Batch_Size parameter is supplied as 20 then 
the Cardinality of 20 columns of a table will be calculated in an iteration. 
It will take 5 iterations to complete the cardinality calculation of all the columns of the table*/)
AS
BEGIN
SET NOCOUNT ON
SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED
IF NOT EXISTS (SELECT 1 FROM sys.tables WHERE name = 'tbl_Cardinality')
BEGIN
CREATE TABLE tbl_Cardinality
(
ObjectIDINT
, SchemaNameSYSNAME
, TableNameSYSNAME
, TotalRowsDECIMAL
, ColumnNameSYSNAME
, Column_Distinct_Values_CountDECIMAL
, CardinalityASCAST(((Column_Distinct_Values_Count / NULLIF(TotalRows, 0)) * 100) AS INT)
)
END
/* Code to drop the temporary tables. It may be useful at the time of debugging the procedure.
IF OBJECT_ID('tempdb..#tmp_Columns') IS NOT NULL DROP TABLE #tmp_Columns;
IF OBJECT_ID('tempdb..#tmp_Tables') IS NOT NULL DROP TABLE #tmp_Tables;
*/DECLARE @tbl_InputTableListTABLE
(
[ObjectID]SYSNAME
)
IF (ISNULL(@TableList, '') = '')
BEGIN
INSERT INTO @tbl_InputTableList ([ObjectID])
SELECT[object_id] AS [ObjectID]
FROMsys.tables
END
ELSE
BEGIN
INSERT INTO @tbl_InputTableList ([ObjectID])
SELECTOBJECT_ID([value]) AS [ObjectID]
FROMSTRING_SPLIT(@TableList, ',')
END
SELECT C.object_id
, T.name AS TableName
, C.name AS ColumnName
, ROW_NUMBER() OVER(PARTITION BY C.object_id ORDER BY C.name ASC) AS RowID
INTO #tmp_Columns
FROM sys.columns C
INNER JOIN sys.tables T
ON T.object_id = C.object_id
AND T.type = 'U'
INNER JOIN @tbl_InputTableList I
ON I.ObjectID = T.object_id
LEFT JOIN tbl_Cardinality D
ON D.TableName = T.name
AND D.ColumnName = C.Name
WHERE D.TableName IS NULL
SELECT DISTINCT C.object_id
, S.name AS SchemaName
, T.name AS TableName
, IDENTITY(INT, 1, 1) AS RowID
INTO #tmp_Tables
FROM #tmp_Columns C
INNER JOIN sys.tables T
ON T.object_id = C.object_id
INNER JOIN sys.schemas S
ON S.schema_id = T.schema_id
DECLARE @SchemaNameAS  SYSNAME
, @TableNameAS  SYSNAME
, @ColumnName                           AS  SYSNAME
, @Object_ID                            AS  INT
, @Table_RowID                          AS  INT
, @Total_Rows                           AS  BIGINT
, @Column_From_RowID                    AS  INT
, @Column_To_RowIDAS  INT
 
SET @Table_RowID = 1;
WHILE EXISTS (SELECT 1 FROM #tmp_Tables WHERE RowID = @Table_RowID)
BEGIN
SELECT @Object_ID   =   object_id
, @SchemaName=SchemaName
, @TableName    =   TableName
FROM #tmp_Tables 
WHERE RowID = @Table_RowID;
SELECT @Column_From_RowID=1
, @Column_To_RowID=@Column_Batch_Size;
WHILE EXISTS (SELECT 1 FROM #tmp_Columns WHERE object_id = @Object_ID AND RowID BETWEEN @Column_From_RowID AND @Column_To_RowID)
BEGIN   
DECLARE @SQLString NVARCHAR(MAX);  
 
IF (@Column_From_RowID = 1)
BEGIN
SET @SQLString = N'SELECT @Total_Rows = COUNT(1)
FROM ' + QUOTENAME(@TableName) + ' WITH (NOLOCK)';
BEGIN TRY
EXECUTE sp_executesql @SQLString
, N'@Total_Rows BIGINT OUTPUT'
, @Total_Rows = @Total_Rows OUTPUT;  
END TRY
 
BEGIN CATCH
END CATCH
END
SELECT @SQLString=STUFF
(
(
SELECT N' UNION ALL SELECT '  
+ '' + CAST(@Object_ID AS VARCHAR(50)) + ' AS [ObjectID]'
+ ', ''' + QUOTENAME(@SchemaName) + '''' + ' AS [SchemaName]'
+ ', ''' + QUOTENAME(@TableName) + '''' + ' AS [TableName]'
+ ', ' + CAST(@Total_Rows AS VARCHAR(50)) + ' AS [TotalRows]'
+ ', ' + '''' + QUOTENAME(ColumnName) + '''' + ' AS [ColumnName]'
+ ', ' + 'CAST(COUNT(DISTINCT ' + QUOTENAME(ColumnName) + ') AS VARCHAR(500))' + ' AS [Column_Distinct_Values_Count]' 
+ ' FROM ' + QUOTENAME(@TableName)
FROM #tmp_Columns 
WHERE object_id = @Object_ID
AND RowID BETWEEN @Column_From_RowID AND @Column_To_RowID
FOR XML PATH ('')
), 1, 10, ''
)
BEGIN TRY
SET @SQLString = 'INSERT INTO tbl_Cardinality (ObjectID, SchemaName, TableName, TotalRows, ColumnName, Column_Distinct_Values_Count) ' 
+ @SQLString;
EXECUTE sp_executesql @SQLString;
END TRY
 
BEGIN CATCH
SELECT @SchemaName AS [SchemaName], @TableName AS [TableName], ERROR_MESSAGE() AS [ErrorMessage]
END CATCH

SELECT @Column_From_RowID=@Column_To_RowID + 1
, @Column_To_RowID=@Column_To_RowID + @Column_Batch_Size;
END
 
SET @Table_RowID = @Table_RowID + 1;
END
SELECT * FROM tbl_Cardinality;
IF OBJECT_ID('tempdb..#tmp_Columns') IS NOT NULL DROP TABLE #tmp_Columns;
IF OBJECT_ID('tempdb..#tmp_Tables') IS NOT NULL DROP TABLE #tmp_Tables;
END

Original post (opens in new tab)
View comments in original post (opens in new tab)

Rate

Share

Share

Rate