Technical Article

Table-Column Data Profiler

,

Simply run the script with the DBName.SchName.TblName specified for the @Cat_Sch_TableName variable, e.g., 'Sales.dbo.Invoices'.

Returns a "Results" table with one row per Table's column, attributed with:

ColumnName

CondensedDataType

SequenceNumber (aka ORDINAL_POSITION)

NullCount

ModalValue

ModalCount

MinValue

MaxValue

MaxLength

Be warned: tall, wide tables may take up to 10 minutes or more to run...

DECLARE @Cat_Sch_TableName varchar(128) = 'Sales.dbo.Invoices'
-- Initialize fully qualified object name; script can be used like a stored procedure
DECLARE @sqlstring varchar(8000), @ColumnName varchar(128), @ColumnID smallint,
        @SequenceNumber smallint

-- cleanup temp tables        
IF OBJECT_ID('Tempdb..#RawColumns')      IS NOT NULL DROP TABLE #RawColumns
IF OBJECT_ID('Tempdb..#ProfiledColumns') IS NOT NULL DROP TABLE #ProfiledColumns

-- Populate working list of columns
SELECT ColumnID = IDENTITY(int, 1,1),
       COLUMN_NAME       AS ColumnName,
       ORDINAL_POSITION  AS SequenceNumber
      INTO #RawColumns 
     FROM information_schema.COLUMNS  
WHERE TABLE_CATALOG+'.'+TABLE_SCHEMA+'.'+TABLE_NAME=@Cat_Sch_TableName 

-- Initialize output table
CREATE TABLE #ProfiledColumns(
             ColumnName  varchar(128) NOT NULL,
             SequenceNumber varchar(10),
             NullCount int ,
             ModalValue varchar(255),
             ModalCount int,
             MinValue varchar(255),
             MaxValue varchar(255) ,
             MaxLength smallint )

-- Loop through columns
WHILE (EXISTS(
    SELECT  * 
    FROM #RawColumns 
    WHERE ColumnName IS NOT NULL ))

BEGIN

SELECT @ColumnID       = ColumnID, 
       @ColumnName     = ColumnName,  
       @SequenceNumber = SequenceNumber
FROM #RawColumns -- assign column data to variables

SET @sqlstring=
   'INSERT INTO #ProfiledColumns ' + 
   'SELECT '''+@ColumnName+''',' +
    CAST(@SequenceNumber AS varchar(10)) + ' , ' +
   'SUM(CASE WHEN ['+@ColumnName+'] IS NULL THEN 1 ELSE 0 END) ,'+ 
   '(SELECT TOP 1 ['+@ColumnName+'] FROM '+@Cat_Sch_TableName+' GROUP BY ['+@ColumnName+'] ORDER BY COUNT(*) DESC),' +
   '(SELECT TOP 1 COUNT(*) FROM '+@Cat_Sch_TableName+' GROUP BY ['+@ColumnName+'] ORDER BY COUNT(*) DESC),' +
   'CAST(MIN(['+@ColumnName+']) AS VARCHAR(255)),'+
   'CAST(MAX(['+@ColumnName+']) AS VARCHAR(255)),'+
   'MAX(LEN(CAST(['+@ColumnName+'] AS VARCHAR(255)))) '+   
   'FROM ' + @Cat_Sch_TableName    -- build query string
                  
DELETE FROM #RawColumns WHERE ColumnID=@ColumnID  -- one column down, next...
EXECUTE (@sqlstring)                              -- execute built query   
END                                               -- End loop block

-- Display data profile with condensed data type
SELECT 
ColumnName,
DATA_TYPE  + 
    CASE WHEN DATA_TYPE LIKE '%char'         THEN '('+LTRIM(CHARACTER_MAXIMUM_LENGTH)+')'
         WHEN COALESCE(NUMERIC_SCALE,0) <> 0 THEN '('+LTRIM(NUMERIC_PRECISION)+','+LTRIM(NUMERIC_SCALE)+')' ELSE ''
END AS DataTypeName,
NullCount,
ModalValue,
ModalCount,
MinValue,
MaxValue,
MaxLength 
FROM #ProfiledColumns  
JOIN information_schema.COLUMNS
  ON SequenceNumber=ORDINAL_POSITION  
WHERE TABLE_CATALOG+'.'+TABLE_SCHEMA+'.'+TABLE_NAME=@Cat_Sch_TableName 
ORDER BY CAST(SequenceNumber AS SMALLINT)

Rate

4.71 (7)

Share

Share

Rate

4.71 (7)