Object Explore

,

Usage:

First run this script is the database where you want to store this procedure. Then you can run the stored procedure with the following statement:

EXEC sp_ObjectExplore 'database.schema.tablename'

You do not need to supply databasename and schema (example: dbo) when table is in same database as this procedure.

If you want additional metrics concerning the distribution of all numeric variables, execute with parameter @distribution='yes'. Example:

If you want additional metrics concerning the distribution of all numeric variables, execute with parameter @distribution='yes'. Example:
EXEC sp_ObjectExplore 'database.schema.tablename'
, @distribution='yes'

If you are only interested in a subset of columns, you can specify them in the parameter @columns. Columnnames must be separated by a comma. You can specify them either with or without brackets []. Columnnames may not contain commas.

Example:

EXEC sp_ObjectExplore 'database.schema.tablename'
, @distribution='yes'
, @columns='column1, column2, etc'

Purpose:

Provide summary information and metrics for any column in a given Table/View. Metrics included are:

  • Total number of records in Object (Records)
  • Number of values in column (Cnt)
  • Number of unique values in column (CntDist)
  • Number of NULL values in column (NullValues)
  • Min and Max value in column (Min/Max)
  • Average value in column (Avg)
  • Standard Deviation in column (StDev)
  • Number of numeric values in column (IsNum)
  • Number of integer values in column (IsInt)

Optional metrics (only computed if procedure is executed with parameter @distribution='yes':

  • First Quartile/25% (Q1)
  • Median (Median)
  • Third Quartile/75% (Q3)
  • Interquartile range (IQR)
  • Skew (Skew)
  • Kurtosis (Kurt)
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO

ALTER PROCEDURE [dbo].[sp_ObjectExplore]
	@FullObjectName varchar(200)
,	@Distribution varchar(3) = 'No'
,	@Columns varchar(max) = NULL
AS
-- ______________________________________________________ Object Explore _________________________________________________________
--
-- October 4th 2010, by Robin van Schaik
-- Version 1.6.3 BETA
--
-- USAGE
-- First run this script is the database where you want to store this procedure
-- Then you van run the stored procedure with the following statement:
--
-- EXEC sp_ObjectExplore 'database.schema.tablename'
--
-- You do not need to supply databasename and schema (example: dbo) when table is in same database as this procedure.
--
-- If you want additional metrics concerning the distribution of all numeric variables, execute with parameter @distribution='yes'.
-- Example:
-- EXEC sp_ObjectExplore 'database.schema.tablename'
-- , @distribution='yes'
-- 
-- If you are only interested in a subset of columns, you can specify them in the parameter @columns
-- ColumnNames must be separated by a comma. You can specify them either with or without brackets []
-- ColumnNames may not contain commas.
-- Example
-- EXEC sp_ObjectExplore 'database.schema.tablename'
-- , @distribution='yes'
-- , @columns='column1, column2, etc'
--
-- PURPOSE
-- Provide summary information and metrics for any column in a given Table/View
-- Metrics included are:
-- * Total number of records in Object (Records)
-- * Number of values in column (CNT)
-- * Number of unique values in column (CNTDist)
-- * Number of NULL values in column (NullValues)
-- * Min and Max value in column (Min/Max)
-- * Average value in column (Avg)
-- * Standard Deviation in column (StDev)
-- * Number of numeric values in column (IsNum)
-- * Number of integer values in column (IsInt)
-- Optional metrics (only computed if procedure is executed with parameter @distribution='yes'
-- * First Quartile/25% (Q1)
-- * Median (Median)
-- * Third Quartile/75% (Q3)
-- * Interquartile range (IQR)
-- * Skew (Skew)
-- * Kurtosis (Kurt)
--
-- VERSIONS
-- 1.0: 01-JUL-2009.
-- 1.1: 06-JUL-2009. Changed logic to determine primary key. Script did not work on columns
-- with multiple indexes (duplicate rows)
-- 1.2: 06-AUG-2009. Min and Max values for date fields converted (back) to yyyy-mm-dd hh:mi:ss(24h)
-- Fixing bug for all numeric ColumnNames (added brackets [] to query)
-- Prevent executing count function over certain DataTypes (like xml, text, etc).
-- 1.3: 31-OCT-2009. Fixed bug. Script did not run when table has a ColumnName with space(s).
-- 1.4: 01-NOV-2009. Prevent executing min and max function over certain DataTypes.
-- Added Average, Standard Deviation and percentage of NULL values.
-- 1.5: 12-NOV-2009. Added IsNum and IsInt variables. Isnum counts the number of records which
-- comply with the IsNumeric() statement (numbers including period, $, hiven, etc.)
-- IsInt counts the number of records that contain numeric values (only numbers).
-- 1.6 BETA: 19-AUG-2010.
-- Added Distribution variables for every numeric value. Quartiles, median, IQR, Skew and Kurtosis.
-- These statistics are only computed when procedure is executed with parameter @distribution='yes'
-- These stats are not computed for columns that are (or appear to be) ID's.
-- Added possibility to run this only for a subset of columns, using @columns parameter.
-- 1.6.1 BETA: 25-AUG-2010
-- Procedure is now Latin1_General_Binary compatable (thanks to mlabedz on sqlservercentral.com!)
-- 1.6.2. BETA: 26-AUG-2010
-- Ook counts op Text velden mogelijk maken.
-- 1.6.3. BETA: 04-OCT-2010
-- Fixed bug. Script failed (divide by zero) when stdev of a column was zero.
-- *******************************************************************************************************************************

SET NOCOUNT ON

/*
STEP 1: Initialize Procedure
*/
DECLARE @Object varchar(200)
DECLARE @Schema varchar(200)
DECLARE @Database varchar(200)

-- Break down parameter in Database/Schema/Object
SET @Object = PARSENAME(@FullObjectName,1)
SET @Schema = ISNULL(PARSENAME(@FullObjectName,2),'dbo')
SET @Database = PARSENAME(@FullObjectName,3)


-- Creating temporary table to store queries used for calculating metrics
CREATE TABLE #ObjectDef
(	ObjectName varchar(200)
,	ColumnName varchar(128)
,	ColumnID int
,	Query varchar(max)
,	DataType varchar(50)
,	MaxLength int
,	[Precision] int
,	Scale int
,	IsNullable tinyint
,	IsPrimaryKey tinyint
,	IsIdentity tinyint
,	SelectedColumn bit
)

-- Temporary table used for results/output
CREATE TABLE #ObjectAudit
(	ObjectName varchar(200)
,	ColumnName varchar(128)
,	ColumnID int
-- Column Specifications
,	DataType varchar(50)
,	MaxLength int
,	[Precision] int
,	Scale int
,	IsNullable tinyint
,	IsPrimaryKey tinyint
,	IsIdentity tinyint
-- Metrics
,	Records bigint -- All records in table
,	CNT bigint -- Number of values in column
,	[IsNum] bigint -- Is numeric (including period, $, - etc.)
,	[IsInt] bigint -- Is integer
,	CNTDIST bigint -- Number of distinct values in column
,	NullValues bigint -- Number of Null values in column
,	[Min] nvarchar(4000) -- Min value in column
,	[Max] nvarchar(4000) -- Max value in column
,	[Avg] decimal(38,5) -- Average value in column
,	[StDev] decimal(38,5) -- Standard Deviation
)

-- Insert all columns for object in temp table
-- Check appropiate Schema and Database
EXEC(
'INSERT INTO #ObjectDef (ObjectName, ColumnName, ColumnID, DataType, MaxLength,[Precision],Scale,IsNullable,IsPrimaryKey, IsIdentity )
SELECT '''
+@FullObjectName+'''
, b.name
, b.column_id
, type_name(user_type_id) AS DataType
, convert(int, b.max_length) as max_length
, b.[precision]
, b.scale
, b.is_nullable
, isnull(c.PrimaryKey,0)
, b.is_identity
FROM
' +@Database+'.sys.objects a
INNER JOIN
' +@Database+'.sys.columns b
ON a.object_id=b.object_id
LEFT OUTER JOIN
(
SELECT
a.object_id
, 1 as PrimaryKey
, c.column_id
FROM
'+@Database+'.sys.indexes a
INNER JOIN
'+@Database+'.sys.objects b
ON a.object_id=b.parent_object_id
AND a.name=b.name
INNER JOIN
'+@Database+'.sys.index_columns c
ON a.index_id=c.index_id
AND b.parent_object_id=c.object_id
WHERE
b.type=''PK''
) c
ON a.object_id=c.object_id
AND b.column_id=c.column_id
INNER JOIN
' +@Database+'.sys.schemas d
ON a.schema_id=d.schema_id
WHERE
a.name = '''+@Object+'''
AND d.name = '''+@Schema+'''
'
)

-- Parse string of requested Columns
-- If columns are selected the variable SelectedColumn in #ObjectDef will be set to 1
-- All other columns are set to 0.
-- If no columns are selected the variable SelectedColumn will remain NULL
IF @Columns IS NOT NULL
BEGIN
	CREATE table #columns
	(ColumnName varchar(128))

	DECLARE @ColumnInsert varchar(max)
	DECLARE @ColumnInsertTrim varchar(130)

	SET @Columns=','+@Columns -- adding comma in able to maintain logic below if only one column name is supplied

	WHILE CHARINDEX(',',@Columns)>0
	BEGIN
		SET @ColumnInsert=REVERSE(LEFT(REVERSE(@Columns),charindex(',',REVERSE(@Columns),1)-1))
		SET @ColumnInsertTrim=LTRIM(RTRIM(@ColumnInsert))
		INSERT into #columns (ColumnName)
		-- If ColumnNames are specified between brackets, remove brackets:
		SELECT CASE WHEN @ColumnInsertTrim LIKE '[[]%]' THEN SUBSTRING(@ColumnInsertTrim,2,DATALENGTH(@ColumnInsertTrim)-2) ELSE @ColumnInsertTrim END
		SET @Columns=REPLACE(@Columns,','+@ColumnInsert,'')
	END
	UPDATE #ObjectDef SET SelectedColumn=1 WHERE ColumnName in (SELECT ColumnName FROM #columns)
	UPDATE #ObjectDef SET SelectedColumn=0 WHERE SelectedColumn IS NULL
END


-- Validate input parameters

-- If Object does not exist, end script
IF NOT EXISTS (SELECT top 1 ObjectName FROM #ObjectDef)
BEGIN
	DROP TABLE
		#ObjectDef
	,	#ObjectAudit
	PRINT 'Object '+UPPER(@FullObjectName)+' does not exist! Please enter a valid object name.'
	RETURN
END

-- If one or more of the specified columns do not exist, end script

-- or columns specified in @Columns?
IF @Columns is not null
BEGIN
-- If so check one or more columns do not exist in the specified object
	IF EXISTS (
				SELECT top 1
					a.ColumnName
				FROM
					#Columns a
				LEFT OUTER JOIN
					#ObjectDef b
				ON	a.ColumnName=b.ColumnName
				WHERE b.ColumnName is NULL
				)
	-- Are there missing columns? End Script
	BEGIN
	-- identify missing columns
		SELECT
			a.ColumnName
		INTO #MissingColumns
		FROM
			#Columns a
		LEFT OUTER JOIN
			#ObjectDef b
		ON	a.ColumnName=b.ColumnName
		WHERE
			b.ColumnName is NULL

		DECLARE @MaxMissingColumn varchar(128) SET @MaxMissingColumn=''
		DECLARE @MissingColumns varchar(max) SET @MissingColumns=''
		-- Building string to output missing columns to user
		WHILE (SELECT COUNT(ColumnName) from #MissingColumns)>0
		BEGIN
			SET @MaxMissingColumn=(SELECT MAX(ColumnName) FROM #MissingColumns)
			SET @Columns=REPLACE(@Columns,@MaxMissingColumn,'')
			DELETE FROM #MissingColumns WHERE ColumnName=@MaxMissingColumn
			SET @MissingColumns=','+UPPER(@MaxMissingColumn)+@MissingColumns
		END

		DROP TABLE
			#ObjectDef
		,	#ObjectAudit
		,	#Columns
		-- output to user:
		PRINT 'One or more of the requested columns do not exist in object: ' + UPPER(@FullObjectName)+
		CHAR(13)+'Invalid column(s): '+SUBSTRING(@MissingColumns,2,LEN(@MissingColumns)-1)

		RETURN
	END
END

/*
STEP 2: Defining queries for calculating column metrics
*/

-- First determine which DataTypes are suitable for operators count, min, max and avg
-- Add select statement per column in temp table
-- Naming convention for the metrics:
-- [ColumnID]_CNT (number of values)
-- _CNTDIST (distinct values), _MIN, _MAX, _Nulls (number of null values), _AVG


-- available DataTypes
SELECT
	name
INTO #DataTypes
FROM 
	sys.types

ALTER table #DataTypes
ADD O_COUNT tinyint
,	O_MIN tinyint
,	O_NUM tinyint
,	O_ISNUM tinyint


-- COUNT function
UPDATE #DataTypes
SET O_COUNT=1
WHERE name in
(
'bigint','binary','bit','char','date','datetime','datetime2'
, 'datetimeoffset','decimal','float','hierarchyid','int'
, 'money','nchar','numeric','nvarchar','real','smalldatetime'
, 'smallint','smallmoney','sql_variant','sysname','time'
, 'timestamp','tinyint','uniqueidentifier','varbinary'
, 'varchar', 'text', 'ntext'
)

-- MIN/MAX function
UPDATE #DataTypes
SET O_MIN =1
WHERE name in
(
'int','bigint','binary','bit','char','date','datetime'
, 'datetime2','datetimeoffset','decimal','float','hierarchyid'
, 'int','money','nchar','numeric','nvarchar','real'
, 'smalldatetime','smallint','smallmoney','sql_variant'
, 'sysname','time','tinyint','varbinary','varchar'
)

-- NUMERIC FUNCTIONS (avg, stdev, etc.)
UPDATE #DataTypes
SET O_NUM =1
WHERE name in
(
'int','smallint','bigint','tinyint','float','decimal','numeric'
, 'money','smallmoney','real'
)

-- ISNUMERIC / ISINT function
UPDATE #DataTypes
SET O_ISNUM =1
WHERE name in
(
'bigint','binary','bit','char','datetime','decimal','float','int'
, 'money','nchar','numeric','nvarchar','real','smalldatetime'
, 'smallint','smallmoney','sysname','tinyint','uniqueidentifier'
, 'varbinary','varchar'
)


UPDATE #ObjectDef
SET Query=
CASE
	WHEN DataType in (SELECT name FROM #DataTypes WHERE O_COUNT=1)
	THEN	'COUNT('+
		CASE 
			WHEN DataType in ('text','ntext') 
			THEN 'cast(['+ColumnName+'] as nvarchar(max))'
			ELSE '['+ColumnName+']' 
		END 
		+') as ['+CAST(ColumnID as varchar)+'_CNT]'
		+',COUNT(distinct '+
		CASE 
			WHEN DataType in ('text','ntext') 
			THEN 'cast(['+ColumnName+'] as nvarchar(max))'
			ELSE  '['+ColumnName+']' 
		END +') as ['+CAST(ColumnID as varchar)+'_CNTDIST]'
	ELSE 'NULL as ['+CAST(ColumnID as varchar)+'_CNT], NULL as ['+CAST(ColumnID as varchar)+'_CNTDIST]'

END
-- Min and Max statement not posssible on DataType "Bit"
-- Therefore, convert this DataType to a varchar
+
CASE
	WHEN DataType IN (SELECT name FROM #DataTypes WHERE O_MIN=1)
	THEN ', MIN('+
		CASE
			WHEN DataType = 'bit'
			THEN 'cast(['+ColumnName+'] as int)'
			WHEN DataType IN ('datetime','datetime2','smalldatetime','date','datetimeoffset','hierarchyid','sql_variant')
			THEN 'convert(varchar,['+ColumnName+'],120)' -- convert date to format yyyy-mm-dd hh:mm:ss
			ELSE '['+ColumnName+']'
		END+')'
	ELSE ', NULL ' -- MIN function not available for DataType
END +' as ['+CAST(ColumnID as varchar)+'_MIN]'
+
CASE
	WHEN DataType IN (SELECT name FROM #DataTypes WHERE O_MIN=1)
	THEN ', MAX('+
		CASE
			WHEN DataType = 'bit'
			THEN 'cast(['+ColumnName+'] as int)'
			WHEN DataType IN ('datetime','datetime2','smalldatetime','date','datetimeoffset','hierarchyid','sql_variant')
			THEN 'convert(varchar,['+ColumnName+'],120)'
			ELSE '['+ColumnName+']'
		END+')'
	ELSE ', NULL ' -- MAX function not available for DataType
END +' as ['+CAST(ColumnID as varchar)+'_MAX]'
+',SUM(CASE WHEN ['+ColumnName+'] is null then 1 else 0 end) as ['+CAST(ColumnID as varchar)+'_Nulls]'
+ CASE
	WHEN DataType in (SELECT name FROM #DataTypes WHERE O_NUM=1)
-- AVG for ID's not particularly usefull, therefore filter them out:
	AND IsPrimaryKey=0
	AND IsIdentity=0
	THEN ', AVG(['+ColumnName+']/1.0)'
-- AVG function not available for DataType
	ELSE ', NULL' 
END + ' as ['+CAST(ColumnID as varchar)+'_AVG]'
+	CASE
		WHEN DataType in (SELECT name FROM #DataTypes WHERE O_NUM=1)
	-- Standard Deviation for ID's not particularly usefull, therefore filter them out:
		AND IsPrimaryKey=0
		AND IsIdentity=0
		THEN ', STDEV(['+ColumnName+']/1.0)'
	-- STDDEV function not available for DataType
		ELSE ', NULL' 
	END + ' as ['+CAST(ColumnID as varchar)+'_DEV]'
+	CASE
		WHEN DataType in (SELECT name FROM #DataTypes WHERE O_ISNUM=1)
		THEN ', SUM(ISNUMERIC(['+ColumnName+']))'
		-- SUM function not available for DataType
		ELSE ', NULL' END + ' as ['+CAST(ColumnID as varchar)+'_ISNUM]'
+	CASE
		WHEN DataType in (SELECT name FROM #DataTypes WHERE O_ISNUM=1)
		THEN ', SUM(CASE WHEN ['+ColumnName+'] like ''%[^0-9]%'' OR ['+ColumnName+'] IS NULL then 0 else 1 end)'
		-- Not available for DataType
		ELSE ', NULL' 
	END + ' as ['+CAST(ColumnID as varchar)+'_ISINT]'


-- Concatenating the individual select statement per column to one select statement for entire object
DECLARE @Query varchar(max)
DECLARE @Sql varchar(max)
DECLARE @Sql2 varchar(max)
DECLARE @Sql3 varchar(max)

-- Start Cursor c_Query
DECLARE c_Query CURSOR FAST_FORWARD FOR
SELECT Query FROM #ObjectDef WHERE isnull(SelectedColumn,1)<>0


OPEN c_Query
FETCH NEXT FROM c_Query INTO @Query
WHILE @@FETCH_STATUS = 0

BEGIN
	SET @Sql =cast(@Query as varchar(max))
	-- Adding current statement to previous statement. Last run results in one select statement.
	SET @Sql2=cast(@Sql as varchar (max))+','+ISNULL(@Sql2,'')

	FETCH NEXT FROM c_Query INTO @Query
END

CLOSE c_Query
DEALLOCATE c_Query
-- End Cursor c_Query

-- Adding SELECT and FROM Statement, resulting in a query
SET @Sql3='select count(*) as Records, '+left(ltrim(rtrim(@Sql2)),LEN(ltrim(rtrim(@Sql2)))-1)
+'into ##ObjectSingleRow from '+@FullObjectName

-- Execute the final query
-- This results in a one-row table with a column for every combination of metric/column
-- USING Exec statement sets up a new connection, therefore results are added to a
-- Global temporary table (##)
EXEC(@Sql3)

/*
STEP 3: Output
*/
-- Add columns and column specifications to temp table
INSERT into #ObjectAudit
(	ObjectName
,	ColumnName
,	ColumnID
,	DataType
,	MaxLength
,	[Precision]
,	Scale
,	IsNullable
,	IsPrimaryKey
,	IsIdentity
)
SELECT
	ObjectName
,	ColumnName
,	ColumnID
,	DataType
,	MaxLength
,	[Precision]
,	Scale
,	IsNullable
,	IsPrimaryKey
,	IsIdentity
FROM
	#ObjectDef
WHERE
	isnull(SelectedColumn,1)<>0


DECLARE @Column varchar(5)
DECLARE c_update CURSOR FAST_FORWARD FOR
SELECT ColumnID FROM #ObjectAudit

-- Start cursor c_update
OPEN c_update
FETCH NEXT FROM c_update INTO @Column
WHILE @@FETCH_STATUS = 0

BEGIN
	-- Define Update Query for updating metric columns
	SET @Sql = 'UPDATE #ObjectAudit
SET
Records=(select Records from ##ObjectSingleRow)
, CNT=(select ['+@Column+'_CNT] from ##ObjectSingleRow)
, IsNum=(select ['+@Column +'_ISNUM] from ##ObjectSingleRow)
, IsInt=(select ['+@Column +'_ISINT] from ##ObjectSingleRow)
, CNTDIST=(select ['+@Column +'_CNTDIST] from ##ObjectSingleRow)
, Min=(select ['+@Column +'_MIN] from ##ObjectSingleRow)
, Max=(select ['+@Column +'_MAX] from ##ObjectSingleRow)
, NullValues=(select ['+@Column+'_Nulls] from ##ObjectSingleRow)
, Avg=(select ['+@Column +'_AVG] from ##ObjectSingleRow)
, StDev=(select ['+@Column +'_DEV] from ##ObjectSingleRow)
WHERE ColumnID= '+@Column+''
-- Executing above update statement
	EXEC(@Sql)
	FETCH NEXT FROM c_update INTO @Column
END

CLOSE c_update
DEALLOCATE c_update
-- END CURSOR c_update


-- DISTRIBUTION
-- Only calculate distribution variables if procedure is executed with parameter 'distribution'
-- Do not calculate for key/ID variables
IF EXISTS (
		SELECT top 1 ColumnName 
		FROM 
			#ObjectAudit
		WHERE
			DataType in (
						SELECT
							[name]
						FROM
							#DataTypes
						WHERE O_NUM=1
						)
		AND IsPrimaryKey=0
		AND IsIdentity=0
		AND CNT>=4 -- to avoid divide by 0 error when calculating kurtosis
		AND [StDev]>0
		-- filter out variables which are probably Key / ID variables:
		AND NOT(	IsInt=CNT
				AND CAST([Max] as bigint)-CAST([Min] as bigint)+1=[CNT]
				AND CNTDIST=CNT
				)
		)
AND LOWER(@Distribution)='yes'

BEGIN
	ALTER TABLE #ObjectAudit
	ADD Q1 decimal(38,5), Median decimal(38,5), Q3 decimal(38,5), IQR decimal(38,5), Skew numeric(38,3), Kurt numeric(38,3)

	DECLARE @ColumnName as varchar(128)
	DECLARE c_ntiles cursor fast_forward for
	SELECT
		ColumnName
	FROM
		#ObjectAudit
	WHERE
		DataType IN (
					SELECT
						[Name]
					FROM
						#DataTypes
					WHERE
						O_NUM=1
					)
	AND CNT>=4 -- to avoid divide by 0 error when calculating kurtosis
	AND [StDev]>0
	AND IsPrimaryKey=0
	AND IsIdentity=0
	-- filter out variables which are probably Key / ID variables:
	AND NOT(	IsInt=CNT
			AND CAST([Max] as bigint)-CAST([Min] as bigint)+1=[CNT]
			AND CNTDIST=CNT
			)

	OPEN c_ntiles
	FETCH NEXT FROM c_ntiles INTO @ColumnName
	WHILE @@FETCH_STATUS = 0

	BEGIN
		SET @Sql = '
DECLARE @CNT as decimal(38,5)
DECLARE @STDEV as decimal(38,5)
DECLARE @AVG as decimal(38,5)
DECLARE @CORR_SKEW as numeric(10,9)
DECLARE @CORR_KURT as numeric(10,9)
DECLARE @SUBFACT as numeric(10,9)
DECLARE @Q1 as int
DECLARE @Q2a as decimal(38,5)
DECLARE @Q2b as decimal(38,5)
DECLARE @Q3 as int

SET @CNT=(SELECT CNT FROM #ObjectAudit WHERE ColumnName='''+@ColumnName+''')
SET @CORR_SKEW=(@CNT)/(@CNT-1)/(@CNT-2)
SET @CORR_KURT=(@CNT)*(@CNT+1)/(@CNT-1)/(@CNT-2)/(@CNT-3)
SET @SUBFACT=3*SQUARE((@CNT-1))/(@CNT-2)/(@CNT-3)

SET @STDEV=(SELECT stDEV FROM #ObjectAudit WHERE ColumnName='''+@ColumnName+''')
SET @AVG=(SELECT Avg FROM #ObjectAudit WHERE ColumnName='''+@ColumnName+''')
SET @Q1 = round(0.25*(@CNT+1),0)
SET @Q2a = 0.50*(@CNT+@CNT%2)
SET @Q2b = 0.50*(@CNT+@CNT%2)+(@CNT+1)%2
SET @Q3 = round(0.75*(@CNT+1),0)

SELECT ['+@ColumnName+'], row_number() OVER (partition by 1 ORDER BY ['+@ColumnName+']) as rownum
, POWER((['+@ColumnName+']*1.0-@AVG)/@STDEV,3) as Skew
, SQUARE(SQUARE(((['+@ColumnName+']-@AVG)/@STDEV))) as Kurt
INTO #quartiles_rows
FROM '+@FullObjectName+' where ['+@ColumnName+']>0

SELECT
MAX(CASE rownum WHEN @Q1 THEN ['+@ColumnName+'] ELSE null END) as Q1
, MAX(CASE rownum when @Q2a THEN ['+@ColumnName+'] ELSE null END) as Q2a
, MAX(CASE rownum when @Q2b THEN ['+@ColumnName+'] ELSE null END) as Q2b
, MAX(CASE rownum when @Q3 THEN ['+@ColumnName+'] ELSE null END) as Q3
, SUM(Skew)*@CORR_SKEW as Skew
, SUM(Kurt)*@CORR_KURT-@SUBFACT as Kurt
INTO ##quartiles
FROM #quartiles_rows
'
		EXEC (@Sql)

		UPDATE #ObjectAudit
		SET Q1=##Quartiles.Q1
		,	Median=(##Quartiles.Q2a+##Quartiles.Q2b)/2
		,	Q3=##Quartiles.Q3
		,	Skew=##Quartiles.Skew
		,	Kurt=##Quartiles.Kurt
		,	IQR=##Quartiles.Q3-##Quartiles.Q1
		FROM
			##Quartiles
		WHERE
			ColumnName=@ColumnName

		DROP TABLE ##Quartiles

		FETCH NEXT FROM c_ntiles INTO @ColumnName

	END

	CLOSE c_ntiles
	DEALLOCATE c_ntiles

END


-- Output to screen
SELECT *
,	CAST(CAST(NullValues as decimal(38,2))/Records*100 as decimal(38,2)) as NullPerc
,	CASE WHEN CNT>0 THEN CAST(CAST(CNTDIST as decimal(38,2))/CNT*100 as decimal(38,2)) ELSE NULL END as DistinctPerc
FROM
	#ObjectAudit

-- Clean up!
DROP TABLE
	#ObjectDef
,	#ObjectAudit
,	##ObjectSingleRow
/*
End Of Script
*/
GO

SET ANSI_NULLS OFF
GO
SET QUOTED_IDENTIFIER OFF
GO

Rate

Share

Share

Rate