Technical Article

relatively simple mode, median, mean

,

Here's a sample script to help you get some generic mode, median, and mean statistics for something. In my example, I'm getting the number of days something takes to start and stop some task. The idea is that I'd have a fairly short list of jobs, and record would record the job id, and start/stop dates. You should be able to extrapolate the mechanism to report on other things.

DECLARE @thisjob char(1)
DECLARE @medianTbl TABLE(
[job] char(1),
[mode] smallint,
[median] smallint,
[mean] smallint,
[total] int,
[updated] int DEFAULT 0
)
DECLARE @workTbl TABLE(
[job] char(1),
[startDate] smalldatetime,
[endDate] smalldatetime
)

/*start by loading up a working resultset of all the jobs you want to test*/INSERT @workTbl
SELECT jID,jStart,jEnd
FROM jobs
--WHERE some criteria

/*create initial set of results*/INSERT @medianTbl([job],[mean],[total])
SELECT [job],AVG(DATEDIFF(d,[startDate],[endDate])),COUNT(*) FROM @workTbl
GROUP BY [job] WITH ROLLUP

/*loop over each item, and get the statistics for each*/SELECT TOP 1 @thisjob=[job] FROM @medianTbl WHERE [updated]=0 AND [job] IS NOT NULL
WHILE @@ROWCOUNT>0 /* Run updates for each job */BEGIN
UPDATE @medianTbl
SET [mode]=(
SELECT TOP 1 DATEDIFF(d,[startDate],[endDate]) AS [timeTaken]
FROM @workTbl
WHERE [job]=@thisjob
GROUP BY DATEDIFF(d,[startDate],[endDate])
ORDER BY COUNT(DATEDIFF(d,[startDate],[endDate])) DESC),

[median]=(SELECT TOP 1 timeTaken FROM (
SELECT TOP 50 PERCENT DATEDIFF(d,[startDate],[endDate]) AS [timeTaken]
FROM @workTbl
WHERE [job]=@thisjob
ORDER BY timeTaken
) AS presort
ORDER BY timeTaken DESC),

[updated]=1
WHERE [job]=@thisjob

SELECT TOP 1 @thisjob=[job] FROM @medianTbl WHERE [updated]=0 AND [job] IS NOT NULL
END

/*get over-all statistics for the entire set*/UPDATE @medianTbl
SET [mode]=(SELECT TOP 1 DATEDIFF(d,[startDate],[endDate]) AS [timeTaken]
FROM @workTbl
GROUP BY DATEDIFF(d,[startDate],[endDate])
ORDER BY COUNT(DATEDIFF(d,[startDate],[endDate])) DESC),

[median]=(SELECT TOP 1 presort.timeTaken FROM (
SELECT TOP 50 PERCENT DATEDIFF(d,[startDate],[endDate]) AS [timeTaken]
FROM @workTbl
ORDER BY timeTaken
) AS presort
ORDER BY timeTaken DESC),

[updated]=1
WHERE [updated]=0 AND [job] IS NULL

/* Select just what we need */SELECT [job],[mode],[median],[mean],[total] FROM @medianTbl ORDER BY [job]

Rate

You rated this post out of 5. Change rating

Share

Share

Rate

You rated this post out of 5. Change rating