RE: Getting Random Results – SQLServerCentral

Ten Centuries

Points: 1322

August 23, 2007 at 9:23 am

Ian,

I think this is a good approach and it is actually what some coworkers and I ended up with awhile back (more them than me ). Our problem statement involved noncontiguous ranges, so using an identity column was not sufficient. The solution was to use row_number() ranking function and join against a temp table of random values.

I find that with 3.3 MM rows, this approach is about half the cpu and twice as fast as doing order by newid() for this contrived test. Subsequent executions take about 20% of the cpu that order by newid(), and still about twice as fast.

So when you have many rows and a noncontiguous key to index off of, this approach appears to be superior if not as simple.

-- BEGIN TEST PREP

create table test_table (number int primary key clustered, payload varchar(1));

WITH

-- first CTE which returns 10 rows (0-9)

digits AS (

SELECT 0 as Number

UNION SELECT 1

UNION SELECT 2

UNION SELECT 3

UNION SELECT 4

UNION SELECT 5

UNION SELECT 6

UNION SELECT 7

UNION SELECT 8

UNION SELECT 9

)

-- second CTE which returns 10 million rows by using

-- a CROSS JOIN on the first CTE

, dig AS (

SELECT

(millions.Number * 1000000)

+ (hThousands.Number * 100000)

+ (tThousands.Number * 10000)

+ (thousands.Number * 1000)

+ (hundreds.Number * 100)

+ (tens.Number * 10)

+ ones.Number AS Number

FROM digits AS ones

CROSS JOIN digits AS tens

CROSS JOIN digits AS hundreds

CROSS JOIN digits AS thousands

CROSS JOIN digits AS tThousands

CROSS JOIN digits AS hThousands

CROSS JOIN digits as millions

)

INSERT test_table(number, payload)

SELECT number, 'z'

FROM dig

WHERE number % 3 = 0

--END TEST PREP

set statistics time on

set statistics io on

--BEGIN TEST 1

select top 10 number, payload from test_table order by newid();

GO 2

--END TEST 1

--BEGIN TEST 2

DECLARE @TV table(rand_num int)

DECLARE @max-2 int, @num int

select @num=10, @max-2 = count(*) from test_table;

DECLARE @i int

SET @i = 0

WHILE (@i < @num)

BEGIN

INSERT @TV values (ceiling(rand() * @max-2))

SET @i = @i + 1

END

SELECT *

FROM

(

select

row_number() over (order by number) row,

from test_table

) a

WHERE a.row in (SELECT rand_num from @TV)

GO 2

--END TEST 2