Ian,
I think this is a good approach and it is actually what some coworkers and I ended up with awhile back (more them than me ). Our problem statement involved noncontiguous ranges, so using an identity column was not sufficient. The solution was to use row_number() ranking function and join against a temp table of random values.
I find that with 3.3 MM rows, this approach is about half the cpu and twice as fast as doing order by newid() for this contrived test. Subsequent executions take about 20% of the cpu that order by newid(), and still about twice as fast.
So when you have many rows and a noncontiguous key to index off of, this approach appears to be superior if not as simple.
-- BEGIN TEST PREP
create table test_table (number int primary key clustered, payload varchar(1));
WITH
-- first CTE which returns 10 rows (0-9)
digits AS (
SELECT 0 as Number
UNION SELECT 1
UNION SELECT 2
UNION SELECT 3
UNION SELECT 4
UNION SELECT 5
UNION SELECT 6
UNION SELECT 7
UNION SELECT 8
UNION SELECT 9
)
-- second CTE which returns 10 million rows by using
-- a CROSS JOIN on the first CTE
, dig AS (
SELECT
(millions.Number * 1000000)
+ (hThousands.Number * 100000)
+ (tThousands.Number * 10000)
+ (thousands.Number * 1000)
+ (hundreds.Number * 100)
+ (tens.Number * 10)
+ ones.Number AS Number
FROM digits AS ones
CROSS JOIN digits AS tens
CROSS JOIN digits AS hundreds
CROSS JOIN digits AS thousands
CROSS JOIN digits AS tThousands
CROSS JOIN digits AS hThousands
CROSS JOIN digits as millions
)
INSERT test_table(number, payload)
SELECT number, 'z'
FROM dig
WHERE number % 3 = 0
go
--END TEST PREP
set statistics time on
set statistics io on
--BEGIN TEST 1
select top 10 number, payload from test_table order by newid();
GO 2
--END TEST 1
--BEGIN TEST 2
DECLARE @TV table(rand_num int)
DECLARE @max-2 int, @num int
select @num=10, @max-2 = count(*) from test_table;
DECLARE @i int
SET @i = 0
WHILE (@i < @num)
BEGIN
INSERT @TV values (ceiling(rand() * @max-2))
SET @i = @i + 1
END
SELECT *
FROM
(
select
row_number() over (order by number) row,
*
from test_table
) a
WHERE a.row in (SELECT rand_num from @TV)
GO 2
--END TEST 2