March 13, 2013 at 4:45 am
Hi Eugine and all, your solution works fine! a lot of thanks for all.
But I Have any trouble more, if I have one type in records like this:
CREATE TABLE #TEMP (LogIndex int, UserID nvarchar (10), LogTime datetime, Tipo smallint)
insert into #TEMP select 1, '01551', '20/02/2013 17:41:45.000',1 --ok
insert into #TEMP select 19, '01551', '20/02/2013 17:41:45.400',1
insert into #TEMP select 39, '01551', '20/02/2013 17:41:45.850',1
insert into #TEMP select 2, '01551', '20/02/2013 17:41:45.900',1
insert into #TEMP select 9, '01551', '20/02/2013 17:41:45.950',1
insert into #TEMP select 31, '01551', '20/02/2013 17:41:47.140',1--ok
insert into #TEMP select 40, '01551', '20/02/2013 17:41:47.180',1
insert into #TEMP select 32, '01551', '20/02/2013 17:41:47.940',1
insert into #TEMP select 20, '01551', '20/02/2013 17:41:45.700',2--ok
insert into #TEMP select 38, '01551', '20/02/2013 17:41:45.750',2
insert into #TEMP select 8, '01551', '20/02/2013 17:41:45.800',2
insert into #TEMP select 21, '01551', '20/02/2013 17:41:46.100',2
insert into #TEMP select 3, '01551', '20/02/2013 17:41:46.150',2
insert into #TEMP select 22, '01551', '20/02/2013 17:41:46.990' ,2
insert into #TEMP select 33, '01551', '20/02/2013 17:41:48.120',2--ok
insert into #TEMP select 34, '01551', '20/02/2013 17:41:48.720',2
insert into #TEMP select 41, '01551', '20/02/2013 17:41:48.980',2
insert into #TEMP select 10, '01551', '20/02/2013 20:41:45.600',2--ok
insert into #TEMP select 11, '01551', '20/02/2013 20:41:45.900',2
insert into #TEMP select 4, '01551', '20/02/2013 20:41:47.000',2
insert into #TEMP select 35, '01551', '20/02/2013 20:41:47.100',2
insert into #TEMP select 36, '01551', '20/02/2013 20:41:47.600',2--ok
insert into #TEMP select 37, '01551', '20/02/2013 20:41:47.900',2
insert into #TEMP select 24, '01551', '20/02/2013 20:41:47.700',2
insert into #TEMP select 25, '01551', '20/02/2013 20:41:48.990',2
insert into #TEMP select 26, '01551', '20/02/2013 20:41:49.100',2
insert into #TEMP select 5, '01552', '20/02/2013 17:42:45.000',1--ok
insert into #TEMP select 12, '01552', '20/02/2013 17:42:45.500',1
insert into #TEMP select 6, '01552', '20/02/2013 17:42:46.000',1
insert into #TEMP select 13, '01552', '20/02/2013 17:42:46.800',1
insert into #TEMP select 12, '01552', '20/02/2013 17:42:45.500',2
insert into #TEMP select 27, '01552', '20/02/2013 17:42:46.100',2
insert into #TEMP select 28, '01552', '20/02/2013 17:42:46.600',2
insert into #TEMP select 42, '01552', '20/02/2013 17:42:47.600',2
insert into #TEMP select 43, '01552', '20/02/2013 17:42:47.700',1
insert into #TEMP select 44, '01552', '20/02/2013 17:42:47.900',1
insert into #TEMP select 29, '01552', '20/02/2013 19:45:45.400',2--ok
insert into #TEMP select 30, '01552', '20/02/2013 19:45:45.900',2
insert into #TEMP select 15, '01552', '20/02/2013 19:45:46.200',2
insert into #TEMP select 45, '01552', '20/02/2013 19:45:47.400',2--ok
insert into #TEMP select 46, '01552', '20/02/2013 19:45:47.800',2
insert into #TEMP select 16, '01553', '20/02/2013 19:45:45.100',1--ok
insert into #TEMP select 17, '01553', '20/02/2013 19:45:45.600',1
insert into #TEMP select 47, '01553', '20/02/2013 19:45:47.100',2--ok
insert into #TEMP select 18, '01553', '20/02/2013 23:45:45.000',2--ok
insert into #TEMP select 19, '01553', '20/02/2013 23:45:45.200',2
insert into #TEMP select 48, '01553', '20/02/2013 23:45:45.800',2
How i Can modify the script for works fine?
I try this but not works fine:
SELECT IDENTITY(INT,1,1) RID,*, CAST(NULL AS BIT) AS Flag INTO #Work
FROM #TEMP ORDER BY UserId, LogTime
CREATE UNIQUE CLUSTERED INDEX ix_work ON #Work(RID, UserId, LogTime, Tipo) -- very important!
DECLARE @Flag BIT, @UserId INT,@Tipo SMALLINT, @LastFlaggedLogTime DATETIME
UPDATE w
SET @Flag = Flag = CASE WHEN @UserId IS NULL
OR @UserId != UserId
OR @Tipo != Tipo
OR DATEDIFF(MILLISECOND ,@LastFlaggedLogTime, Logtime) >= 2000
THEN 1
ELSE 0
END
,@LastFlaggedLogTime = CASE WHEN @UserId IS NULL
OR @UserId != UserId
OR @Tipo != Tipo
OR DATEDIFF(MILLISECOND ,@LastFlaggedLogTime, Logtime) >= 2000
THEN Logtime
ELSE @LastFlaggedLogTime
END
,@UserId = UserId
FROM #work AS w WITH (TABLOCKX) -- very important!
OPTION (MAXDOP 1) -- very important!
SELECT * FROM #Work WHERE Flag = 1
--SELECT * FROM #TEMP
DROP TABLE #WORK
DROP TABLE #temp
March 13, 2013 at 5:15 am
I'm sure you didn't mark rows to be "ok" for UserId 01552 correctly, please check carefully!
SELECT IDENTITY(INT,1,1) RID,*, CAST(NULL AS BIT) AS Flag INTO #Work
FROM #TEMP ORDER BY UserId, Tipo, LogTime -- order is important here, as you want to take first row for the different Tipo
CREATE UNIQUE CLUSTERED INDEX ix_work ON #Work(RID, UserId, Tipo, LogTime) -- very important!
-- again order of columns in clustered index is important, for the same reason!
DECLARE @Flag BIT, @UserId INT,@Tipo SMALLINT, @LastFlaggedLogTime DATETIME
UPDATE w
SET @Flag = Flag = CASE WHEN @UserId IS NULL
OR @UserId != UserId
OR @Tipo != Tipo
OR DATEDIFF(MILLISECOND ,@LastFlaggedLogTime, Logtime) >= 2000
THEN 1
ELSE 0
END
,@LastFlaggedLogTime = CASE WHEN @UserId IS NULL
OR @UserId != UserId
OR @Tipo != Tipo
OR DATEDIFF(MILLISECOND ,@LastFlaggedLogTime, Logtime) >= 2000
THEN Logtime
ELSE @LastFlaggedLogTime
END
,@UserId = UserId
,@Tipo = Tipo -- You need to set it as well as @UserId, otherwise it will always be NULL
FROM #work AS w WITH (TABLOCKX) -- very important!
OPTION (MAXDOP 1) -- very important!
SELECT * FROM #Work WHERE Flag = 1 order by UserId, Tipo, Logtime
--SELECT * FROM #TEMP
DROP TABLE #WORK
March 13, 2013 at 5:51 am
Eugene Elutin (3/13/2013)
I'm sure you didn't mark rows to be "ok" for UserId 01552 correctly, please check carefully!
01552 is correct, 01551 is incorrect:
;WITH OrderedData AS (
SELECT *, seq = ROW_NUMBER() OVER(PARTITION BY UserID ORDER BY LogTime)
FROM #temp
),
Calculator AS (
SELECT Seq, LogIndex, UserID, LogTime
FROM OrderedData
WHERE seq = 1
UNION ALL
SELECT tr.Seq, tr.LogIndex, tr.UserID, tr.LogTime
FROM Calculator lr
CROSS APPLY (
SELECT seq, UserID, rn = ROW_NUMBER() OVER(ORDER BY Logtime)
FROM OrderedData tr
WHERE tr.UserID = lr.UserID
AND DATEDIFF(MILLISECOND, lr.LogTime, tr.LogTime) > 2000
) x
INNER JOIN OrderedData tr ON tr.UserID = x.UserID AND tr.seq = x.seq
WHERE rn = 1
)
SELECT LogIndex, UserID, LogTime
FROM Calculator
ORDER BY UserID, LogTime
For fast, accurate and documented assistance in answering your questions, please read this article.
Understanding and using APPLY, (I) and (II) Paul White
Hidden RBAR: Triangular Joins / The "Numbers" or "Tally" Table: What it is and how it replaces a loop Jeff Moden
March 13, 2013 at 5:57 am
ChrisM@Work (3/13/2013)
Eugene Elutin (3/13/2013)
I'm sure you didn't mark rows to be "ok" for UserId 01552 correctly, please check carefully!01552 is correct, 01551 is incorrect:
;WITH OrderedData AS (
SELECT *, seq = ROW_NUMBER() OVER(PARTITION BY UserID ORDER BY LogTime)
FROM #temp
),
Calculator AS (
SELECT Seq, LogIndex, UserID, LogTime
FROM OrderedData
WHERE seq = 1
UNION ALL
SELECT tr.Seq, tr.LogIndex, tr.UserID, tr.LogTime
FROM Calculator lr
CROSS APPLY (
SELECT seq, UserID, rn = ROW_NUMBER() OVER(ORDER BY Logtime)
FROM OrderedData tr
WHERE tr.UserID = lr.UserID
AND DATEDIFF(MILLISECOND, lr.LogTime, tr.LogTime) > 2000
) x
INNER JOIN OrderedData tr ON tr.UserID = x.UserID AND tr.seq = x.seq
WHERE rn = 1
)
SELECT LogIndex, UserID, LogTime
FROM Calculator
ORDER BY UserID, LogTime
OP has changed the setup. There is a new column Tipo in his #temp table! Check his previous post...
March 13, 2013 at 6:04 am
Eugene Elutin (3/13/2013)
ChrisM@Work (3/13/2013)
Eugene Elutin (3/13/2013)
I'm sure you didn't mark rows to be "ok" for UserId 01552 correctly, please check carefully!01552 is correct, 01551 is incorrect:
;WITH OrderedData AS (
SELECT *, seq = ROW_NUMBER() OVER(PARTITION BY UserID ORDER BY LogTime)
FROM #temp
),
Calculator AS (
SELECT Seq, LogIndex, UserID, LogTime
FROM OrderedData
WHERE seq = 1
UNION ALL
SELECT tr.Seq, tr.LogIndex, tr.UserID, tr.LogTime
FROM Calculator lr
CROSS APPLY (
SELECT seq, UserID, rn = ROW_NUMBER() OVER(ORDER BY Logtime)
FROM OrderedData tr
WHERE tr.UserID = lr.UserID
AND DATEDIFF(MILLISECOND, lr.LogTime, tr.LogTime) > 2000
) x
INNER JOIN OrderedData tr ON tr.UserID = x.UserID AND tr.seq = x.seq
WHERE rn = 1
)
SELECT LogIndex, UserID, LogTime
FROM Calculator
ORDER BY UserID, LogTime
OP has changed the setup. There is a new column Tipo in his #temp table! Check his previous post...
Thanks Eugene, you are of course correct. Here's a fixed script.
;WITH OrderedData AS (
SELECT *, seq = ROW_NUMBER() OVER(PARTITION BY UserID, Tipo ORDER BY LogTime)
FROM #temp
),
Calculator AS (
SELECT Seq, LogIndex, UserID, LogTime, Tipo
FROM OrderedData
WHERE seq = 1
UNION ALL
SELECT tr.Seq, tr.LogIndex, tr.UserID, tr.LogTime, tr.Tipo
FROM Calculator lr
CROSS APPLY (
SELECT seq, UserID, Tipo, rn = ROW_NUMBER() OVER(ORDER BY Logtime)
FROM OrderedData tr
WHERE tr.UserID = lr.UserID
AND tr.Tipo = lr.Tipo
AND DATEDIFF(MILLISECOND, lr.LogTime, tr.LogTime) > 2000
) x
INNER JOIN OrderedData tr ON tr.UserID = x.UserID AND tr.Tipo = x.Tipo AND tr.seq = x.seq
WHERE rn = 1
)
SELECT LogIndex, UserID, LogTime, Tipo
FROM Calculator
ORDER BY UserID, Tipo, LogTime
For fast, accurate and documented assistance in answering your questions, please read this article.
Understanding and using APPLY, (I) and (II) Paul White
Hidden RBAR: Triangular Joins / The "Numbers" or "Tally" Table: What it is and how it replaces a loop Jeff Moden
March 13, 2013 at 6:14 am
Yep, that is right now.
And it does look less code. But...
Cause it's based on recursive CTE and hidden triangular join (via cross apply), it's performance very unlikely be on pair with "quirky" update method. Also, depending on data, it may have problem with maximum recursion count...
March 13, 2013 at 7:20 am
Eugene Elutin (3/13/2013)
Yep, that is right now.And it does look less code. But...
Cause it's based on recursive CTE and hidden triangular join (via cross apply), it's performance very unlikely be on pair with "quirky" update method. Also, depending on data, it may have problem with maximum recursion count...
That CROSS APPLY is very fast (with the right index in place of course), but you are correct about the recursion level restriction.
Ramp up the sample data:
DECLARE @MaxLogIndex INT, @MaxUserID INT
SELECT @MaxLogIndex = MAX(LogIndex), @MaxUserID = MAX(UserID) FROM #TEMP
INSERT INTO #TEMP (LogIndex, UserID, LogTime, Tipo)
SELECT
LogIndex = @MaxLogIndex + ROW_NUMBER() OVER(ORDER BY LogIndex),
UserID = RIGHT('0'+CAST(UserID + ((@MaxUserID+3) - MAX(UserID) OVER (PARTITION BY (SELECT NULL))) AS VARCHAR(5)),5),
LogTime,
Tipo
FROM #Source
GO 2000
SELECT * FROM #TEMP
-- (47,047 row(s) affected) / 00:00:32
Add a sequence number, run into a new #temp table and index it:
DROP TABLE #OrderedData
SELECT *,
seq = ROW_NUMBER() OVER(PARTITION BY UserID, Tipo ORDER BY LogTime)
INTO #OrderedData
FROM #temp
CREATE UNIQUE CLUSTERED INDEX ucx_Things ON #OrderedData (UserID, Tipo, LogTime)
-- (94047 row(s) affected) / 00:00:00
Run the rCTE:
;WITH Calculator AS (
SELECT Seq, LogIndex, UserID, LogTime, Tipo
FROM #OrderedData
WHERE seq = 1
UNION ALL
SELECT tr.Seq, tr.LogIndex, tr.UserID, tr.LogTime, tr.Tipo
FROM Calculator lr
CROSS APPLY (
SELECT seq, LogIndex, UserID, LogTime, Tipo, rn = ROW_NUMBER() OVER(ORDER BY Logtime)
FROM #OrderedData tr
WHERE tr.UserID = lr.UserID
AND tr.Tipo = lr.Tipo
AND DATEDIFF(MILLISECOND, lr.LogTime, tr.LogTime) > 2000
) tr
WHERE rn = 1
)
SELECT LogIndex, UserID, LogTime, Tipo
FROM Calculator
ORDER BY UserID, Tipo, LogTime
OPTION (MAXRECURSION 0);
-- (30015 row(s) affected) / 00:00:01😉
For fast, accurate and documented assistance in answering your questions, please read this article.
Understanding and using APPLY, (I) and (II) Paul White
Hidden RBAR: Triangular Joins / The "Numbers" or "Tally" Table: What it is and how it replaces a loop Jeff Moden
March 13, 2013 at 8:34 am
Let's try on 1,000,000 rows:
common test data setup parts:
SET DATEFORMAT dmy;
CREATE TABLE #Source (LogIndex int, UserID nvarchar (10), LogTime datetime, Tipo smallint);
CREATE TABLE #Temp (LogIndex int, UserID nvarchar (10), LogTime datetime, Tipo smallint);
insert into #Source select 1, '01551', '20/02/2013 17:41:45.000',1 --ok
insert into #Source select 19, '01551', '20/02/2013 17:41:45.400',1
insert into #Source select 39, '01551', '20/02/2013 17:41:45.850',1
insert into #Source select 2, '01551', '20/02/2013 17:41:45.900',1
insert into #Source select 9, '01551', '20/02/2013 17:41:45.950',1
insert into #Source select 31, '01551', '20/02/2013 17:41:47.140',1--ok
insert into #Source select 40, '01551', '20/02/2013 17:41:47.180',1
insert into #Source select 32, '01551', '20/02/2013 17:41:47.940',1
insert into #Source select 20, '01551', '20/02/2013 17:41:45.700',2--ok
insert into #Source select 38, '01551', '20/02/2013 17:41:45.750',2
insert into #Source select 8, '01551', '20/02/2013 17:41:45.800',2
insert into #Source select 21, '01551', '20/02/2013 17:41:46.100',2
insert into #Source select 3, '01551', '20/02/2013 17:41:46.150',2
insert into #Source select 22, '01551', '20/02/2013 17:41:46.990' ,2
insert into #Source select 33, '01551', '20/02/2013 17:41:48.120',2--ok
insert into #Source select 34, '01551', '20/02/2013 17:41:48.720',2
insert into #Source select 41, '01551', '20/02/2013 17:41:48.980',2
insert into #Source select 10, '01551', '20/02/2013 20:41:45.600',2--ok
insert into #Source select 11, '01551', '20/02/2013 20:41:45.900',2
insert into #Source select 4, '01551', '20/02/2013 20:41:47.000',2
insert into #Source select 35, '01551', '20/02/2013 20:41:47.100',2
insert into #Source select 36, '01551', '20/02/2013 20:41:47.600',2--ok
insert into #Source select 37, '01551', '20/02/2013 20:41:47.900',2
insert into #Source select 24, '01551', '20/02/2013 20:41:47.700',2
insert into #Source select 25, '01551', '20/02/2013 20:41:48.990',2
insert into #Source select 26, '01551', '20/02/2013 20:41:49.100',2
insert into #Source select 5, '01552', '20/02/2013 17:42:45.000',1--ok
insert into #Source select 12, '01552', '20/02/2013 17:42:45.500',1
insert into #Source select 6, '01552', '20/02/2013 17:42:46.000',1
insert into #Source select 13, '01552', '20/02/2013 17:42:46.800',1
insert into #Source select 12, '01552', '20/02/2013 17:42:45.500',2
insert into #Source select 27, '01552', '20/02/2013 17:42:46.100',2
insert into #Source select 28, '01552', '20/02/2013 17:42:46.600',2
insert into #Source select 42, '01552', '20/02/2013 17:42:47.600',2
insert into #Source select 43, '01552', '20/02/2013 17:42:47.700',1
insert into #Source select 44, '01552', '20/02/2013 17:42:47.900',1
insert into #Source select 29, '01552', '20/02/2013 19:45:45.400',2--ok
insert into #Source select 30, '01552', '20/02/2013 19:45:45.900',2
insert into #Source select 15, '01552', '20/02/2013 19:45:46.200',2
insert into #Source select 45, '01552', '20/02/2013 19:45:47.400',2--ok
insert into #Source select 46, '01552', '20/02/2013 19:45:47.800',2
insert into #Source select 16, '01553', '20/02/2013 19:45:45.100',1--ok
insert into #Source select 17, '01553', '20/02/2013 19:45:45.600',1
insert into #Source select 47, '01553', '20/02/2013 19:45:47.100',2--ok
insert into #Source select 18, '01553', '20/02/2013 23:45:45.000',2--ok
insert into #Source select 19, '01553', '20/02/2013 23:45:45.200',2
insert into #Source select 48, '01553', '20/02/2013 23:45:45.800',2
INSERT INTO #TEMP
SELECT LogIndex, RIGHT('0' + CAST(s1.UserID + RN + 2 AS VARCHAR),5), LogTime, Tipo
FROM #Source s1
CROSS JOIN (SELECT TOP 21276 ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) RN FROM sys.columns s1, sys.columns s2) m
CROSS APPLY solution with recursive CTE (with not really high level of "recursiveness" as per sample data):
DECLARE @dtStart DATETIME = GETDATE();
SELECT *,
seq = ROW_NUMBER() OVER(PARTITION BY UserID, Tipo ORDER BY LogTime)
INTO #OrderedData
FROM #temp
CREATE UNIQUE CLUSTERED INDEX ucx_Things ON #OrderedData (UserID, Tipo, LogTime)
;WITH Calculator AS (
SELECT Seq, LogIndex, UserID, LogTime, Tipo
FROM #OrderedData
WHERE seq = 1
UNION ALL
SELECT tr.Seq, tr.LogIndex, tr.UserID, tr.LogTime, tr.Tipo
FROM Calculator lr
CROSS APPLY (
SELECT seq, LogIndex, UserID, LogTime, Tipo, rn = ROW_NUMBER() OVER(ORDER BY Logtime)
FROM #OrderedData tr
WHERE tr.UserID = lr.UserID
AND tr.Tipo = lr.Tipo
AND DATEDIFF(MILLISECOND, lr.LogTime, tr.LogTime) > 2000
) tr
WHERE rn = 1
)
SELECT LogIndex, UserID, LogTime, Tipo INTO #res -- so time to get resultset back will not be counted
FROM Calculator
ORDER BY UserID, Tipo, LogTime
OPTION (MAXRECURSION 0);
SELECT DATEDIFF(millisecond,@dtStart,GETDATE());
Average result I have for the above after 5 executions: 24 seconds
Now "quirky" update version:
DECLARE @dtStart DATETIME = GETDATE();
SELECT IDENTITY(INT,1,1) RID,*, CAST(NULL AS BIT) AS Flag INTO #Work
FROM #TEMP ORDER BY UserId, Tipo, LogTime -- order is important here, as you want to take first row for the different Tipo
CREATE UNIQUE CLUSTERED INDEX ix_work ON #Work(RID, UserId, Tipo, LogTime) -- very important!
-- again order of columns in clustered index is important, for the same reason!
DECLARE @Flag BIT, @UserId INT,@Tipo SMALLINT, @LastFlaggedLogTime DATETIME
UPDATE w
SET @Flag = Flag = CASE WHEN @UserId IS NULL
OR @UserId != UserId
OR @Tipo != Tipo
OR DATEDIFF(MILLISECOND ,@LastFlaggedLogTime, Logtime) >= 2000
THEN 1
ELSE 0
END
,@LastFlaggedLogTime = CASE WHEN @UserId IS NULL
OR @UserId != UserId
OR @Tipo != Tipo
OR DATEDIFF(MILLISECOND ,@LastFlaggedLogTime, Logtime) >= 2000
THEN Logtime
ELSE @LastFlaggedLogTime
END
,@UserId = UserId
,@Tipo = Tipo -- You need to set it as well as @UserId, otherwise it will always be NULL
FROM #work AS w WITH (TABLOCKX) -- very important!
OPTION (MAXDOP 1) -- very important!
SELECT * INTO #res -- so time to get resultset back will not be counted
FROM #Work WHERE Flag = 1 order by UserId, Tipo, Logtime
SELECT DATEDIFF(millisecond,@dtStart,GETDATE());
Average result I have here after 5 executions: 15 seconds
But, again it's based on the very little number of log records per UserId, which mean recursive CTE is quite quick.
Let's change testing data, so it will be just for one user:
INSERT INTO #TEMP
SELECT LogIndex, s1.UserID, DATEADD(MINUTE, RN * 2, LogTime), Tipo
FROM #Source s1
CROSS JOIN (SELECT TOP 21276 ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) - 1 RN FROM sys.columns s1, sys.columns s2) m
WHERE s1.UserID ='01551'
The above will generate around half of million log records for the same user. (Doing that I also found that CROSS APPLY solution is currently based on assumption that combination of UserID, Tipo, LogTime is uniquie, which might not be the true...)
Any way. Let's re-run tests.
"Quirky" update took 8 seconds to work things out.
First attempt of CROSS APPLY with Recursive CTE I've terminated after 1 minute waiting.
Now I want to see what exactly time it will take, over 5 minutes so far, still running...
For the same reason, I think it will not be good enough even for much smaller sets of the log records per user...
March 13, 2013 at 8:41 am
Still running after 12 minutes.
Some one around was looking for CPU killer query... I need to cancel that one!
March 13, 2013 at 8:44 am
Eugene Elutin (3/13/2013)
Still running after 12 minutes.Some one around was looking for CPU killer query... I need to cancel that one!
Hahahaha!
I didn't say it would be faster, but it works. This is faster:
CREATE UNIQUE CLUSTERED INDEX ucx_Everything ON #TEMP (UserID, Tipo, LogTime, LogIndex)
;WITH Calculator AS (
SELECT LogIndex, UserID, LogTime, Tipo
FROM (
SELECT LogIndex, UserID, LogTime, Tipo,
seq = ROW_NUMBER() OVER(PARTITION BY UserID, Tipo ORDER BY LogTime)
FROM #TEMP
) d
WHERE seq = 1
UNION ALL
SELECT tr.LogIndex, tr.UserID, tr.LogTime, tr.Tipo
FROM Calculator lr
CROSS APPLY (
SELECT LogIndex, UserID, LogTime, Tipo,
rn = ROW_NUMBER() OVER(ORDER BY Logtime)
FROM #TEMP tr
WHERE tr.UserID = lr.UserID
AND tr.Tipo = lr.Tipo
AND DATEDIFF(MILLISECOND, lr.LogTime, tr.LogTime) > 2000
) tr
WHERE rn = 1
)
SELECT LogIndex, UserID, LogTime, Tipo
FROM Calculator
OPTION (MAXRECURSION 0);
about 6 seconds per million rows.
For fast, accurate and documented assistance in answering your questions, please read this article.
Understanding and using APPLY, (I) and (II) Paul White
Hidden RBAR: Triangular Joins / The "Numbers" or "Tally" Table: What it is and how it replaces a loop Jeff Moden
March 13, 2013 at 9:02 am
ChrisM@Work (3/13/2013)
Eugene Elutin (3/13/2013)
Still running after 12 minutes.Some one around was looking for CPU killer query... I need to cancel that one!
Hahahaha!
I didn't say it would be faster, but it works. This is faster:
CREATE UNIQUE CLUSTERED INDEX ucx_Everything ON #TEMP (UserID, Tipo, LogTime, LogIndex)
;WITH Calculator AS (
SELECT LogIndex, UserID, LogTime, Tipo
FROM (
SELECT LogIndex, UserID, LogTime, Tipo,
seq = ROW_NUMBER() OVER(PARTITION BY UserID, Tipo ORDER BY LogTime)
FROM #TEMP
) d
WHERE seq = 1
UNION ALL
SELECT tr.LogIndex, tr.UserID, tr.LogTime, tr.Tipo
FROM Calculator lr
CROSS APPLY (
SELECT LogIndex, UserID, LogTime, Tipo,
rn = ROW_NUMBER() OVER(ORDER BY Logtime)
FROM #TEMP tr
WHERE tr.UserID = lr.UserID
AND tr.Tipo = lr.Tipo
AND DATEDIFF(MILLISECOND, lr.LogTime, tr.LogTime) > 2000
) tr
WHERE rn = 1
)
SELECT LogIndex, UserID, LogTime, Tipo
FROM Calculator
OPTION (MAXRECURSION 0);
about 6 seconds per million rows.
I didn't say that is really bad for one million records...
I does not work for much smaller sets.... if you have more log records per same user.
And the above version still doesn't work...
March 13, 2013 at 9:24 am
Eugene Elutin (3/13/2013)
ChrisM@Work (3/13/2013)
Eugene Elutin (3/13/2013)
Still running after 12 minutes.Some one around was looking for CPU killer query... I need to cancel that one!
Hahahaha!
I didn't say it would be faster, but it works. This is faster:
CREATE UNIQUE CLUSTERED INDEX ucx_Everything ON #TEMP (UserID, Tipo, LogTime, LogIndex)
;WITH Calculator AS (
SELECT LogIndex, UserID, LogTime, Tipo
FROM (
SELECT LogIndex, UserID, LogTime, Tipo,
seq = ROW_NUMBER() OVER(PARTITION BY UserID, Tipo ORDER BY LogTime)
FROM #TEMP
) d
WHERE seq = 1
UNION ALL
SELECT tr.LogIndex, tr.UserID, tr.LogTime, tr.Tipo
FROM Calculator lr
CROSS APPLY (
SELECT LogIndex, UserID, LogTime, Tipo,
rn = ROW_NUMBER() OVER(ORDER BY Logtime)
FROM #TEMP tr
WHERE tr.UserID = lr.UserID
AND tr.Tipo = lr.Tipo
AND DATEDIFF(MILLISECOND, lr.LogTime, tr.LogTime) > 2000
) tr
WHERE rn = 1
)
SELECT LogIndex, UserID, LogTime, Tipo
FROM Calculator
OPTION (MAXRECURSION 0);
about 6 seconds per million rows.
I didn't say that is really bad for one million records...
I does not work for much smaller sets.... if you have more log records per same user.
And the above version still doesn't work...
The clustered index seeks which you see in the EP of my solution are critical for performance. It has to be there and of course it's best if it's unique. If there aren't columns to support a unique CX then you could use ROW_NUMBER() to generate one (but not as a chained CTE with the rCTE, this kills performance).
How doesn't it work, Eugene?
For fast, accurate and documented assistance in answering your questions, please read this article.
Understanding and using APPLY, (I) and (II) Paul White
Hidden RBAR: Triangular Joins / The "Numbers" or "Tally" Table: What it is and how it replaces a loop Jeff Moden
March 13, 2013 at 9:33 am
Cause some log data can be a bit different.
Try this one:
SET DATEFORMAT dmy;
CREATE TABLE #Source (LogIndex int, UserID nvarchar (10), LogTime datetime, Tipo smallint);
CREATE TABLE #Temp (LogIndex int, UserID nvarchar (10), LogTime datetime, Tipo smallint);
insert into #Source select 1, '01551', '20/02/2013 17:41:45.000',1 --ok
insert into #Source select 19, '01551', '20/02/2013 17:41:45.400',1
insert into #Source select 39, '01551', '20/02/2013 17:41:45.850',1
insert into #Source select 2, '01551', '20/02/2013 17:41:45.900',1
insert into #Source select 9, '01551', '20/02/2013 17:41:45.950',1
insert into #Source select 31, '01551', '20/02/2013 17:41:47.140',1--ok
insert into #Source select 40, '01551', '20/02/2013 17:41:47.180',1
insert into #Source select 32, '01551', '20/02/2013 17:41:47.940',1
insert into #Source select 20, '01551', '20/02/2013 17:41:45.700',2--ok
insert into #Source select 38, '01551', '20/02/2013 17:41:45.750',2
insert into #Source select 8, '01551', '20/02/2013 17:41:45.800',2
insert into #Source select 21, '01551', '20/02/2013 17:41:46.100',2
insert into #Source select 3, '01551', '20/02/2013 17:41:46.150',2
insert into #Source select 22, '01551', '20/02/2013 17:41:46.990' ,2
insert into #Source select 33, '01551', '20/02/2013 17:41:48.120',2--ok
insert into #Source select 34, '01551', '20/02/2013 17:41:48.720',2
insert into #Source select 41, '01551', '20/02/2013 17:41:48.980',2
insert into #Source select 10, '01551', '20/02/2013 20:41:45.600',2--ok
insert into #Source select 11, '01551', '20/02/2013 20:41:45.900',2
insert into #Source select 4, '01551', '20/02/2013 20:41:47.000',2
insert into #Source select 35, '01551', '20/02/2013 20:41:47.100',2
insert into #Source select 36, '01551', '20/02/2013 20:41:47.600',2--ok
insert into #Source select 37, '01551', '20/02/2013 20:41:47.900',2
insert into #Source select 24, '01551', '20/02/2013 20:41:47.700',2
insert into #Source select 25, '01551', '20/02/2013 20:41:48.990',2
insert into #Source select 26, '01551', '20/02/2013 20:41:49.100',2
insert into #Source select 5, '01552', '20/02/2013 17:42:45.000',1--ok
insert into #Source select 12, '01552', '20/02/2013 17:42:45.500',1
insert into #Source select 6, '01552', '20/02/2013 17:42:46.000',1
insert into #Source select 13, '01552', '20/02/2013 17:42:46.800',1
insert into #Source select 12, '01552', '20/02/2013 17:42:45.500',2
insert into #Source select 27, '01552', '20/02/2013 17:42:46.100',2
insert into #Source select 28, '01552', '20/02/2013 17:42:46.600',2
insert into #Source select 42, '01552', '20/02/2013 17:42:47.600',2
insert into #Source select 43, '01552', '20/02/2013 17:42:47.700',1
insert into #Source select 44, '01552', '20/02/2013 17:42:47.900',1
insert into #Source select 29, '01552', '20/02/2013 19:45:45.400',2--ok
insert into #Source select 30, '01552', '20/02/2013 19:45:45.900',2
insert into #Source select 15, '01552', '20/02/2013 19:45:46.200',2
insert into #Source select 45, '01552', '20/02/2013 19:45:47.400',2--ok
insert into #Source select 46, '01552', '20/02/2013 19:45:47.800',2
insert into #Source select 16, '01553', '20/02/2013 19:45:45.100',1--ok
insert into #Source select 17, '01553', '20/02/2013 19:45:45.600',1
insert into #Source select 47, '01553', '20/02/2013 19:45:47.100',2--ok
insert into #Source select 18, '01553', '20/02/2013 23:45:45.000',2--ok
insert into #Source select 19, '01553', '20/02/2013 23:45:45.200',2
insert into #Source select 48, '01553', '20/02/2013 23:45:45.800',2
INSERT INTO #TEMP
SELECT LogIndex, s1.UserID, DATEADD(MINUTE, RN * 2, LogTime), Tipo
FROM #Source s1
CROSS JOIN (SELECT TOP 21276 ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) - 1 RN FROM sys.columns s1, sys.columns s2) m
WHERE s1.UserID ='01551'
GO
CREATE UNIQUE CLUSTERED INDEX ucx_Everything ON #TEMP (UserID, Tipo, LogTime, LogIndex)
;WITH Calculator AS (
SELECT LogIndex, UserID, LogTime, Tipo
FROM (
SELECT LogIndex, UserID, LogTime, Tipo,
seq = ROW_NUMBER() OVER(PARTITION BY UserID, Tipo ORDER BY LogTime)
FROM #TEMP
) d
WHERE seq = 1
UNION ALL
SELECT tr.LogIndex, tr.UserID, tr.LogTime, tr.Tipo
FROM Calculator lr
CROSS APPLY (
SELECT LogIndex, UserID, LogTime, Tipo,
rn = ROW_NUMBER() OVER(ORDER BY Logtime)
FROM #TEMP tr
WHERE tr.UserID = lr.UserID
AND tr.Tipo = lr.Tipo
AND DATEDIFF(MILLISECOND, lr.LogTime, tr.LogTime) > 2000
) tr
WHERE rn = 1
)
SELECT LogIndex, UserID, LogTime, Tipo
FROM Calculator
OPTION (MAXRECURSION 0);
GO
Looks like CTE goes into endlesss loop...
March 13, 2013 at 9:49 am
Eugene Elutin (3/13/2013)
Cause some log data can be a bit different.Try this one:
SET DATEFORMAT dmy;
CREATE TABLE #Source (LogIndex int, UserID nvarchar (10), LogTime datetime, Tipo smallint);
CREATE TABLE #Temp (LogIndex int, UserID nvarchar (10), LogTime datetime, Tipo smallint);
insert into #Source select 1, '01551', '20/02/2013 17:41:45.000',1 --ok
insert into #Source select 19, '01551', '20/02/2013 17:41:45.400',1
insert into #Source select 39, '01551', '20/02/2013 17:41:45.850',1
insert into #Source select 2, '01551', '20/02/2013 17:41:45.900',1
insert into #Source select 9, '01551', '20/02/2013 17:41:45.950',1
insert into #Source select 31, '01551', '20/02/2013 17:41:47.140',1--ok
insert into #Source select 40, '01551', '20/02/2013 17:41:47.180',1
insert into #Source select 32, '01551', '20/02/2013 17:41:47.940',1
insert into #Source select 20, '01551', '20/02/2013 17:41:45.700',2--ok
insert into #Source select 38, '01551', '20/02/2013 17:41:45.750',2
insert into #Source select 8, '01551', '20/02/2013 17:41:45.800',2
insert into #Source select 21, '01551', '20/02/2013 17:41:46.100',2
insert into #Source select 3, '01551', '20/02/2013 17:41:46.150',2
insert into #Source select 22, '01551', '20/02/2013 17:41:46.990' ,2
insert into #Source select 33, '01551', '20/02/2013 17:41:48.120',2--ok
insert into #Source select 34, '01551', '20/02/2013 17:41:48.720',2
insert into #Source select 41, '01551', '20/02/2013 17:41:48.980',2
insert into #Source select 10, '01551', '20/02/2013 20:41:45.600',2--ok
insert into #Source select 11, '01551', '20/02/2013 20:41:45.900',2
insert into #Source select 4, '01551', '20/02/2013 20:41:47.000',2
insert into #Source select 35, '01551', '20/02/2013 20:41:47.100',2
insert into #Source select 36, '01551', '20/02/2013 20:41:47.600',2--ok
insert into #Source select 37, '01551', '20/02/2013 20:41:47.900',2
insert into #Source select 24, '01551', '20/02/2013 20:41:47.700',2
insert into #Source select 25, '01551', '20/02/2013 20:41:48.990',2
insert into #Source select 26, '01551', '20/02/2013 20:41:49.100',2
insert into #Source select 5, '01552', '20/02/2013 17:42:45.000',1--ok
insert into #Source select 12, '01552', '20/02/2013 17:42:45.500',1
insert into #Source select 6, '01552', '20/02/2013 17:42:46.000',1
insert into #Source select 13, '01552', '20/02/2013 17:42:46.800',1
insert into #Source select 12, '01552', '20/02/2013 17:42:45.500',2
insert into #Source select 27, '01552', '20/02/2013 17:42:46.100',2
insert into #Source select 28, '01552', '20/02/2013 17:42:46.600',2
insert into #Source select 42, '01552', '20/02/2013 17:42:47.600',2
insert into #Source select 43, '01552', '20/02/2013 17:42:47.700',1
insert into #Source select 44, '01552', '20/02/2013 17:42:47.900',1
insert into #Source select 29, '01552', '20/02/2013 19:45:45.400',2--ok
insert into #Source select 30, '01552', '20/02/2013 19:45:45.900',2
insert into #Source select 15, '01552', '20/02/2013 19:45:46.200',2
insert into #Source select 45, '01552', '20/02/2013 19:45:47.400',2--ok
insert into #Source select 46, '01552', '20/02/2013 19:45:47.800',2
insert into #Source select 16, '01553', '20/02/2013 19:45:45.100',1--ok
insert into #Source select 17, '01553', '20/02/2013 19:45:45.600',1
insert into #Source select 47, '01553', '20/02/2013 19:45:47.100',2--ok
insert into #Source select 18, '01553', '20/02/2013 23:45:45.000',2--ok
insert into #Source select 19, '01553', '20/02/2013 23:45:45.200',2
insert into #Source select 48, '01553', '20/02/2013 23:45:45.800',2
INSERT INTO #TEMP
SELECT LogIndex, s1.UserID, DATEADD(MINUTE, RN * 2, LogTime), Tipo
FROM #Source s1
CROSS JOIN (SELECT TOP 21276 ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) - 1 RN FROM sys.columns s1, sys.columns s2) m
WHERE s1.UserID ='01551'
GO
CREATE UNIQUE CLUSTERED INDEX ucx_Everything ON #TEMP (UserID, Tipo, LogTime, LogIndex)
;WITH Calculator AS (
SELECT LogIndex, UserID, LogTime, Tipo
FROM (
SELECT LogIndex, UserID, LogTime, Tipo,
seq = ROW_NUMBER() OVER(PARTITION BY UserID, Tipo ORDER BY LogTime)
FROM #TEMP
) d
WHERE seq = 1
UNION ALL
SELECT tr.LogIndex, tr.UserID, tr.LogTime, tr.Tipo
FROM Calculator lr
CROSS APPLY (
SELECT LogIndex, UserID, LogTime, Tipo,
rn = ROW_NUMBER() OVER(ORDER BY Logtime)
FROM #TEMP tr
WHERE tr.UserID = lr.UserID
AND tr.Tipo = lr.Tipo
AND DATEDIFF(MILLISECOND, lr.LogTime, tr.LogTime) > 2000
) tr
WHERE rn = 1
)
SELECT LogIndex, UserID, LogTime, Tipo
FROM Calculator
OPTION (MAXRECURSION 0);
GO
Looks like CTE goes into endlesss loop...
Those logtimes look sequential to me - are you sure?
For fast, accurate and documented assistance in answering your questions, please read this article.
Understanding and using APPLY, (I) and (II) Paul White
Hidden RBAR: Triangular Joins / The "Numbers" or "Tally" Table: What it is and how it replaces a loop Jeff Moden
March 13, 2013 at 10:58 am
Those logtimes look sequential to me - are you sure?
What do you mean by "sequential"? These LogTimes are for the same UserId, they are unique and they grow in its "value" which is kind of expected from LogTime.
Viewing 15 posts - 16 through 30 (of 38 total)
You must be logged in to reply to this topic. Login to reply