Complex SQL QUERY with DateDIFF

  • Hi Eugine and all, your solution works fine! a lot of thanks for all.

    But I Have any trouble more, if I have one type in records like this:

    CREATE TABLE #TEMP (LogIndex int, UserID nvarchar (10), LogTime datetime, Tipo smallint)

    insert into #TEMP select 1, '01551', '20/02/2013 17:41:45.000',1 --ok

    insert into #TEMP select 19, '01551', '20/02/2013 17:41:45.400',1

    insert into #TEMP select 39, '01551', '20/02/2013 17:41:45.850',1

    insert into #TEMP select 2, '01551', '20/02/2013 17:41:45.900',1

    insert into #TEMP select 9, '01551', '20/02/2013 17:41:45.950',1

    insert into #TEMP select 31, '01551', '20/02/2013 17:41:47.140',1--ok

    insert into #TEMP select 40, '01551', '20/02/2013 17:41:47.180',1

    insert into #TEMP select 32, '01551', '20/02/2013 17:41:47.940',1

    insert into #TEMP select 20, '01551', '20/02/2013 17:41:45.700',2--ok

    insert into #TEMP select 38, '01551', '20/02/2013 17:41:45.750',2

    insert into #TEMP select 8, '01551', '20/02/2013 17:41:45.800',2

    insert into #TEMP select 21, '01551', '20/02/2013 17:41:46.100',2

    insert into #TEMP select 3, '01551', '20/02/2013 17:41:46.150',2

    insert into #TEMP select 22, '01551', '20/02/2013 17:41:46.990' ,2

    insert into #TEMP select 33, '01551', '20/02/2013 17:41:48.120',2--ok

    insert into #TEMP select 34, '01551', '20/02/2013 17:41:48.720',2

    insert into #TEMP select 41, '01551', '20/02/2013 17:41:48.980',2

    insert into #TEMP select 10, '01551', '20/02/2013 20:41:45.600',2--ok

    insert into #TEMP select 11, '01551', '20/02/2013 20:41:45.900',2

    insert into #TEMP select 4, '01551', '20/02/2013 20:41:47.000',2

    insert into #TEMP select 35, '01551', '20/02/2013 20:41:47.100',2

    insert into #TEMP select 36, '01551', '20/02/2013 20:41:47.600',2--ok

    insert into #TEMP select 37, '01551', '20/02/2013 20:41:47.900',2

    insert into #TEMP select 24, '01551', '20/02/2013 20:41:47.700',2

    insert into #TEMP select 25, '01551', '20/02/2013 20:41:48.990',2

    insert into #TEMP select 26, '01551', '20/02/2013 20:41:49.100',2

    insert into #TEMP select 5, '01552', '20/02/2013 17:42:45.000',1--ok

    insert into #TEMP select 12, '01552', '20/02/2013 17:42:45.500',1

    insert into #TEMP select 6, '01552', '20/02/2013 17:42:46.000',1

    insert into #TEMP select 13, '01552', '20/02/2013 17:42:46.800',1

    insert into #TEMP select 12, '01552', '20/02/2013 17:42:45.500',2

    insert into #TEMP select 27, '01552', '20/02/2013 17:42:46.100',2

    insert into #TEMP select 28, '01552', '20/02/2013 17:42:46.600',2

    insert into #TEMP select 42, '01552', '20/02/2013 17:42:47.600',2

    insert into #TEMP select 43, '01552', '20/02/2013 17:42:47.700',1

    insert into #TEMP select 44, '01552', '20/02/2013 17:42:47.900',1

    insert into #TEMP select 29, '01552', '20/02/2013 19:45:45.400',2--ok

    insert into #TEMP select 30, '01552', '20/02/2013 19:45:45.900',2

    insert into #TEMP select 15, '01552', '20/02/2013 19:45:46.200',2

    insert into #TEMP select 45, '01552', '20/02/2013 19:45:47.400',2--ok

    insert into #TEMP select 46, '01552', '20/02/2013 19:45:47.800',2

    insert into #TEMP select 16, '01553', '20/02/2013 19:45:45.100',1--ok

    insert into #TEMP select 17, '01553', '20/02/2013 19:45:45.600',1

    insert into #TEMP select 47, '01553', '20/02/2013 19:45:47.100',2--ok

    insert into #TEMP select 18, '01553', '20/02/2013 23:45:45.000',2--ok

    insert into #TEMP select 19, '01553', '20/02/2013 23:45:45.200',2

    insert into #TEMP select 48, '01553', '20/02/2013 23:45:45.800',2

    How i Can modify the script for works fine?

    I try this but not works fine:

    SELECT IDENTITY(INT,1,1) RID,*, CAST(NULL AS BIT) AS Flag INTO #Work

    FROM #TEMP ORDER BY UserId, LogTime

    CREATE UNIQUE CLUSTERED INDEX ix_work ON #Work(RID, UserId, LogTime, Tipo) -- very important!

    DECLARE @Flag BIT, @UserId INT,@Tipo SMALLINT, @LastFlaggedLogTime DATETIME

    UPDATE w

    SET @Flag = Flag = CASE WHEN @UserId IS NULL

    OR @UserId != UserId

    OR @Tipo != Tipo

    OR DATEDIFF(MILLISECOND ,@LastFlaggedLogTime, Logtime) >= 2000

    THEN 1

    ELSE 0

    END

    ,@LastFlaggedLogTime = CASE WHEN @UserId IS NULL

    OR @UserId != UserId

    OR @Tipo != Tipo

    OR DATEDIFF(MILLISECOND ,@LastFlaggedLogTime, Logtime) >= 2000

    THEN Logtime

    ELSE @LastFlaggedLogTime

    END

    ,@UserId = UserId

    FROM #work AS w WITH (TABLOCKX) -- very important!

    OPTION (MAXDOP 1) -- very important!

    SELECT * FROM #Work WHERE Flag = 1

    --SELECT * FROM #TEMP

    DROP TABLE #WORK

    DROP TABLE #temp

  • I'm sure you didn't mark rows to be "ok" for UserId 01552 correctly, please check carefully!

    SELECT IDENTITY(INT,1,1) RID,*, CAST(NULL AS BIT) AS Flag INTO #Work

    FROM #TEMP ORDER BY UserId, Tipo, LogTime -- order is important here, as you want to take first row for the different Tipo

    CREATE UNIQUE CLUSTERED INDEX ix_work ON #Work(RID, UserId, Tipo, LogTime) -- very important!

    -- again order of columns in clustered index is important, for the same reason!

    DECLARE @Flag BIT, @UserId INT,@Tipo SMALLINT, @LastFlaggedLogTime DATETIME

    UPDATE w

    SET @Flag = Flag = CASE WHEN @UserId IS NULL

    OR @UserId != UserId

    OR @Tipo != Tipo

    OR DATEDIFF(MILLISECOND ,@LastFlaggedLogTime, Logtime) >= 2000

    THEN 1

    ELSE 0

    END

    ,@LastFlaggedLogTime = CASE WHEN @UserId IS NULL

    OR @UserId != UserId

    OR @Tipo != Tipo

    OR DATEDIFF(MILLISECOND ,@LastFlaggedLogTime, Logtime) >= 2000

    THEN Logtime

    ELSE @LastFlaggedLogTime

    END

    ,@UserId = UserId

    ,@Tipo = Tipo -- You need to set it as well as @UserId, otherwise it will always be NULL

    FROM #work AS w WITH (TABLOCKX) -- very important!

    OPTION (MAXDOP 1) -- very important!

    SELECT * FROM #Work WHERE Flag = 1 order by UserId, Tipo, Logtime

    --SELECT * FROM #TEMP

    DROP TABLE #WORK

    _____________________________________________
    "The only true wisdom is in knowing you know nothing"
    "O skol'ko nam otkrytiy chudnyh prevnosit microsofta duh!":-D
    (So many miracle inventions provided by MS to us...)

    How to post your question to get the best and quick help[/url]

  • Eugene Elutin (3/13/2013)


    I'm sure you didn't mark rows to be "ok" for UserId 01552 correctly, please check carefully!

    01552 is correct, 01551 is incorrect:

    ;WITH OrderedData AS (

    SELECT *, seq = ROW_NUMBER() OVER(PARTITION BY UserID ORDER BY LogTime)

    FROM #temp

    ),

    Calculator AS (

    SELECT Seq, LogIndex, UserID, LogTime

    FROM OrderedData

    WHERE seq = 1

    UNION ALL

    SELECT tr.Seq, tr.LogIndex, tr.UserID, tr.LogTime

    FROM Calculator lr

    CROSS APPLY (

    SELECT seq, UserID, rn = ROW_NUMBER() OVER(ORDER BY Logtime)

    FROM OrderedData tr

    WHERE tr.UserID = lr.UserID

    AND DATEDIFF(MILLISECOND, lr.LogTime, tr.LogTime) > 2000

    ) x

    INNER JOIN OrderedData tr ON tr.UserID = x.UserID AND tr.seq = x.seq

    WHERE rn = 1

    )

    SELECT LogIndex, UserID, LogTime

    FROM Calculator

    ORDER BY UserID, LogTime

    “Write the query the simplest way. If through testing it becomes clear that the performance is inadequate, consider alternative query forms.” - Gail Shaw

    For fast, accurate and documented assistance in answering your questions, please read this article.
    Understanding and using APPLY, (I) and (II) Paul White
    Hidden RBAR: Triangular Joins / The "Numbers" or "Tally" Table: What it is and how it replaces a loop Jeff Moden

  • ChrisM@Work (3/13/2013)


    Eugene Elutin (3/13/2013)


    I'm sure you didn't mark rows to be "ok" for UserId 01552 correctly, please check carefully!

    01552 is correct, 01551 is incorrect:

    ;WITH OrderedData AS (

    SELECT *, seq = ROW_NUMBER() OVER(PARTITION BY UserID ORDER BY LogTime)

    FROM #temp

    ),

    Calculator AS (

    SELECT Seq, LogIndex, UserID, LogTime

    FROM OrderedData

    WHERE seq = 1

    UNION ALL

    SELECT tr.Seq, tr.LogIndex, tr.UserID, tr.LogTime

    FROM Calculator lr

    CROSS APPLY (

    SELECT seq, UserID, rn = ROW_NUMBER() OVER(ORDER BY Logtime)

    FROM OrderedData tr

    WHERE tr.UserID = lr.UserID

    AND DATEDIFF(MILLISECOND, lr.LogTime, tr.LogTime) > 2000

    ) x

    INNER JOIN OrderedData tr ON tr.UserID = x.UserID AND tr.seq = x.seq

    WHERE rn = 1

    )

    SELECT LogIndex, UserID, LogTime

    FROM Calculator

    ORDER BY UserID, LogTime

    OP has changed the setup. There is a new column Tipo in his #temp table! Check his previous post...

    _____________________________________________
    "The only true wisdom is in knowing you know nothing"
    "O skol'ko nam otkrytiy chudnyh prevnosit microsofta duh!":-D
    (So many miracle inventions provided by MS to us...)

    How to post your question to get the best and quick help[/url]

  • Eugene Elutin (3/13/2013)


    ChrisM@Work (3/13/2013)


    Eugene Elutin (3/13/2013)


    I'm sure you didn't mark rows to be "ok" for UserId 01552 correctly, please check carefully!

    01552 is correct, 01551 is incorrect:

    ;WITH OrderedData AS (

    SELECT *, seq = ROW_NUMBER() OVER(PARTITION BY UserID ORDER BY LogTime)

    FROM #temp

    ),

    Calculator AS (

    SELECT Seq, LogIndex, UserID, LogTime

    FROM OrderedData

    WHERE seq = 1

    UNION ALL

    SELECT tr.Seq, tr.LogIndex, tr.UserID, tr.LogTime

    FROM Calculator lr

    CROSS APPLY (

    SELECT seq, UserID, rn = ROW_NUMBER() OVER(ORDER BY Logtime)

    FROM OrderedData tr

    WHERE tr.UserID = lr.UserID

    AND DATEDIFF(MILLISECOND, lr.LogTime, tr.LogTime) > 2000

    ) x

    INNER JOIN OrderedData tr ON tr.UserID = x.UserID AND tr.seq = x.seq

    WHERE rn = 1

    )

    SELECT LogIndex, UserID, LogTime

    FROM Calculator

    ORDER BY UserID, LogTime

    OP has changed the setup. There is a new column Tipo in his #temp table! Check his previous post...

    Thanks Eugene, you are of course correct. Here's a fixed script.

    ;WITH OrderedData AS (

    SELECT *, seq = ROW_NUMBER() OVER(PARTITION BY UserID, Tipo ORDER BY LogTime)

    FROM #temp

    ),

    Calculator AS (

    SELECT Seq, LogIndex, UserID, LogTime, Tipo

    FROM OrderedData

    WHERE seq = 1

    UNION ALL

    SELECT tr.Seq, tr.LogIndex, tr.UserID, tr.LogTime, tr.Tipo

    FROM Calculator lr

    CROSS APPLY (

    SELECT seq, UserID, Tipo, rn = ROW_NUMBER() OVER(ORDER BY Logtime)

    FROM OrderedData tr

    WHERE tr.UserID = lr.UserID

    AND tr.Tipo = lr.Tipo

    AND DATEDIFF(MILLISECOND, lr.LogTime, tr.LogTime) > 2000

    ) x

    INNER JOIN OrderedData tr ON tr.UserID = x.UserID AND tr.Tipo = x.Tipo AND tr.seq = x.seq

    WHERE rn = 1

    )

    SELECT LogIndex, UserID, LogTime, Tipo

    FROM Calculator

    ORDER BY UserID, Tipo, LogTime

    “Write the query the simplest way. If through testing it becomes clear that the performance is inadequate, consider alternative query forms.” - Gail Shaw

    For fast, accurate and documented assistance in answering your questions, please read this article.
    Understanding and using APPLY, (I) and (II) Paul White
    Hidden RBAR: Triangular Joins / The "Numbers" or "Tally" Table: What it is and how it replaces a loop Jeff Moden

  • Yep, that is right now.

    And it does look less code. But...

    Cause it's based on recursive CTE and hidden triangular join (via cross apply), it's performance very unlikely be on pair with "quirky" update method. Also, depending on data, it may have problem with maximum recursion count...

    _____________________________________________
    "The only true wisdom is in knowing you know nothing"
    "O skol'ko nam otkrytiy chudnyh prevnosit microsofta duh!":-D
    (So many miracle inventions provided by MS to us...)

    How to post your question to get the best and quick help[/url]

  • Eugene Elutin (3/13/2013)


    Yep, that is right now.

    And it does look less code. But...

    Cause it's based on recursive CTE and hidden triangular join (via cross apply), it's performance very unlikely be on pair with "quirky" update method. Also, depending on data, it may have problem with maximum recursion count...

    That CROSS APPLY is very fast (with the right index in place of course), but you are correct about the recursion level restriction.

    Ramp up the sample data:

    DECLARE @MaxLogIndex INT, @MaxUserID INT

    SELECT @MaxLogIndex = MAX(LogIndex), @MaxUserID = MAX(UserID) FROM #TEMP

    INSERT INTO #TEMP (LogIndex, UserID, LogTime, Tipo)

    SELECT

    LogIndex = @MaxLogIndex + ROW_NUMBER() OVER(ORDER BY LogIndex),

    UserID = RIGHT('0'+CAST(UserID + ((@MaxUserID+3) - MAX(UserID) OVER (PARTITION BY (SELECT NULL))) AS VARCHAR(5)),5),

    LogTime,

    Tipo

    FROM #Source

    GO 2000

    SELECT * FROM #TEMP

    -- (47,047 row(s) affected) / 00:00:32

    Add a sequence number, run into a new #temp table and index it:

    DROP TABLE #OrderedData

    SELECT *,

    seq = ROW_NUMBER() OVER(PARTITION BY UserID, Tipo ORDER BY LogTime)

    INTO #OrderedData

    FROM #temp

    CREATE UNIQUE CLUSTERED INDEX ucx_Things ON #OrderedData (UserID, Tipo, LogTime)

    -- (94047 row(s) affected) / 00:00:00

    Run the rCTE:

    ;WITH Calculator AS (

    SELECT Seq, LogIndex, UserID, LogTime, Tipo

    FROM #OrderedData

    WHERE seq = 1

    UNION ALL

    SELECT tr.Seq, tr.LogIndex, tr.UserID, tr.LogTime, tr.Tipo

    FROM Calculator lr

    CROSS APPLY (

    SELECT seq, LogIndex, UserID, LogTime, Tipo, rn = ROW_NUMBER() OVER(ORDER BY Logtime)

    FROM #OrderedData tr

    WHERE tr.UserID = lr.UserID

    AND tr.Tipo = lr.Tipo

    AND DATEDIFF(MILLISECOND, lr.LogTime, tr.LogTime) > 2000

    ) tr

    WHERE rn = 1

    )

    SELECT LogIndex, UserID, LogTime, Tipo

    FROM Calculator

    ORDER BY UserID, Tipo, LogTime

    OPTION (MAXRECURSION 0);

    -- (30015 row(s) affected) / 00:00:01😉

    “Write the query the simplest way. If through testing it becomes clear that the performance is inadequate, consider alternative query forms.” - Gail Shaw

    For fast, accurate and documented assistance in answering your questions, please read this article.
    Understanding and using APPLY, (I) and (II) Paul White
    Hidden RBAR: Triangular Joins / The "Numbers" or "Tally" Table: What it is and how it replaces a loop Jeff Moden

  • Let's try on 1,000,000 rows:

    common test data setup parts:

    SET DATEFORMAT dmy;

    CREATE TABLE #Source (LogIndex int, UserID nvarchar (10), LogTime datetime, Tipo smallint);

    CREATE TABLE #Temp (LogIndex int, UserID nvarchar (10), LogTime datetime, Tipo smallint);

    insert into #Source select 1, '01551', '20/02/2013 17:41:45.000',1 --ok

    insert into #Source select 19, '01551', '20/02/2013 17:41:45.400',1

    insert into #Source select 39, '01551', '20/02/2013 17:41:45.850',1

    insert into #Source select 2, '01551', '20/02/2013 17:41:45.900',1

    insert into #Source select 9, '01551', '20/02/2013 17:41:45.950',1

    insert into #Source select 31, '01551', '20/02/2013 17:41:47.140',1--ok

    insert into #Source select 40, '01551', '20/02/2013 17:41:47.180',1

    insert into #Source select 32, '01551', '20/02/2013 17:41:47.940',1

    insert into #Source select 20, '01551', '20/02/2013 17:41:45.700',2--ok

    insert into #Source select 38, '01551', '20/02/2013 17:41:45.750',2

    insert into #Source select 8, '01551', '20/02/2013 17:41:45.800',2

    insert into #Source select 21, '01551', '20/02/2013 17:41:46.100',2

    insert into #Source select 3, '01551', '20/02/2013 17:41:46.150',2

    insert into #Source select 22, '01551', '20/02/2013 17:41:46.990' ,2

    insert into #Source select 33, '01551', '20/02/2013 17:41:48.120',2--ok

    insert into #Source select 34, '01551', '20/02/2013 17:41:48.720',2

    insert into #Source select 41, '01551', '20/02/2013 17:41:48.980',2

    insert into #Source select 10, '01551', '20/02/2013 20:41:45.600',2--ok

    insert into #Source select 11, '01551', '20/02/2013 20:41:45.900',2

    insert into #Source select 4, '01551', '20/02/2013 20:41:47.000',2

    insert into #Source select 35, '01551', '20/02/2013 20:41:47.100',2

    insert into #Source select 36, '01551', '20/02/2013 20:41:47.600',2--ok

    insert into #Source select 37, '01551', '20/02/2013 20:41:47.900',2

    insert into #Source select 24, '01551', '20/02/2013 20:41:47.700',2

    insert into #Source select 25, '01551', '20/02/2013 20:41:48.990',2

    insert into #Source select 26, '01551', '20/02/2013 20:41:49.100',2

    insert into #Source select 5, '01552', '20/02/2013 17:42:45.000',1--ok

    insert into #Source select 12, '01552', '20/02/2013 17:42:45.500',1

    insert into #Source select 6, '01552', '20/02/2013 17:42:46.000',1

    insert into #Source select 13, '01552', '20/02/2013 17:42:46.800',1

    insert into #Source select 12, '01552', '20/02/2013 17:42:45.500',2

    insert into #Source select 27, '01552', '20/02/2013 17:42:46.100',2

    insert into #Source select 28, '01552', '20/02/2013 17:42:46.600',2

    insert into #Source select 42, '01552', '20/02/2013 17:42:47.600',2

    insert into #Source select 43, '01552', '20/02/2013 17:42:47.700',1

    insert into #Source select 44, '01552', '20/02/2013 17:42:47.900',1

    insert into #Source select 29, '01552', '20/02/2013 19:45:45.400',2--ok

    insert into #Source select 30, '01552', '20/02/2013 19:45:45.900',2

    insert into #Source select 15, '01552', '20/02/2013 19:45:46.200',2

    insert into #Source select 45, '01552', '20/02/2013 19:45:47.400',2--ok

    insert into #Source select 46, '01552', '20/02/2013 19:45:47.800',2

    insert into #Source select 16, '01553', '20/02/2013 19:45:45.100',1--ok

    insert into #Source select 17, '01553', '20/02/2013 19:45:45.600',1

    insert into #Source select 47, '01553', '20/02/2013 19:45:47.100',2--ok

    insert into #Source select 18, '01553', '20/02/2013 23:45:45.000',2--ok

    insert into #Source select 19, '01553', '20/02/2013 23:45:45.200',2

    insert into #Source select 48, '01553', '20/02/2013 23:45:45.800',2

    INSERT INTO #TEMP

    SELECT LogIndex, RIGHT('0' + CAST(s1.UserID + RN + 2 AS VARCHAR),5), LogTime, Tipo

    FROM #Source s1

    CROSS JOIN (SELECT TOP 21276 ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) RN FROM sys.columns s1, sys.columns s2) m

    CROSS APPLY solution with recursive CTE (with not really high level of "recursiveness" as per sample data):

    DECLARE @dtStart DATETIME = GETDATE();

    SELECT *,

    seq = ROW_NUMBER() OVER(PARTITION BY UserID, Tipo ORDER BY LogTime)

    INTO #OrderedData

    FROM #temp

    CREATE UNIQUE CLUSTERED INDEX ucx_Things ON #OrderedData (UserID, Tipo, LogTime)

    ;WITH Calculator AS (

    SELECT Seq, LogIndex, UserID, LogTime, Tipo

    FROM #OrderedData

    WHERE seq = 1

    UNION ALL

    SELECT tr.Seq, tr.LogIndex, tr.UserID, tr.LogTime, tr.Tipo

    FROM Calculator lr

    CROSS APPLY (

    SELECT seq, LogIndex, UserID, LogTime, Tipo, rn = ROW_NUMBER() OVER(ORDER BY Logtime)

    FROM #OrderedData tr

    WHERE tr.UserID = lr.UserID

    AND tr.Tipo = lr.Tipo

    AND DATEDIFF(MILLISECOND, lr.LogTime, tr.LogTime) > 2000

    ) tr

    WHERE rn = 1

    )

    SELECT LogIndex, UserID, LogTime, Tipo INTO #res -- so time to get resultset back will not be counted

    FROM Calculator

    ORDER BY UserID, Tipo, LogTime

    OPTION (MAXRECURSION 0);

    SELECT DATEDIFF(millisecond,@dtStart,GETDATE());

    Average result I have for the above after 5 executions: 24 seconds

    Now "quirky" update version:

    DECLARE @dtStart DATETIME = GETDATE();

    SELECT IDENTITY(INT,1,1) RID,*, CAST(NULL AS BIT) AS Flag INTO #Work

    FROM #TEMP ORDER BY UserId, Tipo, LogTime -- order is important here, as you want to take first row for the different Tipo

    CREATE UNIQUE CLUSTERED INDEX ix_work ON #Work(RID, UserId, Tipo, LogTime) -- very important!

    -- again order of columns in clustered index is important, for the same reason!

    DECLARE @Flag BIT, @UserId INT,@Tipo SMALLINT, @LastFlaggedLogTime DATETIME

    UPDATE w

    SET @Flag = Flag = CASE WHEN @UserId IS NULL

    OR @UserId != UserId

    OR @Tipo != Tipo

    OR DATEDIFF(MILLISECOND ,@LastFlaggedLogTime, Logtime) >= 2000

    THEN 1

    ELSE 0

    END

    ,@LastFlaggedLogTime = CASE WHEN @UserId IS NULL

    OR @UserId != UserId

    OR @Tipo != Tipo

    OR DATEDIFF(MILLISECOND ,@LastFlaggedLogTime, Logtime) >= 2000

    THEN Logtime

    ELSE @LastFlaggedLogTime

    END

    ,@UserId = UserId

    ,@Tipo = Tipo -- You need to set it as well as @UserId, otherwise it will always be NULL

    FROM #work AS w WITH (TABLOCKX) -- very important!

    OPTION (MAXDOP 1) -- very important!

    SELECT * INTO #res -- so time to get resultset back will not be counted

    FROM #Work WHERE Flag = 1 order by UserId, Tipo, Logtime

    SELECT DATEDIFF(millisecond,@dtStart,GETDATE());

    Average result I have here after 5 executions: 15 seconds

    But, again it's based on the very little number of log records per UserId, which mean recursive CTE is quite quick.

    Let's change testing data, so it will be just for one user:

    INSERT INTO #TEMP

    SELECT LogIndex, s1.UserID, DATEADD(MINUTE, RN * 2, LogTime), Tipo

    FROM #Source s1

    CROSS JOIN (SELECT TOP 21276 ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) - 1 RN FROM sys.columns s1, sys.columns s2) m

    WHERE s1.UserID ='01551'

    The above will generate around half of million log records for the same user. (Doing that I also found that CROSS APPLY solution is currently based on assumption that combination of UserID, Tipo, LogTime is uniquie, which might not be the true...)

    Any way. Let's re-run tests.

    "Quirky" update took 8 seconds to work things out.

    First attempt of CROSS APPLY with Recursive CTE I've terminated after 1 minute waiting.

    Now I want to see what exactly time it will take, over 5 minutes so far, still running...

    For the same reason, I think it will not be good enough even for much smaller sets of the log records per user...

    _____________________________________________
    "The only true wisdom is in knowing you know nothing"
    "O skol'ko nam otkrytiy chudnyh prevnosit microsofta duh!":-D
    (So many miracle inventions provided by MS to us...)

    How to post your question to get the best and quick help[/url]

  • Still running after 12 minutes.

    Some one around was looking for CPU killer query... I need to cancel that one!

    _____________________________________________
    "The only true wisdom is in knowing you know nothing"
    "O skol'ko nam otkrytiy chudnyh prevnosit microsofta duh!":-D
    (So many miracle inventions provided by MS to us...)

    How to post your question to get the best and quick help[/url]

  • Eugene Elutin (3/13/2013)


    Still running after 12 minutes.

    Some one around was looking for CPU killer query... I need to cancel that one!

    Hahahaha!

    I didn't say it would be faster, but it works. This is faster:

    CREATE UNIQUE CLUSTERED INDEX ucx_Everything ON #TEMP (UserID, Tipo, LogTime, LogIndex)

    ;WITH Calculator AS (

    SELECT LogIndex, UserID, LogTime, Tipo

    FROM (

    SELECT LogIndex, UserID, LogTime, Tipo,

    seq = ROW_NUMBER() OVER(PARTITION BY UserID, Tipo ORDER BY LogTime)

    FROM #TEMP

    ) d

    WHERE seq = 1

    UNION ALL

    SELECT tr.LogIndex, tr.UserID, tr.LogTime, tr.Tipo

    FROM Calculator lr

    CROSS APPLY (

    SELECT LogIndex, UserID, LogTime, Tipo,

    rn = ROW_NUMBER() OVER(ORDER BY Logtime)

    FROM #TEMP tr

    WHERE tr.UserID = lr.UserID

    AND tr.Tipo = lr.Tipo

    AND DATEDIFF(MILLISECOND, lr.LogTime, tr.LogTime) > 2000

    ) tr

    WHERE rn = 1

    )

    SELECT LogIndex, UserID, LogTime, Tipo

    FROM Calculator

    OPTION (MAXRECURSION 0);

    about 6 seconds per million rows.

    “Write the query the simplest way. If through testing it becomes clear that the performance is inadequate, consider alternative query forms.” - Gail Shaw

    For fast, accurate and documented assistance in answering your questions, please read this article.
    Understanding and using APPLY, (I) and (II) Paul White
    Hidden RBAR: Triangular Joins / The "Numbers" or "Tally" Table: What it is and how it replaces a loop Jeff Moden

  • ChrisM@Work (3/13/2013)


    Eugene Elutin (3/13/2013)


    Still running after 12 minutes.

    Some one around was looking for CPU killer query... I need to cancel that one!

    Hahahaha!

    I didn't say it would be faster, but it works. This is faster:

    CREATE UNIQUE CLUSTERED INDEX ucx_Everything ON #TEMP (UserID, Tipo, LogTime, LogIndex)

    ;WITH Calculator AS (

    SELECT LogIndex, UserID, LogTime, Tipo

    FROM (

    SELECT LogIndex, UserID, LogTime, Tipo,

    seq = ROW_NUMBER() OVER(PARTITION BY UserID, Tipo ORDER BY LogTime)

    FROM #TEMP

    ) d

    WHERE seq = 1

    UNION ALL

    SELECT tr.LogIndex, tr.UserID, tr.LogTime, tr.Tipo

    FROM Calculator lr

    CROSS APPLY (

    SELECT LogIndex, UserID, LogTime, Tipo,

    rn = ROW_NUMBER() OVER(ORDER BY Logtime)

    FROM #TEMP tr

    WHERE tr.UserID = lr.UserID

    AND tr.Tipo = lr.Tipo

    AND DATEDIFF(MILLISECOND, lr.LogTime, tr.LogTime) > 2000

    ) tr

    WHERE rn = 1

    )

    SELECT LogIndex, UserID, LogTime, Tipo

    FROM Calculator

    OPTION (MAXRECURSION 0);

    about 6 seconds per million rows.

    I didn't say that is really bad for one million records...

    I does not work for much smaller sets.... if you have more log records per same user.

    And the above version still doesn't work...

    _____________________________________________
    "The only true wisdom is in knowing you know nothing"
    "O skol'ko nam otkrytiy chudnyh prevnosit microsofta duh!":-D
    (So many miracle inventions provided by MS to us...)

    How to post your question to get the best and quick help[/url]

  • Eugene Elutin (3/13/2013)


    ChrisM@Work (3/13/2013)


    Eugene Elutin (3/13/2013)


    Still running after 12 minutes.

    Some one around was looking for CPU killer query... I need to cancel that one!

    Hahahaha!

    I didn't say it would be faster, but it works. This is faster:

    CREATE UNIQUE CLUSTERED INDEX ucx_Everything ON #TEMP (UserID, Tipo, LogTime, LogIndex)

    ;WITH Calculator AS (

    SELECT LogIndex, UserID, LogTime, Tipo

    FROM (

    SELECT LogIndex, UserID, LogTime, Tipo,

    seq = ROW_NUMBER() OVER(PARTITION BY UserID, Tipo ORDER BY LogTime)

    FROM #TEMP

    ) d

    WHERE seq = 1

    UNION ALL

    SELECT tr.LogIndex, tr.UserID, tr.LogTime, tr.Tipo

    FROM Calculator lr

    CROSS APPLY (

    SELECT LogIndex, UserID, LogTime, Tipo,

    rn = ROW_NUMBER() OVER(ORDER BY Logtime)

    FROM #TEMP tr

    WHERE tr.UserID = lr.UserID

    AND tr.Tipo = lr.Tipo

    AND DATEDIFF(MILLISECOND, lr.LogTime, tr.LogTime) > 2000

    ) tr

    WHERE rn = 1

    )

    SELECT LogIndex, UserID, LogTime, Tipo

    FROM Calculator

    OPTION (MAXRECURSION 0);

    about 6 seconds per million rows.

    I didn't say that is really bad for one million records...

    I does not work for much smaller sets.... if you have more log records per same user.

    And the above version still doesn't work...

    The clustered index seeks which you see in the EP of my solution are critical for performance. It has to be there and of course it's best if it's unique. If there aren't columns to support a unique CX then you could use ROW_NUMBER() to generate one (but not as a chained CTE with the rCTE, this kills performance).

    How doesn't it work, Eugene?

    “Write the query the simplest way. If through testing it becomes clear that the performance is inadequate, consider alternative query forms.” - Gail Shaw

    For fast, accurate and documented assistance in answering your questions, please read this article.
    Understanding and using APPLY, (I) and (II) Paul White
    Hidden RBAR: Triangular Joins / The "Numbers" or "Tally" Table: What it is and how it replaces a loop Jeff Moden

  • Cause some log data can be a bit different.

    Try this one:

    SET DATEFORMAT dmy;

    CREATE TABLE #Source (LogIndex int, UserID nvarchar (10), LogTime datetime, Tipo smallint);

    CREATE TABLE #Temp (LogIndex int, UserID nvarchar (10), LogTime datetime, Tipo smallint);

    insert into #Source select 1, '01551', '20/02/2013 17:41:45.000',1 --ok

    insert into #Source select 19, '01551', '20/02/2013 17:41:45.400',1

    insert into #Source select 39, '01551', '20/02/2013 17:41:45.850',1

    insert into #Source select 2, '01551', '20/02/2013 17:41:45.900',1

    insert into #Source select 9, '01551', '20/02/2013 17:41:45.950',1

    insert into #Source select 31, '01551', '20/02/2013 17:41:47.140',1--ok

    insert into #Source select 40, '01551', '20/02/2013 17:41:47.180',1

    insert into #Source select 32, '01551', '20/02/2013 17:41:47.940',1

    insert into #Source select 20, '01551', '20/02/2013 17:41:45.700',2--ok

    insert into #Source select 38, '01551', '20/02/2013 17:41:45.750',2

    insert into #Source select 8, '01551', '20/02/2013 17:41:45.800',2

    insert into #Source select 21, '01551', '20/02/2013 17:41:46.100',2

    insert into #Source select 3, '01551', '20/02/2013 17:41:46.150',2

    insert into #Source select 22, '01551', '20/02/2013 17:41:46.990' ,2

    insert into #Source select 33, '01551', '20/02/2013 17:41:48.120',2--ok

    insert into #Source select 34, '01551', '20/02/2013 17:41:48.720',2

    insert into #Source select 41, '01551', '20/02/2013 17:41:48.980',2

    insert into #Source select 10, '01551', '20/02/2013 20:41:45.600',2--ok

    insert into #Source select 11, '01551', '20/02/2013 20:41:45.900',2

    insert into #Source select 4, '01551', '20/02/2013 20:41:47.000',2

    insert into #Source select 35, '01551', '20/02/2013 20:41:47.100',2

    insert into #Source select 36, '01551', '20/02/2013 20:41:47.600',2--ok

    insert into #Source select 37, '01551', '20/02/2013 20:41:47.900',2

    insert into #Source select 24, '01551', '20/02/2013 20:41:47.700',2

    insert into #Source select 25, '01551', '20/02/2013 20:41:48.990',2

    insert into #Source select 26, '01551', '20/02/2013 20:41:49.100',2

    insert into #Source select 5, '01552', '20/02/2013 17:42:45.000',1--ok

    insert into #Source select 12, '01552', '20/02/2013 17:42:45.500',1

    insert into #Source select 6, '01552', '20/02/2013 17:42:46.000',1

    insert into #Source select 13, '01552', '20/02/2013 17:42:46.800',1

    insert into #Source select 12, '01552', '20/02/2013 17:42:45.500',2

    insert into #Source select 27, '01552', '20/02/2013 17:42:46.100',2

    insert into #Source select 28, '01552', '20/02/2013 17:42:46.600',2

    insert into #Source select 42, '01552', '20/02/2013 17:42:47.600',2

    insert into #Source select 43, '01552', '20/02/2013 17:42:47.700',1

    insert into #Source select 44, '01552', '20/02/2013 17:42:47.900',1

    insert into #Source select 29, '01552', '20/02/2013 19:45:45.400',2--ok

    insert into #Source select 30, '01552', '20/02/2013 19:45:45.900',2

    insert into #Source select 15, '01552', '20/02/2013 19:45:46.200',2

    insert into #Source select 45, '01552', '20/02/2013 19:45:47.400',2--ok

    insert into #Source select 46, '01552', '20/02/2013 19:45:47.800',2

    insert into #Source select 16, '01553', '20/02/2013 19:45:45.100',1--ok

    insert into #Source select 17, '01553', '20/02/2013 19:45:45.600',1

    insert into #Source select 47, '01553', '20/02/2013 19:45:47.100',2--ok

    insert into #Source select 18, '01553', '20/02/2013 23:45:45.000',2--ok

    insert into #Source select 19, '01553', '20/02/2013 23:45:45.200',2

    insert into #Source select 48, '01553', '20/02/2013 23:45:45.800',2

    INSERT INTO #TEMP

    SELECT LogIndex, s1.UserID, DATEADD(MINUTE, RN * 2, LogTime), Tipo

    FROM #Source s1

    CROSS JOIN (SELECT TOP 21276 ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) - 1 RN FROM sys.columns s1, sys.columns s2) m

    WHERE s1.UserID ='01551'

    GO

    CREATE UNIQUE CLUSTERED INDEX ucx_Everything ON #TEMP (UserID, Tipo, LogTime, LogIndex)

    ;WITH Calculator AS (

    SELECT LogIndex, UserID, LogTime, Tipo

    FROM (

    SELECT LogIndex, UserID, LogTime, Tipo,

    seq = ROW_NUMBER() OVER(PARTITION BY UserID, Tipo ORDER BY LogTime)

    FROM #TEMP

    ) d

    WHERE seq = 1

    UNION ALL

    SELECT tr.LogIndex, tr.UserID, tr.LogTime, tr.Tipo

    FROM Calculator lr

    CROSS APPLY (

    SELECT LogIndex, UserID, LogTime, Tipo,

    rn = ROW_NUMBER() OVER(ORDER BY Logtime)

    FROM #TEMP tr

    WHERE tr.UserID = lr.UserID

    AND tr.Tipo = lr.Tipo

    AND DATEDIFF(MILLISECOND, lr.LogTime, tr.LogTime) > 2000

    ) tr

    WHERE rn = 1

    )

    SELECT LogIndex, UserID, LogTime, Tipo

    FROM Calculator

    OPTION (MAXRECURSION 0);

    GO

    Looks like CTE goes into endlesss loop...

    _____________________________________________
    "The only true wisdom is in knowing you know nothing"
    "O skol'ko nam otkrytiy chudnyh prevnosit microsofta duh!":-D
    (So many miracle inventions provided by MS to us...)

    How to post your question to get the best and quick help[/url]

  • Eugene Elutin (3/13/2013)


    Cause some log data can be a bit different.

    Try this one:

    SET DATEFORMAT dmy;

    CREATE TABLE #Source (LogIndex int, UserID nvarchar (10), LogTime datetime, Tipo smallint);

    CREATE TABLE #Temp (LogIndex int, UserID nvarchar (10), LogTime datetime, Tipo smallint);

    insert into #Source select 1, '01551', '20/02/2013 17:41:45.000',1 --ok

    insert into #Source select 19, '01551', '20/02/2013 17:41:45.400',1

    insert into #Source select 39, '01551', '20/02/2013 17:41:45.850',1

    insert into #Source select 2, '01551', '20/02/2013 17:41:45.900',1

    insert into #Source select 9, '01551', '20/02/2013 17:41:45.950',1

    insert into #Source select 31, '01551', '20/02/2013 17:41:47.140',1--ok

    insert into #Source select 40, '01551', '20/02/2013 17:41:47.180',1

    insert into #Source select 32, '01551', '20/02/2013 17:41:47.940',1

    insert into #Source select 20, '01551', '20/02/2013 17:41:45.700',2--ok

    insert into #Source select 38, '01551', '20/02/2013 17:41:45.750',2

    insert into #Source select 8, '01551', '20/02/2013 17:41:45.800',2

    insert into #Source select 21, '01551', '20/02/2013 17:41:46.100',2

    insert into #Source select 3, '01551', '20/02/2013 17:41:46.150',2

    insert into #Source select 22, '01551', '20/02/2013 17:41:46.990' ,2

    insert into #Source select 33, '01551', '20/02/2013 17:41:48.120',2--ok

    insert into #Source select 34, '01551', '20/02/2013 17:41:48.720',2

    insert into #Source select 41, '01551', '20/02/2013 17:41:48.980',2

    insert into #Source select 10, '01551', '20/02/2013 20:41:45.600',2--ok

    insert into #Source select 11, '01551', '20/02/2013 20:41:45.900',2

    insert into #Source select 4, '01551', '20/02/2013 20:41:47.000',2

    insert into #Source select 35, '01551', '20/02/2013 20:41:47.100',2

    insert into #Source select 36, '01551', '20/02/2013 20:41:47.600',2--ok

    insert into #Source select 37, '01551', '20/02/2013 20:41:47.900',2

    insert into #Source select 24, '01551', '20/02/2013 20:41:47.700',2

    insert into #Source select 25, '01551', '20/02/2013 20:41:48.990',2

    insert into #Source select 26, '01551', '20/02/2013 20:41:49.100',2

    insert into #Source select 5, '01552', '20/02/2013 17:42:45.000',1--ok

    insert into #Source select 12, '01552', '20/02/2013 17:42:45.500',1

    insert into #Source select 6, '01552', '20/02/2013 17:42:46.000',1

    insert into #Source select 13, '01552', '20/02/2013 17:42:46.800',1

    insert into #Source select 12, '01552', '20/02/2013 17:42:45.500',2

    insert into #Source select 27, '01552', '20/02/2013 17:42:46.100',2

    insert into #Source select 28, '01552', '20/02/2013 17:42:46.600',2

    insert into #Source select 42, '01552', '20/02/2013 17:42:47.600',2

    insert into #Source select 43, '01552', '20/02/2013 17:42:47.700',1

    insert into #Source select 44, '01552', '20/02/2013 17:42:47.900',1

    insert into #Source select 29, '01552', '20/02/2013 19:45:45.400',2--ok

    insert into #Source select 30, '01552', '20/02/2013 19:45:45.900',2

    insert into #Source select 15, '01552', '20/02/2013 19:45:46.200',2

    insert into #Source select 45, '01552', '20/02/2013 19:45:47.400',2--ok

    insert into #Source select 46, '01552', '20/02/2013 19:45:47.800',2

    insert into #Source select 16, '01553', '20/02/2013 19:45:45.100',1--ok

    insert into #Source select 17, '01553', '20/02/2013 19:45:45.600',1

    insert into #Source select 47, '01553', '20/02/2013 19:45:47.100',2--ok

    insert into #Source select 18, '01553', '20/02/2013 23:45:45.000',2--ok

    insert into #Source select 19, '01553', '20/02/2013 23:45:45.200',2

    insert into #Source select 48, '01553', '20/02/2013 23:45:45.800',2

    INSERT INTO #TEMP

    SELECT LogIndex, s1.UserID, DATEADD(MINUTE, RN * 2, LogTime), Tipo

    FROM #Source s1

    CROSS JOIN (SELECT TOP 21276 ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) - 1 RN FROM sys.columns s1, sys.columns s2) m

    WHERE s1.UserID ='01551'

    GO

    CREATE UNIQUE CLUSTERED INDEX ucx_Everything ON #TEMP (UserID, Tipo, LogTime, LogIndex)

    ;WITH Calculator AS (

    SELECT LogIndex, UserID, LogTime, Tipo

    FROM (

    SELECT LogIndex, UserID, LogTime, Tipo,

    seq = ROW_NUMBER() OVER(PARTITION BY UserID, Tipo ORDER BY LogTime)

    FROM #TEMP

    ) d

    WHERE seq = 1

    UNION ALL

    SELECT tr.LogIndex, tr.UserID, tr.LogTime, tr.Tipo

    FROM Calculator lr

    CROSS APPLY (

    SELECT LogIndex, UserID, LogTime, Tipo,

    rn = ROW_NUMBER() OVER(ORDER BY Logtime)

    FROM #TEMP tr

    WHERE tr.UserID = lr.UserID

    AND tr.Tipo = lr.Tipo

    AND DATEDIFF(MILLISECOND, lr.LogTime, tr.LogTime) > 2000

    ) tr

    WHERE rn = 1

    )

    SELECT LogIndex, UserID, LogTime, Tipo

    FROM Calculator

    OPTION (MAXRECURSION 0);

    GO

    Looks like CTE goes into endlesss loop...

    Those logtimes look sequential to me - are you sure?

    “Write the query the simplest way. If through testing it becomes clear that the performance is inadequate, consider alternative query forms.” - Gail Shaw

    For fast, accurate and documented assistance in answering your questions, please read this article.
    Understanding and using APPLY, (I) and (II) Paul White
    Hidden RBAR: Triangular Joins / The "Numbers" or "Tally" Table: What it is and how it replaces a loop Jeff Moden

  • Those logtimes look sequential to me - are you sure?

    What do you mean by "sequential"? These LogTimes are for the same UserId, they are unique and they grow in its "value" which is kind of expected from LogTime.

    _____________________________________________
    "The only true wisdom is in knowing you know nothing"
    "O skol'ko nam otkrytiy chudnyh prevnosit microsofta duh!":-D
    (So many miracle inventions provided by MS to us...)

    How to post your question to get the best and quick help[/url]

Viewing 15 posts - 16 through 30 (of 38 total)

You must be logged in to reply to this topic. Login to reply