-- Since group allocation must take place sequentially row by row, your good options
-- are quite limited. This solution uses a recursive CTE, you could also consider the
-- Quirky Update, probably the fastest method, or a cursor.
-- Performance will be horrible unless you have an existing column which models rn in "OrderedSet",
-- or you can add one. You will need an index on it too.
;WITH OrderedSet AS (
SELECT
rn = ROW_NUMBER() OVER(ORDER BY [caller], callee, localtime),
*
FROM test
),
rCTE AS (
SELECT tr.*, grp = 1
FROM OrderedSet tr
WHERE rn = 1
UNION ALL
SELECT tr.*,
grp = CASE
WHEN tr.[caller] = lr.[caller] AND tr.callee = lr.callee
AND DATEDIFF(SECOND, lr.localtime, tr.localtime ) <= 1 THEN lr.grp
ELSE lr.grp+1 END
FROM rCTE lr
INNER JOIN OrderedSet tr
ON tr.rn = lr.rn+1
)
SELECT r.*,
filter = CASE
WHEN x.minsid = 2 AND x.maxsid = 6 AND r.s_id = 2 THEN 1
WHEN x.minsid = 6 AND r.s_id = 6 THEN 1
WHEN x.minsid = 2 AND x.maxsid = 2 AND r.s_id = 2 THEN 1
ELSE NULL END
FROM rCTE r
CROSS APPLY (
SELECT minsid = MIN(ri.s_id), maxsid = MAX(ri.s_id)
FROM rCTE ri
WHERE ri.grp = r.grp
AND ri.s_id IN (2,6)
) x
OPTION (MAXRECURSION 0);
-- Here's how not to do it: a query which resolves the group number but uses a "triangular join".
-- For each and every row in the set, the whole set up to that point (rn) is scanned.
-- Horrible if you have anything except a very small number of rows.
;WITH OrderedSet AS (
SELECT
rn = ROW_NUMBER() OVER(ORDER BY [caller], callee, localtime),
*
FROM test
),
MarkedSet AS (
SELECT tr.*, Marker = ISNULL(x.Marker,1)
FROM OrderedSet tr
OUTER APPLY (
SELECT Marker = 0
FROM OrderedSet lr
WHERE lr.rn + 1 = tr.rn
AND lr.[caller] = tr.[caller]
AND lr.callee = tr.callee
AND DATEDIFF(SECOND, lr.localtime, tr.localtime ) <= 1
) x
)
SELECT *
FROM MarkedSet m
CROSS APPLY (
SELECT grp = SUM(Marker)
FROM MarkedSet pr
WHERE pr.rn <= m.rn
) x
For fast, accurate and documented assistance in answering your questions, please read this article.
Understanding and using APPLY, (I) and (II) Paul White
Hidden RBAR: Triangular Joins / The "Numbers" or "Tally" Table: What it is and how it replaces a loop Jeff Moden