Pre-grouping works quite well in this case, especially if there is a filtered index on the the table for customerID where sales is not null.
😎
;WITH CUSTOMERS_SALE AS
(
SELECT
SSF.customerId
FROM #example SSF
WHERE SSF.sales IS NOT NULL
GROUP BY SSF.customerId
)
SELECT
SF.customerId
,SF.window
,SF.product
,SF.sales
,SIGN(ISNULL(CSALE.customerId,0)) AS SALE_FLAG
FROM #example SF
LEFT OUTER JOIN CUSTOMERS_SALE CSALE
ON SF.customerId = CSALE.customerId;
Quick and simple test harness
USE tempdb;
GO
SET NOCOUNT ON;
--/*
IF OBJECT_ID(N'dbo.TBL_SAMPLE_SALE_FLAG') IS NOT NULL DROP TABLE dbo.TBL_SAMPLE_SALE_FLAG;
CREATE TABLE dbo.TBL_SAMPLE_SALE_FLAG
(
SSF_ID INT NOT NULL CONSTRAINT PK_DBO_TBL_SAMPLE_SALE_FLAG_SALE_SSF_ID PRIMARY KEY CLUSTERED
,CUST_ID INT NOT NULL
,WIND_ID INT NOT NULL
,PROD_ID INT NOT NULL
,SALE_VAL INT NULL
);
DECLARE @SAMPLE_SIZE INT = 1000000;
DECLARE @WINDOW_COUNT INT = 12;
DECLARE @CUSTOMER_COUNT INT = 500000;
DECLARE @PRODUCT_COUNT INT = 1000;
DECLARE @NO_SALE_SEED INT = 3;
DECLARE @MAX_SALE_VAL INT = 10000;
;WITH T(N) AS (SELECT N FROM (VALUES (0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) AS X(N))
, NUMS(N) AS (SELECT TOP(@SAMPLE_SIZE) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS N
FROM T T1,T T2,T T3,T T4,T T5,T T6,T T7,T T8,T T9)
INSERT INTO dbo.TBL_SAMPLE_SALE_FLAG WITH (TABLOCK) (SSF_ID,CUST_ID,WIND_ID,PROD_ID,SALE_VAL)
SELECT
NM.N AS SSF_ID
,ABS(CHECKSUM(NEWID())) % @CUSTOMER_COUNT AS CUST_ID
,ABS(CHECKSUM(NEWID())) % @WINDOW_COUNT AS WIND_ID
,ABS(CHECKSUM(NEWID())) % @PRODUCT_COUNT AS PROD_ID
,ABS(CHECKSUM(NEWID())) % @MAX_SALE_VAL
+ NULLIF(ABS(CHECKSUM(NEWID())) % @NO_SALE_SEED,0) AS SALE_VAL
FROM NUMS NM;
-- POC INDICES
CREATE NONCLUSTERED INDEX NCLIDX_DBO_TBL_SAMPLE_SALE_FLAG_CUST_ID_SALE_VAL_INCL_WIN_ID_PROD_ID
ON dbo.TBL_SAMPLE_SALE_FLAG (CUST_ID ASC, SALE_VAL ASC) INCLUDE (WIND_ID,PROD_ID);
CREATE NONCLUSTERED INDEX NCLIDX_DBO_TBL_SAMPLE_SALE_FLAG_CUST_ID_SALE_VAL
ON dbo.TBL_SAMPLE_SALE_FLAG (CUST_ID ASC,SALE_VAL ASC) WHERE (SALE_VAL IS NOT NULL);
CREATE NONCLUSTERED INDEX NCLIDX_DBO_TBL_SAMPLE_SALE_FLAG_SALE_VAL_INCL_CUST_ID_WIND_ID
ON dbo.TBL_SAMPLE_SALE_FLAG (SALE_VAL ASC) INCLUDE (CUST_ID,WIND_ID) WHERE (SALE_VAL IS NOT NULL);
-- */
DECLARE @timer TABLE (T_TEXT VARCHAR(50) NOT NULL, T_TS DATETIME2(7) NOT NULL DEFAULT(SYSDATETIME()));
DECLARE @INT_BUCKET01 INT = 0;
DECLARE @INT_BUCKET02 INT = 0;
DECLARE @INT_BUCKET03 INT = 0;
DECLARE @INT_BUCKET04 INT = 0;
DECLARE @BIT_BUCKET01 INT = 0;
-- First round
RAISERROR ('--- DRY RUN ---',0,0) WITH NOWAIT;
INSERT INTO @timer(T_TEXT) VALUES('DRY RUN');
SELECT
@INT_BUCKET01 = E.CUST_ID
,@INT_BUCKET02 = E.WIND_ID
,@INT_BUCKET03 = E.PROD_ID
,@INT_BUCKET04 = E.SALE_VAL
FROM dbo.TBL_SAMPLE_SALE_FLAG E
ORDER BY CUST_ID ASC
,SALE_VAL ASC
OPTION (MAXDOP 1);
INSERT INTO @timer(T_TEXT) VALUES('DRY RUN');
RAISERROR ('--- MAX ---',0,0) WITH NOWAIT;
INSERT INTO @timer(T_TEXT) VALUES('MAX');
SELECT
@INT_BUCKET01 = E.CUST_ID
,@INT_BUCKET02 = E.WIND_ID
,@INT_BUCKET03 = E.PROD_ID
,@INT_BUCKET04 = E.SALE_VAL
,@BIT_BUCKET01 = SIGN(ISNULL(CONVERT(INT,MAX(E.SALE_VAL) OVER
(
PARTITION BY E.CUST_ID
ORDER BY E.CUST_ID
),0),0)) --AS FLG
FROM dbo.TBL_SAMPLE_SALE_FLAG E
OPTION (MAXDOP 1);
INSERT INTO @timer(T_TEXT) VALUES('MAX');
RAISERROR ('--- COUNT ---',0,0) WITH NOWAIT;
INSERT INTO @timer(T_TEXT) VALUES('COUNT');
SELECT
@INT_BUCKET01 = E.CUST_ID
,@INT_BUCKET02 = E.WIND_ID
,@INT_BUCKET03 = E.PROD_ID
,@INT_BUCKET04 = E.SALE_VAL
,@BIT_BUCKET01 = CASE WHEN COUNT(E.SALE_VAL) OVER
(
PARTITION BY E.CUST_ID
ORDER BY E.CUST_ID
) > 0 THEN 1 ELSE 0 END --AS FLG
FROM dbo.TBL_SAMPLE_SALE_FLAG E
OPTION (MAXDOP 1);
INSERT INTO @timer(T_TEXT) VALUES('COUNT');
RAISERROR ('--- TOP ---',0,0) WITH NOWAIT;
INSERT INTO @timer(T_TEXT) VALUES('TOP');
SELECT
@INT_BUCKET01 = e.CUST_ID
,@INT_BUCKET02 = e.WIND_ID
,@INT_BUCKET03 = e.PROD_ID
,@INT_BUCKET04 = e.SALE_VAL
,@BIT_BUCKET01 = CASE WHEN s.sf = 1 THEN 1 ELSE 0 END --AS sales_flag
FROM dbo.TBL_SAMPLE_SALE_FLAG e
OUTER APPLY (SELECT TOP 1 1 AS sf
FROM dbo.TBL_SAMPLE_SALE_FLAG e2
WHERE e.CUST_ID = e2.CUST_ID
AND e2.SALE_VAL IS NOT NULL) AS s
OPTION (MAXDOP 1);
INSERT INTO @timer(T_TEXT) VALUES('TOP');
RAISERROR ('--- PRE GROUP ---',0,0) WITH NOWAIT;
INSERT INTO @timer(T_TEXT) VALUES('PRE GROUP');
;WITH CUSTOMERS_SALE AS
(
SELECT
SSF.CUST_ID
FROM dbo.TBL_SAMPLE_SALE_FLAG SSF
WHERE SSF.SALE_VAL IS NOT NULL
GROUP BY SSF.CUST_ID
)
SELECT
@INT_BUCKET01 = SF.CUST_ID
,@INT_BUCKET02 = SF.WIND_ID
,@INT_BUCKET03 = SF.PROD_ID
,@INT_BUCKET04 = SF.SALE_VAL
,@BIT_BUCKET01 = SIGN(ISNULL(CSALE.CUST_ID,0)) --AS SALE_FLAG
FROM dbo.TBL_SAMPLE_SALE_FLAG SF
LEFT OUTER JOIN CUSTOMERS_SALE CSALE
ON SF.CUST_ID = CSALE.CUST_ID
OPTION (MAXDOP 1);
INSERT INTO @timer(T_TEXT) VALUES('PRE GROUP');
-- Second round
RAISERROR ('--- DRY RUN 02 ---',0,0) WITH NOWAIT;
INSERT INTO @timer(T_TEXT) VALUES('DRY RUN 02');
SELECT
@INT_BUCKET01 = E.CUST_ID
,@INT_BUCKET02 = E.WIND_ID
,@INT_BUCKET03 = E.PROD_ID
,@INT_BUCKET04 = E.SALE_VAL
FROM dbo.TBL_SAMPLE_SALE_FLAG E
ORDER BY CUST_ID ASC
,SALE_VAL ASC
OPTION (MAXDOP 1);
INSERT INTO @timer(T_TEXT) VALUES('DRY RUN 02');
RAISERROR ('--- MAX 02 ---',0,0) WITH NOWAIT;
INSERT INTO @timer(T_TEXT) VALUES('MAX 02');
SELECT
@INT_BUCKET01 = E.CUST_ID
,@INT_BUCKET02 = E.WIND_ID
,@INT_BUCKET03 = E.PROD_ID
,@INT_BUCKET04 = E.SALE_VAL
,@BIT_BUCKET01 = SIGN(ISNULL(CONVERT(INT,MAX(E.SALE_VAL) OVER
(
PARTITION BY E.CUST_ID
ORDER BY E.CUST_ID
),0),0)) --AS FLG
FROM dbo.TBL_SAMPLE_SALE_FLAG E
OPTION (MAXDOP 1);
INSERT INTO @timer(T_TEXT) VALUES('MAX 02');
RAISERROR ('--- COUNT 02 ---',0,0) WITH NOWAIT;
INSERT INTO @timer(T_TEXT) VALUES('COUNT 02');
SELECT
@INT_BUCKET01 = E.CUST_ID
,@INT_BUCKET02 = E.WIND_ID
,@INT_BUCKET03 = E.PROD_ID
,@INT_BUCKET04 = E.SALE_VAL
,@BIT_BUCKET01 = CASE WHEN COUNT(E.SALE_VAL) OVER
(
PARTITION BY E.CUST_ID
ORDER BY E.CUST_ID
) > 0 THEN 1 ELSE 0 END --AS FLG
FROM dbo.TBL_SAMPLE_SALE_FLAG E
OPTION (MAXDOP 1);
INSERT INTO @timer(T_TEXT) VALUES('COUNT 02');
RAISERROR ('--- TOP 02 ---',0,0) WITH NOWAIT;
INSERT INTO @timer(T_TEXT) VALUES('TOP 02');
SELECT
@INT_BUCKET01 = e.CUST_ID
,@INT_BUCKET02 = e.WIND_ID
,@INT_BUCKET03 = e.PROD_ID
,@INT_BUCKET04 = e.SALE_VAL
,@BIT_BUCKET01 = CASE WHEN s.sf = 1 THEN 1 ELSE 0 END --AS sales_flag
FROM dbo.TBL_SAMPLE_SALE_FLAG e
OUTER APPLY (SELECT TOP 1 1 AS sf
FROM dbo.TBL_SAMPLE_SALE_FLAG e2
WHERE e.CUST_ID = e2.CUST_ID
AND e2.SALE_VAL IS NOT NULL) AS s
OPTION (MAXDOP 1);
INSERT INTO @timer(T_TEXT) VALUES('TOP 02');
RAISERROR ('--- PRE GROUP 02 ---',0,0) WITH NOWAIT;
INSERT INTO @timer(T_TEXT) VALUES('PRE GROUP 02');
;WITH CUSTOMERS_SALE AS
(
SELECT
SSF.CUST_ID
FROM dbo.TBL_SAMPLE_SALE_FLAG SSF
WHERE SSF.SALE_VAL IS NOT NULL
GROUP BY SSF.CUST_ID
)
SELECT
@INT_BUCKET01 = SF.CUST_ID
,@INT_BUCKET02 = SF.WIND_ID
,@INT_BUCKET03 = SF.PROD_ID
,@INT_BUCKET04 = SF.SALE_VAL
,@BIT_BUCKET01 = SIGN(ISNULL(CSALE.CUST_ID,0)) --AS SALE_FLAG
FROM dbo.TBL_SAMPLE_SALE_FLAG SF
LEFT OUTER JOIN CUSTOMERS_SALE CSALE
ON SF.CUST_ID = CSALE.CUST_ID
OPTION (MAXDOP 1);
INSERT INTO @timer(T_TEXT) VALUES('PRE GROUP 02');
SELECT
T.T_TEXT
,DATEDIFF(MICROSECOND,MIN(T.T_TS),MAX(T.T_TS)) AS DURATION
FROM @timer T
GROUP BY T.T_TEXT
ORDER BY DURATION;
Results on 2nd Gen i5 / SQL Server 2014
T_TEXT DURATION
--------------- -----------
DRY RUN 02 190001
DRY RUN 200000
PRE GROUP 440000
PRE GROUP 02 440000
TOP 2530004
TOP 02 2570004
MAX 02 10675016
COUNT 10700015
MAX 10796016
COUNT 02 10810015
Edit: Corrected the wrong "pre-group" code snippet.