March 10, 2016 at 4:15 am
Hi all
Apologies for the subject not sure how best to describe what I am hoping to achieve.
I have used ROW_NUMBER() OVER PARTITION on numerous occasions to sequence by a date field and was thinking that would be the starting block for the following:
I have created test data as an example and was hoping somebody could find a solution to both parts even if you don't need to use part1 to get to part2 (which is what I am really after).
Picture a Sales Order table at line level, where most items will generally be a single line (these I am not interested in and can be excluded), but there can be kits with a header and any number of components and the only thing linking them is that the components will directly follow their header when added to the Sales Order.
The Part field will denote the Header as H, Component as C and any normal item as nothing i.e. ''
So if we do WHERE Part IN ('H','C') we are left with a pattern / island of H's and C's where each H would be the boundary of a kit, like the following example:

I was thinking we would need to partition by Order, then the Part Islands to sequence the number of kits on each Order, ideally in order of the table ROW_NUMBER() but failing that the OrderLine.
Ideally though I would like to pivot all components to their actual header as follows:

Are the above 2 examples possible?
Below is the basic code to build the data
CREATE TABLE dbo.Table1
(
Order int,OrderLine int,ProductCode int,Part Char(1)
);
INSERT INTO Table1
VALUES
(1, 1,1000,'H'),(1, 2,98,'C'),(1, 3,99,'C'),(1, 7,2000,'H'),(1,8,70,'C'),
(1,9,71,'C'),(1,10,72,'C'),(1,15,3000,'H'),(1,16,50,'C'),(4,3,9000,'H'),
(4,4,10,'C'),(8,1,1000,'H'),(8,2,98,'C'),(8,3,99,'C'),(8,4,9000,'H'),
(8,5,10,'C'),(9,1,6000,'')
Thanks in advance
March 10, 2016 at 7:58 am
one idea....???
WITH cte as (
SELECT OrderNo,
OrderLine,
CASE WHEN part = 'H' THEN productcode ELSE NULL END as Headerproductcode,
CASE WHEN part = 'C' THEN productcode ELSE NULL END as Componentproductcode,
ROW_NUMBER() OVER(ORDER BY orderno, orderline) as rn
FROM table1
),
-- ref http://sqlmag.com/t-sql/last-non-null-puzzle for following
cte2 as (
SELECT OrderNo,
OrderLine,
CAST(SUBSTRING(MAX(CAST(rn AS BINARY(4)) + CAST(Headerproductcode AS BINARY(4))) OVER(ORDER BY rn ROWS UNBOUNDED PRECEDING), 5, 4) AS INT) AS Headerproductcode,
Componentproductcode,
rn
FROM cte
)
SELECT OrderNo,
OrderLine,
Headerproductcode ,
Componentproductcode
FROM cte2
WHERE(NOT(Componentproductcode IS NULL))
ORDER BY rn
________________________________________________________________
you can lead a user to data....but you cannot make them think
and remember....every day is a school day
March 10, 2016 at 9:20 am
I don't have a 2012 instance handy, or I could have tried using LEAD or LAG with some GROUP BYs, but here's a query that only uses 2008 syntax that produces the right results. It doesn't look like a good performer though...
CREATE TABLE #Table1 (
[Order] int NOT NULL,
OrderLine int NOT NULL,
ProductCode int,
Part char(1),
PRIMARY KEY CLUSTERED (
[Order] ASC,
OrderLine ASC
)
);
INSERT INTO #Table1 ([Order], OrderLine, ProductCode, Part)
VALUES (1, 1,1000,'H'),
(1, 2,98,'C'),
(1, 3,99,'C'),
(1, 7,2000,'H'),
(1,8,70,'C'),
(1,9,71,'C'),
(1,10,72,'C'),
(1,15,3000,'H'),
(1,16,50,'C'),
(4,3,9000,'H'),
(4,4,10,'C'),
(8,1,1000,'H'),
(8,2,98,'C'),
(8,3,99,'C'),
(8,4,9000,'H'),
(8,5,10,'C'),
(9,1,6000,'');
CREATE NONCLUSTERED INDEX IX_Table1_Part ON #Table1
(
Part ASC
);
WITH HEADERS AS (
SELECT *,
ROW_NUMBER() OVER(PARTITION BY T.[Order] ORDER BY T.OrderLine) AS ROW_NUM
FROM #Table1 AS T
WHERE T.Part = 'H'
),
COMPONENTS AS (
SELECT *
FROM #Table1 AS T
WHERE T.Part = 'C'
)
SELECT H.[Order], C.OrderLine, H.ProductCode AS HeaderProductCode, C.ProductCode AS ComponentProductCode,
ISNULL((SELECT MIN(H2.OrderLine) FROM HEADERS AS H2 WHERE H2.ROW_NUM > H.ROW_NUM), 1) AS THE_ISNULL
FROM HEADERS AS H
INNER JOIN COMPONENTS AS C
ON H.[Order] = C.[Order]
AND H.OrderLine < C.OrderLine
AND C.OrderLine < ISNULL((SELECT MIN(H2.OrderLine) FROM HEADERS AS H2 WHERE H2.[Order] = H.[Order] AND H2.ROW_NUM > H.ROW_NUM),
(SELECT MAX(C2.OrderLine) FROM COMPONENTS AS C2 WHERE C2.[Order] = H.[Order]) + 1)
ORDER BY H.[Order], C.OrderLine;
DROP TABLE #Table1;
Steve (aka sgmunson) 🙂 🙂 🙂
Rent Servers for Income (picks and shovels strategy)
March 10, 2016 at 2:29 pm
sgmunson (3/10/2016)
I don't have a 2012 instance handy, or I could have tried using LEAD or LAG with some GROUP BYs, but here's a query that only uses 2008 syntax that produces the right results. It doesn't look like a good performer though...
CREATE TABLE #Table1 (
[Order] int NOT NULL,
OrderLine int NOT NULL,
ProductCode int,
Part char(1),
PRIMARY KEY CLUSTERED (
[Order] ASC,
OrderLine ASC
)
);
INSERT INTO #Table1 ([Order], OrderLine, ProductCode, Part)
VALUES (1, 1,1000,'H'),
(1, 2,98,'C'),
(1, 3,99,'C'),
(1, 7,2000,'H'),
(1,8,70,'C'),
(1,9,71,'C'),
(1,10,72,'C'),
(1,15,3000,'H'),
(1,16,50,'C'),
(4,3,9000,'H'),
(4,4,10,'C'),
(8,1,1000,'H'),
(8,2,98,'C'),
(8,3,99,'C'),
(8,4,9000,'H'),
(8,5,10,'C'),
(9,1,6000,'');
CREATE NONCLUSTERED INDEX IX_Table1_Part ON #Table1
(
Part ASC
);
WITH HEADERS AS (
SELECT *,
ROW_NUMBER() OVER(PARTITION BY T.[Order] ORDER BY T.OrderLine) AS ROW_NUM
FROM #Table1 AS T
WHERE T.Part = 'H'
),
COMPONENTS AS (
SELECT *
FROM #Table1 AS T
WHERE T.Part = 'C'
)
SELECT H.[Order], C.OrderLine, H.ProductCode AS HeaderProductCode, C.ProductCode AS ComponentProductCode,
ISNULL((SELECT MIN(H2.OrderLine) FROM HEADERS AS H2 WHERE H2.ROW_NUM > H.ROW_NUM), 1) AS THE_ISNULL
FROM HEADERS AS H
INNER JOIN COMPONENTS AS C
ON H.[Order] = C.[Order]
AND H.OrderLine < C.OrderLine
AND C.OrderLine < ISNULL((SELECT MIN(H2.OrderLine) FROM HEADERS AS H2 WHERE H2.[Order] = H.[Order] AND H2.ROW_NUM > H.ROW_NUM),
(SELECT MAX(C2.OrderLine) FROM COMPONENTS AS C2 WHERE C2.[Order] = H.[Order]) + 1)
ORDER BY H.[Order], C.OrderLine;
DROP TABLE #Table1;
Hi Steve......would be really interested to see your thoughts on LEAD/LAG if you have chance.
Have put together a basic test rig
use tempdb
go
--DROP TABLE #table1;
--DROP TABLE #gen1;
--DROP TABLE #gen2;
SELECT TOP 10000 IDENTITY(INT, 1, 1) AS OrderNo
into #gen1
FROM sys.all_columns ac1
CROSS JOIN sys.all_columns ac2
CROSS JOIN sys.all_columns ac3
CREATE TABLE #gen2(
Orderline INTEGER NOT NULL PRIMARY KEY
,part VARCHAR(1) NOT NULL
);
INSERT INTO #gen2(Orderline,part) VALUES
(1,'H'),(2,'C'),(3,'C'),(4,'C'),(5,'H')
,(6,'C'),(7,'H'),(8,'C'),(9,'C'),(10,'C');
SELECT #gen1.OrderNo,
#gen2.Orderline,
1 + CAST(ABS(CHECKSUM(NEWID()) % 9000) AS INT) AS ProductCode,
#gen2.part
INTO #Table1
FROM #gen1 CROSS JOIN #gen2;
DELETE FROM #Table1
WHERE (OrderNo % 5 = 0) AND (Orderline > 2)
DELETE FROM #Table1
WHERE (OrderNo % 3 = 0) AND (Orderline >6)
CREATE CLUSTERED INDEX [PK] ON [dbo].[#Table1]
([OrderNo] ASC,[Orderline] ASC);
CREATE NONCLUSTERED INDEX IX_Table1_Part ON #Table1
(Part ASC);
select count(*) from #table1
your code and mine in comparison
PRINT '+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++'
PRINT 'sgmunson results below ';
PRINT '+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++'
PRINT ''
set statistics time, io on;
WITH HEADERS AS (
SELECT *,
ROW_NUMBER() OVER(PARTITION BY T.OrderNo ORDER BY T.OrderLine) AS ROW_NUM
FROM #Table1 AS T
WHERE T.Part = 'H'
),
COMPONENTS AS (
SELECT *
FROM #Table1 AS T
WHERE T.Part = 'C'
)
SELECT H.OrderNo, C.OrderLine, H.ProductCode AS HeaderProductCode, C.ProductCode AS ComponentProductCode,
ISNULL((SELECT MIN(H2.OrderLine) FROM HEADERS AS H2 WHERE H2.ROW_NUM > H.ROW_NUM), 1) AS THE_ISNULL
FROM HEADERS AS H
INNER JOIN COMPONENTS AS C
ON H.OrderNo = C.OrderNo
AND H.OrderLine < C.OrderLine
AND C.OrderLine < ISNULL((SELECT MIN(H2.OrderLine) FROM HEADERS AS H2 WHERE H2.OrderNo = H.OrderNo AND H2.ROW_NUM > H.ROW_NUM),
(SELECT MAX(C2.OrderLine) FROM COMPONENTS AS C2 WHERE C2.OrderNo = H.OrderNo) + 1)
ORDER BY H.OrderNo, C.OrderLine;
set statistics time, io off;
PRINT ''
PRINT ''
PRINT '+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++'
PRINT 'JLS results below ';
PRINT '+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++'
set statistics time, io on;
WITH cte as (
SELECT OrderNo,
OrderLine,
CASE WHEN part = 'H' THEN productcode ELSE NULL END as Headerproductcode,
CASE WHEN part = 'C' THEN productcode ELSE NULL END as Componentproductcode,
ROW_NUMBER() OVER(ORDER BY orderno, orderline) as rn
FROM #table1
),
-- ref http://sqlmag.com/t-sql/last-non-null-puzzle for following
cte2 as (
SELECT OrderNo,
OrderLine,
CAST(SUBSTRING(MAX(CAST(rn AS BINARY(4)) + CAST(Headerproductcode AS BINARY(4))) OVER(ORDER BY rn ROWS UNBOUNDED PRECEDING), 5, 4) AS INT) AS Headerproductcode,
Componentproductcode,
rn
FROM cte
)
SELECT OrderNo,
OrderLine,
Headerproductcode ,
Componentproductcode
FROM cte2
WHERE(NOT(Componentproductcode IS NULL))
ORDER BY rn
set statistics time, io off;
--DROP TABLE #table1;
--DROP TABLE #gen1;
--DROP TABLE #gen2;
________________________________________________________________
you can lead a user to data....but you cannot make them think
and remember....every day is a school day
March 10, 2016 at 3:24 pm
sgmunson (3/10/2016)
I don't have a 2012 instance handy, or I could have tried using LEAD or LAG with some GROUP BYs, but here's a query that only uses 2008 syntax that produces the right results. It doesn't look like a good performer though...
LEAD/LAG tend not to work well in cases like this, because you have to specify a static offset and it becomes unwieldy when the offset can vary. It's easier to use FIRST_VALUE/LAST_VALUE when the offset can vary.
Drew
J. Drew Allen
Business Intelligence Analyst
Philadelphia, PA
March 10, 2016 at 9:53 pm
drew.allen (3/10/2016)
sgmunson (3/10/2016)
I don't have a 2012 instance handy, or I could have tried using LEAD or LAG with some GROUP BYs, but here's a query that only uses 2008 syntax that produces the right results. It doesn't look like a good performer though...LEAD/LAG tend not to work well in cases like this, because you have to specify a static offset and it becomes unwieldy when the offset can vary. It's easier to use FIRST_VALUE/LAST_VALUE when the offset can vary.
Drew
I don't recall, but can you specify a field value to be the offset? Even if you can't, I can think of another way to get around it, but once you do that, it might even not perform as well as what I've already coded.
Steve (aka sgmunson) 🙂 🙂 🙂
Rent Servers for Income (picks and shovels strategy)
March 11, 2016 at 3:45 am
Thanks for the replies!
I tried J Livingston first CTE method and while that worked perfect on the dataset provided I didn't realise the datatype of the Headerproductcode and the Componentproductcode would be fundamental to the solution, they are both actually char(30) and I just used INT for simplicity and to highlight the example.
I have briefly spent some time reading the SQLmag puzzle article and tried playing around with the CTE2 Max() line, changing the datatypes etc but could not achieve desired results.
I am not familiar with creating non clustered indexes on our application database and so I have not tried any of the other methods provided. Does the code reference these indexes or would it work without creating them but just not run as fast?
I was also hoping (only if possible) to be able to save this as a View but I am not sure that would be achievable with the CTE solution and all of the Order by clauses.
Thanks in advance and sorry for not being accurate.
March 11, 2016 at 3:58 am
richardm 53436 (3/11/2016)
Thanks for the replies!I tried J Livingston first CTE method and while that worked perfect on the dataset provided I didn't realise the datatype of the Headerproductcode and the Componentproductcode would be fundamental to the solution, they are both actually char(30) and I just used INT for simplicity and to highlight the example.
I have briefly spent some time reading the SQLmag puzzle article and tried playing around with the CTE2 Max() line, changing the datatypes etc but could not achieve desired results.
I am not familiar with creating non clustered indexes on our application database and so I have not tried any of the other methods provided. Does the code reference these indexes or would it work without creating them but just not run as fast?
I was also hoping (only if possible) to be able to save this as a View but I am not sure that would be achievable with the CTE solution and all of the Order by clauses.
Thanks in advance and sorry for not being accurate.
try this line if you have Productcode as Char(30)
CAST(SUBSTRING(MAX(CAST(rn AS BINARY(4)) + CAST(Headerproductcode AS VARBINARY(30))) OVER(ORDER BY rn ROWS UNBOUNDED PRECEDING), 5, 30) AS CHAR(30)) AS Headerproductcode,
________________________________________________________________
you can lead a user to data....but you cannot make them think
and remember....every day is a school day
March 11, 2016 at 8:17 am
sgmunson (3/10/2016)
drew.allen (3/10/2016)
sgmunson (3/10/2016)
I don't have a 2012 instance handy, or I could have tried using LEAD or LAG with some GROUP BYs, but here's a query that only uses 2008 syntax that produces the right results. It doesn't look like a good performer though...LEAD/LAG tend not to work well in cases like this, because you have to specify a static offset and it becomes unwieldy when the offset can vary. It's easier to use FIRST_VALUE/LAST_VALUE when the offset can vary.
Drew
I don't recall, but can you specify a field value to be the offset? Even if you can't, I can think of another way to get around it, but once you do that, it might even not perform as well as what I've already coded.
No, you cannot. That is why I specifically said "static offset". For another problem, I tried using a tally table for the offset, and it wouldn't let me do it.
Drew
J. Drew Allen
Business Intelligence Analyst
Philadelphia, PA
March 11, 2016 at 9:38 am
just to give you an idea...here's some sample data and a view using productcode as char
--generate a test table
use tempdb
go
--DROP TABLE test;
--DROP TABLE #gen1;
--DROP TABLE #gen2;
--DROP VIEW testview;
SELECT TOP 10000 IDENTITY(INT, 1, 1) AS OrderNo
into #gen1
FROM sys.all_columns ac1
CROSS JOIN sys.all_columns ac2
CROSS JOIN sys.all_columns ac3
CREATE TABLE #gen2(
Orderline INT NULL
,part VARCHAR(1) NULL
);
INSERT INTO #gen2(Orderline,part) VALUES
(1,'H'),(2,'C'),(3,'C'),(4,'C'),(5,'H')
,(6,'C'),(7,'H'),(8,'C'),(9,'C'),(10,'C');
CREATE TABLE test
(OrderNo INT NULL,
OrderLine INT NULL,
ProductCode CHAR(30) NULL,
Part CHAR (1) NULL
);
DECLARE @StringMaxLen int = 30
INSERT INTO test
(OrderNo
,OrderLine
,ProductCode
,Part)
SELECT #gen1.OrderNo,
#gen2.Orderline,
LEFT (REPLACE(CAST (NEWID () AS CHAR(256)),'-','') , ABS (CHECKSUM (NEWID ())) % @StringMaxLen + 1) AS ProductCode,
#gen2.part
FROM #gen1 CROSS JOIN #gen2;
-- skew no of rows
DELETE FROM test
WHERE (OrderNo % 5 = 0) AND (Orderline > 2)
DELETE FROM test
WHERE (OrderNo % 3 = 0) AND (Orderline >6)
-- see what we have got
SELECT OrderNo, OrderLine, ProductCode, Part
FROM test
ORDER BY OrderNo, OrderLine
create a view
CREATE VIEW testview
AS
WITH cte as (
SELECT OrderNo,
OrderLine,
CASE WHEN part = 'H' THEN productcode ELSE NULL END as Headerproductcode,
CASE WHEN part = 'C' THEN productcode ELSE NULL END as Componentproductcode,
ROW_NUMBER() OVER(ORDER BY orderno, orderline) as rn
FROM test
),
cte2 as (
SELECT OrderNo,
OrderLine,
CAST(SUBSTRING(MAX(CAST(rn AS BINARY(4)) + CAST(Headerproductcode AS VARBINARY(30))) OVER(ORDER BY rn ROWS UNBOUNDED PRECEDING), 5, 30) AS CHAR(30)) AS Headerproductcode,
Componentproductcode,
rn
FROM cte
)
SELECT OrderNo,
OrderLine,
Headerproductcode ,
Componentproductcode
FROM cte2
WHERE(NOT(Componentproductcode IS NULL))
GO
--see results and compare
SELECT OrderNo, OrderLine, ProductCode, Part
FROM test
--WHERE (OrderNo = 1001)
ORDER BY OrderNo, OrderLine
SELECT OrderNo, OrderLine, Headerproductcode, Componentproductcode
FROM testview
--WHERE (OrderNo = 1001)
ORDER BY OrderNo, OrderLine
________________________________________________________________
you can lead a user to data....but you cannot make them think
and remember....every day is a school day
Viewing 10 posts - 1 through 10 (of 10 total)
You must be logged in to reply to this topic. Login to reply