February 8, 2012 at 1:48 am
cdh-727733 (2/7/2012)
Thanks for all your efforts much appreciated.It does seem to mostly work with the example data - I even put in a 4th category level and it returned that.
However it doesn't seem to work at higher levels now - e.g. passing in the 2nd level ID doesn't show the 3rd, but passing in the 3rd level does return the 4th.
I also need to try and get it to work when it doesn't know what the initial category selection is and return the 2nd tier ones (or even just the top tier ones would be a step forward from where I was at).
I will try and deconstruct this and see if I can (a) understand it! and (b) engineer in the missing pieces.
Cheers.
Sorry about dashing out yesterday, wanted to beat the traffic 😀
Anyway, I've sat down for 10 minutes this morning and written out the query to do what I think you're after.
Here it is with the sample data: -
BEGIN TRAN
--Your DDL and sample data
CREATE TABLE [dbo].[tblCategory] ([ID] [int] NOT NULL, [ParentID] [int] NULL, [Name] [varchar](150) NOT NULL,
CONSTRAINT [PK_tblCategory] PRIMARY KEY CLUSTERED ([ID] ASC)
WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]) ON [PRIMARY];
INSERT INTO tblCategory (ID, ParentID, NAME)
VALUES (1, 0, 'Power Tools');
INSERT INTO tblCategory (ID, ParentID, NAME)
VALUES (2, 0, 'Hand Tools');
INSERT INTO tblCategory (ID, ParentID, NAME)
VALUES (3, 1, 'Drills');
INSERT INTO tblCategory (ID, ParentID, NAME)
VALUES (4, 1, 'Saws');
INSERT INTO tblCategory (ID, ParentID, NAME)
VALUES (5, 1, 'Planers & Thicknessers');
INSERT INTO tblCategory (ID, ParentID, NAME)
VALUES (6, 3, 'Twist Drills');
INSERT INTO tblCategory (ID, ParentID, NAME)
VALUES (7, 3, 'Hammer Drills');
INSERT INTO tblCategory (ID, ParentID, NAME)
VALUES (8, 5, 'Planers');
INSERT INTO tblCategory (ID, ParentID, NAME)
VALUES (9, 5, 'Thicknessers');
CREATE TABLE [dbo].[tblProduct] ([ProductID] [int] NOT NULL, [ProductName] [varchar](150) NOT NULL,
[CategoryID] [int] NOT NULL CONSTRAINT [PK_tblProduct] PRIMARY KEY CLUSTERED ([ProductID] ASC)
WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]) ON [PRIMARY];
INSERT INTO tblProduct (ProductID, ProductName, CategoryID)
VALUES (1, 'A Twist Drill', 6);
INSERT INTO tblProduct (ProductID, ProductName, CategoryID)
VALUES (2, 'Another Twist Drill', 6);
INSERT INTO tblProduct (ProductID, ProductName, CategoryID)
VALUES (3, 'A Hammer Drill', 7);
INSERT INTO tblProduct (ProductID, ProductName, CategoryID)
VALUES (4, 'Another Hammer Drill', 7);
INSERT INTO tblProduct (ProductID, ProductName, CategoryID)
VALUES (5, 'A Planer with Drill in the name', 9);
--Actual solution!!
DECLARE @partSearch VARCHAR(100) = 'drill', @level INT = 0, --0 is root, 1 for next level and so on. Set to NULL for all
@levelID INT = 3; --Only display --> (3, 1, 'Drills'), set to NULL for all
;
WITH subnodes (Distance, ID, NAME, Node_Seq, ParentID)
AS (
SELECT 0, h.ID, h.NAME, CONVERT(VARCHAR(80), ltrim(str(h.ID))) AS Node_Seq, h.ParentID
FROM tblCategory h
WHERE h.ID = 1
UNION ALL
SELECT distance + 1, h.ID, h.NAME, CONVERT(VARCHAR(80), sn.Node_Seq + '.' + ltrim(str(h.ID))),
h.ParentID
FROM tblCategory h
INNER JOIN subnodes sn ON h.ParentID = sn.ID
)
SELECT Distance, ID, NAME, Node_Seq
FROM (SELECT baseQ.Distance, baseQ.ID, baseQ.NAME, baseQ.Node_Seq,
DENSE_RANK() OVER(ORDER BY baseQ.Distance)-1 AS currentLevel, outerQ.ParentID AS levelID
FROM subnodes baseQ
CROSS APPLY (SELECT Distance, ID, NAME, Node_Seq AS pNode_Seq, b.ProductName, ParentID
FROM subnodes innerQ
CROSS APPLY (SELECT ProductName
FROM tblProduct
WHERE PATINDEX('%' + @partSearch + '%', ProductName) > 0
AND innerQ.ID = CategoryID) b
WHERE baseQ.ID = ParentID OR baseQ.ID = ID) outerQ
GROUP BY baseQ.Distance, baseQ.ID, baseQ.NAME, baseQ.Node_Seq, outerQ.ParentID) levelQ
WHERE currentLevel = COALESCE(@level,currentLevel) AND levelID = COALESCE(@levelID,levelID)
ROLLBACK
And here it is without: -
--Actual solution!!
DECLARE @partSearch VARCHAR(100) = 'drill', @level INT = 0, --0 is root, 1 for next level and so on. Set to NULL for all
@levelID INT = 3; --Only display --> (3, 1, 'Drills'), set to NULL for all
;
WITH subnodes (Distance, ID, NAME, Node_Seq, ParentID)
AS (
SELECT 0, h.ID, h.NAME, CONVERT(VARCHAR(80), ltrim(str(h.ID))) AS Node_Seq, h.ParentID
FROM tblCategory h
WHERE h.ID = 1
UNION ALL
SELECT distance + 1, h.ID, h.NAME, CONVERT(VARCHAR(80), sn.Node_Seq + '.' + ltrim(str(h.ID))),
h.ParentID
FROM tblCategory h
INNER JOIN subnodes sn ON h.ParentID = sn.ID
)
SELECT Distance, ID, NAME, Node_Seq
FROM (SELECT baseQ.Distance, baseQ.ID, baseQ.NAME, baseQ.Node_Seq,
DENSE_RANK() OVER(ORDER BY baseQ.Distance)-1 AS currentLevel, outerQ.ParentID AS levelID
FROM subnodes baseQ
CROSS APPLY (SELECT Distance, ID, NAME, Node_Seq AS pNode_Seq, b.ProductName, ParentID
FROM subnodes innerQ
CROSS APPLY (SELECT ProductName
FROM tblProduct
WHERE PATINDEX('%' + @partSearch + '%', ProductName) > 0
AND innerQ.ID = CategoryID) b
WHERE baseQ.ID = ParentID OR baseQ.ID = ID) outerQ
GROUP BY baseQ.Distance, baseQ.ID, baseQ.NAME, baseQ.Node_Seq, outerQ.ParentID) levelQ
WHERE currentLevel = COALESCE(@level,currentLevel) AND levelID = COALESCE(@levelID,levelID)
It could still do with being cleaned up a bit, and I'm not promising the performance will be brilliant (something for you to test), but it should get you started.
February 8, 2012 at 3:10 am
No worries thanks for your perseverance!
I can't seem to get it to produce the correct results though.
If @levelID is set to null and h.ID is set to 1 then I get "Drills" and "Planers & Thicknessers".
If I then step down a level and set h.ID to 3 I still only get "Drills" and not the child categories.
Am passing the wrong criteria?
I wonder if you could outline the theory of the query then maybe I can understand the angle of approach and try and hack away at this as well?
Thanks again.
February 8, 2012 at 3:18 am
Jeff Moden (2/7/2012)
fahey.jonathan (2/7/2012)
cdh-727733 (2/7/2012)
I can't do that unfortunately as this is to be driven from user searches which I can't predict.What I am suggesting is that you create a complete hierarchy that includes all products. When a user requests a search, you use the hierarchy from the point they request, progressing down from there.
We currently do that with our distributor and customer list, which runs to 200,000 people. We rebuild the hierarchy once an hour. When a distributor searches for distributors and customers in his sales organization, that data is already precompiled so we don't have to dynamically build the hierarchy every time to return the results.
For our purpose, once an hour is sufficient. If your product hierarchy changes less frequently, you could rebuild the complete hierarchy once a day.
I'm just curious. What type of hierarchy are you using? Adjacency list, Hierarchical Path, HierarchyID, Nested Sets, cfomposite of 2 or more of those? Also, how long does it take you to rebuild your 220k node hierarchy?
I'm not sure how this would work with this situation - what records would I create - an entry for each product at each level of the category tree it might live at?
February 8, 2012 at 3:25 am
cdh-727733 (2/8/2012)
No worries thanks for your perseverance!I can't seem to get it to produce the correct results though.
If @levelID is set to null and h.ID is set to 1 then I get "Drills" and "Planers & Thicknessers".
If I then step down a level and set h.ID to 3 I still only get "Drills" and not the child categories.
Am passing the wrong criteria?
I wonder if you could outline the theory of the query then maybe I can understand the angle of approach and try and hack away at this as well?
Thanks again.
OK, easiest way to explain is to stick it in a SPROC.
IF EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[dbo].[hierarchySearch]') AND type in (N'P', N'PC'))
DROP PROCEDURE [dbo].[hierarchySearch]
GO
CREATE PROCEDURE hierarchySearch(@partSearch VARCHAR(100), @level INT = NULL, @levelID INT = NULL)
AS
BEGIN
WITH subnodes (Distance, ID, NAME, Node_Seq, ParentID)
AS (
SELECT 0, h.ID, h.NAME, CONVERT(VARCHAR(80), ltrim(str(h.ID))) AS Node_Seq, h.ParentID
FROM tblCategory h
WHERE h.ID = 1
UNION ALL
SELECT distance + 1, h.ID, h.NAME, CONVERT(VARCHAR(80), sn.Node_Seq + '.' + ltrim(str(h.ID))),
h.ParentID
FROM tblCategory h
INNER JOIN subnodes sn ON h.ParentID = sn.ID
)
SELECT Distance, ID, NAME, Node_Seq
FROM (SELECT baseQ.Distance, baseQ.ID, baseQ.NAME, baseQ.Node_Seq,
DENSE_RANK() OVER(ORDER BY baseQ.Distance)-1 AS currentLevel, outerQ.ParentID AS levelID
FROM subnodes baseQ
CROSS APPLY (SELECT Distance, ID, NAME, Node_Seq AS pNode_Seq, b.ProductName, ParentID
FROM subnodes innerQ
CROSS APPLY (SELECT ProductName
FROM tblProduct
WHERE PATINDEX('%' + @partSearch + '%', ProductName) > 0
AND innerQ.ID = CategoryID) b
WHERE baseQ.ID = ParentID OR baseQ.ID = ID) outerQ
GROUP BY baseQ.Distance, baseQ.ID, baseQ.NAME, baseQ.Node_Seq, outerQ.ParentID) levelQ
WHERE currentLevel = COALESCE(@level,currentLevel) AND levelID = COALESCE(@levelID,levelID)
END
Then use the SPROC to demonstrate the usage in the way that I mean.
EXECUTE hierarchySearch @partSearch = 'drill' --Do a search for drill with all matches
EXECUTE hierarchySearch @partSearch = 'drill', @level = 1 --Do a search for drill with second level of matches (excludes ID 3 and 5, just shows children)
EXECUTE hierarchySearch @partSearch = 'drill', @levelID = 3 --Do a search for drill where root ID is 3 (excludes ID 5 and 9)
EXECUTE hierarchySearch @partSearch = 'drill', @level = 1, @levelID = 3 --Do a search for drill where root ID is 3 for second level matches (excludes ID 3, 5 and 9)
The above produces the following results
--------------------------------------------------------------------------------
EXECUTE hierarchySearch @partSearch = 'drill' --Do a search for drill with all matches
Distance ID NAME Node_Seq
----------- ----------- ------------------------------------------------------------------------------------------------------------------------------------------------------ --------------------------------------------------------------------------------
1 3 Drills 1.3
1 5 Planers & Thicknessers 1.5
2 6 Twist Drills 1.3.6
2 7 Hammer Drills 1.3.7
2 9 Thicknessers 1.5.9
--------------------------------------------------------------------------------
EXECUTE hierarchySearch @partSearch = 'drill', @level = 1 --Do a search for drill with second level of matches (excludes ID 3 and 5, just shows children)
Distance ID NAME Node_Seq
----------- ----------- ------------------------------------------------------------------------------------------------------------------------------------------------------ --------------------------------------------------------------------------------
2 6 Twist Drills 1.3.6
2 7 Hammer Drills 1.3.7
2 9 Thicknessers 1.5.9
--------------------------------------------------------------------------------
EXECUTE hierarchySearch @partSearch = 'drill', @levelID = 3 --Do a search for drill where root ID is 3 (excludes ID 5 and 9)
Distance ID NAME Node_Seq
----------- ----------- ------------------------------------------------------------------------------------------------------------------------------------------------------ --------------------------------------------------------------------------------
1 3 Drills 1.3
2 6 Twist Drills 1.3.6
2 7 Hammer Drills 1.3.7
--------------------------------------------------------------------------------
EXECUTE hierarchySearch @partSearch = 'drill', @level = 1, @levelID = 3 --Do a search for drill where root ID is 3 for second level matches (excludes ID 3, 5 and 9)
Distance ID NAME Node_Seq
----------- ----------- ------------------------------------------------------------------------------------------------------------------------------------------------------ --------------------------------------------------------------------------------
2 6 Twist Drills 1.3.6
2 7 Hammer Drills 1.3.7
--------------------------------------------------------------------------------
February 8, 2012 at 5:11 am
Sorry I missed the @level variable completely somehow. That seems to work well for the 3 tier scenario.
I tried it on the 4 tier and I can't get it to get me anything other than the lowest levels right away.
Test data :
INSERT INTO tblCategory (ID,ParentID,Name) VALUES (2,0,'Hand Tools')
INSERT INTO tblCategory (ID,ParentID,Name) VALUES (11,2,'Screwdrivers')
INSERT INTO tblCategory (ID,ParentID,Name) VALUES (12,11,'Phillips Screwdrivers')
INSERT INTO tblCategory (ID,ParentID,Name) VALUES (13,11,'Slotted Screwdrivers')
INSERT INTO tblCategory (ID,ParentID,Name) VALUES (14,13,'Big Slotted Screwdrivers')
INSERT INTO tblCategory (ID,ParentID,Name) VALUES (15,13,'Small Slotted Screwdrivers')
INSERT INTO tblProduct (ProductID,ProductName,CategoryID) VALUES (6,'A Big Screwdriver',14)
INSERT INTO tblProduct (ProductID,ProductName,CategoryID) VALUES (7,'A Small Screwdriver',15)
Then try a search for "screwdriver"
February 8, 2012 at 5:27 am
cdh-727733 (2/8/2012)
Sorry I missed the @level variable completely somehow. That seems to work well for the 3 tier scenario.I tried it on the 4 tier and I can't get it to get me anything other than the lowest levels right away.
Test data :
INSERT INTO tblCategory (ID,ParentID,Name) VALUES (2,0,'Hand Tools')
INSERT INTO tblCategory (ID,ParentID,Name) VALUES (11,2,'Screwdrivers')
INSERT INTO tblCategory (ID,ParentID,Name) VALUES (12,11,'Phillips Screwdrivers')
INSERT INTO tblCategory (ID,ParentID,Name) VALUES (13,11,'Slotted Screwdrivers')
INSERT INTO tblCategory (ID,ParentID,Name) VALUES (14,13,'Big Slotted Screwdrivers')
INSERT INTO tblCategory (ID,ParentID,Name) VALUES (15,13,'Small Slotted Screwdrivers')
INSERT INTO tblProduct (ProductID,ProductName,CategoryID) VALUES (6,'A Big Screwdriver',14)
INSERT INTO tblProduct (ProductID,ProductName,CategoryID) VALUES (7,'A Small Screwdriver',15)
Then try a search for "screwdriver"
LOL!!!
I missed a bug in your original script, my bad :w00t:
Can you see what it was? In your original subnodes CTE you were eliminating all except ID=1 as the root of the hierarchy. Instead, the root of the hierarchy is where ParentID is 0, which allows us to have both Power Tools and Hand Tools as root elements.
IF EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[dbo].[hierarchySearch]') AND type in (N'P', N'PC'))
DROP PROCEDURE [dbo].[hierarchySearch]
GO
CREATE PROCEDURE hierarchySearch(@partSearch VARCHAR(100), @level INT = NULL, @levelID INT = NULL)
AS
BEGIN
WITH subnodes (Distance, ID, NAME, Node_Seq, ParentID)
AS (
SELECT 0, h.ID, h.NAME, CONVERT(VARCHAR(80), ltrim(str(h.ID))) AS Node_Seq, h.ParentID
FROM tblCategory h
WHERE h.ParentID = 0
UNION ALL
SELECT distance + 1, h.ID, h.NAME, CONVERT(VARCHAR(80), sn.Node_Seq + '.' + ltrim(str(h.ID))),
h.ParentID
FROM tblCategory h
INNER JOIN subnodes sn ON h.ParentID = sn.ID
)
SELECT Distance, ID, NAME, Node_Seq
FROM (SELECT baseQ.Distance, baseQ.ID, baseQ.NAME, baseQ.Node_Seq,
DENSE_RANK() OVER(ORDER BY baseQ.Distance)-1 AS currentLevel, outerQ.ParentID AS levelID
FROM subnodes baseQ
CROSS APPLY (SELECT Distance, ID, NAME, Node_Seq AS pNode_Seq, b.ProductName, ParentID
FROM subnodes innerQ
CROSS APPLY (SELECT ProductName
FROM tblProduct
WHERE PATINDEX('%' + @partSearch + '%', ProductName) > 0
AND innerQ.ID = CategoryID) b
WHERE baseQ.ID = ParentID OR baseQ.ID = ID) outerQ
GROUP BY baseQ.Distance, baseQ.ID, baseQ.NAME, baseQ.Node_Seq, outerQ.ParentID) levelQ
WHERE currentLevel = COALESCE(@level,currentLevel) AND levelID = COALESCE(@levelID,levelID)
END
GO
EXECUTE hierarchySearch @partSearch ='Screwdriver';
Distance ID NAME Node_Seq
----------- ----------- ------------------------------------------------------------------------------------------------------------------------------------------------------ --------------------------------------------------------------------------------
2 13 Slotted Screwdrivers 2.11.13
3 14 Big Slotted Screwdrivers 2.11.13.14
3 15 Small Slotted Screwdrivers 2.11.13.15
February 8, 2012 at 5:43 am
Ah yeah that was hard coded when I was testing!
I still can't get it though to step through :
Screwdrivers > Phillips / Slotted > Big / Small
Using your execution examples, the first execution gived the 2 lowest level categories and their parent, the second jumps straight to the lowest level, and the last 2 return nothing :
XECUTE hierarchySearch @partSearch = 'screw' --Do a search for drill with all matches
EXECUTE hierarchySearch @partSearch = 'screw', @level = 1 --Do a search for drill with second level of matches (excludes ID 3 and 5, just shows children)
EXECUTE hierarchySearch @partSearch = 'screw', @levelID = 11 --Do a search for drill where root ID is 3 (excludes ID 5 and 9)
EXECUTE hierarchySearch @partSearch = 'screw', @level = 1, @levelID = 11 --Do a search for drill where root ID is 3 for second level matches (excludes ID 3, 5 and 9)
February 8, 2012 at 6:06 am
cdh-727733 (2/8/2012)
Ah yeah that was hard coded when I was testing!I still can't get it though to step through :
Screwdrivers > Phillips / Slotted > Big / Small
Using your execution examples, the first execution gived the 2 lowest level categories and their parent, the second jumps straight to the lowest level, and the last 2 return nothing :
XECUTE hierarchySearch @partSearch = 'screw' --Do a search for drill with all matches
EXECUTE hierarchySearch @partSearch = 'screw', @level = 1 --Do a search for drill with second level of matches (excludes ID 3 and 5, just shows children)
EXECUTE hierarchySearch @partSearch = 'screw', @levelID = 11 --Do a search for drill where root ID is 3 (excludes ID 5 and 9)
EXECUTE hierarchySearch @partSearch = 'screw', @level = 1, @levelID = 11 --Do a search for drill where root ID is 3 for second level matches (excludes ID 3, 5 and 9)
Yep, executes as expected. The root of all of those, when you execute for "Screw" :
EXECUTE hierarchySearch @partSearch ='Screw';
is 13 (the Node_Seq for the lowest is "2.11.13"). 11 is unnecessary because there are no products that are linked directly to 11 or to the children of 11.
February 8, 2012 at 6:24 am
No but they are linked to the children of 13, or the grandchildren of 11.
Essentially I am trying to achieve the drill down effect so as long as there are matches lower than the current level we should return the categories that are the children of the current level.
February 8, 2012 at 6:26 am
cdh-727733 (2/8/2012)
No but they are linked to the children of 13, or the grandchildren of 11.Essentially I am trying to achieve the drill down effect so as long as there are matches lower than the current level we should return the categories that are the children of the current level.
Hmm, the way I understood it from what you had said before was that you wanted the parent category for any category that had matching products.
OK, we'll need to scratch my previous attempt and have a re-think.
February 8, 2012 at 7:11 am
Cadavre (2/8/2012)
cdh-727733 (2/8/2012)
No but they are linked to the children of 13, or the grandchildren of 11.Essentially I am trying to achieve the drill down effect so as long as there are matches lower than the current level we should return the categories that are the children of the current level.
Hmm, the way I understood it from what you had said before was that you wanted the parent category for any category that had matching products.
OK, we'll need to scratch my previous attempt and have a re-think.
OK, I've hacked the code up a bit and adjusted the logic. Almost certainly not the best way to have done it, but I'm about to be quite busy so figured something was better than nothing.
BEGIN TRAN
CREATE TABLE [dbo].[tblCategory] ([ID] [int] NOT NULL, [ParentID] [int] NULL, [Name] [varchar](150) NOT NULL,
CONSTRAINT [PK_tblCategory] PRIMARY KEY CLUSTERED ([ID] ASC)
WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]) ON [PRIMARY];
INSERT INTO tblCategory (ID, ParentID, NAME)
VALUES (1, 0, 'Power Tools');
INSERT INTO tblCategory (ID, ParentID, NAME)
VALUES (2, 0, 'Hand Tools');
INSERT INTO tblCategory (ID, ParentID, NAME)
VALUES (3, 1, 'Drills');
INSERT INTO tblCategory (ID, ParentID, NAME)
VALUES (4, 1, 'Saws');
INSERT INTO tblCategory (ID, ParentID, NAME)
VALUES (5, 1, 'Planers & Thicknessers');
INSERT INTO tblCategory (ID, ParentID, NAME)
VALUES (6, 3, 'Twist Drills');
INSERT INTO tblCategory (ID, ParentID, NAME)
VALUES (7, 3, 'Hammer Drills');
INSERT INTO tblCategory (ID, ParentID, NAME)
VALUES (8, 5, 'Planers');
INSERT INTO tblCategory (ID, ParentID, NAME)
VALUES (9, 5, 'Thicknessers');
CREATE TABLE [dbo].[tblProduct] ([ProductID] [int] NOT NULL, [ProductName] [varchar](150) NOT NULL,
[CategoryID] [int] NOT NULL CONSTRAINT [PK_tblProduct] PRIMARY KEY CLUSTERED ([ProductID] ASC)
WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]) ON [PRIMARY];
INSERT INTO tblProduct (ProductID, ProductName, CategoryID)
VALUES (1, 'A Twist Drill', 6);
INSERT INTO tblProduct (ProductID, ProductName, CategoryID)
VALUES (2, 'Another Twist Drill', 6);
INSERT INTO tblProduct (ProductID, ProductName, CategoryID)
VALUES (3, 'A Hammer Drill', 7);
INSERT INTO tblProduct (ProductID, ProductName, CategoryID)
VALUES (4, 'Another Hammer Drill', 7);
INSERT INTO tblProduct (ProductID, ProductName, CategoryID)
VALUES (5, 'A Planer with Drill in the name', 9);
INSERT INTO tblCategory (ID,ParentID,Name) VALUES (11,2,'Screwdrivers')
INSERT INTO tblCategory (ID,ParentID,Name) VALUES (12,11,'Phillips Screwdrivers')
INSERT INTO tblCategory (ID,ParentID,Name) VALUES (13,11,'Slotted Screwdrivers')
INSERT INTO tblCategory (ID,ParentID,Name) VALUES (14,13,'Big Slotted Screwdrivers')
INSERT INTO tblCategory (ID,ParentID,Name) VALUES (15,13,'Small Slotted Screwdrivers')
INSERT INTO tblProduct (ProductID,ProductName,CategoryID) VALUES (6,'A Big Screwdriver', 14)
INSERT INTO tblProduct (ProductID,ProductName,CategoryID) VALUES (7,'A Small Screwdriver',15)
IF EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[dbo].[hierarchySearch]') AND type in (N'P', N'PC'))
DROP PROCEDURE [dbo].[hierarchySearch]
GO
CREATE PROCEDURE hierarchySearch(@partSearch VARCHAR(100), @level INT = NULL, @levelID INT = NULL)
AS
BEGIN
WITH subnodes (Distance, ID, NAME, Node_Seq, ParentID, base)
AS (
SELECT 0, h.ID, h.NAME, CONVERT(VARCHAR(80), ltrim(str(h.ID))) AS Node_Seq, h.ParentID, h.ID AS base
FROM tblCategory h
WHERE h.ParentID = 0
UNION ALL
SELECT distance + 1, h.ID, h.NAME, CONVERT(VARCHAR(80), sn.Node_Seq + '.' + ltrim(str(h.ID))),
h.ParentID, sn.base
FROM tblCategory h
INNER JOIN subnodes sn ON h.ParentID = sn.ID
)
SELECT Distance, ID, NAME, Node_Seq
FROM (SELECT baseQ.Distance, baseQ.ID, baseQ.NAME, baseQ.Node_Seq, outerQ.base,
DENSE_RANK() OVER(ORDER BY baseQ.Distance)-1 AS currentLevel, outerQ.ParentID AS levelID
FROM subnodes baseQ
CROSS APPLY (SELECT Distance, ID, NAME, ParentID, base
FROM subnodes innerQ
CROSS APPLY (SELECT ProductName
FROM tblProduct
WHERE PATINDEX('%' + @partSearch + '%', ProductName) > 0 AND
innerQ.ID = CategoryID) b
WHERE baseQ.ID = COALESCE(@levelID,ParentID) OR
'.'+baseQ.Node_Seq+'.' LIKE '%.'+COALESCE(CAST(@levelID AS VARCHAR(10)),CAST(base AS VARCHAR(10)))+'.%'
) outerQ
GROUP BY baseQ.Distance, baseQ.ID, baseQ.NAME, baseQ.Node_Seq, outerQ.ParentID,outerQ.base) levelQ
WHERE currentLevel = COALESCE(@level,currentLevel)
GROUP BY Distance, ID, NAME, Node_Seq
END
GO
--EXECUTE hierarchySearch @partSearch ='Screw';
--EXECUTE hierarchySearch @partSearch ='Screw', @levelID = 2; --SAME AS EXECUTE hierarchySearch @partSearch ='Screw';
--EXECUTE hierarchySearch @partSearch ='Screw', @levelID = 11;
--EXECUTE hierarchySearch @partSearch ='Screw', @levelID = 13;
--EXECUTE hierarchySearch @partSearch = 'drill';
--EXECUTE hierarchySearch @partSearch = 'drill', @levelID = 1; --SAME AS EXECUTE hierarchySearch @partSearch = 'drill';
--EXECUTE hierarchySearch @partSearch = 'drill', @levelID = 3;
--EXECUTE hierarchySearch @partSearch = 'drill', @levelID = 4;
--EXECUTE hierarchySearch @partSearch = 'drill', @levelID = 5;
ROLLBACK
Here's some examples: -
EXECUTE hierarchySearch @partSearch ='Screw', @levelID = 11;
/*
Distance ID NAME Node_Seq
----------- ----------- ------------------------------------------------------------------------------------------------------------------------------------------------------ --------------------------------------------------------------------------------
1 11 Screwdrivers 2.11
2 12 Phillips Screwdrivers 2.11.12
2 13 Slotted Screwdrivers 2.11.13
3 14 Big Slotted Screwdrivers 2.11.13.14
3 15 Small Slotted Screwdrivers 2.11.13.15
*/
EXECUTE hierarchySearch @partSearch = 'drill';
/*
Distance ID NAME Node_Seq
----------- ----------- ------------------------------------------------------------------------------------------------------------------------------------------------------ --------------------------------------------------------------------------------
0 1 Power Tools 1
1 3 Drills 1.3
1 4 Saws 1.4
1 5 Planers & Thicknessers 1.5
2 6 Twist Drills 1.3.6
2 7 Hammer Drills 1.3.7
2 8 Planers 1.5.8
2 9 Thicknessers 1.5.9
*/
EXECUTE hierarchySearch @partSearch = 'drill', @levelID = 3;
/*
Distance ID NAME Node_Seq
----------- ----------- ------------------------------------------------------------------------------------------------------------------------------------------------------ --------------------------------------------------------------------------------
1 3 Drills 1.3
2 6 Twist Drills 1.3.6
2 7 Hammer Drills 1.3.7
*/
February 8, 2012 at 7:22 am
Jeff Moden (2/7/2012)
I'm just curious. What type of hierarchy are you using? Adjacency list, Hierarchical Path, HierarchyID, Nested Sets, cfomposite of 2 or more of those? Also, how long does it take you to rebuild your 220k node hierarchy?
I am using a hierarchical path method. I thought I had created something new when I came up with it 12 years ago, only to find others were also using the same method. As they say, "nothing new under the sun."
I use a base-36 numbering scheme for the path characters, which saves significant space. The length of the path at level 32 is 60, which means I am averaging less than 2 characters per level. The average length of the path is 27 characters. I also use a modified Tally table to speed the process of calculating the path. Things got easier with the ROW_NUMBER() function to order and categorize the children of each parent by PARTITIONing by the parent identifier.
Children are added one level at a time, so 32 iterations/queries are needed for 32 levels, the current maximum. The table has an "AbsLevel" column ("absolute level" as opposed to "compressed level" when flattening inactive accounts) to aid depth calculations, which is a simple incremented counter for each level added.
Average exectution time to calculate the current 32 levels, update the base table where values differ, then calculate and update the group count (total downline accounts) is 10.4 seconds. On average, 502 records are updated per hourly execution.
February 8, 2012 at 11:20 pm
cdh-727733 (2/8/2012)
Jeff Moden (2/7/2012)
fahey.jonathan (2/7/2012)
cdh-727733 (2/7/2012)
I can't do that unfortunately as this is to be driven from user searches which I can't predict.What I am suggesting is that you create a complete hierarchy that includes all products. When a user requests a search, you use the hierarchy from the point they request, progressing down from there.
We currently do that with our distributor and customer list, which runs to 200,000 people. We rebuild the hierarchy once an hour. When a distributor searches for distributors and customers in his sales organization, that data is already precompiled so we don't have to dynamically build the hierarchy every time to return the results.
For our purpose, once an hour is sufficient. If your product hierarchy changes less frequently, you could rebuild the complete hierarchy once a day.
I'm just curious. What type of hierarchy are you using? Adjacency list, Hierarchical Path, HierarchyID, Nested Sets, cfomposite of 2 or more of those? Also, how long does it take you to rebuild your 220k node hierarchy?
I'm not sure how this would work with this situation - what records would I create - an entry for each product at each level of the category tree it might live at?
I've not studied your requirements but, if you're worried about how many nodes and levels you may end up with, I've experimented with some new methods I created for PASS 2010. On a 10 year old, single CPU, I convert a million node "Adjacency List" to a "Hierarchical Path" to a "Nested Set" to a 3.4 million node hierarchical data warehouse with precalculated answers up to 7 levels per node (less if there aren't that many) in just over 2 minutes. And, no... I don't use HierarchyID for any of it.
--Jeff Moden
Change is inevitable... Change for the better is not.
February 8, 2012 at 11:29 pm
fahey.jonathan (2/8/2012)
Jeff Moden (2/7/2012)
I'm just curious. What type of hierarchy are you using? Adjacency list, Hierarchical Path, HierarchyID, Nested Sets, cfomposite of 2 or more of those? Also, how long does it take you to rebuild your 220k node hierarchy?I am using a hierarchical path method. I thought I had created something new when I came up with it 12 years ago, only to find others were also using the same method. As they say, "nothing new under the sun."
I use a base-36 numbering scheme for the path characters, which saves significant space. The length of the path at level 32 is 60, which means I am averaging less than 2 characters per level. The average length of the path is 27 characters. I also use a modified Tally table to speed the process of calculating the path. Things got easier with the ROW_NUMBER() function to order and categorize the children of each parent by PARTITIONing by the parent identifier.
Children are added one level at a time, so 32 iterations/queries are needed for 32 levels, the current maximum. The table has an "AbsLevel" column ("absolute level" as opposed to "compressed level" when flattening inactive accounts) to aid depth calculations, which is a simple incremented counter for each level added.
Average exectution time to calculate the current 32 levels, update the base table where values differ, then calculate and update the group count (total downline accounts) is 10.4 seconds. On average, 502 records are updated per hourly execution.
I use 4 bit binaries to contain the node ID in the hierarchical path. It's a bit wider than what you're using but it has a direct conversion and no delimiters required. It also allows for some pretty need hierarchical datamart precalculations like sums and counts for each node in a setbased fashion.
I'm curious why you have a limit of 32 levels. If you're using an rCTE (recursive CTE) to assemble the Hierarchical path, there is not limit as to how many iterations you can achieve. I don't know if functions still have that limit because I generally try to avoid recursion and haven't tried the limits of a recursive function.
It sounds like you have your act very well together on what you're doing in the Hierarchical world. Of course, I'm interested in the Tally Table aspect, as well. Are you using that to do your base 36 conversions?
--Jeff Moden
Change is inevitable... Change for the better is not.
February 9, 2012 at 7:15 pm
32 levels is not the limit, it is simply the current maximum depth. I am not using a CTE to build the recursion; the code I have now was developed before I had access to that option and I have not gone back to the code to experiment with it.
Here is the basic outline of how it works. It requires the existence of a PathBase table that has the following content:
SELECTTOP 20 *
FROMPathBase
WHEREPathBaseID % 10 = 0
ORDER BY PathBaseID
/*
PathBaseID PathChar
----------- --------
0 000
10 00A
20 00K
30 00U
40 014
50 01E
60 01O
70 01Y
80 028
90 02I
100 02S
110 032
120 03C
130 03M
140 03W
150 046
160 04G
170 04Q
180 050
190 05A
*/
That is what I called a modified Tally table because it has the PathBaseID ("N") and a character field that represents the path value to append for a given child. Here is the code to build the table:
CREATE-- DROP
TABLEPathBase
(
PathBaseIDINTEGERNOT NULL,
PathCharCHAR(3)NOT NULL,
CONSTRAINTPathBase_PK
PRIMARY KEY CLUSTERED(PathBaseID)
)
IFNOT EXISTS(SELECT PathBaseID FROM PathBase)
BEGIN-- PathBase Population
;WITHUnits
AS(
SELECTROW_NUMBER() OVER (ORDER BY Letter) - 1 AS KeyID, N AS AsciiVal, Letter
FROM(/* Adjust for the number of characters desired. */
SELECTN, CHAR(N) AS Letter
FROMTally
WHEREN BETWEEN 48 AND 57
ORN BETWEEN 65 AND 90
) x
),
Squared
AS(SELECT(a.KeyID * (SELECT COUNT(Letter) FROM Units)) + b.KeyID AS KeyID, a.Letter + b.Letter AS Letter FROM Units a CROSS JOIN Units b),
Cubed
AS(SELECT(a.KeyID * POWER((SELECT COUNT(Letter) FROM Units), 2)) + b.KeyID AS KeyID, a.Letter + b.Letter AS Letter FROM Units a CROSS JOIN Squared b)
INSERT
INTOPathBase
SELECTTOP 10000
KeyID,Letter
FROMCubed
ORDER BY KeyID
END-- PathBase Population
The code currently starts by adding the records that have no parents at AbsLevel 1. Then, in a "WHILE @@ROWCOUNT > 0" loop, it adds children of records found in the last insert (using the AbsLevel value). Each record added increments the parent's AbsLevel + 1. Using ROW_NUMBER() PARTITIONed by ParentID, each child is assigned a number that is used to look up the child's addition to the parents path. Here is some data to view, and I'll continue the explanation below.
-- Full upline of a single account by using the path.
SELECTAccountID, SponsorID, AbsLevel, CAST(Path AS VARCHAR(100)) AS Path
FROMMarketer
WHERE'1311B0C8Q4OMB03A1X0DK01X04N101E00T0S0V060L070203040401111111' LIKE Path + '%'
ORDER BY Path
/*
AccountID SponsorID AbsLevel Path
----------- ----------- -------- ----------------------------------------------------------------------------------------------------
1 1 1 1
3 1 2 13
22 3 3 131
500000 22 4 1311B
510707 500000 5 1311B0C
510718 510707 6 1311B0C8Q
517050 510718 7 1311B0C8Q4O
643807 517050 8 1311B0C8Q4OMB
511380 643807 9 1311B0C8Q4OMB03A
636140 511380 10 1311B0C8Q4OMB03A1X
637376 636140 11 1311B0C8Q4OMB03A1X0DK
641571 637376 12 1311B0C8Q4OMB03A1X0DK01X
675929 641571 13 1311B0C8Q4OMB03A1X0DK01X04N
1258325 675929 14 1311B0C8Q4OMB03A1X0DK01X04N10
1402520 1258325 15 1311B0C8Q4OMB03A1X0DK01X04N101E
5472606 1402520 16 1311B0C8Q4OMB03A1X0DK01X04N101E00T
5492000 5472606 17 1311B0C8Q4OMB03A1X0DK01X04N101E00T0S
5612569 5492000 18 1311B0C8Q4OMB03A1X0DK01X04N101E00T0S0V
5664878 5612569 19 1311B0C8Q4OMB03A1X0DK01X04N101E00T0S0V06
5684249 5664878 20 1311B0C8Q4OMB03A1X0DK01X04N101E00T0S0V060L
6745172 5684249 21 1311B0C8Q4OMB03A1X0DK01X04N101E00T0S0V060L07
6751913 6745172 22 1311B0C8Q4OMB03A1X0DK01X04N101E00T0S0V060L0702
6751991 6751913 23 1311B0C8Q4OMB03A1X0DK01X04N101E00T0S0V060L070203
6756826 6751991 24 1311B0C8Q4OMB03A1X0DK01X04N101E00T0S0V060L07020304
6769356 6756826 25 1311B0C8Q4OMB03A1X0DK01X04N101E00T0S0V060L0702030404
6769389 6769356 26 1311B0C8Q4OMB03A1X0DK01X04N101E00T0S0V060L070203040401
6792291 6769389 27 1311B0C8Q4OMB03A1X0DK01X04N101E00T0S0V060L0702030404011
6793025 6792291 28 1311B0C8Q4OMB03A1X0DK01X04N101E00T0S0V060L07020304040111
6795580 6793025 29 1311B0C8Q4OMB03A1X0DK01X04N101E00T0S0V060L070203040401111
6798097 6795580 30 1311B0C8Q4OMB03A1X0DK01X04N101E00T0S0V060L0702030404011111
6798145 6798097 31 1311B0C8Q4OMB03A1X0DK01X04N101E00T0S0V060L07020304040111111
6798341 6798145 32 1311B0C8Q4OMB03A1X0DK01X04N101E00T0S0V060L070203040401111111
(32 row(s) affected)
*/
If a parent has 100 children, they are assigned numbers 1-100 from the ROW_NUMBER() function; that value is used to look up "001" for the first child, "002" for the second child, "00K" for the 20th child, and "02S" for the 100th child. Based on a calculation of the most number of children added that round, the path values for the children are truncated. If 36 children or less were added, we can truncate to 1 character for the base-36 values used. If 1296 children or less were added (36^2), the path characters can be truncated to 2 characters. Otherwise, the full 3 characters must be used. The truncation is not necessary for uniqueness, it simply shortens the resulting output, saving space in the index and, hopefully, allowing more rows per page.
I know it is harder to get a handle on code without sample data to use for experimentation, so here is how the product sample would work. You should already have the data from the sample code provided, and you have the PathBase table above.
-- Create a view to bring together the products and categories.
CREATE-- DROP
VIEWProdItem
AS
SELECT'PRD' + CAST(ProductID AS VARCHAR(5))AS BaseKey,
ProductIDAS ItemID,
'PRD'AS ItemType,
ProductNameAS ItemName,
'CAT' + CAST(CategoryID AS VARCHAR(5))AS ParentID
FROMtblProduct
UNIONALL
SELECT'CAT' + CAST(ID AS VARCHAR(5))AS BaseKey,
IDAS ItemID,
'CAT'AS ItemType,
NameAS ItemName,
'CAT' + CAST(ParentID AS VARCHAR(5))AS ParentID
FROMtblCategory
-- Create a table to hold hierarchy
CREATE-- DROP
TABLEtblProdHierarchy
(
ItemIDINTEGERNOT NULL,
ItemTypeCHAR(3)NOT NULL,
AbsLevelSMALLINTNOT NULL,
ItemPathVARCHAR(100)NOT NULL,
CONSTRAINTtblProdHierarchy_PK
PRIMARY KEY NONCLUSTERED (ItemID, ItemType)
)
CREATEUNIQUE CLUSTERED
INDEXtblProdHierarchy_ItemPath
ONtblProdHierarchy(ItemPath)
--<logic>Prepare variables.</logic>
DECLARE @AbsLevel SMALLINT = 0
DECLARE@BasePathUnitsSMALLINT =
(
SELECTCOUNT(DISTINCT RIGHT(PathChar, 1))
FROMGlobenet.dbo.PathBase
WHEREPathBaseID <= 1000
)
--<logic>Add the items with no parents.</logic>
;WITHBase
AS(
SELECTItemID, ParentID, ItemType, ROW_NUMBER() OVER (ORDER BY ItemName) AS PathBaseID
FROMProdItem
WHEREParentID = 'CAT0'
)
INSERT
INTOtblProdHierarchy
(
ItemID,ItemType,AbsLevel,ItemPath
)
SELECTb.ItemID,b.ItemType,1 AS AbsLevel,RIGHT(p.PathChar, l.FieldLength) AS Path
FROMBase b
JOINPathBase p
ONb.PathBaseID = p.PathBaseID
CROSS JOIN (
SELECTCASEWHEN MAX(PathBaseID) < @BasePathUnits THEN 1
WHEN MAX(PathBaseID) < POWER(@BasePathUnits, 2) THEN 2
ELSE 3 END AS FieldLength
FROMBase
) l
--<logic>While child items of the last-added items are found:</logic>
WHILE@@ROWCOUNT > 0
BEGIN-- Children Loop
--<logic> - Increment the level number.</logic>
SET@AbsLevel = @AbsLevel + 1
--<logic> - Add the children of the items added in the last round.</logic>
;WITHBase
AS(
SELECTp.ItemID, p.ItemType, t.AbsLevel + 1 AS AbsLevel, t.ItemPath,
ROW_NUMBER() OVER (PARTITION BY p.ParentID ORDER BY p.ParentID, p.ItemName, p.ItemID) AS PathBaseID
FROM(
SELECTItemType + CAST(ItemID AS VARCHAR(5)) AS ParentID, AbsLevel, ItemPath
FROMtblProdHierarchy
WHEREAbsLevel = @AbsLevel
) t
JOINProdItem p
ONp.ParentID = t.ParentID
)
INSERT
INTOtblProdHierarchy
(
ItemID,ItemType,AbsLevel,ItemPath
)
SELECTb.ItemID,b.ItemType,b.AbsLevel,b.ItemPath + /* TESTING ONLY '.' +*/ RIGHT(p.PathChar, l.FieldLength) AS ItemPath
FROMBase b
JOINPathBase p
ONb.PathBaseID = p.PathBaseID
CROSS JOIN (
SELECTCASEWHEN MAX(PathBaseID) < @BasePathUnits THEN 1
WHEN MAX(PathBaseID) < POWER(@BasePathUnits, 2) THEN 2
ELSE 3 END AS FieldLength
FROMBase
) l
END-- Children Loop
--<logic>Return indented results for the full tree.</logic>
SELECTp.ItemType,
p.ItemID,
h.AbsLevel,
CAST(REPLICATE(' | ', h.AbsLevel - 1) + p.ItemName AS VARCHAR(100)) AS ItemName
FROMtblProdHierarchy h
JOINProdItem p
ONp.ItemID = h.ItemId
ANDp.ItemType = h.ItemType
ORDER BY h.ItemPath
--<logic>Return search results.</logic>
-- This would need to be modified to be used in a procedure; right now, I am just uncommenting needed sections of the WHERE clause.
SELECTn.ItemId,
n.ItemType,
--n.ItemName,/* Regular view */
CAST(REPLICATE(' | ', z.AbsLevel - 1) + n.ItemName AS VARCHAR(50)) AS ItemName, /* Indented view */
z.AbsLevel,
z.ItemPath
FROM(
SELECTDISTINCT
y.ItemID, y.Itemtype, y.AbsLevel, y.ItemPath
FROM(
SELECTp.ProductID, p.ProductName, h.ItemPath
FROMtblProduct p
JOINtblProdHierarchy h
ONh.ItemID = p.ProductID
ANDh.ItemType = 'PRD'
WHEREProductName LIKE '%screw%'/* PARAM 1: Search string */
) x
JOINtblProdHierarchy y
ONx.ItemPath LIKE y.ItemPath + '%'
--WHEREy.AbsLevel = 2/* PARAM 2: Limit to a specific level. */
--ANDy.ItemPath LIKE '%'/* PARAM 3: Limit to a specific leg. */
) z
JOINProdItem n
ONn.ItemID = z.ItemID
ANDn.ItemType = z.ItemType
ORDER BY z.ItemPath
Viewing 15 posts - 16 through 30 (of 30 total)
You must be logged in to reply to this topic. Login to reply