February 25, 2011 at 5:16 am
Hi Jeff
I could do a quirky UPDATE solution in the following way.
I have added an extra column to the @RACES table called usable (BIT) and then just filtered by that after the UPDATE has been done
You might have another way of doing it
SET DATEFORMAT YMD
DECLARE @LastDateTime datetime, @usable bit, @diff int, @record varchar(1000)
DECLARE @races table(raceno int identity(1,1),racedatetime datetime,diff int,usable int default(0))
insert into @races
(racedatetime)
SELECT '2011-02-20 13:50:00'
UNION ALL
SELECT '2011-02-20 13:55:00'
UNION ALL
SELECT '2011-02-20 14:00:00'
UNION ALL
SELECT '2011-02-20 14:10:00'
UNION ALL
SELECT '2011-02-20 14:20:00'
UNION ALL
SELECT '2011-02-20 14:25:00'
UNION ALL
SELECT '2011-02-20 14:26:00'
UNION ALL
SELECT '2011-02-20 14:27:00'
UNION ALL
SELECT '2011-02-20 14:30:00'
UNION ALL
SELECT '2011-02-20 14:33:00'
UNION ALL
SELECT '2011-02-20 14:36:00'
UNION ALL
SELECT '2011-02-20 14:45:00'
UNION ALL
SELECT '2011-02-20 14:46:00'
UNION ALL
SELECT '2011-02-20 14:56:01'
SELECT*
FROM@RACES
UPDATER
SET@diff = DateDiff(mi,@LastDateTime,Racedatetime),
@usable = CASE WHEN @usable IS NULL THEN 1
WHEN @LastDateTime IS NOT NULL AND @diff >= 10 THEN 1
ELSE 0 END,
@LastDateTime = CASE @Usable WHEN 1 THEN RacedateTime ELSE @LastDateTime END,
Usable = @Usable,
Diff = @Diff
FROM@RACES as R
SELECT*
FROM@RACES
WHEREusable = 1
But as I said before the problem is not that I don't know how do it in another way (cursor, quirky update) but that I have a lot of code currently in SELECT format (I will provide an example) that I want to automate the update to the new version. As far as I can see using a quirky update or cursor is going to involve a major rewrite for all my historical reports which I use for analysis to help predict future behavior.
for example here is one such report which has been cut down by 90% to provide an example.
/****** Object: StoredProcedure [dbo].[usp_sql_fav_in_a_row_report] Script Date: 02/25/2011 11:14:27 ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
-- =============================================================================
-- Author:Rob Reid
-- Create date: 02-FEB-2011
-- Description:Look for patterns in the the first X or last X races of a day
-- using various scoring factors to see whether patterns exist for accumulators
-- on the first X races of the day. A percentage of 1-5% for 3/4 way bets
-- can return a nice profit.
-- =============================================================================
CREATE PROCEDURE [dbo].[usp_sql_runner_in_a_row_report]
AS
BEGIN
SET DATEFORMAT YMD
SET NOCOUNT ON
DECLARE @stats TABLE([Desc] varchar(50),Racedatetime datetime,First4 bit,First3 bit,First2 bit,Last4 bit,Last3 bit,Last2 bit)
DECLARE @REPORT TABLE(row int identity(1,1),Details varchar(100),perc float, TotalStaked int NULL, TotalWinnings int NULL)
DECLARE @MinDate datetime, @MaxDate datetime, @Stamp datetime, @COUNT INT, @win INT, @PERC FLOAT
DECLARE @First4 bit,@Last4 bit,@First3 bit,@Last3 bit,@First2 bit, @Last2 bit
DECLARE @Odds varchar(100),@Winnings float,@Stake int
DECLARE @WinFavFirst4TotalWinnings float, @WinFavFirst3TotalWinnings float,@WinFavFirst2TotalWinnings float
DECLARE @WinFavLast3TotalWinnings float,@WinFavLast2TotalWinnings float,@WinFavLast4TotalWinnings float
DECLARE @DEBUG BIT,
@SkipBadCourses BIT,
@Skip BIT
SELECT@SkipBadCourses = 0,
@Stake = 100,
@WinFavFirst4TotalWinnings = 0,
@WinFavFirst3TotalWinnings = 0,
@WinFavFirst2TotalWinnings = 0,
@WinFavLast4TotalWinnings = 0,
@WinFavLast3TotalWinnings = 0,
@WinFavLast2TotalWinnings = 0,
@Winnings = 0
-- I use my RACE_REPORT table which holds a record for each race ever run with columns for numerous factors
-- such as whether the race was won by the favourite, the winners final price, whether the favourite was placed
-- whether any of my myriad of other ranking scores won, placed and so on. Very wide table full of BIT columns
-- which is rebuilt at the end of each day
-- get the date range for the report
SELECT@MinDate = MIN(Racedatetime),
@MaxDate = MAX(RaceDatetime)
FROMRACE_REPORT
-- override min date to current month for quicker report
SELECT @MinDate = '2011-FEB-01'
PRINT 'FROM ' + CAST(@MinDate as varchar) + ' TO ' + CAST(@MaxDate as varchar)
SELECT @Stamp = @MinDate
WHILE @Stamp <= @MaxDate
BEGIN
PRINT 'ON DATE = ' + CAST(@Stamp as varchar)
SELECT@Winnings = 0,
@Odds = NULL
-- CODE REMOVED FOR EASE OF DEMONSTRATIOn
BEGIN
PRINT 'Todays races are ok'
-- were the first 4 of the day winners
SELECT@COUNT = SUM(WonbyFavourite)
FROM(
SELECTTOP 4 CAST(WonByFavourite as tinyint) as WonbyFavourite
FROMRACE_REPORT
WHEREDateDiff(day,Racedatetime,@Stamp)=0
ORDER BY RaceDateTime
) as t
PRINT 'COUNT FROM FIRST FOUR THAT WERE WON BY FAV = '+CAST(@COUNT as varchar)
-- if the first 4 were all won by the favourite then we create a list of the final prices in 7/4,2/1,3/1,11/10
-- order and then pass them to our function that will calculate total winnings when an initial amount is
-- staked on the first runner and the winnings moved to the next.
IF @COUNT = 4
BEGIN
SELECT @First4 = 1, @Odds= NULL
-- get price list as we know all X won
SELECT@Odds=COALESCE(@Odds+',','')+WinningOdds
FROM(
SELECTTOP 4 WinningOdds
FROMRACE_REPORT
WHEREDateDiff(day,Racedatetime,@Stamp)=0
ORDER BY RaceDateTime
) as t
PRINT @ODDS
SELECT @Winnings = 0
-- get total winnings
EXEC dbo.usp_sql_calculate_accumulator_winnings @ODDS,@Stake,0,@Winnings OUTPUT
SELECT @WinFavFirst4TotalWinnings = @WinFavFirst4TotalWinnings + @Winnings
END
ELSE
SELECT @First4 = 0
PRINT 'WERE FIRST FOUR OF THE DAY WON BY FAV = '+CAST(@First4 as varchar)
SELECT@COUNT = SUM(WonbyFavourite)
FROM(
SELECTTOP 3 CAST(WonByFavourite as tinyint) as WonbyFavourite
FROMRACE_REPORT
WHEREDateDiff(day,Racedatetime,@Stamp)=0
ORDER BY RaceDateTime
) as t
PRINT 'COUNT FROM FIRST THREE THAT WERE WON BY FAV = '+CAST(@COUNT as varchar)
IF @COUNT = 3
BEGIN
SELECT @First3 = 1, @Odds= NULL
SELECT@Odds=COALESCE(@Odds+',','')+WinningOdds
FROM(
SELECTTOP 3 WinningOdds
FROMRACE_REPORT
WHEREDateDiff(day,Racedatetime,@Stamp)=0
ORDER BY RaceDateTime
) as t
PRINT @ODDS
SELECT @Winnings = 0
EXEC dbo.usp_sql_calculate_accumulator_winnings @ODDS,@Stake,0,@Winnings OUTPUT
SELECT @WinFavFirst3TotalWinnings = @WinFavFirst3TotalWinnings + @Winnings
END
ELSE
SELECT @First3 = 0
PRINT 'WERE FIRST THREE OF THE DAY WON BY FAV = '+CAST(@First3 as varchar)
SELECT@COUNT = SUM(WonbyFavourite)
FROM(
SELECTTOP 2 CAST(WonByFavourite as tinyint) as WonbyFavourite
FROMRACE_REPORT
WHEREDateDiff(day,Racedatetime,@Stamp)=0
ORDER BY RaceDateTime
) as t
PRINT 'COUNT FROM FIRST TWO THAT WERE WON BY FAV = '+CAST(@COUNT as varchar)
IF @COUNT = 2
BEGIN
SELECT @First2 = 1, @Odds= NULL
SELECT@Odds=COALESCE(@Odds+',','')+WinningOdds
FROM(
SELECTTOP 2 WinningOdds
FROMRACE_REPORT
WHEREDateDiff(day,Racedatetime,@Stamp)=0
ORDER BY RaceDateTime
) as t
PRINT @ODDS
SELECT @Winnings = 0
EXEC dbo.usp_sql_calculate_accumulator_winnings @ODDS,@Stake,0,@Winnings OUTPUT
SELECT @WinFavFirst2TotalWinnings = @WinFavFirst2TotalWinnings + @Winnings
END
ELSE
SELECT @First2 = 0
PRINT 'WERE FIRST TWO OF THE DAY WON BY FAV = '+CAST(@First2 as varchar)
SELECT@COUNT = SUM(WonbyFavourite)
FROM(
SELECTTOP 4 CAST(WonByFavourite as tinyint) as WonbyFavourite
FROMRACE_REPORT
WHEREDateDiff(day,Racedatetime,@Stamp)=0
ORDER BY RaceDateTime DESC
) as t
PRINT 'COUNT FROM LAST FOUR THAT WERE WON BY FAV = '+CAST(@COUNT as varchar)
IF @COUNT = 4
BEGIN
SELECT @Last4 = 1, @Odds= NULL
SELECT@Odds=COALESCE(@Odds+',','')+WinningOdds
FROM(
SELECTTOP 4 WinningOdds
FROMRACE_REPORT
WHEREDateDiff(day,Racedatetime,@Stamp)=0
ORDER BY RaceDateTime DESC
) as t
PRINT @ODDS
SELECT @Winnings = 0
EXEC dbo.usp_sql_calculate_accumulator_winnings @ODDS,@Stake,0,@Winnings OUTPUT
SELECT @WinFavLast4TotalWinnings = @WinFavLast4TotalWinnings + @Winnings
END
ELSE
SELECT @Last4 = 0
PRINT 'WERE LAST FOUR OF THE DAY WON BY FAV = '+CAST(@Last4 as varchar)
SELECT@COUNT = SUM(WonbyFavourite)
FROM(
SELECTTOP 3 CAST(WonByFavourite as tinyint) as WonbyFavourite
FROMRACE_REPORT
WHEREDateDiff(day,Racedatetime,@Stamp)=0
ORDER BY RaceDateTime DESC
) as t
PRINT 'COUNT FROM LAST THREE THAT WERE WON BY FAV = '+CAST(@COUNT as varchar)
IF @COUNT = 3
BEGIN
SELECT @Last3 = 1, @Odds= NULL
SELECT@Odds=COALESCE(@Odds+',','')+WinningOdds
FROM(
SELECTTOP 3 WinningOdds
FROMRACE_REPORT
WHEREDateDiff(day,Racedatetime,@Stamp)=0
ORDER BY RaceDateTime DESC
) as t
PRINT @ODDS
SELECT @Winnings = 0
EXEC dbo.usp_sql_calculate_accumulator_winnings @ODDS,@Stake,0,@Winnings OUTPUT
SELECT @WinFavLast3TotalWinnings = @WinFavLast3TotalWinnings + @Winnings
END
ELSE
SELECT @Last3 = 0
PRINT 'WERE LAST THREE OF THE DAY WON BY FAV = '+CAST(@Last3 as varchar)
SELECT@COUNT = SUM(WonbyFavourite)
FROM(
SELECTTOP 2 CAST(WonByFavourite as tinyint) as WonbyFavourite
FROMRACE_REPORT
WHEREDateDiff(day,Racedatetime,@Stamp)=0
ORDER BY RaceDateTime DESC
) as t
PRINT 'COUNT FROM LAST TWO THAT WERE WON BY FAV = '+CAST(@COUNT as varchar)
IF @COUNT = 2
BEGIN
SELECT @Last2 = 1,@Odds= NULL
SELECT@Odds=COALESCE(@Odds+',','')+WinningOdds
FROM(
SELECTTOP 2 WinningOdds
FROMRACE_REPORT
WHEREDateDiff(day,Racedatetime,@Stamp)=0
ORDER BY RaceDateTime DESC
) as t
PRINT @ODDS
SELECT @Winnings = 0
EXEC dbo.usp_sql_calculate_accumulator_winnings @ODDS,@Stake,0,@Winnings OUTPUT
SELECT @WinFavLast2TotalWinnings = @WinFavLast2TotalWinnings + @Winnings
END
ELSE
SELECT @Last2 = 0
PRINT 'WERE LAST TWO OF THE DAY WON BY FAV = '+CAST(@Last2 as varchar)
INSERT INTO @stats
([Desc],Racedatetime,First4,First3,First2,Last4,Last3,Last2)
VALUES
('WONFAV',@Stamp,@First4,@First3,@First2,@Last4,@Last3,@Last2)
-- now another 150 SELECT statements using other rankings apart from the favourite status
END
SELECT @Stamp = DATEADD(day,1,@Stamp)
-- CODE REMOVED
IF @Stamp > @MaxDate
BEGIN
PRINT 'QUIT'
END
END
-- now some reports to look for patterns
/*
SELECT*
FROM@stats
*/
SELECT@COUNT= COUNT(*) FROM @stats WHERE [DESC]='WONFAV'
SELECT@win= COUNT(*) FROM @stats WHERE First4 = 1 AND [DESC]='WONFAV'
INSERT INTO @REPORT
(Perc,Details,TotalStaked,TotalWinnings)
SELECTCAST(@WIN as float) / @COUNT * 100 , 'First Four Races Won By Fav %'
,@Stake * @COUNT, @WinFavFirst4TotalWinnings
SELECT@win= COUNT(*) FROM @stats WHERE First3 = 1 AND [DESC]='WONFAV'
INSERT INTO @REPORT
(Perc,Details,TotalStaked,TotalWinnings)
SELECTCAST(@WIN as float) / @COUNT * 100 , 'First Three Races Won By Fav %'
,@Stake * @COUNT, @WinFavFirst3TotalWinnings
SELECT@win= COUNT(*) FROM @stats WHERE First2 = 1 AND [DESC]='WONFAV'
INSERT INTO @REPORT
(Perc,Details,TotalStaked,TotalWinnings)
SELECTCAST(@WIN as float) / @COUNT * 100 , 'First Two Races Won By Fav %'
,@Stake * @COUNT, @WinFavFirst2TotalWinnings
SELECT@win= COUNT(*) FROM @stats WHERE Last4 = 1 AND [DESC]='WONFAV'
INSERT INTO @REPORT
(Perc,Details,TotalStaked,TotalWinnings)
SELECTCAST(@WIN as float) / @COUNT * 100 , 'Last Four Races Won By Fav %'
,@Stake * @COUNT, @WinFavLast4TotalWinnings
SELECT@win= COUNT(*) FROM @stats WHERE Last3 = 1 AND [DESC]='WONFAV'
INSERT INTO @REPORT
(Perc,Details,TotalStaked,TotalWinnings)
SELECTCAST(@WIN as float) / @COUNT * 100 , 'Last Three Races Won By Fav %'
,@Stake * @COUNT, @WinFavLast3TotalWinnings
SELECT@win= COUNT(*) FROM @stats WHERE Last2 = 1 AND [DESC]='WONFAV'
INSERT INTO @REPORT
(Perc,Details,TotalStaked,TotalWinnings)
SELECTCAST(@WIN as float) / @COUNT * 100 , 'Last Two Races Won By Fav %'
,@Stake * @COUNT, @WinFavLast2TotalWinnings
-- another 150 or so reports for all the other kinds of ranking
-- an overview of patterns
SELECTDetails,Perc,COALESCE(TotalStaked,0) as TotalStaked,
COALESCE(TotalWinnings,0) as TotalWinnings,
COALESCE(TotalWinnings,0)-COALESCE(TotalStaked,0) as Profit
FROM@REPORT
ORDER BY Row
END
This report is one of many that look for patterns in historical races which can be used for future bets.
This particular report is looking for patterns which can be used for accumulator bets and looks for days where the first 4, 3, 2 races OR last 4,3,2
races of the day were won by a certain ranking factor. In this report I have removed most of the code so that only one ranking factor is shown. In this case its the favourite status of the horse (favourites win 30-40% of all races).
I have many different scoring factors.
The report loops through the date range and for each day checks whether the first and last 4,3,2 races were won by the favourite. If so it takes the final prices of each runner and passes them to another procedure which returns the total winnings. The details of that days outcome are added to a report which is outputted at the end.
As you can see there are lots of SELECT statements already being used to determine whether the first X races of the day were won.
These are the statements I need to change to reflect the issue in hand, namely that each race needs to be at least ten minutes apart from the subsequent race.
This is why I wanted a SELECT style solution or another way to simply update these statements if possible
-- were the first 4 races of the day won by favourites
SELECT@COUNT = SUM(WonbyFavourite)
FROM(
SELECTTOP 4 CAST(WonByFavourite as tinyint) as WonbyFavourite
FROMRACE_REPORT
WHEREDateDiff(day,Racedatetime,@Stamp)=0
ORDER BY RaceDateTime
) as t
PRINT 'COUNT FROM FIRST FOUR THAT WERE WON BY FAV = '+CAST(@COUNT as varchar)
/* if the first 4 were all won by the favourite then we create a list of the final prices in 7/4,2/1,3/1,11/10
order and then pass them to our function that will calculate total winnings when an initial amount is
staked on the first runner and the winnings moved to the next. */
IF @COUNT = 4
BEGIN
SELECT @First4 = 1, @Odds= NULL
-- get price list as we know all 4 won
SELECT@Odds=COALESCE(@Odds+',','')+WinningOdds
FROM(
SELECTTOP 4 WinningOdds
FROMRACE_REPORT
WHEREDateDiff(day,Racedatetime,@Stamp)=0
ORDER BY RaceDateTime
) as t
PRINT @ODDS
SELECT @Winnings = 0
-- get total winnings
EXEC dbo.usp_sql_calculate_accumulator_winnings @ODDS,@Stake,0,@Winnings OUTPUT
SELECT @WinFavFirst4TotalWinnings = @WinFavFirst4TotalWinnings + @Winnings
END
ELSE
SELECT @First4 = 0
Now I am sure that all this code can be written in a myriad of different ways but the fact is the code is already there so I would like to be able to quickly update it all if possible rather than rewrite it from scratch.
If you have any tips then please share.
Thanks for your help
February 25, 2011 at 7:28 am
Rob Reid-246754 (2/25/2011)
These are the statements I need to change to reflect the issue in hand, namely that each race needs to be at least ten minutes apart from the subsequent race.
You don't need to change any of code. Change the name of the Race_Report table to something else. Use the Quirky Update to create a "new" table from that "something else" table called "Race_Report" with your 10 minute spaced races and you're done.
--Jeff Moden
Change is inevitable... Change for the better is not.
February 25, 2011 at 6:33 pm
Couldn't you create a table valued function and then do a global find and replace, replacing the table name with the name of your table valued function, e.g., "Race_Report" => "Race_Report_Table_Function()"
February 25, 2011 at 7:57 pm
bobhines (2/25/2011)
Couldn't you create a table valued function and then do a global find and replace, replacing the table name with the name of your table valued function, e.g., "Race_Report" => "Race_Report_Table_Function()"
What would be in that table valued function? A While Loop?
--Jeff Moden
Change is inevitable... Change for the better is not.
February 25, 2011 at 8:07 pm
Could be anything that generates the data in the form desired. Once the table proper has been replaced with the table valued function, he can be free to change it's definition without having to change stored procedures, etc.
March 1, 2011 at 3:12 am
Cheers Jeff
I now get what you mean whereas I must admit before I must have misread your previous comment.
That would be a very good solution to my problem 🙂
Thanks
March 1, 2011 at 6:19 am
Very cool. Thanks for the feedback, Rob.
--Jeff Moden
Change is inevitable... Change for the better is not.
March 2, 2011 at 7:44 am
Jeff seeing you are an RBAR expert how would you approach the following;
It's related to the same database and basically for each runner in a race I have a number of score/rating columns that rank each runner.
To calculate each score/rating I have to carry out numerous lookups and run a number of calculations e.g how many times has this horse won on this course with these conditions over this time period and so on. I weight different factors and then use those weightings in comparison with other runners.
At the moment I am using User Defined Functions and then utilizing them in my UPDATE statements whenever I need to update them e.g if the conditions change, the price goes right up or down or another runner drops out.
This hasn't been a problem up until now but in the last week I have noticed significant slow downs when running reports and I am currently investigating whether the problems are related to this code or not. I am using a shared server with lots of other busy sites however queries that were taking 2 seconds max are sometimes taking up to 12 minutes to run (intermittently).
The tables I use are heavily indexed and most of the actual reports use with (nolock) on the joins anyway (unless the reports are being used for pricing decisions) so I wouldn't have thought the UPDATES would have blocked my reporting anyhow but I am experiencing major slowdowns which I am looking into at the moment.
I have been using the new 2008 Activity monitor and some DMV reports as well as automatic logging of the sys.processes system view at various intervals but so far I have not seen any blocking or deadlocks during these long query times.
When I run each UDF on it's own with a hardcoded value it takes between <1 second and 2 seconds to return.
When I run all 3 UDF's for a single runner it is taking around 2-3 seconds to return all 3 values.
Each UDF probably carries out 30 -60 SELECTS as well as making numerous calls to other UDF's to get various related statistics to be used in the calculations.
SELECT FormRating = dbo.udf_GET_FORM_RATING( 7585 ),
FormRating2 = dbo.udf_GET_FORM_RATING2( 7585) ,
FormRating3 = dbo.udf_GET_FORM_RATING3( 7585 )
However when I run an UPDATE statement for all the runners in a race (7 runners) the statement is taking
to return whereas it was taking 21 seconds (7 runners * 3 seconds duration) up until a week ago it is now taking anywhere between 5-12 minutes (depending on ??)
UPDATERACE_RUNNERS
SETFormRating = dbo.udf_GET_FORM_RATING( RunnerPK ),
FormRating2 = dbo.udf_GET_FORM_RATING2( RunnerPK ) ,
FormRating3 = dbo.udf_GET_FORM_RATING3( RunnerPK )
WHERERaceFk = 23503 -- @RacePK
AND NonRunner = 0 -- ignore non runners
Whenever possible I will always target my UPDATE to one particular runner but sometimes when the conditions change e.g weather turns the ground from firm to heavy all the runners ratings will be affected and need to change.
Therefore in your opinion what would be the best way to achieve the UPDATE of multiple rows without resorting to a CURSOR/WHILE loop which I am currently considering as it seems the table lock is slowing things right down whereas the row locks are manageable.
Saying that however I have still have not ruled out something else causing the problem as we have some other I/O, temp table intensive sites on the same server so some other process could be causing the strange varying response times. However even if that is not the cause it would be good to hear your thoughts on the matter considering the RBAR nature of the problem.
Thanks
March 2, 2011 at 10:19 am
Rob Reid-246754 (3/2/2011)
<Snip a worthy explanation>Therefore in your opinion what would be the best way to achieve the UPDATE of multiple rows without resorting to a CURSOR/WHILE loop which I am currently considering as it seems the table lock is slowing things right down whereas the row locks are manageable.
Saying that however I have still have not ruled out something else causing the problem as we have some other I/O, temp table intensive sites on the same server so some other process could be causing the strange varying response times. However even if that is not the cause it would be good to hear your thoughts on the matter considering the RBAR nature of the problem.
Thanks
I'm not Jeff but there's going to be a few problems with this. You mention the number of functions and the like that you're performing. That would require some heavy sifting through your code to eventually try to remove all your UDFs and turn them into rowset methods, and determining which of them are candidates for that task. It's pretty much an overhaul and most folks around here would get paid for that level of work.
However, I don't want to discourage you from specific questions, so perhaps we can offer you some strategies to help find the unique culprit(s) and help you on those.
You mention that a single UDF takes about 3 seconds to run, and for 7 rows 21 seconds to run, this is not unexpected. When the same UDF takes 5 minutes to run for 7 rows, something's gone wrong, as you'd expect.
The first thing I'd do is start checking my wait_types, seeing if you can figure out where the bottleneck is. It might be contention for the log file, it might be simply reading data, it might be blocking itself. You won't know without checking the wait_types on the SPID you're running the process from while it's taking a while... well, or without resetting the wait_type information and checking it again afterwards.
The next task I'd do to start figuring out where the problems lie is benchmarking each unique step of the functions in a test environment, and see if the same problems exist there as they do in production. If they do, you've got a nice place to work out issues on. If not, it's most likely got to do with multi-user concurrency on the production box, which means you'll need to explore locking and blocking there. Then again, cleaning up the code will reduce the locking and blocking, so it's kind of the good version of a catch-22.
Once you know the couple of hot spots and/or troubled queries, we can review the code with you and help you untangle whatever it is that ails it.
Never stop learning, even if it hurts. Ego bruises are practically mandatory as you learn unless you've never risked enough to make a mistake.
For better assistance in answering your questions[/url] | Forum Netiquette
For index/tuning help, follow these directions.[/url] |Tally Tables[/url]
Twitter: @AnyWayDBA
March 3, 2011 at 9:48 am
I have narrowed down the code and found the culprit which was a UDF which was being used a lot within the various ranking functions and reports.
The functions aim is to convert distances inputted as Miles, Yards and Furlongs into Yards which can then be used in range searches e.g to find a history of distances a runner performs well at.
So it was converting all the various distance formats e.g
2m 144yd 7f
1m
900 yd 6f
7f
into just yards.
The function was making use of @table variables to split up the distance into its 3 parts, carrying out calculations to convert each measurement into yards and then returning a SUM of the yards.
CREATE FUNCTION [dbo].[udf_GET_YARD_DISTANCE]
(
@Distance varchar(50)
)
RETURNS INT
AS
BEGIN
DECLARE @Yards INT
DECLARE @dist TABLE(val varchar(20))
IF @Distance = 'NA'
RETURN 0
-- split 3m 220yds 7f into 3 rows
INSERT INTO @dist
SELECT VALUE FROM dbo.udf_SPLIT(@Distance, ' ')
-- clean up each record and convert into yards
UPDATE@dist
SETVAL = CAST(REPLACE(VAL,'m','') as int) * 1760
WHEREVAL LIKE '%m'
UPDATE @dist
SETVAL = CAST(REPLACE(VAL,'f','') as int) * 220
WHEREVAL LIKE '%f'
UPDATE @dist
SETVAL = CAST(REPLACE(REPLACE(VAL,'yds',''),'y','') as int)
WHEREVAL LIKE '%y%'
-- return
SELECT @YARDS = SUM(CAST(VAL as int)) FROM @dist
RETURN @YARDS
END
This function was being used a lot like so:
-- get info about current race
SELECT @RaceFK = RacePK,
@Racedatetime = Racedatetime ,
@HorseName = HorseName,
@Distance= Distance
@Jockey = Jockey,
@CourseFk = CourseFk,
@ThreeMonths = DATEADD(MONTH,-3,Racedatetime)
FROMRACE_RUNNERS as run
JOINRACES as ra
ONra.RacePK = run.RaceFK
WHERERunnerPK = @RunnerPK
-- lots of other similar reports to increment @RATING
SELECT@Rating = @Rating + CASE WHEN COUNT(*) > 0 AND Racedatetime > @ThreeMonths THEN 4 WHEN COUNT(*) > 0 THEN 2 ELSE 0 END
FROMRACE_RUNNERS as rr
JOINRACES as ra ON rr.racefk = ra.racePK
WHEREHorseName=@HorseName
AND Jockey = @Jockey
AND dbo.udf_GET_YARD_DISTANCE(@Distance) BETWEEN dbo.udf_GET_YARD_DISTANCE(ra.Distance)-220 AND dbo.udf_GET_YARD_DISTANCE(ra.Distance) + 220
AND finishposition = 1
AND CourseFK = @CourseFK
AND ra.RacePk <> @RaceFK
AND ra.Racedatetime < @Racedatetime
GROUP BY RaceDatetime
So it was calculating the Distance in yards for every record it looked at which is obviously bad design anyway but it didn't cause any noticeable performance effect when the dataset was quite small. I guess I must have recently gone over some kind of threshold as the way the function works combined with the size of the data it was parsing has decreased overall performance dramatically in the last week or so.
I was going to build a numbers/lookup table to replace the function but instead I have added a new column called Yards to the RACES table and calculated the value once on data input and added an index to it. I have also replaced all the instances of
dbo.udf_GET_YARD_DISTANCE(@Distance) BETWEEN dbo.udf_GET_YARD_DISTANCE(ra.Distance)-220 AND dbo.udf_GET_YARD_DISTANCE(ra.Distance) + 220
with a slight re-jiggle of the logic that accomplishes the same end in a different way
DECLARE @YardsStart INT,
@YardsEnd INT
SELECT@YardsStart = @Yards - 220,
@YardsEnd = @Yards + 220
-- then in WHERE filters reverse the logic to ensure previous races were in the range of the current race + or - 220 yards
ra.Yards BETWEEEN @YardsStart AND @YardsEnd
and the performance has increased dramatically.
The previous UPDATE that was taking upwards of 5+ minutes is now returning in <7 seconds for 11 runners.
Thanks for the help
March 8, 2011 at 6:54 pm
Rob Reid-246754 (3/3/2011)
and the performance has increased dramatically.The previous UPDATE that was taking upwards of 5+ minutes is now returning in <7 seconds for 11 runners.
I'm glad you found the problem and I'm glad you're happy with 7 seconds to process the stats for 11 runners.
I am concerned for the future welfare of the code, though. In the few examples of code you posted, I see SELECT @this and SELECT @that all over the place. My cursory observation is that you're probably running all the states for one runner at a time.
If you really want to make this code fly, stop thinking about what you want to process for one runner at a time. Instead, please consider what I have in my signature line below...
First step towards the paradigm shift of writing Set Based code:
Stop thinking about what you want to do to a row... think, instead, of what you want to do to a column."
... or, in your terms, stop thinking about what you want to do for a single runner. Instead, think about how to calculate/update a stat for ALL runners. 🙂
--Jeff Moden
Change is inevitable... Change for the better is not.
March 8, 2011 at 9:05 pm
I totally get what you mean but its pretty hard to update say the FormRating for all runners in a race at one time as there are so many different factors (50+ that go into creating a rating)
Just for example there are factors such as
-How many races has this horse won, placed, lost in last X days
-How many races has this horse won, placed, lost in total
-Combine those 2 with different filters such as
-performance on current track, current going conditions, jockey, weight
-Working out a draw bias (starting position of horse) to see if its starting position is beneficial
-Working out details of its last X races, compairing conditions and seeing if its on an upwards or downwards trend (moving down or up the rankings)
-Checking whether this runner has run against any of the other runners, has it beaten or lost to any of them and what were the conditions like
-Checking out the jockey, is it a champion jockey trying riding for a top stable, what are there ratings like at the current course
-Taking averages of stats from other winners over similar conditions and checking whether the current runner falls into this boundaries
-What is its overall win, place, lose ratios. How do these compare with the other runners, does this runner stand out amongst them or does it look like an average runner.
-Has the price changed, is it going up or down. is it the current favourite.
and so on and so on
Each factor is given a weighted score that is added up as I go along. A horse with no history will have a very low score (based around jockey , trainer and draw bias stats alone) whereas a horse with lots of history will have a higher rating.
if it were simple select statements that could be applied in a set like manner I would already be doing it but each runner has to be analysed individually as well as in relation to the other runners and there are many factors that are involved in coming up with a rating score.
I already build up my lookup tables once e.g tables of course stats, jockey and horse stats etc so that they are re-used but at present I cannot see a way of rebuilding the UDF's any other way although if you have any ideas please let me know.
March 10, 2011 at 8:48 am
Rob Reid-246754 (3/8/2011)
I totally get what you mean but its pretty hard to update say the FormRating for all runners in a race at one time as there are so many different factors (50+ that go into creating a rating)
Understood but didn't suggest updating all runners for all factors in a single go. Isn't there an "order" to the factors which must be considered? Even one full table update per factor will outstrip a cursor and other forms of RBAR.
--Jeff Moden
Change is inevitable... Change for the better is not.
March 10, 2011 at 3:00 pm
I know that you guys have moved on to deeper issues but I just wanted to post an alternative query I developed for returning races at a minimum specified time interval (5 minutes or 300 seconds in my example below). It may or may not help solve/improve your particular application at this point but it was an interesting exercise for me, and maybe it will be of interest to someone else. Thanks for the indulgence.
This query is based on a recursive CTE and is very similar to queries used in hierarchical dataset queries. You can think of the races in a hierarchical way where the next race is the earliest child of the preceding (i.e., parent) race that starts after the prescribed interval.
DECLARE @Races TABLE(raceno int identity(1,1),racedatetime datetime)
INSERT INTO @Races(racedatetime)
SELECT '2011-02-20 13:50:00'
UNION ALL
SELECT '2011-02-20 13:51:30'
UNION ALL
SELECT '2011-02-20 13:52:00'
UNION ALL
SELECT '2011-02-20 13:55:00'
UNION ALL
SELECT '2011-02-20 13:56:21'
UNION ALL
SELECT '2011-02-20 13:57:45'
UNION ALL
SELECT '2011-02-20 13:59:18'
UNION ALL
SELECT '2011-02-20 14:00:00'
UNION ALL
SELECT '2011-02-20 14:03:41'
UNION ALL
SELECT '2011-02-20 14:07:48'
UNION ALL
SELECT '2011-02-20 14:10:00'
UNION ALL
SELECT '2011-02-20 14:20:00'
UNION ALL
SELECT '2011-02-20 14:25:00'
UNION ALL
SELECT '2011-02-20 14:27:00'
UNION ALL
SELECT '2011-02-20 14:32:00'
UNION ALL
SELECT '2011-02-20 14:38:00'
UNION ALL
SELECT '2011-02-20 14:41:00'
UNION ALL
SELECT '2011-02-20 14:42:00'
UNION ALL
SELECT '2011-02-20 14:44:00'
UNION ALL
SELECT '2011-02-20 14:45:00'
DECLARE @Intervals TABLE(raceno1 int, racedatetime1 datetime, raceno2 int, racedatetime2 datetime, interval int)
INSERT INTO @Intervals
SELECT
raceno1,
racedatetime1,
MIN(raceno2) AS raceno2,
MIN(racedatetime2) AS racedatetime2,
MIN(interval) AS interval
FROM
(
SELECT
r1.raceno AS raceno1,
r1.racedatetime AS racedatetime1,
r2.raceno AS raceno2,
r2.racedatetime AS racedatetime2,
DATEDIFF(ss, r1.racedatetime, r2.racedatetime) as interval
FROM
@Races r1
CROSS JOIN
@Races r2
WHERE
DATEDIFF(ss, r1.racedatetime, r2.racedatetime) >= 300
) AS intervals
GROUP BY raceno1, racedatetime1
;WITH RecursiveIntervals AS
(
SELECT TOP 1
raceno1, racedatetime1, raceno2, racedatetime2, interval
FROM
@Intervals
ORDER BY
racedatetime1
UNION ALL
SELECT
i.raceno1, i.racedatetime1, i.raceno2, i.racedatetime2, i.interval
FROM
RecursiveIntervals r
INNER JOIN
@Intervals i
ON r.raceno2 = i.raceno1
)
SELECT
*
FROM
RecursiveIntervals
Viewing 14 posts - 16 through 28 (of 28 total)
You must be logged in to reply to this topic. Login to reply