Return records at least 10 minutes apart from last valid record without CURSOR

  • Hi Jeff

    I could do a quirky UPDATE solution in the following way.

    I have added an extra column to the @RACES table called usable (BIT) and then just filtered by that after the UPDATE has been done

    You might have another way of doing it

    SET DATEFORMAT YMD

    DECLARE @LastDateTime datetime, @usable bit, @diff int, @record varchar(1000)

    DECLARE @races table(raceno int identity(1,1),racedatetime datetime,diff int,usable int default(0))

    insert into @races

    (racedatetime)

    SELECT '2011-02-20 13:50:00'

    UNION ALL

    SELECT '2011-02-20 13:55:00'

    UNION ALL

    SELECT '2011-02-20 14:00:00'

    UNION ALL

    SELECT '2011-02-20 14:10:00'

    UNION ALL

    SELECT '2011-02-20 14:20:00'

    UNION ALL

    SELECT '2011-02-20 14:25:00'

    UNION ALL

    SELECT '2011-02-20 14:26:00'

    UNION ALL

    SELECT '2011-02-20 14:27:00'

    UNION ALL

    SELECT '2011-02-20 14:30:00'

    UNION ALL

    SELECT '2011-02-20 14:33:00'

    UNION ALL

    SELECT '2011-02-20 14:36:00'

    UNION ALL

    SELECT '2011-02-20 14:45:00'

    UNION ALL

    SELECT '2011-02-20 14:46:00'

    UNION ALL

    SELECT '2011-02-20 14:56:01'

    SELECT*

    FROM@RACES

    UPDATER

    SET@diff = DateDiff(mi,@LastDateTime,Racedatetime),

    @usable = CASE WHEN @usable IS NULL THEN 1

    WHEN @LastDateTime IS NOT NULL AND @diff >= 10 THEN 1

    ELSE 0 END,

    @LastDateTime = CASE @Usable WHEN 1 THEN RacedateTime ELSE @LastDateTime END,

    Usable = @Usable,

    Diff = @Diff

    FROM@RACES as R

    SELECT*

    FROM@RACES

    WHEREusable = 1

    But as I said before the problem is not that I don't know how do it in another way (cursor, quirky update) but that I have a lot of code currently in SELECT format (I will provide an example) that I want to automate the update to the new version. As far as I can see using a quirky update or cursor is going to involve a major rewrite for all my historical reports which I use for analysis to help predict future behavior.

    for example here is one such report which has been cut down by 90% to provide an example.

    /****** Object: StoredProcedure [dbo].[usp_sql_fav_in_a_row_report] Script Date: 02/25/2011 11:14:27 ******/

    SET ANSI_NULLS ON

    GO

    SET QUOTED_IDENTIFIER ON

    GO

    -- =============================================================================

    -- Author:Rob Reid

    -- Create date: 02-FEB-2011

    -- Description:Look for patterns in the the first X or last X races of a day

    -- using various scoring factors to see whether patterns exist for accumulators

    -- on the first X races of the day. A percentage of 1-5% for 3/4 way bets

    -- can return a nice profit.

    -- =============================================================================

    CREATE PROCEDURE [dbo].[usp_sql_runner_in_a_row_report]

    AS

    BEGIN

    SET DATEFORMAT YMD

    SET NOCOUNT ON

    DECLARE @stats TABLE([Desc] varchar(50),Racedatetime datetime,First4 bit,First3 bit,First2 bit,Last4 bit,Last3 bit,Last2 bit)

    DECLARE @REPORT TABLE(row int identity(1,1),Details varchar(100),perc float, TotalStaked int NULL, TotalWinnings int NULL)

    DECLARE @MinDate datetime, @MaxDate datetime, @Stamp datetime, @COUNT INT, @win INT, @PERC FLOAT

    DECLARE @First4 bit,@Last4 bit,@First3 bit,@Last3 bit,@First2 bit, @Last2 bit

    DECLARE @Odds varchar(100),@Winnings float,@Stake int

    DECLARE @WinFavFirst4TotalWinnings float, @WinFavFirst3TotalWinnings float,@WinFavFirst2TotalWinnings float

    DECLARE @WinFavLast3TotalWinnings float,@WinFavLast2TotalWinnings float,@WinFavLast4TotalWinnings float

    DECLARE @DEBUG BIT,

    @SkipBadCourses BIT,

    @Skip BIT

    SELECT@SkipBadCourses = 0,

    @Stake = 100,

    @WinFavFirst4TotalWinnings = 0,

    @WinFavFirst3TotalWinnings = 0,

    @WinFavFirst2TotalWinnings = 0,

    @WinFavLast4TotalWinnings = 0,

    @WinFavLast3TotalWinnings = 0,

    @WinFavLast2TotalWinnings = 0,

    @Winnings = 0

    -- I use my RACE_REPORT table which holds a record for each race ever run with columns for numerous factors

    -- such as whether the race was won by the favourite, the winners final price, whether the favourite was placed

    -- whether any of my myriad of other ranking scores won, placed and so on. Very wide table full of BIT columns

    -- which is rebuilt at the end of each day

    -- get the date range for the report

    SELECT@MinDate = MIN(Racedatetime),

    @MaxDate = MAX(RaceDatetime)

    FROMRACE_REPORT

    -- override min date to current month for quicker report

    SELECT @MinDate = '2011-FEB-01'

    PRINT 'FROM ' + CAST(@MinDate as varchar) + ' TO ' + CAST(@MaxDate as varchar)

    SELECT @Stamp = @MinDate

    WHILE @Stamp <= @MaxDate

    BEGIN

    PRINT 'ON DATE = ' + CAST(@Stamp as varchar)

    SELECT@Winnings = 0,

    @Odds = NULL

    -- CODE REMOVED FOR EASE OF DEMONSTRATIOn

    BEGIN

    PRINT 'Todays races are ok'

    -- were the first 4 of the day winners

    SELECT@COUNT = SUM(WonbyFavourite)

    FROM(

    SELECTTOP 4 CAST(WonByFavourite as tinyint) as WonbyFavourite

    FROMRACE_REPORT

    WHEREDateDiff(day,Racedatetime,@Stamp)=0

    ORDER BY RaceDateTime

    ) as t

    PRINT 'COUNT FROM FIRST FOUR THAT WERE WON BY FAV = '+CAST(@COUNT as varchar)

    -- if the first 4 were all won by the favourite then we create a list of the final prices in 7/4,2/1,3/1,11/10

    -- order and then pass them to our function that will calculate total winnings when an initial amount is

    -- staked on the first runner and the winnings moved to the next.

    IF @COUNT = 4

    BEGIN

    SELECT @First4 = 1, @Odds= NULL

    -- get price list as we know all X won

    SELECT@Odds=COALESCE(@Odds+',','')+WinningOdds

    FROM(

    SELECTTOP 4 WinningOdds

    FROMRACE_REPORT

    WHEREDateDiff(day,Racedatetime,@Stamp)=0

    ORDER BY RaceDateTime

    ) as t

    PRINT @ODDS

    SELECT @Winnings = 0

    -- get total winnings

    EXEC dbo.usp_sql_calculate_accumulator_winnings @ODDS,@Stake,0,@Winnings OUTPUT

    SELECT @WinFavFirst4TotalWinnings = @WinFavFirst4TotalWinnings + @Winnings

    END

    ELSE

    SELECT @First4 = 0

    PRINT 'WERE FIRST FOUR OF THE DAY WON BY FAV = '+CAST(@First4 as varchar)

    SELECT@COUNT = SUM(WonbyFavourite)

    FROM(

    SELECTTOP 3 CAST(WonByFavourite as tinyint) as WonbyFavourite

    FROMRACE_REPORT

    WHEREDateDiff(day,Racedatetime,@Stamp)=0

    ORDER BY RaceDateTime

    ) as t

    PRINT 'COUNT FROM FIRST THREE THAT WERE WON BY FAV = '+CAST(@COUNT as varchar)

    IF @COUNT = 3

    BEGIN

    SELECT @First3 = 1, @Odds= NULL

    SELECT@Odds=COALESCE(@Odds+',','')+WinningOdds

    FROM(

    SELECTTOP 3 WinningOdds

    FROMRACE_REPORT

    WHEREDateDiff(day,Racedatetime,@Stamp)=0

    ORDER BY RaceDateTime

    ) as t

    PRINT @ODDS

    SELECT @Winnings = 0

    EXEC dbo.usp_sql_calculate_accumulator_winnings @ODDS,@Stake,0,@Winnings OUTPUT

    SELECT @WinFavFirst3TotalWinnings = @WinFavFirst3TotalWinnings + @Winnings

    END

    ELSE

    SELECT @First3 = 0

    PRINT 'WERE FIRST THREE OF THE DAY WON BY FAV = '+CAST(@First3 as varchar)

    SELECT@COUNT = SUM(WonbyFavourite)

    FROM(

    SELECTTOP 2 CAST(WonByFavourite as tinyint) as WonbyFavourite

    FROMRACE_REPORT

    WHEREDateDiff(day,Racedatetime,@Stamp)=0

    ORDER BY RaceDateTime

    ) as t

    PRINT 'COUNT FROM FIRST TWO THAT WERE WON BY FAV = '+CAST(@COUNT as varchar)

    IF @COUNT = 2

    BEGIN

    SELECT @First2 = 1, @Odds= NULL

    SELECT@Odds=COALESCE(@Odds+',','')+WinningOdds

    FROM(

    SELECTTOP 2 WinningOdds

    FROMRACE_REPORT

    WHEREDateDiff(day,Racedatetime,@Stamp)=0

    ORDER BY RaceDateTime

    ) as t

    PRINT @ODDS

    SELECT @Winnings = 0

    EXEC dbo.usp_sql_calculate_accumulator_winnings @ODDS,@Stake,0,@Winnings OUTPUT

    SELECT @WinFavFirst2TotalWinnings = @WinFavFirst2TotalWinnings + @Winnings

    END

    ELSE

    SELECT @First2 = 0

    PRINT 'WERE FIRST TWO OF THE DAY WON BY FAV = '+CAST(@First2 as varchar)

    SELECT@COUNT = SUM(WonbyFavourite)

    FROM(

    SELECTTOP 4 CAST(WonByFavourite as tinyint) as WonbyFavourite

    FROMRACE_REPORT

    WHEREDateDiff(day,Racedatetime,@Stamp)=0

    ORDER BY RaceDateTime DESC

    ) as t

    PRINT 'COUNT FROM LAST FOUR THAT WERE WON BY FAV = '+CAST(@COUNT as varchar)

    IF @COUNT = 4

    BEGIN

    SELECT @Last4 = 1, @Odds= NULL

    SELECT@Odds=COALESCE(@Odds+',','')+WinningOdds

    FROM(

    SELECTTOP 4 WinningOdds

    FROMRACE_REPORT

    WHEREDateDiff(day,Racedatetime,@Stamp)=0

    ORDER BY RaceDateTime DESC

    ) as t

    PRINT @ODDS

    SELECT @Winnings = 0

    EXEC dbo.usp_sql_calculate_accumulator_winnings @ODDS,@Stake,0,@Winnings OUTPUT

    SELECT @WinFavLast4TotalWinnings = @WinFavLast4TotalWinnings + @Winnings

    END

    ELSE

    SELECT @Last4 = 0

    PRINT 'WERE LAST FOUR OF THE DAY WON BY FAV = '+CAST(@Last4 as varchar)

    SELECT@COUNT = SUM(WonbyFavourite)

    FROM(

    SELECTTOP 3 CAST(WonByFavourite as tinyint) as WonbyFavourite

    FROMRACE_REPORT

    WHEREDateDiff(day,Racedatetime,@Stamp)=0

    ORDER BY RaceDateTime DESC

    ) as t

    PRINT 'COUNT FROM LAST THREE THAT WERE WON BY FAV = '+CAST(@COUNT as varchar)

    IF @COUNT = 3

    BEGIN

    SELECT @Last3 = 1, @Odds= NULL

    SELECT@Odds=COALESCE(@Odds+',','')+WinningOdds

    FROM(

    SELECTTOP 3 WinningOdds

    FROMRACE_REPORT

    WHEREDateDiff(day,Racedatetime,@Stamp)=0

    ORDER BY RaceDateTime DESC

    ) as t

    PRINT @ODDS

    SELECT @Winnings = 0

    EXEC dbo.usp_sql_calculate_accumulator_winnings @ODDS,@Stake,0,@Winnings OUTPUT

    SELECT @WinFavLast3TotalWinnings = @WinFavLast3TotalWinnings + @Winnings

    END

    ELSE

    SELECT @Last3 = 0

    PRINT 'WERE LAST THREE OF THE DAY WON BY FAV = '+CAST(@Last3 as varchar)

    SELECT@COUNT = SUM(WonbyFavourite)

    FROM(

    SELECTTOP 2 CAST(WonByFavourite as tinyint) as WonbyFavourite

    FROMRACE_REPORT

    WHEREDateDiff(day,Racedatetime,@Stamp)=0

    ORDER BY RaceDateTime DESC

    ) as t

    PRINT 'COUNT FROM LAST TWO THAT WERE WON BY FAV = '+CAST(@COUNT as varchar)

    IF @COUNT = 2

    BEGIN

    SELECT @Last2 = 1,@Odds= NULL

    SELECT@Odds=COALESCE(@Odds+',','')+WinningOdds

    FROM(

    SELECTTOP 2 WinningOdds

    FROMRACE_REPORT

    WHEREDateDiff(day,Racedatetime,@Stamp)=0

    ORDER BY RaceDateTime DESC

    ) as t

    PRINT @ODDS

    SELECT @Winnings = 0

    EXEC dbo.usp_sql_calculate_accumulator_winnings @ODDS,@Stake,0,@Winnings OUTPUT

    SELECT @WinFavLast2TotalWinnings = @WinFavLast2TotalWinnings + @Winnings

    END

    ELSE

    SELECT @Last2 = 0

    PRINT 'WERE LAST TWO OF THE DAY WON BY FAV = '+CAST(@Last2 as varchar)

    INSERT INTO @stats

    ([Desc],Racedatetime,First4,First3,First2,Last4,Last3,Last2)

    VALUES

    ('WONFAV',@Stamp,@First4,@First3,@First2,@Last4,@Last3,@Last2)

    -- now another 150 SELECT statements using other rankings apart from the favourite status

    END

    SELECT @Stamp = DATEADD(day,1,@Stamp)

    -- CODE REMOVED

    IF @Stamp > @MaxDate

    BEGIN

    PRINT 'QUIT'

    END

    END

    -- now some reports to look for patterns

    /*

    SELECT*

    FROM@stats

    */

    SELECT@COUNT= COUNT(*) FROM @stats WHERE [DESC]='WONFAV'

    SELECT@win= COUNT(*) FROM @stats WHERE First4 = 1 AND [DESC]='WONFAV'

    INSERT INTO @REPORT

    (Perc,Details,TotalStaked,TotalWinnings)

    SELECTCAST(@WIN as float) / @COUNT * 100 , 'First Four Races Won By Fav %'

    ,@Stake * @COUNT, @WinFavFirst4TotalWinnings

    SELECT@win= COUNT(*) FROM @stats WHERE First3 = 1 AND [DESC]='WONFAV'

    INSERT INTO @REPORT

    (Perc,Details,TotalStaked,TotalWinnings)

    SELECTCAST(@WIN as float) / @COUNT * 100 , 'First Three Races Won By Fav %'

    ,@Stake * @COUNT, @WinFavFirst3TotalWinnings

    SELECT@win= COUNT(*) FROM @stats WHERE First2 = 1 AND [DESC]='WONFAV'

    INSERT INTO @REPORT

    (Perc,Details,TotalStaked,TotalWinnings)

    SELECTCAST(@WIN as float) / @COUNT * 100 , 'First Two Races Won By Fav %'

    ,@Stake * @COUNT, @WinFavFirst2TotalWinnings

    SELECT@win= COUNT(*) FROM @stats WHERE Last4 = 1 AND [DESC]='WONFAV'

    INSERT INTO @REPORT

    (Perc,Details,TotalStaked,TotalWinnings)

    SELECTCAST(@WIN as float) / @COUNT * 100 , 'Last Four Races Won By Fav %'

    ,@Stake * @COUNT, @WinFavLast4TotalWinnings

    SELECT@win= COUNT(*) FROM @stats WHERE Last3 = 1 AND [DESC]='WONFAV'

    INSERT INTO @REPORT

    (Perc,Details,TotalStaked,TotalWinnings)

    SELECTCAST(@WIN as float) / @COUNT * 100 , 'Last Three Races Won By Fav %'

    ,@Stake * @COUNT, @WinFavLast3TotalWinnings

    SELECT@win= COUNT(*) FROM @stats WHERE Last2 = 1 AND [DESC]='WONFAV'

    INSERT INTO @REPORT

    (Perc,Details,TotalStaked,TotalWinnings)

    SELECTCAST(@WIN as float) / @COUNT * 100 , 'Last Two Races Won By Fav %'

    ,@Stake * @COUNT, @WinFavLast2TotalWinnings

    -- another 150 or so reports for all the other kinds of ranking

    -- an overview of patterns

    SELECTDetails,Perc,COALESCE(TotalStaked,0) as TotalStaked,

    COALESCE(TotalWinnings,0) as TotalWinnings,

    COALESCE(TotalWinnings,0)-COALESCE(TotalStaked,0) as Profit

    FROM@REPORT

    ORDER BY Row

    END

    This report is one of many that look for patterns in historical races which can be used for future bets.

    This particular report is looking for patterns which can be used for accumulator bets and looks for days where the first 4, 3, 2 races OR last 4,3,2

    races of the day were won by a certain ranking factor. In this report I have removed most of the code so that only one ranking factor is shown. In this case its the favourite status of the horse (favourites win 30-40% of all races).

    I have many different scoring factors.

    The report loops through the date range and for each day checks whether the first and last 4,3,2 races were won by the favourite. If so it takes the final prices of each runner and passes them to another procedure which returns the total winnings. The details of that days outcome are added to a report which is outputted at the end.

    As you can see there are lots of SELECT statements already being used to determine whether the first X races of the day were won.

    These are the statements I need to change to reflect the issue in hand, namely that each race needs to be at least ten minutes apart from the subsequent race.

    This is why I wanted a SELECT style solution or another way to simply update these statements if possible

    -- were the first 4 races of the day won by favourites

    SELECT@COUNT = SUM(WonbyFavourite)

    FROM(

    SELECTTOP 4 CAST(WonByFavourite as tinyint) as WonbyFavourite

    FROMRACE_REPORT

    WHEREDateDiff(day,Racedatetime,@Stamp)=0

    ORDER BY RaceDateTime

    ) as t

    PRINT 'COUNT FROM FIRST FOUR THAT WERE WON BY FAV = '+CAST(@COUNT as varchar)

    /* if the first 4 were all won by the favourite then we create a list of the final prices in 7/4,2/1,3/1,11/10

    order and then pass them to our function that will calculate total winnings when an initial amount is

    staked on the first runner and the winnings moved to the next. */

    IF @COUNT = 4

    BEGIN

    SELECT @First4 = 1, @Odds= NULL

    -- get price list as we know all 4 won

    SELECT@Odds=COALESCE(@Odds+',','')+WinningOdds

    FROM(

    SELECTTOP 4 WinningOdds

    FROMRACE_REPORT

    WHEREDateDiff(day,Racedatetime,@Stamp)=0

    ORDER BY RaceDateTime

    ) as t

    PRINT @ODDS

    SELECT @Winnings = 0

    -- get total winnings

    EXEC dbo.usp_sql_calculate_accumulator_winnings @ODDS,@Stake,0,@Winnings OUTPUT

    SELECT @WinFavFirst4TotalWinnings = @WinFavFirst4TotalWinnings + @Winnings

    END

    ELSE

    SELECT @First4 = 0

    Now I am sure that all this code can be written in a myriad of different ways but the fact is the code is already there so I would like to be able to quickly update it all if possible rather than rewrite it from scratch.

    If you have any tips then please share.

    Thanks for your help

  • Rob Reid-246754 (2/25/2011)


    These are the statements I need to change to reflect the issue in hand, namely that each race needs to be at least ten minutes apart from the subsequent race.

    You don't need to change any of code. Change the name of the Race_Report table to something else. Use the Quirky Update to create a "new" table from that "something else" table called "Race_Report" with your 10 minute spaced races and you're done.

    --Jeff Moden


    RBAR is pronounced "ree-bar" and is a "Modenism" for Row-By-Agonizing-Row.
    First step towards the paradigm shift of writing Set Based code:
    ________Stop thinking about what you want to do to a ROW... think, instead, of what you want to do to a COLUMN.

    Change is inevitable... Change for the better is not.


    Helpful Links:
    How to post code problems
    How to Post Performance Problems
    Create a Tally Function (fnTally)

  • Couldn't you create a table valued function and then do a global find and replace, replacing the table name with the name of your table valued function, e.g., "Race_Report" => "Race_Report_Table_Function()"

  • bobhines (2/25/2011)


    Couldn't you create a table valued function and then do a global find and replace, replacing the table name with the name of your table valued function, e.g., "Race_Report" => "Race_Report_Table_Function()"

    What would be in that table valued function? A While Loop?

    --Jeff Moden


    RBAR is pronounced "ree-bar" and is a "Modenism" for Row-By-Agonizing-Row.
    First step towards the paradigm shift of writing Set Based code:
    ________Stop thinking about what you want to do to a ROW... think, instead, of what you want to do to a COLUMN.

    Change is inevitable... Change for the better is not.


    Helpful Links:
    How to post code problems
    How to Post Performance Problems
    Create a Tally Function (fnTally)

  • Could be anything that generates the data in the form desired. Once the table proper has been replaced with the table valued function, he can be free to change it's definition without having to change stored procedures, etc.

  • Cheers Jeff

    I now get what you mean whereas I must admit before I must have misread your previous comment.

    That would be a very good solution to my problem 🙂

    Thanks

  • Very cool. Thanks for the feedback, Rob.

    --Jeff Moden


    RBAR is pronounced "ree-bar" and is a "Modenism" for Row-By-Agonizing-Row.
    First step towards the paradigm shift of writing Set Based code:
    ________Stop thinking about what you want to do to a ROW... think, instead, of what you want to do to a COLUMN.

    Change is inevitable... Change for the better is not.


    Helpful Links:
    How to post code problems
    How to Post Performance Problems
    Create a Tally Function (fnTally)

  • Jeff seeing you are an RBAR expert how would you approach the following;

    It's related to the same database and basically for each runner in a race I have a number of score/rating columns that rank each runner.

    To calculate each score/rating I have to carry out numerous lookups and run a number of calculations e.g how many times has this horse won on this course with these conditions over this time period and so on. I weight different factors and then use those weightings in comparison with other runners.

    At the moment I am using User Defined Functions and then utilizing them in my UPDATE statements whenever I need to update them e.g if the conditions change, the price goes right up or down or another runner drops out.

    This hasn't been a problem up until now but in the last week I have noticed significant slow downs when running reports and I am currently investigating whether the problems are related to this code or not. I am using a shared server with lots of other busy sites however queries that were taking 2 seconds max are sometimes taking up to 12 minutes to run (intermittently).

    The tables I use are heavily indexed and most of the actual reports use with (nolock) on the joins anyway (unless the reports are being used for pricing decisions) so I wouldn't have thought the UPDATES would have blocked my reporting anyhow but I am experiencing major slowdowns which I am looking into at the moment.

    I have been using the new 2008 Activity monitor and some DMV reports as well as automatic logging of the sys.processes system view at various intervals but so far I have not seen any blocking or deadlocks during these long query times.

    When I run each UDF on it's own with a hardcoded value it takes between <1 second and 2 seconds to return.

    When I run all 3 UDF's for a single runner it is taking around 2-3 seconds to return all 3 values.

    Each UDF probably carries out 30 -60 SELECTS as well as making numerous calls to other UDF's to get various related statistics to be used in the calculations.

    SELECT FormRating = dbo.udf_GET_FORM_RATING( 7585 ),

    FormRating2 = dbo.udf_GET_FORM_RATING2( 7585) ,

    FormRating3 = dbo.udf_GET_FORM_RATING3( 7585 )

    However when I run an UPDATE statement for all the runners in a race (7 runners) the statement is taking

    to return whereas it was taking 21 seconds (7 runners * 3 seconds duration) up until a week ago it is now taking anywhere between 5-12 minutes (depending on ??)

    UPDATERACE_RUNNERS

    SETFormRating = dbo.udf_GET_FORM_RATING( RunnerPK ),

    FormRating2 = dbo.udf_GET_FORM_RATING2( RunnerPK ) ,

    FormRating3 = dbo.udf_GET_FORM_RATING3( RunnerPK )

    WHERERaceFk = 23503 -- @RacePK

    AND NonRunner = 0 -- ignore non runners

    Whenever possible I will always target my UPDATE to one particular runner but sometimes when the conditions change e.g weather turns the ground from firm to heavy all the runners ratings will be affected and need to change.

    Therefore in your opinion what would be the best way to achieve the UPDATE of multiple rows without resorting to a CURSOR/WHILE loop which I am currently considering as it seems the table lock is slowing things right down whereas the row locks are manageable.

    Saying that however I have still have not ruled out something else causing the problem as we have some other I/O, temp table intensive sites on the same server so some other process could be causing the strange varying response times. However even if that is not the cause it would be good to hear your thoughts on the matter considering the RBAR nature of the problem.

    Thanks

  • Rob Reid-246754 (3/2/2011)


    <Snip a worthy explanation>

    Therefore in your opinion what would be the best way to achieve the UPDATE of multiple rows without resorting to a CURSOR/WHILE loop which I am currently considering as it seems the table lock is slowing things right down whereas the row locks are manageable.

    Saying that however I have still have not ruled out something else causing the problem as we have some other I/O, temp table intensive sites on the same server so some other process could be causing the strange varying response times. However even if that is not the cause it would be good to hear your thoughts on the matter considering the RBAR nature of the problem.

    Thanks

    I'm not Jeff but there's going to be a few problems with this. You mention the number of functions and the like that you're performing. That would require some heavy sifting through your code to eventually try to remove all your UDFs and turn them into rowset methods, and determining which of them are candidates for that task. It's pretty much an overhaul and most folks around here would get paid for that level of work.

    However, I don't want to discourage you from specific questions, so perhaps we can offer you some strategies to help find the unique culprit(s) and help you on those.

    You mention that a single UDF takes about 3 seconds to run, and for 7 rows 21 seconds to run, this is not unexpected. When the same UDF takes 5 minutes to run for 7 rows, something's gone wrong, as you'd expect.

    The first thing I'd do is start checking my wait_types, seeing if you can figure out where the bottleneck is. It might be contention for the log file, it might be simply reading data, it might be blocking itself. You won't know without checking the wait_types on the SPID you're running the process from while it's taking a while... well, or without resetting the wait_type information and checking it again afterwards.

    The next task I'd do to start figuring out where the problems lie is benchmarking each unique step of the functions in a test environment, and see if the same problems exist there as they do in production. If they do, you've got a nice place to work out issues on. If not, it's most likely got to do with multi-user concurrency on the production box, which means you'll need to explore locking and blocking there. Then again, cleaning up the code will reduce the locking and blocking, so it's kind of the good version of a catch-22.

    Once you know the couple of hot spots and/or troubled queries, we can review the code with you and help you untangle whatever it is that ails it.


    - Craig Farrell

    Never stop learning, even if it hurts. Ego bruises are practically mandatory as you learn unless you've never risked enough to make a mistake.

    For better assistance in answering your questions[/url] | Forum Netiquette
    For index/tuning help, follow these directions.[/url] |Tally Tables[/url]

    Twitter: @AnyWayDBA

  • I have narrowed down the code and found the culprit which was a UDF which was being used a lot within the various ranking functions and reports.

    The functions aim is to convert distances inputted as Miles, Yards and Furlongs into Yards which can then be used in range searches e.g to find a history of distances a runner performs well at.

    So it was converting all the various distance formats e.g

    2m 144yd 7f

    1m

    900 yd 6f

    7f

    into just yards.

    The function was making use of @table variables to split up the distance into its 3 parts, carrying out calculations to convert each measurement into yards and then returning a SUM of the yards.

    CREATE FUNCTION [dbo].[udf_GET_YARD_DISTANCE]

    (

    @Distance varchar(50)

    )

    RETURNS INT

    AS

    BEGIN

    DECLARE @Yards INT

    DECLARE @dist TABLE(val varchar(20))

    IF @Distance = 'NA'

    RETURN 0

    -- split 3m 220yds 7f into 3 rows

    INSERT INTO @dist

    SELECT VALUE FROM dbo.udf_SPLIT(@Distance, ' ')

    -- clean up each record and convert into yards

    UPDATE@dist

    SETVAL = CAST(REPLACE(VAL,'m','') as int) * 1760

    WHEREVAL LIKE '%m'

    UPDATE @dist

    SETVAL = CAST(REPLACE(VAL,'f','') as int) * 220

    WHEREVAL LIKE '%f'

    UPDATE @dist

    SETVAL = CAST(REPLACE(REPLACE(VAL,'yds',''),'y','') as int)

    WHEREVAL LIKE '%y%'

    -- return

    SELECT @YARDS = SUM(CAST(VAL as int)) FROM @dist

    RETURN @YARDS

    END

    This function was being used a lot like so:

    -- get info about current race

    SELECT @RaceFK = RacePK,

    @Racedatetime = Racedatetime ,

    @HorseName = HorseName,

    @Distance= Distance

    @Jockey = Jockey,

    @CourseFk = CourseFk,

    @ThreeMonths = DATEADD(MONTH,-3,Racedatetime)

    FROMRACE_RUNNERS as run

    JOINRACES as ra

    ONra.RacePK = run.RaceFK

    WHERERunnerPK = @RunnerPK

    -- lots of other similar reports to increment @RATING

    SELECT@Rating = @Rating + CASE WHEN COUNT(*) > 0 AND Racedatetime > @ThreeMonths THEN 4 WHEN COUNT(*) > 0 THEN 2 ELSE 0 END

    FROMRACE_RUNNERS as rr

    JOINRACES as ra ON rr.racefk = ra.racePK

    WHEREHorseName=@HorseName

    AND Jockey = @Jockey

    AND dbo.udf_GET_YARD_DISTANCE(@Distance) BETWEEN dbo.udf_GET_YARD_DISTANCE(ra.Distance)-220 AND dbo.udf_GET_YARD_DISTANCE(ra.Distance) + 220

    AND finishposition = 1

    AND CourseFK = @CourseFK

    AND ra.RacePk <> @RaceFK

    AND ra.Racedatetime < @Racedatetime

    GROUP BY RaceDatetime

    So it was calculating the Distance in yards for every record it looked at which is obviously bad design anyway but it didn't cause any noticeable performance effect when the dataset was quite small. I guess I must have recently gone over some kind of threshold as the way the function works combined with the size of the data it was parsing has decreased overall performance dramatically in the last week or so.

    I was going to build a numbers/lookup table to replace the function but instead I have added a new column called Yards to the RACES table and calculated the value once on data input and added an index to it. I have also replaced all the instances of

    dbo.udf_GET_YARD_DISTANCE(@Distance) BETWEEN dbo.udf_GET_YARD_DISTANCE(ra.Distance)-220 AND dbo.udf_GET_YARD_DISTANCE(ra.Distance) + 220

    with a slight re-jiggle of the logic that accomplishes the same end in a different way

    DECLARE @YardsStart INT,

    @YardsEnd INT

    SELECT@YardsStart = @Yards - 220,

    @YardsEnd = @Yards + 220

    -- then in WHERE filters reverse the logic to ensure previous races were in the range of the current race + or - 220 yards

    ra.Yards BETWEEEN @YardsStart AND @YardsEnd

    and the performance has increased dramatically.

    The previous UPDATE that was taking upwards of 5+ minutes is now returning in <7 seconds for 11 runners.

    Thanks for the help

  • Rob Reid-246754 (3/3/2011)


    and the performance has increased dramatically.

    The previous UPDATE that was taking upwards of 5+ minutes is now returning in <7 seconds for 11 runners.

    I'm glad you found the problem and I'm glad you're happy with 7 seconds to process the stats for 11 runners.

    I am concerned for the future welfare of the code, though. In the few examples of code you posted, I see SELECT @this and SELECT @that all over the place. My cursory observation is that you're probably running all the states for one runner at a time.

    If you really want to make this code fly, stop thinking about what you want to process for one runner at a time. Instead, please consider what I have in my signature line below...

    First step towards the paradigm shift of writing Set Based code:

    Stop thinking about what you want to do to a row... think, instead, of what you want to do to a column."

    ... or, in your terms, stop thinking about what you want to do for a single runner. Instead, think about how to calculate/update a stat for ALL runners. 🙂

    --Jeff Moden


    RBAR is pronounced "ree-bar" and is a "Modenism" for Row-By-Agonizing-Row.
    First step towards the paradigm shift of writing Set Based code:
    ________Stop thinking about what you want to do to a ROW... think, instead, of what you want to do to a COLUMN.

    Change is inevitable... Change for the better is not.


    Helpful Links:
    How to post code problems
    How to Post Performance Problems
    Create a Tally Function (fnTally)

  • I totally get what you mean but its pretty hard to update say the FormRating for all runners in a race at one time as there are so many different factors (50+ that go into creating a rating)

    Just for example there are factors such as

    -How many races has this horse won, placed, lost in last X days

    -How many races has this horse won, placed, lost in total

    -Combine those 2 with different filters such as

    -performance on current track, current going conditions, jockey, weight

    -Working out a draw bias (starting position of horse) to see if its starting position is beneficial

    -Working out details of its last X races, compairing conditions and seeing if its on an upwards or downwards trend (moving down or up the rankings)

    -Checking whether this runner has run against any of the other runners, has it beaten or lost to any of them and what were the conditions like

    -Checking out the jockey, is it a champion jockey trying riding for a top stable, what are there ratings like at the current course

    -Taking averages of stats from other winners over similar conditions and checking whether the current runner falls into this boundaries

    -What is its overall win, place, lose ratios. How do these compare with the other runners, does this runner stand out amongst them or does it look like an average runner.

    -Has the price changed, is it going up or down. is it the current favourite.

    and so on and so on

    Each factor is given a weighted score that is added up as I go along. A horse with no history will have a very low score (based around jockey , trainer and draw bias stats alone) whereas a horse with lots of history will have a higher rating.

    if it were simple select statements that could be applied in a set like manner I would already be doing it but each runner has to be analysed individually as well as in relation to the other runners and there are many factors that are involved in coming up with a rating score.

    I already build up my lookup tables once e.g tables of course stats, jockey and horse stats etc so that they are re-used but at present I cannot see a way of rebuilding the UDF's any other way although if you have any ideas please let me know.

  • Rob Reid-246754 (3/8/2011)


    I totally get what you mean but its pretty hard to update say the FormRating for all runners in a race at one time as there are so many different factors (50+ that go into creating a rating)

    Understood but didn't suggest updating all runners for all factors in a single go. Isn't there an "order" to the factors which must be considered? Even one full table update per factor will outstrip a cursor and other forms of RBAR.

    --Jeff Moden


    RBAR is pronounced "ree-bar" and is a "Modenism" for Row-By-Agonizing-Row.
    First step towards the paradigm shift of writing Set Based code:
    ________Stop thinking about what you want to do to a ROW... think, instead, of what you want to do to a COLUMN.

    Change is inevitable... Change for the better is not.


    Helpful Links:
    How to post code problems
    How to Post Performance Problems
    Create a Tally Function (fnTally)

  • I know that you guys have moved on to deeper issues but I just wanted to post an alternative query I developed for returning races at a minimum specified time interval (5 minutes or 300 seconds in my example below). It may or may not help solve/improve your particular application at this point but it was an interesting exercise for me, and maybe it will be of interest to someone else. Thanks for the indulgence.

    This query is based on a recursive CTE and is very similar to queries used in hierarchical dataset queries. You can think of the races in a hierarchical way where the next race is the earliest child of the preceding (i.e., parent) race that starts after the prescribed interval.

    DECLARE @Races TABLE(raceno int identity(1,1),racedatetime datetime)

    INSERT INTO @Races(racedatetime)

    SELECT '2011-02-20 13:50:00'

    UNION ALL

    SELECT '2011-02-20 13:51:30'

    UNION ALL

    SELECT '2011-02-20 13:52:00'

    UNION ALL

    SELECT '2011-02-20 13:55:00'

    UNION ALL

    SELECT '2011-02-20 13:56:21'

    UNION ALL

    SELECT '2011-02-20 13:57:45'

    UNION ALL

    SELECT '2011-02-20 13:59:18'

    UNION ALL

    SELECT '2011-02-20 14:00:00'

    UNION ALL

    SELECT '2011-02-20 14:03:41'

    UNION ALL

    SELECT '2011-02-20 14:07:48'

    UNION ALL

    SELECT '2011-02-20 14:10:00'

    UNION ALL

    SELECT '2011-02-20 14:20:00'

    UNION ALL

    SELECT '2011-02-20 14:25:00'

    UNION ALL

    SELECT '2011-02-20 14:27:00'

    UNION ALL

    SELECT '2011-02-20 14:32:00'

    UNION ALL

    SELECT '2011-02-20 14:38:00'

    UNION ALL

    SELECT '2011-02-20 14:41:00'

    UNION ALL

    SELECT '2011-02-20 14:42:00'

    UNION ALL

    SELECT '2011-02-20 14:44:00'

    UNION ALL

    SELECT '2011-02-20 14:45:00'

    DECLARE @Intervals TABLE(raceno1 int, racedatetime1 datetime, raceno2 int, racedatetime2 datetime, interval int)

    INSERT INTO @Intervals

    SELECT

    raceno1,

    racedatetime1,

    MIN(raceno2) AS raceno2,

    MIN(racedatetime2) AS racedatetime2,

    MIN(interval) AS interval

    FROM

    (

    SELECT

    r1.raceno AS raceno1,

    r1.racedatetime AS racedatetime1,

    r2.raceno AS raceno2,

    r2.racedatetime AS racedatetime2,

    DATEDIFF(ss, r1.racedatetime, r2.racedatetime) as interval

    FROM

    @Races r1

    CROSS JOIN

    @Races r2

    WHERE

    DATEDIFF(ss, r1.racedatetime, r2.racedatetime) >= 300

    ) AS intervals

    GROUP BY raceno1, racedatetime1

    ;WITH RecursiveIntervals AS

    (

    SELECT TOP 1

    raceno1, racedatetime1, raceno2, racedatetime2, interval

    FROM

    @Intervals

    ORDER BY

    racedatetime1

    UNION ALL

    SELECT

    i.raceno1, i.racedatetime1, i.raceno2, i.racedatetime2, i.interval

    FROM

    RecursiveIntervals r

    INNER JOIN

    @Intervals i

    ON r.raceno2 = i.raceno1

    )

    SELECT

    *

    FROM

    RecursiveIntervals

Viewing 14 posts - 16 through 28 (of 28 total)

You must be logged in to reply to this topic. Login to reply