Technical Article

Intervention system for runaway jobs

,

Some of my developers had created jobs that executed VB applications that never ended.
Aparently they could not figure out when, why and what was going wrong. They could not repro the situation.
Now I've created this litle system so the users can assign max elaps times for their jobs. These jobs will then be intercepted and interrupted with brute force sp_stopjob.

As always with this kind of stuff : don't use it if there's no explicit reason to use it. If you have to use it, always test it before you put it in a production environment.

-- JOBI dd 24/02/2003
-- SQLServer Interventionsystem for runaway jobs
-- SQL2000 sp2 tested (SQL7.0 does not support functions fn_**)
 use Master
GO 
 

/****** Object:  Database DJOBIJobMonitor    Script Date: 24/02/2003 13:04:42 ******/CREATE DATABASE [DJOBIJobMonitor]  
ON (NAME = N'DJOBIJobMonitor_Data', FILENAME = N'U:\MSSQL$Instance78\Data\DJOBIJobMonitor_Data.MDF' , SIZE = 1, FILEGROWTH = 10%) LOG ON (NAME = N'DJOBIJobMonitor_Log', FILENAME = N'S:\MSSQL$Instance78\Log\DJOBIJobMonitor_Log.LDF' , SIZE = 1, FILEGROWTH = 10%)
GO
 
Print 'Database [DJOBIJobMonitor] has been created'
go
 
 use DJOBIJobMonitor -- gebruik userDB
GO 
 
-- JOBISQLScripts: DBRoles 
 
if not exists (select * from dbo.sysusers where name = N'EndUsers' and uid > 16399)
 EXEC sp_addrole N'EndUsers'
GO
 
-- don't use the sysjobs.job_id in this persistent table because of loose coupling. 
-- JobName is the join predicate because drop-create of the job wil give it a new job_id
/****** Object:  Table [dbo].[T_JobRuntimes]    Script Date: 24/02/2003 13:04:43 ******/CREATE TABLE [dbo].[T_JobRuntimes] (
 [id_nr] [int] IDENTITY (1, 1) NOT NULL ,
 [JobName] [varchar] (128) NOT NULL ,
 [MaxRunMins] [smallint] NOT NULL ,
 [Anotation] [varchar] (250) NOT NULL ,
 [userCrea] [varchar] (30) NOT NULL ,
 [tsCrea] [datetime] NOT NULL ,
 [UserUpdate] [varchar] (30) NOT NULL ,
 [tsUpdate] [datetime] NOT NULL ,
 [CounterCanceled] [int] NOT NULL ,
 [tsLastCanceled] [datetime] NOT NULL ,
 [tsResetCounterCanceled] [datetime] NOT NULL 
) ON [PRIMARY]
GO
 
ALTER TABLE [dbo].[T_JobRuntimes] WITH NOCHECK ADD 
 CONSTRAINT [PK_T_JobRuntimes] PRIMARY KEY  CLUSTERED 
 (
  [id_nr]
 )  ON [PRIMARY] 
GO
 
ALTER TABLE [dbo].[T_JobRuntimes] WITH NOCHECK ADD 
 CONSTRAINT [DF_T_JobRuntimes_MaxRuntime] DEFAULT (60) FOR [MaxRunMins],
 CONSTRAINT [DF_T_JobRuntimes_Anotation] DEFAULT ('') FOR [Anotation],
 CONSTRAINT [DF_T_JobRuntimes_userCrea] DEFAULT (suser_sname()) FOR [userCrea],
 CONSTRAINT [DF_T_JobRuntimes_tsCrea] DEFAULT (getdate()) FOR [tsCrea],
 CONSTRAINT [DF_T_JobRuntimes_UserUpdate] DEFAULT (suser_sname()) FOR [UserUpdate],
 CONSTRAINT [DF_T_JobRuntimes_tsUpdate] DEFAULT (getdate()) FOR [tsUpdate],
 CONSTRAINT [DF_T_JobRuntimes_CancelCounter] DEFAULT (0) FOR [CounterCanceled],
 CONSTRAINT [DF_T_JobRuntimes_tsLastCanceled] DEFAULT (getdate()) FOR [tsLastCanceled],
 CONSTRAINT [DF_T_JobRuntimes_tsResetCounterCanceled] DEFAULT (getdate()) FOR [tsResetCounterCanceled],
 CONSTRAINT [IX_T_JobRuntimes] UNIQUE  NONCLUSTERED 
 (
  [JobName]
 )  ON [PRIMARY] ,
 CONSTRAINT [IX_T_JobRuntimes_JobName] UNIQUE  NONCLUSTERED 
 (
  [JobName]
 )  ON [PRIMARY] 
GO
 

Print 'Table [T_JobRuntimes] has been created'
go
 
if exists (select * from dbo.sysobjects where id = object_id(N'[dbo].[Spc_JOBI_GetAllJobs]') and OBJECTPROPERTY(id, N'IsProcedure') = 1)
drop procedure [dbo].[Spc_JOBI_GetAllJobs]
GO
 
Create procedure Spc_JOBI_GetAllJobs
-- with encryption
as 
-- JOBI dd 24/02/2003
-- Get all jobs of this server exept those starting with the servername(these are our systemjobs)
 
INSERT INTO T_JobRuntimes ([JobName])
 select  name 
 from msdb..sysjobs sj
    left join t_JobRuntimes JRT 
 on sj.name = JRT.JobName
 where JRT.JobName is null
 and sj.name not like @@servername+'%'
        and sj.name not like 'Backup %'
 and sj.name not like '%DB Maintenance Plan%'
 and sj.name not like 'JOBI_JobRuntime_Checker%'
 
go
 
Grant execute on Spc_JOBI_GetAllJobs to EndUsers
go
 

if exists (select * from dbo.sysobjects where id = object_id(N'[dbo].[Spc_JOBI_ResetCancelCounters]') and OBJECTPROPERTY(id, N'IsProcedure') = 1)
drop procedure [dbo].[Spc_JOBI_ResetCancelCounters]
GO
 
Create procedure Spc_JOBI_ResetCancelCounters
-- with encryption
as 
-- JOBI dd 24/02/2003
-- Reset CounterCanceled
update T_JobRuntimes 
 set CounterCanceled = 0
          , tsResetCounterCanceled = getdate()
 
declare @Boodschap as varchar(128)
select @Boodschap = 'Spc_JOBI_ResetCancelCounters executed by [' + Suser_Sname() +'].'
-- Log this operation
RAISERROR ( @Boodschap  , 10, 1) With log
 
go
 
Grant execute on Spc_JOBI_ResetCancelCounters to EndUsers
go
    
-- JOBI dd 24/02/2003
-- Systeem om LONG RUNNING JOBS te cancelen
-- 
 

if exists (select * from dbo.sysobjects where id = object_id(N'[dbo].[Spc_JOBI_CheckLongRunningJobs]') and OBJECTPROPERTY(id, N'IsProcedure') = 1)
 drop procedure [dbo].[Spc_JOBI_CheckLongRunningJobs]
GO
 
Create procedure Spc_JOBI_CheckLongRunningJobs 
 @intCounterCanceled as integer OUTPUT
 
-- with encryption
as
 
-- JOBI dd 24/02/2003
-- Cancel LONG RUNNING JOBS
-- 
-- Important notice : The way to convert the Job_Process_Id to sysprocesses.program_name will have to be checked with every servicepack and every version of SQLServer 
-- *******   
--  Only to be used when there are no other options left 
-- 
 
set nocount on
 
create table #TMP_ALLJobs ( 
 [Job_Name] [varchar] (128)  NOT NULL ,
 [Job_Process_Id] [char] (34)  NOT NULL 
) ON [PRIMARY]
 
insert into #TMP_ALLJobs
select name, master.dbo.fn_varbintohexstr(CAST(job_id AS BINARY(16)))
from  msdb..sysjobs

create table #TMP_Runnig_Processes ( 
 [program_name] [varchar] (128)  NOT NULL ,
 [NinutesRunning] [int] not null 
) ON [PRIMARY]
 
-- convert to job_process_id (Opgelet : Na te kijken bij ieder versie of SP ) !!!!
-- as long as MS keep the process name  'SQLAgent - TSQL JobStep (Job ' +  Job_Process_Id this will work
-- Get running processes
insert into #TMP_Runnig_Processes
select program_name,  isnull( DATEDIFF(minute,last_batch, getdate()), 0)  as NinutesRunning
from master..sysprocesses
where program_name like 'SQLAgent - TSQL JobStep (Job %'
 
-- Heck yes let’s use a cursor
Declare @JobName as varchar(128),
 @intJob_Id_Nr as integer,
 @intMinutesRunning as integer,
 @intMaxRunMins as integer,
 @intCounter  as integer,
 @strMessage_text as varchar(1500)
 
set @intCounter  = 0
 
-- Only jobs that are documented in T_JobRuntimes can be handeled  ! (because of the innerjoins and because IMHO I cannot set a maximum runtime by default)

declare csrJobsOverTime cursor for 
 select JRT.JobName as JobName, JRT.Id_Nr, RP.NinutesRunning , JRT.MaxRunMins
 from #TMP_ALLJobs AJ
  inner join 
  dbo.T_JobRuntimes JRT
      on AJ.Job_Name = JRT.JobName
  inner join #TMP_Runnig_Processes RP
      on RP.program_name like 'SQLAgent - TSQL JobStep (Job ' + AJ.Job_Process_Id + '%'
       and RP.NinutesRunning > JRT.MaxRunMins
 order by JobName
 for read only
 
open csrJobsOverTime
FETCH NEXT FROM csrJobsOverTime 
 INTO @JobName, @intJob_Id_Nr, @intMinutesRunning, @intMaxRunMins
 
WHILE @@FETCH_STATUS = 0
BEGIN
    set @intCounter  = @intCounter  + 1
    select @strMessage_text = ('Spc_JOBI_CheckLongRunningJobs canceled Job [' 
 + rtrim(@JobName)
 + ']. It ran ['
 + SUBSTRING(CAST(@intMinutesRunning AS char(5)),1,LEN(CAST(@intMinutesRunning AS char(5))))
 + '] minutes, Max run time ['
 + SUBSTRING(CAST(@intMaxRunMins AS char(5)),1,LEN(CAST(@intMaxRunMins AS char(5)))) 
 + '] minutes.')
 
    --print @strMessage_text
    -- severity level : information and leave a trace
     RAISERROR (@strMessage_text, 10, 1) With log

    execute msdb..sp_stop_job @job_name = @JobName
 
    update T_JobRuntimes 
  set CounterCanceled = CounterCanceled + 1 
          , tsLastCanceled = getdate()
  where Id_Nr =  @intJob_Id_Nr
 
    -- get next row
    FETCH NEXT FROM csrJobsOverTime
  INTO @JobName, @intJob_Id_Nr, @intMinutesRunning, @intMaxRunMins
 
END
 
set @intCounterCanceled = @intCounter 
--print 'Aantal Jobs canceled : ' + cast(@intCounter  as varchar) + ' **' 
 
-- close and cleanup Cursor 
CLOSE csrJobsOverTime
DEALLOCATE csrJobsOverTime
 
-- Cleanup tmp-tables
drop table #TMP_ALLJobs
 
drop table #TMP_Runnig_Processes
 
/*
 Declare @NoJobsCanceled as integer
 Exec Spc_JOBI_CheckLongRunningJobs @intCounterCanceled = @ NoJobsCanceled OUTPUT
 Print 'Number of Jobs canceled : ' + cast(@NoJobsCanceled as varchar) + ' **' 
*/ 
go
 
Print 'Stored procedures have been instaled'
go
 
-- JOBISQLScripts: Jobsystem - Jobs --
 
begin transaction 
  DECLARE @JobID BINARY(16)
  DECLARE @ReturnCode INT
  SELECT @ReturnCode = 0
if (select count(*) from msdb.dbo.syscategories where name = N'Database Maintenance') < 1 
  execute msdb.dbo.sp_add_category N'Database Maintenance'
 
BEGIN
  execute @ReturnCode = msdb.dbo.sp_add_job @job_id = @JobID OUTPUT, @job_name = N'JOBI_JobRuntime_Checker', @enabled = 0, @start_step_id = 1, @notify_level_eventlog = 2, @notify_level_email = 0, @notify_level_netsend = 0, @notify_level_page = 0, @delete_level = 0, @description = N'No description available.', @category_name = N'Database Maintenance', @owner_login_name = N'sa' 
  if (@@ERROR <> 0 OR @ReturnCode <> 0) goto QuitWithRollback
 

  execute @ReturnCode = msdb.dbo.sp_add_jobstep @job_id = @JobID , @step_id = 1, @cmdexec_success_code = 0, @on_success_action = 1, @on_success_step_id = 0, @on_fail_action = 2, @on_fail_step_id = 0, @retry_attempts = 0, @retry_interval = 1, @os_run_priority = 0, @flags = 0, @step_name = N'SP_JobRuntimeChecker', @subsystem = N'TSQL', @command = N' Declare @ NoJobsCanceled as integer
 Exec Spc_JOBI_CheckLongRunningJobs @intCounterCanceled = @ NoJobsCanceled OUTPUT
 Print ''Number of Jobs canceled : '' + cast(@NoJobsCanceled as varchar) + '' **'' 
', @database_name = N'DJOBIJobMonitor'
  if (@@ERROR <> 0 OR @ReturnCode <> 0) goto QuitWithRollback
 
  execute @ReturnCode = msdb.dbo.sp_update_job @job_id = @JobID, @start_step_id = 1
  if (@@ERROR <> 0 OR @ReturnCode <> 0) goto QuitWithRollback
 
  -- Add the job schedules
  EXECUTE @ReturnCode = msdb.dbo.sp_add_jobschedule @job_id = @JobID, @name = N'Heel_Regelmatig', @enabled = 1, @freq_type = 4, @active_start_date = 20030224, @active_start_time = 0, @freq_interval = 1, @freq_subday_type = 4, @freq_subday_interval = 15, @freq_relative_interval = 0, @freq_recurrence_factor = 0, @active_end_date = 99991231, @active_end_time = 235959
  IF (@@ERROR <> 0 OR @ReturnCode <> 0) GOTO QuitWithRollback 
 
  -- Add the Target Servers
  EXECUTE @ReturnCode = msdb.dbo.sp_add_jobserver @job_id = @JobID, @server_name = N'(local)' 
  IF (@@ERROR <> 0 OR @ReturnCode <> 0) GOTO QuitWithRollback 
 
 
 
END
 
commit transaction 
goto EndSave 
QuitWithRollback: 
  if (@@TRANCOUNT > 0) rollback transaction 
EndSave:
 
GO
 
Print 'Job [JOBI_JobRuntime_Checker] installed and disabled.'
go
 
exec Spc_JOBI_GetAllJobs
 
Print 'Table has been populated with existing jobs. (initial 60 minutes runtime) !'
Print 'Cleanup and modify as needed'

go

Rate

You rated this post out of 5. Change rating

Share

Share

Rate

You rated this post out of 5. Change rating