Intervention system for runaway jobs
Some of my developers had created jobs that executed VB applications that never ended.
Aparently they could not figure out when, why and what was going wrong. They could not repro the situation.
Now I've created this litle system so the users can assign max elaps times for their jobs. These jobs will then be intercepted and interrupted with brute force sp_stopjob.
As always with this kind of stuff : don't use it if there's no explicit reason to use it. If you have to use it, always test it before you put it in a production environment.
-- JOBI dd 24/02/2003
-- SQLServer Interventionsystem for runaway jobs
-- SQL2000 sp2 tested (SQL7.0 does not support functions fn_**)
use Master
GO
/****** Object: Database DJOBIJobMonitor Script Date: 24/02/2003 13:04:42 ******/CREATE DATABASE [DJOBIJobMonitor]
ON (NAME = N'DJOBIJobMonitor_Data', FILENAME = N'U:\MSSQL$Instance78\Data\DJOBIJobMonitor_Data.MDF' , SIZE = 1, FILEGROWTH = 10%) LOG ON (NAME = N'DJOBIJobMonitor_Log', FILENAME = N'S:\MSSQL$Instance78\Log\DJOBIJobMonitor_Log.LDF' , SIZE = 1, FILEGROWTH = 10%)
GO
Print 'Database [DJOBIJobMonitor] has been created'
go
use DJOBIJobMonitor -- gebruik userDB
GO
-- JOBISQLScripts: DBRoles
if not exists (select * from dbo.sysusers where name = N'EndUsers' and uid > 16399)
EXEC sp_addrole N'EndUsers'
GO
-- don't use the sysjobs.job_id in this persistent table because of loose coupling.
-- JobName is the join predicate because drop-create of the job wil give it a new job_id
/****** Object: Table [dbo].[T_JobRuntimes] Script Date: 24/02/2003 13:04:43 ******/CREATE TABLE [dbo].[T_JobRuntimes] (
[id_nr] [int] IDENTITY (1, 1) NOT NULL ,
[JobName] [varchar] (128) NOT NULL ,
[MaxRunMins] [smallint] NOT NULL ,
[Anotation] [varchar] (250) NOT NULL ,
[userCrea] [varchar] (30) NOT NULL ,
[tsCrea] [datetime] NOT NULL ,
[UserUpdate] [varchar] (30) NOT NULL ,
[tsUpdate] [datetime] NOT NULL ,
[CounterCanceled] [int] NOT NULL ,
[tsLastCanceled] [datetime] NOT NULL ,
[tsResetCounterCanceled] [datetime] NOT NULL
) ON [PRIMARY]
GO
ALTER TABLE [dbo].[T_JobRuntimes] WITH NOCHECK ADD
CONSTRAINT [PK_T_JobRuntimes] PRIMARY KEY CLUSTERED
(
[id_nr]
) ON [PRIMARY]
GO
ALTER TABLE [dbo].[T_JobRuntimes] WITH NOCHECK ADD
CONSTRAINT [DF_T_JobRuntimes_MaxRuntime] DEFAULT (60) FOR [MaxRunMins],
CONSTRAINT [DF_T_JobRuntimes_Anotation] DEFAULT ('') FOR [Anotation],
CONSTRAINT [DF_T_JobRuntimes_userCrea] DEFAULT (suser_sname()) FOR [userCrea],
CONSTRAINT [DF_T_JobRuntimes_tsCrea] DEFAULT (getdate()) FOR [tsCrea],
CONSTRAINT [DF_T_JobRuntimes_UserUpdate] DEFAULT (suser_sname()) FOR [UserUpdate],
CONSTRAINT [DF_T_JobRuntimes_tsUpdate] DEFAULT (getdate()) FOR [tsUpdate],
CONSTRAINT [DF_T_JobRuntimes_CancelCounter] DEFAULT (0) FOR [CounterCanceled],
CONSTRAINT [DF_T_JobRuntimes_tsLastCanceled] DEFAULT (getdate()) FOR [tsLastCanceled],
CONSTRAINT [DF_T_JobRuntimes_tsResetCounterCanceled] DEFAULT (getdate()) FOR [tsResetCounterCanceled],
CONSTRAINT [IX_T_JobRuntimes] UNIQUE NONCLUSTERED
(
[JobName]
) ON [PRIMARY] ,
CONSTRAINT [IX_T_JobRuntimes_JobName] UNIQUE NONCLUSTERED
(
[JobName]
) ON [PRIMARY]
GO
Print 'Table [T_JobRuntimes] has been created'
go
if exists (select * from dbo.sysobjects where id = object_id(N'[dbo].[Spc_JOBI_GetAllJobs]') and OBJECTPROPERTY(id, N'IsProcedure') = 1)
drop procedure [dbo].[Spc_JOBI_GetAllJobs]
GO
Create procedure Spc_JOBI_GetAllJobs
-- with encryption
as
-- JOBI dd 24/02/2003
-- Get all jobs of this server exept those starting with the servername(these are our systemjobs)
INSERT INTO T_JobRuntimes ([JobName])
select name
from msdb..sysjobs sj
left join t_JobRuntimes JRT
on sj.name = JRT.JobName
where JRT.JobName is null
and sj.name not like @@servername+'%'
and sj.name not like 'Backup %'
and sj.name not like '%DB Maintenance Plan%'
and sj.name not like 'JOBI_JobRuntime_Checker%'
go
Grant execute on Spc_JOBI_GetAllJobs to EndUsers
go
if exists (select * from dbo.sysobjects where id = object_id(N'[dbo].[Spc_JOBI_ResetCancelCounters]') and OBJECTPROPERTY(id, N'IsProcedure') = 1)
drop procedure [dbo].[Spc_JOBI_ResetCancelCounters]
GO
Create procedure Spc_JOBI_ResetCancelCounters
-- with encryption
as
-- JOBI dd 24/02/2003
-- Reset CounterCanceled
update T_JobRuntimes
set CounterCanceled = 0
, tsResetCounterCanceled = getdate()
declare @Boodschap as varchar(128)
select @Boodschap = 'Spc_JOBI_ResetCancelCounters executed by [' + Suser_Sname() +'].'
-- Log this operation
RAISERROR ( @Boodschap , 10, 1) With log
go
Grant execute on Spc_JOBI_ResetCancelCounters to EndUsers
go
-- JOBI dd 24/02/2003
-- Systeem om LONG RUNNING JOBS te cancelen
--
if exists (select * from dbo.sysobjects where id = object_id(N'[dbo].[Spc_JOBI_CheckLongRunningJobs]') and OBJECTPROPERTY(id, N'IsProcedure') = 1)
drop procedure [dbo].[Spc_JOBI_CheckLongRunningJobs]
GO
Create procedure Spc_JOBI_CheckLongRunningJobs
@intCounterCanceled as integer OUTPUT
-- with encryption
as
-- JOBI dd 24/02/2003
-- Cancel LONG RUNNING JOBS
--
-- Important notice : The way to convert the Job_Process_Id to sysprocesses.program_name will have to be checked with every servicepack and every version of SQLServer
-- *******
-- Only to be used when there are no other options left
--
set nocount on
create table #TMP_ALLJobs (
[Job_Name] [varchar] (128) NOT NULL ,
[Job_Process_Id] [char] (34) NOT NULL
) ON [PRIMARY]
insert into #TMP_ALLJobs
select name, master.dbo.fn_varbintohexstr(CAST(job_id AS BINARY(16)))
from msdb..sysjobs
create table #TMP_Runnig_Processes (
[program_name] [varchar] (128) NOT NULL ,
[NinutesRunning] [int] not null
) ON [PRIMARY]
-- convert to job_process_id (Opgelet : Na te kijken bij ieder versie of SP ) !!!!
-- as long as MS keep the process name 'SQLAgent - TSQL JobStep (Job ' + Job_Process_Id this will work
-- Get running processes
insert into #TMP_Runnig_Processes
select program_name, isnull( DATEDIFF(minute,last_batch, getdate()), 0) as NinutesRunning
from master..sysprocesses
where program_name like 'SQLAgent - TSQL JobStep (Job %'
-- Heck yes let’s use a cursor
Declare @JobName as varchar(128),
@intJob_Id_Nr as integer,
@intMinutesRunning as integer,
@intMaxRunMins as integer,
@intCounter as integer,
@strMessage_text as varchar(1500)
set @intCounter = 0
-- Only jobs that are documented in T_JobRuntimes can be handeled ! (because of the innerjoins and because IMHO I cannot set a maximum runtime by default)
declare csrJobsOverTime cursor for
select JRT.JobName as JobName, JRT.Id_Nr, RP.NinutesRunning , JRT.MaxRunMins
from #TMP_ALLJobs AJ
inner join
dbo.T_JobRuntimes JRT
on AJ.Job_Name = JRT.JobName
inner join #TMP_Runnig_Processes RP
on RP.program_name like 'SQLAgent - TSQL JobStep (Job ' + AJ.Job_Process_Id + '%'
and RP.NinutesRunning > JRT.MaxRunMins
order by JobName
for read only
open csrJobsOverTime
FETCH NEXT FROM csrJobsOverTime
INTO @JobName, @intJob_Id_Nr, @intMinutesRunning, @intMaxRunMins
WHILE @@FETCH_STATUS = 0
BEGIN
set @intCounter = @intCounter + 1
select @strMessage_text = ('Spc_JOBI_CheckLongRunningJobs canceled Job ['
+ rtrim(@JobName)
+ ']. It ran ['
+ SUBSTRING(CAST(@intMinutesRunning AS char(5)),1,LEN(CAST(@intMinutesRunning AS char(5))))
+ '] minutes, Max run time ['
+ SUBSTRING(CAST(@intMaxRunMins AS char(5)),1,LEN(CAST(@intMaxRunMins AS char(5))))
+ '] minutes.')
--print @strMessage_text
-- severity level : information and leave a trace
RAISERROR (@strMessage_text, 10, 1) With log
execute msdb..sp_stop_job @job_name = @JobName
update T_JobRuntimes
set CounterCanceled = CounterCanceled + 1
, tsLastCanceled = getdate()
where Id_Nr = @intJob_Id_Nr
-- get next row
FETCH NEXT FROM csrJobsOverTime
INTO @JobName, @intJob_Id_Nr, @intMinutesRunning, @intMaxRunMins
END
set @intCounterCanceled = @intCounter
--print 'Aantal Jobs canceled : ' + cast(@intCounter as varchar) + ' **'
-- close and cleanup Cursor
CLOSE csrJobsOverTime
DEALLOCATE csrJobsOverTime
-- Cleanup tmp-tables
drop table #TMP_ALLJobs
drop table #TMP_Runnig_Processes
/*
Declare @NoJobsCanceled as integer
Exec Spc_JOBI_CheckLongRunningJobs @intCounterCanceled = @ NoJobsCanceled OUTPUT
Print 'Number of Jobs canceled : ' + cast(@NoJobsCanceled as varchar) + ' **'
*/
go
Print 'Stored procedures have been instaled'
go
-- JOBISQLScripts: Jobsystem - Jobs --
begin transaction
DECLARE @JobID BINARY(16)
DECLARE @ReturnCode INT
SELECT @ReturnCode = 0
if (select count(*) from msdb.dbo.syscategories where name = N'Database Maintenance') < 1
execute msdb.dbo.sp_add_category N'Database Maintenance'
BEGIN
execute @ReturnCode = msdb.dbo.sp_add_job @job_id = @JobID OUTPUT, @job_name = N'JOBI_JobRuntime_Checker', @enabled = 0, @start_step_id = 1, @notify_level_eventlog = 2, @notify_level_email = 0, @notify_level_netsend = 0, @notify_level_page = 0, @delete_level = 0, @description = N'No description available.', @category_name = N'Database Maintenance', @owner_login_name = N'sa'
if (@@ERROR <> 0 OR @ReturnCode <> 0) goto QuitWithRollback
execute @ReturnCode = msdb.dbo.sp_add_jobstep @job_id = @JobID , @step_id = 1, @cmdexec_success_code = 0, @on_success_action = 1, @on_success_step_id = 0, @on_fail_action = 2, @on_fail_step_id = 0, @retry_attempts = 0, @retry_interval = 1, @os_run_priority = 0, @flags = 0, @step_name = N'SP_JobRuntimeChecker', @subsystem = N'TSQL', @command = N' Declare @ NoJobsCanceled as integer
Exec Spc_JOBI_CheckLongRunningJobs @intCounterCanceled = @ NoJobsCanceled OUTPUT
Print ''Number of Jobs canceled : '' + cast(@NoJobsCanceled as varchar) + '' **''
', @database_name = N'DJOBIJobMonitor'
if (@@ERROR <> 0 OR @ReturnCode <> 0) goto QuitWithRollback
execute @ReturnCode = msdb.dbo.sp_update_job @job_id = @JobID, @start_step_id = 1
if (@@ERROR <> 0 OR @ReturnCode <> 0) goto QuitWithRollback
-- Add the job schedules
EXECUTE @ReturnCode = msdb.dbo.sp_add_jobschedule @job_id = @JobID, @name = N'Heel_Regelmatig', @enabled = 1, @freq_type = 4, @active_start_date = 20030224, @active_start_time = 0, @freq_interval = 1, @freq_subday_type = 4, @freq_subday_interval = 15, @freq_relative_interval = 0, @freq_recurrence_factor = 0, @active_end_date = 99991231, @active_end_time = 235959
IF (@@ERROR <> 0 OR @ReturnCode <> 0) GOTO QuitWithRollback
-- Add the Target Servers
EXECUTE @ReturnCode = msdb.dbo.sp_add_jobserver @job_id = @JobID, @server_name = N'(local)'
IF (@@ERROR <> 0 OR @ReturnCode <> 0) GOTO QuitWithRollback
END
commit transaction
goto EndSave
QuitWithRollback:
if (@@TRANCOUNT > 0) rollback transaction
EndSave:
GO
Print 'Job [JOBI_JobRuntime_Checker] installed and disabled.'
go
exec Spc_JOBI_GetAllJobs
Print 'Table has been populated with existing jobs. (initial 60 minutes runtime) !'
Print 'Cleanup and modify as needed'
go