In case it helps anyone else, here is my completed script to check the status of the merge replication agent (that was started when the SQL Server Agent started - and should be running continuously). If the merge agent is found to be stopped a message is logged and it is restarted.
This can be run as a job on the server where the replication agent is running (in my case I was using push subscriptions so the script runs at the Distributor).
declare @TempJobId uniqueidentifier, @TempJobName sysname
set nocount on
use msdb
/* Search in MSDB for enabled replication merge jobs that are scheduled to run when SQL Server Agent starts */
declare TempJobCursor cursor local fast_forward for
select j.job_id, j.name from syscategories c, sysjobs j, sysjobschedules s where
j.job_id = s.job_id and
j.category_id = c.category_id and
c.name = N'REPL-Merge' and
s.freq_type = 64 and
j.enabled = 1 and
s.enabled = 1
open TempJobCursor
fetch next from TempJobCursor into @TempJobId, @TempJobName
while @@fetch_status = 0
/* Found a matching job - check its status */
begin
exec ('declare @TempStatus int
select @TempStatus = current_execution_status from openrowset(''sqloledb'',
''server=(local);trusted_connection=yes'',
''set fmtonly off execute msdb.dbo.sp_help_job @job_id=''''' + @TempJobId +
''''', @job_aspect=''''JOB'''', @execution_status=4'')')
if @@rowcount = 1
/* Job found to be in the idle (not running) state; log an event and restart it */
begin
exec ('execute sp_start_job @job_id=''' + @TempJobId + '''')
raiserror ('Watchdog: Restarted Merge Replication agent %s', 0, 1, @TempJobName) with log
end
fetch next from TempJobCursor into @TempJobId, @TempJobName
end
/* Clean up */
close TempJobCursor
deallocate TempJobCursor
set nocount off
HTH
Richard