There are many alerts you can set up via the replication monitor (I encourage you to set those up as well). To create a custom process to track (this of course isn't full proof, but works well enough)
First you should create a table on your publisher:
CREATE TABLE [dbo].[_ReplicationCheck](
[CurrentTime] [datetime] NOT NULL,
CONSTRAINT [PK__ReplicationCheck] PRIMARY KEY CLUSTERED
(
[CurrentTime] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 92) ON [PRIMARY]
) ON [PRIMARY]
GO
Then add this article to your publication. Verify it has been replicated over to your subscriber.
Then create the SQL Agent job on the Publisher to regularly update the table you created:
USE [msdb]
GO
/****** Object: Job [DAILY - Replication Checker :: Update Current Time] Script Date: 03/16/2013 11:25:58 ******/
BEGIN TRANSACTION
DECLARE @ReturnCode INT
SELECT @ReturnCode = 0
/****** Object: JobCategory [REPL-Checkup] Script Date: 03/16/2013 11:25:58 ******/
IF NOT EXISTS (SELECT name FROM msdb.dbo.syscategories WHERE name=N'REPL-Checkup' AND category_class=1)
BEGIN
EXEC @ReturnCode = msdb.dbo.sp_add_category @class=N'JOB', @type=N'LOCAL', @name=N'REPL-Checkup'
IF (@@ERROR <> 0 OR @ReturnCode <> 0) GOTO QuitWithRollback
END
DECLARE @jobId BINARY(16)
EXEC @ReturnCode = msdb.dbo.sp_add_job @job_name=N'DAILY - Replication Checker :: Update Current Time',
@enabled=1,
@notify_level_eventlog=0,
@notify_level_email=2,
@notify_level_netsend=0,
@notify_level_page=2,
@delete_level=0,
@description=N'SJM - 8/23/2012 - Job updates the current time in the _ReplicationCheck tables in all 4 CC databases. A counter-part job them checks the timestamps in the publication tables against the time in the replicated tables. If it''s off by 30 minutes, an alert is sent via text and email to dba_alerts@mycompany.com',
@category_name=N'REPL-Checkup',
@owner_login_name=N'MyUser',
@notify_email_operator_name=N'DBA',
@notify_page_operator_name=N'DBA', @job_id = @jobId OUTPUT
IF (@@ERROR <> 0 OR @ReturnCode <> 0) GOTO QuitWithRollback
/****** Object: Step [Update CurrentTime in _ReplicationCheck tables] Script Date: 03/16/2013 11:25:59 ******/
EXEC @ReturnCode = msdb.dbo.sp_add_jobstep @job_id=@jobId, @step_name=N'Update CurrentTime in _ReplicationCheck tables',
@step_id=1,
@cmdexec_success_code=0,
@on_success_action=1,
@on_success_step_id=0,
@on_fail_action=2,
@on_fail_step_id=0,
@retry_attempts=0,
@retry_interval=0,
@os_run_priority=0, @subsystem=N'TSQL',
@command=N'UPDATE MyDatabase.dbo.[_ReplicationCheck]
SET CurrentTime = GETDATE()
',
@database_name=N'master',
@flags=4
IF (@@ERROR <> 0 OR @ReturnCode <> 0) GOTO QuitWithRollback
EXEC @ReturnCode = msdb.dbo.sp_update_job @job_id = @jobId, @start_step_id = 1
IF (@@ERROR <> 0 OR @ReturnCode <> 0) GOTO QuitWithRollback
EXEC @ReturnCode = msdb.dbo.sp_add_jobschedule @job_id=@jobId, @name=N'Every 30 minutes',
@enabled=1,
@freq_type=4,
@freq_interval=1,
@freq_subday_type=4,
@freq_subday_interval=24,
@freq_relative_interval=0,
@freq_recurrence_factor=0,
@active_start_date=20120823,
@active_end_date=99991231,
@active_start_time=0,
@active_end_time=235959,
@schedule_uid=N'88bb96d2-a2a1-4c1c-aa1a-8bee09b9f25f'
IF (@@ERROR <> 0 OR @ReturnCode <> 0) GOTO QuitWithRollback
EXEC @ReturnCode = msdb.dbo.sp_add_jobserver @job_id = @jobId, @server_name = N'(local)'
IF (@@ERROR <> 0 OR @ReturnCode <> 0) GOTO QuitWithRollback
COMMIT TRANSACTION
GOTO EndSave
QuitWithRollback:
IF (@@TRANCOUNT > 0) ROLLBACK TRANSACTION
EndSave:
GO
And for the last Agent Job (which does the actual monitoring), also created on the Publisher:
USE [msdb]
GO
/****** Object: Job [DAILY - Replication Checker :: Monitor Status] Script Date: 03/16/2013 11:28:07 ******/
BEGIN TRANSACTION
DECLARE @ReturnCode INT
SELECT @ReturnCode = 0
/****** Object: JobCategory [REPL-Alert Response] Script Date: 03/16/2013 11:28:07 ******/
IF NOT EXISTS (SELECT name FROM msdb.dbo.syscategories WHERE name=N'REPL-Alert Response' AND category_class=1)
BEGIN
EXEC @ReturnCode = msdb.dbo.sp_add_category @class=N'JOB', @type=N'LOCAL', @name=N'REPL-Alert Response'
IF (@@ERROR <> 0 OR @ReturnCode <> 0) GOTO QuitWithRollback
END
DECLARE @jobId BINARY(16)
EXEC @ReturnCode = msdb.dbo.sp_add_job @job_name=N'DAILY - Replication Checker :: Monitor Status',
@enabled=1,
@notify_level_eventlog=0,
@notify_level_email=0,
@notify_level_netsend=0,
@notify_level_page=2,
@delete_level=0,
@description=N'SJM - 8/23/2012 - Created to monitor the time it takes to replicate a single table from all the CC databases to their respective subscription databases on the subscriber (currently MyServer) - The job does NOT have a notification set to email the dba_alerts team as this is already happening within the job step.',
@category_name=N'REPL-Alert Response',
@owner_login_name=N'MyUser',
@notify_page_operator_name=N'DBA', @job_id = @jobId OUTPUT
IF (@@ERROR <> 0 OR @ReturnCode <> 0) GOTO QuitWithRollback
/****** Object: Step [Monitor Replication Threshold] Script Date: 03/16/2013 11:28:07 ******/
EXEC @ReturnCode = msdb.dbo.sp_add_jobstep @job_id=@jobId, @step_name=N'Monitor Replication Threshold',
@step_id=1,
@cmdexec_success_code=0,
@on_success_action=1,
@on_success_step_id=0,
@on_fail_action=2,
@on_fail_step_id=0,
@retry_attempts=0,
@retry_interval=0,
@os_run_priority=0, @subsystem=N'TSQL',
@command=N'SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED
GO
/* DECLARE VARIS NEEDED */
DECLARE
@CAProdDate datetime,
@CAReplDate datetime,
@SQL varchar(max),
@Subject varchar(175),
@Body varchar(max)
/* Assign defaults */
DECLARE
@CALate bit = 0,
@Threshold int = 7200 --> Time in seconds
/* Fetch current time stamps */
SET @CAProdDate = (SELECT CurrentTime FROM MyDatabase.dbo.[_ReplicationCheck])
SET @CAReplDate = (SELECT CurrentTime FROM [MySubscriberServer].MyDatabase.dbo.[_ReplicationCheck])
/* Determine which publications are late */
IF (ABS(DATEDIFF(second, @CAProdDate, @CAReplDate)) > @Threshold)
BEGIN
SET @CALate = 1
END
IF (@CALate = 1)
BEGIN
SET @SQL = ''
SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED
GO
SELECT
a.publisher_db,
a.article,
ds.article_id,
ds.UndelivCmdsInDistDB,
ds.DelivCmdsInDistDB,
CASE WHEN md.[status] = 1 THEN ''''Started''''
WHEN md.[status] = 2 THEN ''''Succeeded''''
WHEN md.[status] = 3 THEN ''''In Progress''''
WHEN md.[status] = 4 THEN ''''Idle''''
WHEN md.[status] = 5 THEN ''''Retrying''''
WHEN md.[status] = 6 THEN ''''Failed''''
ELSE ''''Other''''
END [Status],
CASE WHEN md.warning = 0 THEN ''''OK''''
WHEN md.warning = 1 THEN ''''Expired''''
WHEN md.warning = 2 THEN ''''Latency''''
ELSE ''''OTHER''''
END [Warnings],
md.cur_latency / 60.0 / 60.0 [Latency (min.)]
FROM
[MyDistributionServer].Distribution.dbo.MSdistribution_status ds
JOIN
[MyDistributionServer].Distribution.dbo.MSarticles a
ON a.article_id = ds.article_id
JOIN
[MyDistributionServer].Distribution.dbo.MSreplication_monitordata md
ON md.agent_id = ds.agent_id
WHERE
UndelivCmdsInDistDB > 0
AND a.publisher_db = ''''MyDatabase''''
ORDER BY
a.publisher_db, UndelivCmdsInDistDB DESC''
SET @Subject = ''REPLICATION IS FALLING BEHIND ON '' + @@SERVERNAME
SET @Body = ''The threshold ('' + CAST(@Threshold AS varchar(4))
+ '' [sec.]) which has been allotted to replicate the timestamp in the _ReplicationCheck tables has been exceeded.''
+ CHAR(13) + ''======================================================================================''
+ CHAR(13) + '' REPLICATION HAS FALLEN BEHIND!!!''
+ CHAR(13) + ''======================================================================================''
+ CHAR(13) + CHAR(13)
+ ''Please check the replication monitor for errors/issues or run the QUERY BELOW ON THE DISTRIBUTOR :: THEN CONTACT THE ON CALL DBA IMMEDIATELY AT 000.000.0000''
+ CHAR(13) + CHAR(13) + @SQL
EXEC msdb.dbo.sp_send_dbmail @recipients = ''dba_alerts@mycompany.com'', @copy_recipients=''nagiosdb@mycompany.com'', @subject = @Subject, @body = @Body,
@body_format = ''TEXT'', @importance = ''High''
BEGIN TRY
SELECT
1 / 0 -- Force the job to fail so the page will get sent
END TRY
BEGIN CATCH
RAISERROR (''Replication Threshold EXCEEDED; Notifying DBA team'', 16, 1)
END CATCH
END
',
@database_name=N'master',
@flags=4
IF (@@ERROR <> 0 OR @ReturnCode <> 0) GOTO QuitWithRollback
EXEC @ReturnCode = msdb.dbo.sp_update_job @job_id = @jobId, @start_step_id = 1
IF (@@ERROR <> 0 OR @ReturnCode <> 0) GOTO QuitWithRollback
EXEC @ReturnCode = msdb.dbo.sp_add_jobschedule @job_id=@jobId, @name=N'Every minute',
@enabled=1,
@freq_type=4,
@freq_interval=1,
@freq_subday_type=4,
@freq_subday_interval=5,
@freq_relative_interval=0,
@freq_recurrence_factor=0,
@active_start_date=20120823,
@active_end_date=99991231,
@active_start_time=13000,
@active_end_time=75959,
@schedule_uid=N'3c2cd1eb-a616-422f-a5cd-e40b79e57629'
IF (@@ERROR <> 0 OR @ReturnCode <> 0) GOTO QuitWithRollback
EXEC @ReturnCode = msdb.dbo.sp_add_jobserver @job_id = @jobId, @server_name = N'(local)'
IF (@@ERROR <> 0 OR @ReturnCode <> 0) GOTO QuitWithRollback
COMMIT TRANSACTION
GOTO EndSave
QuitWithRollback:
IF (@@TRANCOUNT > 0) ROLLBACK TRANSACTION
EndSave:
GO
You can also monitor using tracer tokens. I created a job that automatically inserts them as well (because I want to know if the latency is occurring between the publisher >> distributor or from the distributor to my subscribers)
Script to create a SQL Agent job to insert the tracer tokens (Job also created on the Publisher:
USE [msdb]
GO
/****** Object: Job [DBA - Monitor Replication Latency] Script Date: 03/16/2013 11:43:48 ******/
BEGIN TRANSACTION
DECLARE @ReturnCode INT
SELECT @ReturnCode = 0
/****** Object: JobCategory [REPL-Checkup] Script Date: 03/16/2013 11:43:48 ******/
IF NOT EXISTS (SELECT name FROM msdb.dbo.syscategories WHERE name=N'REPL-Checkup' AND category_class=1)
BEGIN
EXEC @ReturnCode = msdb.dbo.sp_add_category @class=N'JOB', @type=N'LOCAL', @name=N'REPL-Checkup'
IF (@@ERROR <> 0 OR @ReturnCode <> 0) GOTO QuitWithRollback
END
DECLARE @jobId BINARY(16)
EXEC @ReturnCode = msdb.dbo.sp_add_job @job_name=N'DAILY - Monitor Replication Latency',
@enabled=1,
@notify_level_eventlog=0,
@notify_level_email=2,
@notify_level_netsend=0,
@notify_level_page=0,
@delete_level=0,
@category_name=N'REPL-Checkup',
@owner_login_name=N'MyUser',
@notify_email_operator_name=N'DBA', @job_id = @jobId OUTPUT
IF (@@ERROR <> 0 OR @ReturnCode <> 0) GOTO QuitWithRollback
/****** Object: Step [Tracer Token - CA] Script Date: 03/16/2013 11:43:48 ******/
EXEC @ReturnCode = msdb.dbo.sp_add_jobstep @job_id=@jobId, @step_name=N'Tracer Token - CA',
@step_id=1,
@cmdexec_success_code=0,
@on_success_action=3,
@on_success_step_id=0,
@on_fail_action=2,
@on_fail_step_id=0,
@retry_attempts=0,
@retry_interval=0,
@os_run_priority=0, @subsystem=N'TSQL',
@command=N'sp_posttracertoken ''PublicationName''',
@database_name=N'MyDatabase',
@flags=4
IF (@@ERROR <> 0 OR @ReturnCode <> 0) GOTO QuitWithRollback
EXEC @ReturnCode = msdb.dbo.sp_update_job @job_id = @jobId, @start_step_id = 1
IF (@@ERROR <> 0 OR @ReturnCode <> 0) GOTO QuitWithRollback
EXEC @ReturnCode = msdb.dbo.sp_add_jobschedule @job_id=@jobId, @name=N'Daily - Tracer Monitoring',
@enabled=1,
@freq_type=4,
@freq_interval=1,
@freq_subday_type=4,
@freq_subday_interval=5,
@freq_relative_interval=0,
@freq_recurrence_factor=0,
@active_start_date=20130228,
@active_end_date=99991231,
@active_start_time=230000,
@active_end_time=60000,
@schedule_uid=N'e1cc826e-0aa1-4eeb-9ef0-ad1729fc0789'
IF (@@ERROR <> 0 OR @ReturnCode <> 0) GOTO QuitWithRollback
EXEC @ReturnCode = msdb.dbo.sp_add_jobserver @job_id = @jobId, @server_name = N'(local)'
IF (@@ERROR <> 0 OR @ReturnCode <> 0) GOTO QuitWithRollback
COMMIT TRANSACTION
GOTO EndSave
QuitWithRollback:
IF (@@TRANCOUNT > 0) ROLLBACK TRANSACTION
EndSave:
GO
You can then monitor the tracer tokens using this TSQL (this code must be run against the Distributor:
USE distribution;
SELECT
c.publication,
publisher_commit,
distributor_commit,
DATEDIFF(ss, publisher_commit, distributor_commit) 'Latency - Pub >> Dis',
subscriber_commit,
DATEDIFF(ss, distributor_commit, subscriber_commit) 'Latency - Dis >> Sub'
FROM
MSTracer_tokens a
INNER JOIN MSTracer_history b
ON tracer_id = parent_tracer_id
INNER JOIN MSPublications c WITH (READUNCOMMITTED)
ON a.publication_id = c.publication_id
WHERE DATEDIFF(ss, publisher_commit, distributor_commit) > 5
OR DATEDIFF(ss, distributor_commit, subscriber_commit) > 5
ORDER BY
a.publisher_commit DESC,
c.publication
You should be able to run these scripts as-is, just change the "MyServer", MyDatabase", and things like that to what works for you - Hope it helps!
______________________________________________________________________________Never argue with an idiot; Theyll drag you down to their level and beat you with experience