您的位置:首页 > 运维架构

复制监控存储过程及失败解决方法

2011-01-13 16:12 381 查看
USE [distribution] --分发数据库

GO

CREATE procedure [dbo].[sp_repl_syn]

@publication sysname='%' --模糊查询,发布名称

,@subscriber sysname='%' --模糊查询,发布名称

,@flag smallint=0 --default 0=not return cmd count,1=return cmd count,2=only show error item

as

/**********************

复制失败的内容查询
REMARK:

1. 解析代码: sp_replmonitorsubscriptionpendingcmds

2. 解析代码: sys.sp_MSget_repl_commands

3. 不考虑可更新订阅的情况,可更新订阅需要修改代码

4. 不返回未分发命令数可以加快查询

5. undelivered_transactions或undelivered_commands=-1 说明有未分发的事务或命令

6. if error agent_id<=5,then return cmd count

7. 注意:未启动或者停止的分发代理不在error之列
EXAMPLE:

exec sp_repl_syn

exec sp_repl_syn '%','%',2 --失败的代理

************************/

set nocount on
if object_id('tempdb.dbo.#agent') is not null drop table #agent

if object_id('tempdb.dbo.#his') is not null drop table #his

if object_id('tempdb.dbo.#max_xact') is not null drop table #max_xact

if object_id('tempdb.dbo.#last_max_xact') is not null drop table #last_max_xact

if object_id('tempdb.dbo.#repl') is not null drop table #repl

if object_id('tempdb.dbo.##out') is not null drop table ##out
create table #agent(agent_id int,publisher_database_id int,publisher_db sysname ,publisher_id smallint

,subscriber_id smallint,subscriber_db sysname,publication sysname,publisher sysname,subscriber sysname

)
--获得agent的基本信息

if @flag=2 --2=only show error item----找出同步不正常的代理

begin

insert into #agent

SELECT a.id as agent_id,a.publisher_database_id, a.publisher_db, a.publisher_id

,a.subscriber_id,a.subscriber_db, a.publication

,b.srvname as publisher,d.srvname as subscriber

from MSdistribution_agents a with(nolock)

inner join master.dbo.sysservers b with(nolock) on a.publisher_id =b.srvid

inner join master.dbo.sysservers d with(nolock) on a.subscriber_id =d.srvid

inner join dbo.MSpublications c with(nolock) on a.publisher_db=c.publisher_db and a.publication=c.publication

inner join dbo.MSsubscriptions s WITH(NOLOCK) on s.agent_id = a.id

inner join (select agent_id, max(timestamp) as timestamp

from dbo.MSdistribution_history

group by agent_id) cte on cte.agent_id=a.id

inner join dbo.MSdistribution_history dh on dh.agent_id=cte.agent_id and dh.timestamp=cte.TIMESTAMP

where a.subscriber_id>0 /*yf.排除subscriber_db=virtual*/

and a.subscription_type = 0 and c.publication_type=0

and (s.status in (0,1) /*订阅的状态 0 = 不活动 1 = 已订阅 2 = 活动*/

or dh.runstatus >=5 /*1 = Start. 2 = Succeed. 3 = In progress. 4 = Idle. 5 = Retry. 6 = Fail*/

or dh.comments like '%Error %'

)

and a.publication like @publication

and d.srvname like @subscriber

--and a.publisher_db like @publisher_db

if @@rowcount<=5 set @flag=1

end

else

begin

insert into #agent

SELECT a.id as agent_id,a.publisher_database_id, a.publisher_db, a.publisher_id

,a.subscriber_id,a.subscriber_db, a.publication

,b.srvname as publisher,d.srvname as subscriber

from MSdistribution_agents a with(nolock)

inner join master.dbo.sysservers b with(nolock) on a.publisher_id =b.srvid

inner join master.dbo.sysservers d with(nolock) on a.subscriber_id =d.srvid

inner join MSpublications c with(nolock) on a.publisher_db=c.publisher_db and a.publication=c.publication

where a.subscriber_id>0 /*yf.排除subscriber_db=virtual*/

and a.subscription_type = 0 and c.publication_type=0

and a.publication like @publication

and d.srvname like @subscriber

--and a.publisher_db like @publisher_db

end
--MSdistribution_history clustered index : agent_id, timestamp, runstatus, start_time, time ; 10000 rows

--add join MSrepl_errors 20090916 kening.hu(yf20091009: slow performance,add redundancy rows , cancel)

;with cte(agent_id,timestamp)

as(

select agent_id, max(timestamp)

from dbo.MSdistribution_history

group by agent_id

)

select dh.agent_id, dh.timestamp, runstatus, start_time, dh.time,dh.xact_seqno as last_xact_seqno,delivery_rate as avg_rate,duration

--case when dh.error_id=0 then comments else e.error_text end AS comments

,comments

,current_delivery_rate,current_delivery_latency,delivered_transactions,delivered_commands

,average_commands as avg_commands,delivery_latency as avg_latency,total_delivered_commands,error_id

into #his

from dbo.MSdistribution_history dh

join cte on dh.agent_id=cte.agent_id and dh.timestamp=cte.TIMESTAMP

--LEFT JOIN MSrepl_errors e ON dh.error_id = e.id

--注意不能被log_reader进程阻塞! with (READPAST)

/*用msrepl_transactions替代MSrepl_commands,可以加快查询速度,提升约1秒*/

select publisher_database_id ,max(xact_seqno) as max_xact_seqno

into #max_xact

from msrepl_transactions mc with (READPAST)

group by publisher_database_id
select ah2.*,mx.max_xact_seqno

into #last_max_xact

from #agent a

join #his ah2 on a.agent_id=ah2.agent_id

join #max_xact mx on a.publisher_database_id=mx.publisher_database_id
create table #repl(agent_id int,undelivered_transactions int,undelivered_commands int)
if not exists(select * from #last_max_xact where last_xact_seqno<>max_xact_seqno)

set @flag=0
if @flag=1 --return cmd count

begin

-- Performance impact

-- Issue the select for counts separately ???

insert into #repl

select l.agent_id, count(distinct xact_seqno) as undelivered_transactions,count(*) as undelivered_commands

from #last_max_xact l

join dbo.MSsubscriptions s WITH(NOLOCK)

on s.agent_id = l.agent_id and l.last_xact_seqno<>l.max_xact_seqno

left join MSrepl_commands rc WITH (NOLOCK)

ON (rc.article_id = s.article_id AND rc.publisher_database_id=s.publisher_database_id )

where s.status = 2 --订阅的状态 0 = 不活动 1 = 已订阅 2 = 活动

and rc.xact_seqno > l.last_xact_seqno

and rc.xact_seqno <= l.max_xact_seqno

group by l.agent_id

end

else --if @flag=0 --fast select,do not return cmd count

begin

insert into #repl

select l.agent_id, -1 as undelivered_transactions,-1 as undelivered_commands

from #last_max_xact l

join dbo.MSsubscriptions s WITH(NOLOCK)

on s.agent_id = l.agent_id and l.last_xact_seqno<>l.max_xact_seqno

--left join MSrepl_commands rc WITH (NOLOCK)

-- ON (rc.article_id = s.article_id AND rc.publisher_database_id=s.publisher_database_id )

where s.status = 2 --订阅的状态 0 = 不活动 1 = 已订阅 2 = 活动

--and rc.xact_seqno > l.last_xact_seqno

--and rc.xact_seqno <= l.max_xact_seqno

group by l.agent_id

end

-- return the resultset

select a.agent_id,a.publisher ,a.publisher_db ,a.publication ,a.subscriber ,a.subscriber_db

,case runstatus when 1 then 'Start' when 2 then 'Succeed' when 3 then 'In progress'

when 4 then 'Idle' when 5 then 'Retry' when 6 then 'Fail' end as runstatus

--,case when last_xact_seqno=max_xact_seqno then 0 else isnull(r.undelivered_transactions,0) end as undelivered_transactions

--,case when last_xact_seqno=max_xact_seqno then 0 else isnull(r.undelivered_commands,0) end as undelivered_commands

,isnull(r.undelivered_transactions,0) as undlv_trans--undelivered_trans

,isnull(r.undelivered_commands,0) as undlv_cmds -- undelivered_cmds

,case when r.undelivered_commands=-1 then ''

when (h.avg_rate<>0) then dbo.udf_Duration_IntToChar(cast(isnull((cast(r.undelivered_commands as float) / h.avg_rate),0) as int) )

else '0' end as prcs_time --EstimatedProcessTime-- compute the time to process

,comments,round(current_delivery_rate,0) as [cur_cmds/s],round(avg_rate,0) as [avg_cmds/s]

,round(convert(float,current_delivery_latency)/1000,0) as cur_latency

, round(convert(float,avg_latency)/1000,0) as avg_latency--, avg_latency

,convert(char(20),h.start_time,120) start_time,dbo.udf_Duration_IntToChar(duration) as run_duration

,delivered_transactions as dlv_trans,delivered_commands as dlv_cmds

,avg_commands as [cmds/tran],total_delivered_commands as total_dlv_cmds

,error_id, convert(char(20),time,120) as update_time

into ##out

from #agent a

left join #his h on a.agent_id=h.agent_id

left join #repl r on a.agent_id=r.agent_id

order by a.publication,a.subscriber
select * from ##out
print 'You can use the output data with:

select * from ##out where comments not like ''%transaction%'' --失败的代理

select * from ##out where publication like ''%'' '
GO

GO

CREATE procedure [dbo].[sp_repl_alert]

@ToMail varchar(1000), -- 收件人

@ToPhone varchar(1000) -- 接收手机号码,

as

/**********************

复制监控存储过程,错误邮件、短信提醒
EXAMPLE:

exec sp_repl_alert

@ToMail = 'xxxxx@xxxxx', -- 收件人

@ToPhone = '153........'

************************/
set nocount on

declare @return int,@message varchar(8000)

--create table

if object_id('dbo.dba_repl_alert') is null

begin

create table dbo.dba_repl_alert(

--id int identity ,

agent_id int primary key nonclustered,

updatetime datetime not null,

publisher sysname not null,

publication sysname not null,

subscriber sysname not null,

comments varchar(4000) null

)

create clustered index cx_dba_repl_alert on dba_repl_alert(updatetime)

end
--删除1天前的记录,因此相同的复制错误每天会报一次,防止一天多次报错

delete dbo.dba_repl_alert where updatetime<=getdate()-1
declare @out table (

publisher sysname not null,

publication sysname not null,

subscriber sysname not null,

comments varchar(4000) null

)
insert dbo.dba_repl_alert

output inserted.publisher,inserted.publication,inserted.subscriber,inserted.comments

into @out

SELECT a.id as agent_id,getdate()

,b.srvname as publisher, a.publication ,d.srvname as subscriber,dh.comments

from MSdistribution_agents a with(nolock)

inner join master.dbo.sysservers b with(nolock) on a.publisher_id =b.srvid

inner join master.dbo.sysservers d with(nolock) on a.subscriber_id =d.srvid

inner join dbo.MSpublications c with(nolock) on a.publisher_db=c.publisher_db and a.publication=c.publication

inner join dbo.MSsubscriptions s WITH(NOLOCK) on s.agent_id = a.id

inner join (select agent_id, max(timestamp) as timestamp

from dbo.MSdistribution_history

group by agent_id) cte on cte.agent_id=a.id

inner join dbo.MSdistribution_history dh on dh.agent_id=cte.agent_id and dh.timestamp=cte.TIMESTAMP

where a.subscriber_id>0 /*yf.排除subscriber_db=virtual*/

and a.subscription_type = 0 and c.publication_type=0

and (s.status in (0,1) /*订阅的状态 0 = 不活动 1 = 已订阅 2 = 活动*/

or dh.runstatus >=5 /*1 = Start. 2 = Succeed. 3 = In progress. 4 = Idle. 5 = Retry. 6 = Fail*/

or dh.comments like '%Error %'

)

and a.id not in (select agent_id from dbo.dba_repl_alert )
if @@rowcount=0 --no error

begin

Print 'Replication is OK or NO new error!'

return 0

end
--error info

declare @publisher sysname ,@publication sysname ,@subscriber sysname,@comments sysname,@time varchar(8)

set @publisher=''

set @publication=''

set @subscriber=''

set @comments=''

set @time=right(convert(char(19),getdate(),120),8)
SELECT @publisher=@publisher+publisher+'|' from (SELECT distinct publisher from @out ) a

if right(@publisher,1)='|' set @publisher=left(@publisher,len(@publisher)-1)
SELECT @publication=@publication+publication+'|' from (SELECT distinct publication from @out ) a

if right(@publication,1)='|' set @publication=left(@publication,len(@publication)-1)
SELECT @subscriber=@subscriber+subscriber+'|' from (SELECT distinct subscriber from @out ) a

if right(@subscriber,1)='|' set @subscriber=left(@subscriber,len(@subscriber)-1)
SELECT @comments=@comments+comments+'|' from (SELECT distinct convert(varchar(35),comments) as comments from @out ) a

if right(@comments,1)='|' set @comments=left(@comments,len(@comments)-1)
--合并相似名称

--SELECT subscriber,CHECKSUM(left(subscriber,len(subscriber)-1)) as CHECKSUM0 from @out group by subscriber
--email

DECLARE @Subject nvarchar(128),@Body nvarchar(4000),@sms nvarchar(132),@From varchar(1000)

set @From = 'xxxxx@xxxxxx'
SELECT @Subject = N'<REPL-ALERT> '+@publisher+'.' +@publication

select @Body='[Publisher Name]:'+@publisher+char(10)

+'[Publication Name]:'+@publication+char(10)

+'[Subscriber Name]:'+@subscriber+char(10)

+'[Time]:'+@time+char(10)

+'[Comments]:'+@comments+char(10)
EXEC [dbo].[sp_send_mail] @From, @ToMail, @Subject, @Body

--sms

set @sms='复制失败;-)'+@Time+' '+@publisher+'.'+@publication+':'+@subscriber+':'+@comments

-- +/*'[Run time]:'+*/ stuff(stuff(convert(char(8),@last_run_date),5,0,'-'),8,0,'-')+' '+stuff(stuff(stuff(replace(str(@last_run_time),' ','0'),1,4,''),3,0,':'),6,0,':')
----truncate to less than ? characters

set @sms=convert(nvarchar(136),@sms)

EXEC [system].dbo.up_send_SMS @phone=@ToPhone,@SMS_Msg =@sms

GO

/********************************复制失败解决方法******************************/

--1==在复制监视器上查看失败的分发代理的具体错误,找到导致失败的事务序列号:
/*例如:
尝试的命令:
if @@trancount > 0 rollback tran
(事务序列号: 0x0035182600001065000A00000000,命令ID: 1)
错误消息:
The row was not found at the Subscriber when applying the replicated command. (源: MSSQLServer,错误号: 20598)
*/
--这个错误消息表明可能有需要删除的行不存在!

--or 1==
--MSdistribution_history.xact_seqno不是出错事务的xact_seqno,精确的导致事务回滚的xact_seqno=MSrepl_errors.xact_seqno
--在分发数据库执行
select top 100 e.xact_seqno ,e.command_id,e.*
from MSdistribution_history h
join MSrepl_errors e on h.error_id=e.id
where comments not like '%transaction%' --失败的代理
order by id desc

--用上面查到的具体事务序列号,查看复制组件执行的具体命令
--在分发数据库上执行:sp_browsereplcmds,注意必须限定开始和结束xact_seqno
--在分发数据库执行
EXEC sp_browsereplcmds '0x0043AABB000002D7001A00000000','0x0043AABB000002D7001A00000000'

--找到这个具体的命令如下: (执行sp_browsereplcmds产生的command列)
--sp_MSupd为执行update操作  _dbo为架构  orderitem为对象名称 (内容)为更新的记录内容
{CALL [sp_MSupd_dboorderitem] (,,,,,,,'D',,,,,,,,,6915690,0x8000)}  
--sp_MSdel为执行delete操作  _dbo为架构  reserve为对象名称  (数字)为要删除的标识ID
{CALL [sp_MSdel_dboreserve] (16963864)}

--sp_MSins为执行insert操作  _dbo为架构  PriceInfo为对象名称  (内容)为要插入的新记录内容

{CALL [sp_MSins_dboPriceInfo] (1177152,'42004043','0001',90178,{ts '2010-06-14 00:00:00.000'},{ts '2010-06-16 00:00:00.000'},'',380.00000,430.00000,380.00000,430.00000,0,0.00000,'RMB',0,0,1,{ts '2010-01-28 10:58:30.780'},'赖文芳','202.104.115.139',0,0,0.0000,0.0000,0.0000,0.0000)}

--验证上述数据在故障订阅服务器上是否存在,如果不存在,则补上
--将找到的记录另存一个表,放到tempdb上,表名相同
drop table tempdb.dbo.customers
select * into tempdb.dbo.customers
from customers where customer_id=14520515

--在备份库上生成丢失数据的insert语句(sp_InsertSQL最好支持where条件)
use tempdb
exec sp_InsertSQL @TableName ='customers'

--在失败的订阅数据库上:
--编写脚本补上丢失的数据,注意identity列的影响
INSERT INTO [customers] ([customer_id],[card_no],[customer_type],[reserv_type],[hotel_booked],[air_booked],[tour_booked],[id_no],[passport_no],[officer_no],[reentry_no],[tctc_no],[eep_no],[other_no],[birthday],[name],[sex],[company_address],[company_postcode],[home_address],[home_postcode],[ticket_address],[ticket_postcode],[phone_inter_code],[phone_area_code],[phone],[phone_ext],[fax],[mobile_area_code],[mobile],[pas_inter_code],[pas_area_code],[pas],[email],[city],[country],[status],[operator],[op_date],[last_certificate_type],[guest_type],[LastContactMethod],[CertificatePeriod],[CertificateIssuingCountry]) values (14520509,1055067293,2,1,0,1,0,'','','','','','','',getdate(),'王辉',2,'','','','','北京市顺义区后沙峪裕祥花园-4-401','101318',0,0,0,0,'',0,13910262848,0,0,0,'','','',0,'机票',getdate(),0,0,2,getdate(),'')

--查看复制监视器,看问题是否解决.注意分发代理可能已经停止,需要启动.
--或者在分发服务器分发数据库上执行监控SQL:
sp_repl_syn 'all_inventory_product','%',1

exec sp_repl_syn '%','%',2
exec sp_repl_syn2

--在发布数据库上查看失败的发布涉及哪些表,以及表的大小和行数:
select * from v_replication
where publication='all_reser_1'

/* 将复制命令转化为SQL
{CALL [sp_MSupd_dbocustomers] (,,,,,,,,,,,,,,1950-12-23 00:00:00.000,,,,,,,,,,,,,,,,,,,,,,,,2009-09-17 10:00:55.267,,,,,,14520515,0x004000004000)}

exec [sp_MSupd_dbocustomers] null,null,null,null,null,null,null,null,null,null,null,null,null,null,'1950-12-23',null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,'2009-09-17',null,null,null,null,null,14520515,0x004000004000
*/
/*--用来做半自动处理的脚本
drop table tempdb.dbo.##replcmd
create table ##replcmd(
xact_seqno varbinary(16)
,originator_srvname sysname null
,originator_db sysname null
,article_id int null
,type int null
,partial_command bit null
,hashkey int null
,originator_publication_id int null
,originator_db_version int null
,originator_lsn varbinary(16) null
,command nvarchar(1024)
,command_id int
)
insert into ##replcmd EXEC sp_browsereplcmds '0x00354C0500000DAC000C00000000','0x00354C0500000DAC000C00000000'

select xact_seqno,article_id,command,command_id from ##replcmd
*/

/*SELECT
commandtext = CONVERT(nvarchar(1024), CASE WHEN type = 30 THEN SUBSTRING(command, 17, 1024) ELSE command END),
*
FROM dbo.MSRepl_commands with(nolock)
WHERE xact_seqno = 0x00354C0500000DAC000C00000000
-- come from step 5
会得到如下不完整的结果:
[sp_MSupd_dbocustomers] (,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,?,,,,,,?,0x000000004000)}àpar 替代办法:
EXEC sp_browsereplcmds '0x00354C0500000DAC000C00000000','0x00354C0500000DAC000C00000000'
*/
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  职场 数据库 error
相关文章推荐