您的位置:首页 > 理论基础 > 计算机网络

cis.wengfu.com: 替换href 路径,里面的http标记,用到正则表达式

2010-03-18 19:47 190 查看
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Data;
using cncic.kmp.DataBase;
using System.Data.SqlClient;

namespace ConsoleApp_WF_UpdateSinofiLink
{
/*
<p> </p><br><p><font style="font-size: 15px">    点击此处下载:
* <a href="http://www.sinofi.com/http://www.sinofi.com/file/20105825035813.pdf">2010年1月第一周复合肥周报</a>
* </font></p><br><p><a href="http://www.sinofi.com/http://www.sinofi.com/file/200933

*/
class Program
{
public const string strconn = @"Persist Security Info=False;User ID=xxx;Password=***;
Initial Catalog=imis2009;Data Source=192.168.6.88";
static void Main(string[] args)
{
string sqlcmd = @"SELECT [DOCID], [DOC_TEXT], [Doc_ZR] FROM [Imis2009].[dbo].[CMS_DOCTEXT] where docid in(
select doc_id from (
select d.doc_id, 'c'+(select top 1 cast(chid as varchar) from cms_doc_ch r where audit_status ='ID_PAGE_STATUS_APPROVE' and r.audit_time is not null and r.docid=d.doc_id order by r.audit_time desc) + '/' + cast(datepart(yyyy, doc_dt_lastmodified) as varchar)+ Right('00' + cast(datepart(mm, doc_dt_lastmodified) as varchar),2) + '/' + Right('00' + cast(datepart(mm, doc_dt_lastmodified) as varchar),2) + Right('00' + cast(datepart(dd, doc_dt_lastmodified) as varchar),2) + '_' + (select top 1 cast(chid as varchar) from cms_doc_ch r where audit_status ='ID_PAGE_STATUS_APPROVE' and r.docid=d.doc_id order by r.audit_time desc) + '_' + cast(d.doc_id as varchar)as doc_url , (select top 1 cast(chid as varchar) from cms_doc_ch r where audit_status='ID_PAGE_STATUS_APPROVE' and r.docid=d.doc_id order by r.audit_time desc)as ch_id, d.doc_title, doc_dt_new, doc_dt_lastmodified from cms_docmain d left join cms_channels ch on (select top 1 cast(chid as varchar) from cms_doc_ch r where audit_status ='ID_PAGE_STATUS_APPROVE' and r.docid=d.doc_id order by r.audit_time desc)=ch.ch_id left join cms_doctext t on d.doc_id=t.docid where exists(select top 1 cast(chid as varchar) from cms_doc_ch r where audit_status='ID_PAGE_STATUS_APPROVE' and r.chid='13' and r.docid=d.doc_id order by r.audit_time desc) AND d.doc_title like '%复合肥%' --ORDER BY-- d.doc_dt_new
)m
) ";
DataTable dt = helper.SQL.ExecuteDataset(strconn, CommandType.Text, sqlcmd).Tables[0];
string source = string.Empty;
foreach (DataRow dr in dt.Rows)
{
string stmp = dr["doc_text"].ToString();
string docid = dr["docid"].ToString();
if (stmp.Contains("www.sinofi.com"))
{
string newvalue = GetAHrefLink(stmp,out source);
string doc_text = stmp.Replace(source, newvalue);
Console.WriteLine(doc_text);
string sqlupdate = @"UPDATE [Imis2009].[dbo].[CMS_DOCTEXT]
SET [DOC_TEXT]=@doc_text
WHERE [DOCID]=@docid";
helper.SQL.ExecuteNonQuery(strconn, CommandType.Text, sqlupdate, new SqlParameter("@doc_text",doc_text)
,new SqlParameter("@docid",docid));

}
}
Console.WriteLine("--------更新完成----------");
Console.Read();
}

public static string GetAHrefLink(string retVal,out string source)
{
source = string.Empty;
retVal = Regex.Match(retVal, @"<a.*[\s]+href=(\x22|\x27)?([^\x22<>]+)\1{1}[^<>]*>",
RegexOptions.Multiline | RegexOptions.IgnoreCase).Groups[2].Value;
source = retVal;
Console.WriteLine(retVal);
return IsHave2Http(retVal);
}

public static string IsHave2Http(string retVal)
{
int i = retVal.ToLower().Split(new string[] { "http" }, StringSplitOptions.None).Length;
if (i == 3)
{
int i2 = retVal.ToLower().LastIndexOf("http");
string strtmp = retVal.Substring(i2,retVal.Length-i2);
Console.WriteLine("更新成:"+strtmp);
return strtmp;
}
return retVal;
}
}
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: 
相关文章推荐