cis.wengfu.com: 替换href 路径,里面的http标记,用到正则表达式
2010-03-18 19:47
190 查看
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Data;
using cncic.kmp.DataBase;
using System.Data.SqlClient;
namespace ConsoleApp_WF_UpdateSinofiLink
{
/*
<p> </p><br><p><font style="font-size: 15px"> 点击此处下载:
* <a href="http://www.sinofi.com/http://www.sinofi.com/file/20105825035813.pdf">2010年1月第一周复合肥周报</a>
* </font></p><br><p><a href="http://www.sinofi.com/http://www.sinofi.com/file/200933
*/
class Program
{
public const string strconn = @"Persist Security Info=False;User ID=xxx;Password=***;
Initial Catalog=imis2009;Data Source=192.168.6.88";
static void Main(string[] args)
{
string sqlcmd = @"SELECT [DOCID], [DOC_TEXT], [Doc_ZR] FROM [Imis2009].[dbo].[CMS_DOCTEXT] where docid in(
select doc_id from (
select d.doc_id, 'c'+(select top 1 cast(chid as varchar) from cms_doc_ch r where audit_status ='ID_PAGE_STATUS_APPROVE' and r.audit_time is not null and r.docid=d.doc_id order by r.audit_time desc) + '/' + cast(datepart(yyyy, doc_dt_lastmodified) as varchar)+ Right('00' + cast(datepart(mm, doc_dt_lastmodified) as varchar),2) + '/' + Right('00' + cast(datepart(mm, doc_dt_lastmodified) as varchar),2) + Right('00' + cast(datepart(dd, doc_dt_lastmodified) as varchar),2) + '_' + (select top 1 cast(chid as varchar) from cms_doc_ch r where audit_status ='ID_PAGE_STATUS_APPROVE' and r.docid=d.doc_id order by r.audit_time desc) + '_' + cast(d.doc_id as varchar)as doc_url , (select top 1 cast(chid as varchar) from cms_doc_ch r where audit_status='ID_PAGE_STATUS_APPROVE' and r.docid=d.doc_id order by r.audit_time desc)as ch_id, d.doc_title, doc_dt_new, doc_dt_lastmodified from cms_docmain d left join cms_channels ch on (select top 1 cast(chid as varchar) from cms_doc_ch r where audit_status ='ID_PAGE_STATUS_APPROVE' and r.docid=d.doc_id order by r.audit_time desc)=ch.ch_id left join cms_doctext t on d.doc_id=t.docid where exists(select top 1 cast(chid as varchar) from cms_doc_ch r where audit_status='ID_PAGE_STATUS_APPROVE' and r.chid='13' and r.docid=d.doc_id order by r.audit_time desc) AND d.doc_title like '%复合肥%' --ORDER BY-- d.doc_dt_new
)m
) ";
DataTable dt = helper.SQL.ExecuteDataset(strconn, CommandType.Text, sqlcmd).Tables[0];
string source = string.Empty;
foreach (DataRow dr in dt.Rows)
{
string stmp = dr["doc_text"].ToString();
string docid = dr["docid"].ToString();
if (stmp.Contains("www.sinofi.com"))
{
string newvalue = GetAHrefLink(stmp,out source);
string doc_text = stmp.Replace(source, newvalue);
Console.WriteLine(doc_text);
string sqlupdate = @"UPDATE [Imis2009].[dbo].[CMS_DOCTEXT]
SET [DOC_TEXT]=@doc_text
WHERE [DOCID]=@docid";
helper.SQL.ExecuteNonQuery(strconn, CommandType.Text, sqlupdate, new SqlParameter("@doc_text",doc_text)
,new SqlParameter("@docid",docid));
}
}
Console.WriteLine("--------更新完成----------");
Console.Read();
}
public static string GetAHrefLink(string retVal,out string source)
{
source = string.Empty;
retVal = Regex.Match(retVal, @"<a.*[\s]+href=(\x22|\x27)?([^\x22<>]+)\1{1}[^<>]*>",
RegexOptions.Multiline | RegexOptions.IgnoreCase).Groups[2].Value;
source = retVal;
Console.WriteLine(retVal);
return IsHave2Http(retVal);
}
public static string IsHave2Http(string retVal)
{
int i = retVal.ToLower().Split(new string[] { "http" }, StringSplitOptions.None).Length;
if (i == 3)
{
int i2 = retVal.ToLower().LastIndexOf("http");
string strtmp = retVal.Substring(i2,retVal.Length-i2);
Console.WriteLine("更新成:"+strtmp);
return strtmp;
}
return retVal;
}
}
}
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Data;
using cncic.kmp.DataBase;
using System.Data.SqlClient;
namespace ConsoleApp_WF_UpdateSinofiLink
{
/*
<p> </p><br><p><font style="font-size: 15px"> 点击此处下载:
* <a href="http://www.sinofi.com/http://www.sinofi.com/file/20105825035813.pdf">2010年1月第一周复合肥周报</a>
* </font></p><br><p><a href="http://www.sinofi.com/http://www.sinofi.com/file/200933
*/
class Program
{
public const string strconn = @"Persist Security Info=False;User ID=xxx;Password=***;
Initial Catalog=imis2009;Data Source=192.168.6.88";
static void Main(string[] args)
{
string sqlcmd = @"SELECT [DOCID], [DOC_TEXT], [Doc_ZR] FROM [Imis2009].[dbo].[CMS_DOCTEXT] where docid in(
select doc_id from (
select d.doc_id, 'c'+(select top 1 cast(chid as varchar) from cms_doc_ch r where audit_status ='ID_PAGE_STATUS_APPROVE' and r.audit_time is not null and r.docid=d.doc_id order by r.audit_time desc) + '/' + cast(datepart(yyyy, doc_dt_lastmodified) as varchar)+ Right('00' + cast(datepart(mm, doc_dt_lastmodified) as varchar),2) + '/' + Right('00' + cast(datepart(mm, doc_dt_lastmodified) as varchar),2) + Right('00' + cast(datepart(dd, doc_dt_lastmodified) as varchar),2) + '_' + (select top 1 cast(chid as varchar) from cms_doc_ch r where audit_status ='ID_PAGE_STATUS_APPROVE' and r.docid=d.doc_id order by r.audit_time desc) + '_' + cast(d.doc_id as varchar)as doc_url , (select top 1 cast(chid as varchar) from cms_doc_ch r where audit_status='ID_PAGE_STATUS_APPROVE' and r.docid=d.doc_id order by r.audit_time desc)as ch_id, d.doc_title, doc_dt_new, doc_dt_lastmodified from cms_docmain d left join cms_channels ch on (select top 1 cast(chid as varchar) from cms_doc_ch r where audit_status ='ID_PAGE_STATUS_APPROVE' and r.docid=d.doc_id order by r.audit_time desc)=ch.ch_id left join cms_doctext t on d.doc_id=t.docid where exists(select top 1 cast(chid as varchar) from cms_doc_ch r where audit_status='ID_PAGE_STATUS_APPROVE' and r.chid='13' and r.docid=d.doc_id order by r.audit_time desc) AND d.doc_title like '%复合肥%' --ORDER BY-- d.doc_dt_new
)m
) ";
DataTable dt = helper.SQL.ExecuteDataset(strconn, CommandType.Text, sqlcmd).Tables[0];
string source = string.Empty;
foreach (DataRow dr in dt.Rows)
{
string stmp = dr["doc_text"].ToString();
string docid = dr["docid"].ToString();
if (stmp.Contains("www.sinofi.com"))
{
string newvalue = GetAHrefLink(stmp,out source);
string doc_text = stmp.Replace(source, newvalue);
Console.WriteLine(doc_text);
string sqlupdate = @"UPDATE [Imis2009].[dbo].[CMS_DOCTEXT]
SET [DOC_TEXT]=@doc_text
WHERE [DOCID]=@docid";
helper.SQL.ExecuteNonQuery(strconn, CommandType.Text, sqlupdate, new SqlParameter("@doc_text",doc_text)
,new SqlParameter("@docid",docid));
}
}
Console.WriteLine("--------更新完成----------");
Console.Read();
}
public static string GetAHrefLink(string retVal,out string source)
{
source = string.Empty;
retVal = Regex.Match(retVal, @"<a.*[\s]+href=(\x22|\x27)?([^\x22<>]+)\1{1}[^<>]*>",
RegexOptions.Multiline | RegexOptions.IgnoreCase).Groups[2].Value;
source = retVal;
Console.WriteLine(retVal);
return IsHave2Http(retVal);
}
public static string IsHave2Http(string retVal)
{
int i = retVal.ToLower().Split(new string[] { "http" }, StringSplitOptions.None).Length;
if (i == 3)
{
int i2 = retVal.ToLower().LastIndexOf("http");
string strtmp = retVal.Substring(i2,retVal.Length-i2);
Console.WriteLine("更新成:"+strtmp);
return strtmp;
}
return retVal;
}
}
}
相关文章推荐
- XDocReport 的简单使用 操作word 替换变量,动态图片,指定操作指令(程序)扩展(转自:http://www.cnblogs.com/fish-in-sky/p/4973237.html)
- 在b/s开发中经常用到的javaScript (http://onlyaa.com/html/common/20080705/2251_5.html)
- 组织和遍历TreeView里面的数据 http://www.cnblogs.com/aspnetx/archive/2007/09/18/897083.html
- java访问修饰符详解——学java,零基础不怕,不只要理论,更要实践+项目,<a href=http://www.bjweixin.com>太原维信科技提供 </a>
- C# 替换去除HTML标记方法(正则表达式)
- 工作需要用到C++中的正则表达式,所以就研究了以上三种正则。 http://www.cnblogs.com/pmars/archive/2012/10/24/2736831.html
- setInterval和setTimeout的使用区别(转自 <a href="http://www.cnblogs.com/sk-net/archive/2008/01/24/1050770.html">http://www.cnblogs.com/sk-net/archive/2008/01/24/1050770.html</a>)
- 做CSS精简时可能会用到的正则表达式 (http://www.cnblogs.com/AndersLiu/archive/2008/06/24/css-minifying-regular-expressions.html)
- Windows Phone开发(34):路径标记语法 转:http://blog.csdn.net/tcjiaan/article/details/7492548
- C# 替换去除HTML标记方法(正则表达式)
- 请使用socket相关函数(非curl)实现如下功能:构造一个post请求,发送到指定httpserver的指定端口的指定请求路径(如http://www.example.com:8080/test)
- java中集合 list Map arraylist linkedlist hashmap ——学java,零基础不怕,不只要理论,更要实践+项目,<a href=http://www.bjweixin.com>太原维信科技提供 </a>
- HTTP协议中的短轮询、长轮询、长连接和短连接 序言 最近刚到公司不到一个月,正处于熟悉项目和源码的阶段,因此最近经常会看一些源码。在研究一个项目的时候,源码里面用到了HTTP的长轮询。由于之前没太
- thinkcmf中Ueditor图片上传相对路径的修改,删除前缀http://xx.com
- iOS a 标签链接:<a href ="http://www.baidu.com">xxxx</a>提取xxxx的3种方法
- DB2 V9.7新特性 - 降低高水位标记(转:http://www.ibm.com/developerworks/cn/data/library/techarticles/dm-1007kongzh/)
- 正则表达式话题(http://www.regexlab.com/zh/regtopic.htm)
- HTTP://WWW.sina.com 替换域名为360.com
- vs2005下彩色验证码的实现(可自由定义是否加入干扰点、验证码位数等验证码显示效果) <br />转自<a href="http://www.cnblogs.com/zm235/archive/2006/10/02/520233.html" target="_blank">http://www.cnblogs.com/zm235/archive/2006/10/02/520233.html</a>
- 文件下载,转载路径:http://www.cnblogs.com/xdp-gacl/p/3789624.html