您的位置：首页 > 编程语言 > C#

c#采集网页用得几个函数有解释

2008-12-07 16:27 295 查看

当当全新正版图书，9周年店庆，特价销售，全场免运费！

public string GetHtmlSource(string Url, string charset) //得到Html源代码

{

if (charset == "" || charset == null) charset = "gb2312";

string text1 = "";

try

{

HttpWebRequest request1 = (HttpWebRequest)WebRequest.Create(Url);

HttpWebResponse response1 = (HttpWebResponse)request1.GetResponse();

Stream stream1 = response1.GetResponseStream();

StreamReader reader1 = new StreamReader(stream1, Encoding.GetEncoding(charset));

text1 = reader1.ReadToEnd();

stream1.Close();

response1.Close();

}

catch (Exception exception1)

{

}

return text1;

}

//获得页面HTML代码中开始标记和结束标记中间的数据:测试可用

//参数:HTML源代码 ,开始标记,结束标记

public string SniffwebCode(string code, string wordsBegin, string wordsEnd)

{

string NewsTitle = "";

Regex regex1 = new Regex("" + wordsBegin + @"(?<title>[/s/S]+?)" + wordsEnd + "", RegexOptions.Compiled | RegexOptions.IgnoreCase);

for (Match match1 = regex1.Match(code); match1.Success; match1 = match1.NextMatch())

{

NewsTitle = match1.Groups["title"].ToString();

}

return NewsTitle;

}

public ArrayList SniffwebCodeReturnList(string code, string wordsBegin, string wordsEnd)

{

ArrayList urlList = new ArrayList();

//string NewsTitle = "";

Regex regex1 = new Regex("" + wordsBegin + @"(?<title>[/s/S]+?)" + wordsEnd + "", RegexOptions.Compiled | RegexOptions.IgnoreCase);

for (Match match1 = regex1.Match(code); match1.Success; match1 = match1.NextMatch())

{

urlList.Add(match1.Groups["title"].ToString());

}

return urlList;

}

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航

c#采集网页用得几个函数 有解释