您的位置:首页 > 编程语言 > C#

c#采集网页用得几个函数 有解释

2008-12-07 16:27 295 查看
当当全新正版图书,9周年店庆,特价销售,全场免运费!

public string GetHtmlSource(string Url, string charset) //得到Html源代码






{


if (charset == "" || charset == null) charset = "gb2312";


string text1 = "";


try






{


HttpWebRequest request1 = (HttpWebRequest)WebRequest.Create(Url);


HttpWebResponse response1 = (HttpWebResponse)request1.GetResponse();


Stream stream1 = response1.GetResponseStream();


StreamReader reader1 = new StreamReader(stream1, Encoding.GetEncoding(charset));


text1 = reader1.ReadToEnd();


stream1.Close();


response1.Close();


}


catch (Exception exception1)






{


}


return text1;


}

//获得页面HTML代码中开始标记和结束标记中间的数据:测试可用

//参 数:HTML源代码 ,开始标记,结束标记

public string SniffwebCode(string code, string wordsBegin, string wordsEnd)






{


string NewsTitle = "";


Regex regex1 = new Regex("" + wordsBegin + @"(?<title>[/s/S]+?)" + wordsEnd + "", RegexOptions.Compiled | RegexOptions.IgnoreCase);


for (Match match1 = regex1.Match(code); match1.Success; match1 = match1.NextMatch())






{


NewsTitle = match1.Groups["title"].ToString();


}


return NewsTitle;




}

public ArrayList SniffwebCodeReturnList(string code, string wordsBegin, string wordsEnd)






{


ArrayList urlList = new ArrayList();


//string NewsTitle = "";


Regex regex1 = new Regex("" + wordsBegin + @"(?<title>[/s/S]+?)" + wordsEnd + "", RegexOptions.Compiled | RegexOptions.IgnoreCase);


for (Match match1 = regex1.Match(code); match1.Success; match1 = match1.NextMatch())






{


urlList.Add(match1.Groups["title"].ToString());


}


return urlList;




}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: