您的位置:首页 > 编程语言 > C#

c#简单实现提取网页内容

2009-11-29 13:00 633 查看
下面的代码是从一个网络爬虫程序中提取出来的,觉得有用,记录下来。

代码

using System;
using System.Collections;
using System.Collections.Generic;
using System.Reflection;
using System.IO;
using System.Net;
using System.Text;

namespace MyCsStudy
{
class Program
{
/// <summary>
/// 简单网络爬虫程序
/// </summary>
/// <param name="url"></param>
/// <param name="charset">编码 可以为空</param>
/// <returns></returns>
public static string Fetch(string url, string charset)
{
Encoding encoding;
HttpWebRequest request;
HttpWebResponse response = null;
Stream resStream = null;
StreamReader sr = null;
string result = string.Empty;
try
{
request = (HttpWebRequest)HttpWebRequest.Create(url);
response = (HttpWebResponse)request.GetResponse();
resStream = response.GetResponseStream();
if (!string.IsNullOrEmpty(charset))
{
encoding = Encoding.GetEncoding(charset);
}
else if (!string.IsNullOrEmpty(response.CharacterSet))
{
encoding = Encoding.GetEncoding(response.CharacterSet);
}
else
{
encoding = Encoding.Default;
}
sr = new StreamReader(resStream, encoding);
result = sr.ReadToEnd();
}
//catch (Exception ex)
//{
// throw ex;
//}
finally
{
if (sr != null)
{
sr.Close();
}
if (resStream != null)
{
resStream.Close();
}
if (response != null)
{
response.Close();
}
}
return result;
}

static void Main(string[] args)
{
string webSite=@"http://www.google.cn"; //这里url必须带上协议
string strHTML = Fetch(webSite,null);

Console.Write(strHTML);

Console.ReadLine();
}
}
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: