您的位置：首页 > 编程语言 > C#

c#简单实现提取网页内容

2009-11-29 13:00 633 查看

下面的代码是从一个网络爬虫程序中提取出来的，觉得有用，记录下来。

代码

using System;
using System.Collections;
using System.Collections.Generic;
using System.Reflection;
using System.IO;
using System.Net;
using System.Text;

namespace MyCsStudy
{
class Program
{
/// <summary>
/// 简单网络爬虫程序
/// </summary>
/// <param name="url"></param>
/// <param name="charset">编码可以为空</param>
/// <returns></returns>
public static string Fetch(string url, string charset)
{
Encoding encoding;
HttpWebRequest request;
HttpWebResponse response = null;
Stream resStream = null;
StreamReader sr = null;
string result = string.Empty;
try
{
request = (HttpWebRequest)HttpWebRequest.Create(url);
response = (HttpWebResponse)request.GetResponse();
resStream = response.GetResponseStream();
if (!string.IsNullOrEmpty(charset))
{
encoding = Encoding.GetEncoding(charset);
}
else if (!string.IsNullOrEmpty(response.CharacterSet))
{
encoding = Encoding.GetEncoding(response.CharacterSet);
}
else
{
encoding = Encoding.Default;
}
sr = new StreamReader(resStream, encoding);
result = sr.ReadToEnd();
}
//catch (Exception ex)
//{
// throw ex;
//}
finally
{
if (sr != null)
{
sr.Close();
}
if (resStream != null)
{
resStream.Close();
}
if (response != null)
{
response.Close();
}
}
return result;
}

static void Main(string[] args)
{
string webSite=@"http://www.google.cn"; //这里url必须带上协议
string strHTML = Fetch(webSite,null);

Console.Write(strHTML);

Console.ReadLine();
}
}
}

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航