c#简单实现提取网页内容
2009-11-29 13:00
633 查看
下面的代码是从一个网络爬虫程序中提取出来的,觉得有用,记录下来。
代码
using System;
using System.Collections;
using System.Collections.Generic;
using System.Reflection;
using System.IO;
using System.Net;
using System.Text;
namespace MyCsStudy
{
class Program
{
/// <summary>
/// 简单网络爬虫程序
/// </summary>
/// <param name="url"></param>
/// <param name="charset">编码 可以为空</param>
/// <returns></returns>
public static string Fetch(string url, string charset)
{
Encoding encoding;
HttpWebRequest request;
HttpWebResponse response = null;
Stream resStream = null;
StreamReader sr = null;
string result = string.Empty;
try
{
request = (HttpWebRequest)HttpWebRequest.Create(url);
response = (HttpWebResponse)request.GetResponse();
resStream = response.GetResponseStream();
if (!string.IsNullOrEmpty(charset))
{
encoding = Encoding.GetEncoding(charset);
}
else if (!string.IsNullOrEmpty(response.CharacterSet))
{
encoding = Encoding.GetEncoding(response.CharacterSet);
}
else
{
encoding = Encoding.Default;
}
sr = new StreamReader(resStream, encoding);
result = sr.ReadToEnd();
}
//catch (Exception ex)
//{
// throw ex;
//}
finally
{
if (sr != null)
{
sr.Close();
}
if (resStream != null)
{
resStream.Close();
}
if (response != null)
{
response.Close();
}
}
return result;
}
static void Main(string[] args)
{
string webSite=@"http://www.google.cn"; //这里url必须带上协议
string strHTML = Fetch(webSite,null);
Console.Write(strHTML);
Console.ReadLine();
}
}
}
代码
using System;
using System.Collections;
using System.Collections.Generic;
using System.Reflection;
using System.IO;
using System.Net;
using System.Text;
namespace MyCsStudy
{
class Program
{
/// <summary>
/// 简单网络爬虫程序
/// </summary>
/// <param name="url"></param>
/// <param name="charset">编码 可以为空</param>
/// <returns></returns>
public static string Fetch(string url, string charset)
{
Encoding encoding;
HttpWebRequest request;
HttpWebResponse response = null;
Stream resStream = null;
StreamReader sr = null;
string result = string.Empty;
try
{
request = (HttpWebRequest)HttpWebRequest.Create(url);
response = (HttpWebResponse)request.GetResponse();
resStream = response.GetResponseStream();
if (!string.IsNullOrEmpty(charset))
{
encoding = Encoding.GetEncoding(charset);
}
else if (!string.IsNullOrEmpty(response.CharacterSet))
{
encoding = Encoding.GetEncoding(response.CharacterSet);
}
else
{
encoding = Encoding.Default;
}
sr = new StreamReader(resStream, encoding);
result = sr.ReadToEnd();
}
//catch (Exception ex)
//{
// throw ex;
//}
finally
{
if (sr != null)
{
sr.Close();
}
if (resStream != null)
{
resStream.Close();
}
if (response != null)
{
response.Close();
}
}
return result;
}
static void Main(string[] args)
{
string webSite=@"http://www.google.cn"; //这里url必须带上协议
string strHTML = Fetch(webSite,null);
Console.Write(strHTML);
Console.ReadLine();
}
}
}
相关文章推荐
- c#关于网页内容抓取,简单爬虫的实现。(包括动态,静态的)
- c#关于网页内容抓取,简单爬虫的实现。(包括动态,静态的)
- c#关于网页内容抓取,简单爬虫的实现。(包括动态,静态的)
- c#关于网页内容抓取,简单爬虫的实现。(包括动态,静态的)
- 能够按页号提取word文档文本内容的小程序,由C#实现
- c#实现网页图片提取工具代码分享
- [导入]C#内容分页简单实现代码及祥解
- 内容分页简单实现代码及祥解(C#)
- C#内容分页简单实现代码及祥解
- C# 实现抓取网页内容(一)
- Python简单实现网页内容抓取功能示例
- AJAX实现简单的读取文本文档内容到网页--AJAX
- C#内容分页简单实现代码及祥解
- 内容分页简单实现代码及祥解(C#)
- C#实现网页内容正文抓取
- C#实现简单的网页爬虫
- C#内容分页简单实现代码及祥解
- 网页内容解析简单实现
- 问题1:怎么实现从某网页提取某标签中的内容?
- Java读取网页内容并生成静态页面的简单实现