C#获取指定网页源码的几种方法
2014-04-16 01:46
791 查看
// WebClient private string GetWebClient(string url) { string strHTML = ""; WebClient myWebClient = new WebClient(); Stream myStream = myWebClient.OpenRead(url); StreamReader sr = new StreamReader(myStream, System.Text.Encoding.GetEncoding("utf-8")); strHTML = sr.ReadToEnd(); myStream.Close(); return strHTML; } // WebRequest private string GetWebRequest(string url) { Uri uri = new Uri(url); WebRequest myReq = WebRequest.Create(uri); WebResponse result = myReq.GetResponse(); Stream receviceStream = result.GetResponseStream(); StreamReader readerOfStream = new StreamReader(receviceStream,System.Text.Encoding.GetEncoding("gb2312")); string strHTML = readerOfStream.ReadToEnd(); readerOfStream.Close(); receviceStream.Close(); result.Close(); return strHTML; } // HttpWebRequest private string GetHttpWebRequest(string url) { try { Uri uri = new Uri(url); HttpWebRequest myReq = (HttpWebRequest)WebRequest.Create(uri); myReq.UserAgent = "User-Agent:Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.0.3705"; myReq.Accept = "*/*"; myReq.KeepAlive = true; myReq.Headers.Add("Accept-Language", "zh-cn,en-us;q=0.5"); HttpWebResponse result = (HttpWebResponse)myReq.GetResponse(); Stream receviceStream = result.GetResponseStream(); StreamReader readerOfStream = new StreamReader(receviceStream, System.Text.Encoding.GetEncoding("gb2312")); string strHTML = readerOfStream.ReadToEnd(); readerOfStream.Close(); receviceStream.Close(); result.Close(); return strHTML; } catch (Exception ex) { throw new Exception("采集指定网址异常," + ex.Message); } }
// 获取网页源码,如果启用了gzip压缩后页面获取会产生乱码,采用此方法可解决gzip压缩而产生的乱码情况
private string GetHtmlCode(string url)
{
string htmlCode;
HttpWebRequest webRequest = (System.Net.HttpWebRequest)System.Net.WebRequest.Create(url);
webRequest.Timeout = 30000;
webRequest.Method = "GET";
webRequest.UserAgent = "Mozilla/4.0";
webRequest.Headers.Add("Accept-Encoding", "gzip, deflate");
HttpWebResponse webResponse = (System.Net.HttpWebResponse)webRequest.GetResponse();
if (webResponse.ContentEncoding.ToLower() == "gzip")//如果使用了GZip则先解压 {
using (System.IO.Stream streamReceive = webResponse.GetResponseStream())
{
using (var zipStream =
new System.IO.Compression.GZipStream(streamReceive, System.IO.Compression.CompressionMode.Decompress))
{
using (StreamReader sr = new System.IO.StreamReader(zipStream, Encoding.Default))
{
htmlCode = sr.ReadToEnd();
}
}
}
}
else
{
using (System.IO.Stream streamReceive = webResponse.GetResponseStream())
{
using (System.IO.StreamReader sr = new System.IO.StreamReader(streamReceive, Encoding.Default))
{
htmlCode = sr.ReadToEnd();
}
}
}
return htmlCode;
}
原文地址
相关文章推荐
- C#获取指定网页源码的几种方法
- Linux获取网页源码的几种方法 linux爬虫程序
- c# 获取网页源码,自动判断编码格式新方法!(转)
- .NET下获取网页源码的几种方法
- 记录 -- C# 获取网页源码,判断编码格式方法
- C#获取网页中指定图片的方法
- c# 获取网页源码,自动判断编码格式新方法!
- c#获取网页源代码的几种方法
- c# 获取网页源码,自动判断编码格式新方法
- Linux获取网页源码的几种方法 - 遗世之都 - ITeye技术网站
- Linux获取网页源码的几种方法
- Linux获取网页源码的几种方法 linux爬虫程序
- c#获取网页源代码的几种方法
- Linux获取网页源码的几种方法
- asp.net C# 获取网页源码的几种方式
- C#获取指定网页源码
- 如何在C#中获取指定网页源码的示例
- C#获取某个URL下网页的方法
- C#实现下载网页HTML源码的方法
- JS获取网页中HTML元素的几种方法分析