您的位置:首页 > 编程语言 > C#

c# 获取网页源码

2011-04-26 23:11 477 查看
C#获取指定网页HTML原代码可使用 WebClient WebRequest HttpWebRequest 三种方式来实现。

当然也可使用webBrowse!在此就不研究webBrowse如何获取了。

WebClient

view sourceprint?

private
string
GetWebClient(
string
url)
{
string
strHTML =
""
;
WebClient myWebClient =
new
WebClient();
Stream myStream = myWebClient.OpenRead(url);
StreamReader sr =
new
StreamReader(myStream,System.Text.Encoding.GetEncoding(
"utf-8"
));
strHTML =sr.ReadToEnd();
myStream.Close();
return
strHTML;
}

WebRequest

view sourceprint?

private
string
GetWebRequest(
string
url)
{
Uri uri =
new
Uri(url);
WebRequest myReq = WebRequest.Create(uri);
WebResponse result = myReq.GetResponse();
Stream receviceStream = result.GetResponseStream();
StreamReader readerOfStream =
new
StreamReader(receviceStream,System.Text.Encoding.GetEncoding(
"utf-8"
));
string
strHTML =readerOfStream.ReadToEnd();
readerOfStream.Close();
receviceStream.Close();
result.Close();
return
strHTML;
}

HttpWebRequest

view sourceprint?

private
string
GetHttpWebRequest(
string
url)
{
Uri uri =
new
Uri(url);
HttpWebRequest myReq = (HttpWebRequest)WebRequest.Create(uri);
myReq.UserAgent =
"User-Agent:Mozilla/4.0 (compatible;MSIE 6.0;Windows NT 5.2;.NET CLR 1.0.3705"
;
myReq.Accept =
"*/*"
;
myReq.KeepAlive =
true
;
myReq.Headers.Add(
"Accept-Language"
,
"zh-cn,en-us;q=0.5"
);
HttpWebResponse result = (HttpWebResponse)myReq.GetResponse();
Stream receviceStream = result.GetResponseStream();
//www.elivn.com

StreamReader readerOfStream =
new
StreamReader(receviceStream,System.Text.Encoding.GetEncoding(
"utf-8"
));
string
strHTML =readerOfStream.ReadToEnd();
readerOfStream.Close();
receviceStream.Close();
result.Close();
return
strHTML;
}
注意“utf-8”应与指定网页的编码对应。

总结

可以看到HttpWebRequest 方式最复杂,但确提供了更多的选择性。

有的网站检测客户端的UserAgent!如163.com,你如果使用WebClient WebRequest方式获取时,将获取到的是错误提示页面内容。

而通过HttpWebRequest 就没问题。
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: