获取远程网页的内容之二(downmoon原创)
2006-04-14 14:08
555 查看
本机直接上网时,请参看
获取远程网页的内容之一(downmoon原创)
本文仅针AD下代理上网的情况:
代码如下:
1、定义变量:
方法:
获取指定远程网页内容
获取指定远程网页元素字节数组::
转换指定字节数组为字符串::
借用这个,写了个抽取中国天气网预报的服务!很爽!
在webForm中WebRequest/WebClient/WebBrowser获取远程页面源码的三种方式(downmoon)
获取远程网页的内容之一(downmoon原创)
本文仅针AD下代理上网的情况:
代码如下:
1、定义变量:
定义变量 #region 定义变量 private string strFireWallIP ... { get ... { return System.Configuration.ConfigurationSettings.AppSettings[ " strFireWallIP " ]; } } private string strFireWallPort ... { get ... { return System.Configuration.ConfigurationSettings.AppSettings[ " strFireWallPort " ]; } } private string strUID ... { get ... { return System.Configuration.ConfigurationSettings.AppSettings[ " strUID " ]; } } private string strPWD ... { get ... { return System.Configuration.ConfigurationSettings.AppSettings[ " strPWD " ]; } } private string strDomain ... { get ... { return System.Configuration.ConfigurationSettings.AppSettings[ " strDomain " ]; } } #endregion |
获取指定远程网页内容
/**/ /// <summary> /// 获取指定远程网页内容 /// </summary> /// <param name="strUrl"> 所要查找的远程网页地址 </param> /// <returns></returns> // [WebMethod(Description = "获取指定远程网页内容。")] public string getPageContent( string strUrl) ... { string strResult = "" ; this .CurrentUrl = strUrl; if ( this .CurrentUrl.ToLower().StartsWith( " http:// " ) == false ) this .CurrentUrl = " http:// " + this .CurrentUrl; try ... { contentBytes = GetHtmlByte(CurrentUrl); } catch (Exception err) ... { strResult = " 请求错误: " + err.Message; } if (contentBytes == null ) ... { throw new Exception( " 没有获得返回值 " ); } strResult = getStringFromByteArray(contentBytes,Encoding.UTF8); return strResult; } |
获取指定远程网页元素字节数组 #region 获取指定远程网页元素字节数组 /**/ /// <summary> /// 获取指定远程网页元素字节数组 /// </summary> /// <param name="strUrl"> 所要查找的远程网页地址 </param> /// <returns></returns> private byte [] GetHtmlByte( string strUrl) ... { string strPara = (strUrl.IndexOf( " ? " ) >= 0 ? strUrl.Substring(strUrl.IndexOf( " ? " ) + 1 ): "" ); System.Text.Encoding encoding = new UTF8Encoding(); byte [] byte1 = encoding.GetBytes(strPara); byte [] byteReturn = new byte [ 10000000 ]; if (strUrl.Trim().ToLower().StartsWith( " http:// " ) == false ) ... { strUrl = " http:// " + strUrl; } HttpWebRequest myHttpWebRequest = (HttpWebRequest)WebRequest.Create(strUrl); myHttpWebRequest.AllowAutoRedirect = true ; myHttpWebRequest.KeepAlive = true ; myHttpWebRequest.UserAgent = " Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; .NET CLR 1.1.4322) " ; System.Net .WebProxy proxy = new WebProxy(strFireWallIP + " : " + strFireWallPort, true ); // proxy=(WebProxy)System.Net.GlobalProxySelection.Select; System.Net.NetworkCredential myCredential = new NetworkCredential(strUID,strPWD,strDomain); proxy.Credentials = myCredential; myHttpWebRequest.Proxy = proxy; HttpWebResponse myHttpWebResponse = (HttpWebResponse)myHttpWebRequest.GetResponse(); byte [] bRead = new byte [ 1024 ]; int lngCount = 1 ; int totalLen = 0 ; Stream recWeb = myHttpWebResponse.GetResponseStream(); lngCount = recWeb.Read(bRead, 0 , 1024 ); while (lngCount > 0 ) ... { Array.Copy(bRead, 0 ,byteReturn,totalLen,lngCount); totalLen += lngCount; lngCount = recWeb.Read(bRead, 0 , 1024 ); } recWeb.Close(); byte [] byteGets = new byte [totalLen]; Array.Copy(byteReturn, 0 ,byteGets, 0 ,totalLen); byteReturn = null ; bRead = null ; return byteGets; } #endregion |
转换指定字节数组为字符串 #region 转换指定字节数组为字符串 /**/ /// <summary> /// 转换指定字节数组为字符串 /// </summary> /// <param name="ByteGet"> 字节数组Byte[] </param> /// <param name="myEncoding"> 编码方式 </param> /// <returns></returns> private static string getStringFromByteArray(Byte[] ByteGet,Encoding myEncoding) ... { int i,lngCount; StringBuilder aTemp = new StringBuilder( 10000 ); lngCount = ByteGet.Length; for (i = 0 ;i < lngCount;i += 10000 ) ... { aTemp.Append(myEncoding.GetString(ByteGet,i,(lngCount >= i + 10000 ? 10000 :lngCount - i))); } if (i <= lngCount) ... { aTemp.Append(myEncoding.GetString(ByteGet,i,(lngCount - i))); } return aTemp.ToString(); } #endregion |
在webForm中WebRequest/WebClient/WebBrowser获取远程页面源码的三种方式(downmoon)