用socket来代替HttpWebRequest和HttpWebResponse
2012-03-09 09:40
537 查看
紧接着上一篇博客,通常我们爬虫不知道对方的ip,只知道它们的url,先把客户端代码修改如下:
以上是用socket同步的方式来实现的,以下是异步方式
public class StateObject:IDisposable
{
public Socket workSocket = null;
public const int BufferSize = 256;
public byte[] buffer = new byte[BufferSize];
// public StringBuilder sb = new StringBuilder();
public MemoryStream Stream = new MemoryStream();
public string header = null;
public static Encoding Encoding = Encoding.UTF8;
public Action<Stream> HandAction;
public void Dispose()
{
if (workSocket != null)
{
workSocket.Shutdown(SocketShutdown.Both);
workSocket.Close();
}
}
}
public class AsynchronousClient:IDisposable
{
public StateObject State { set; get; }
public void StartClient(IPEndPoint ipPoint, string data, Action<Stream> actionHandle)
{
try
{
//IPHostEntry ipHostInfo = Dns.Resolve("host.contoso.com");
//IPAddress ipAddress = ipHostInfo.AddressList[0];
//IPEndPoint remoteEP = new IPEndPoint(ipAddress, port);
Socket client = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp);
StateObject state = new StateObject();
state.workSocket = client;
state.header = data;
state.HandAction = actionHandle;
State = state;
// state.header = "GET "+data+" HTTP/1.1";
client.BeginConnect(ipPoint, new AsyncCallback(ConnectCallback), state);
}
catch { }
}
void ConnectCallback(IAsyncResult ar)
{
StateObject state = (StateObject)ar.AsyncState;
Socket client = state.workSocket;
client.EndConnect(ar);
Trace.WriteLine(string.Format("Socket connected to {0}", client.RemoteEndPoint.ToString()));
byte[] byteData = StateObject.Encoding.GetBytes(state.header);
// 开始发送数据到远程设备.
client.BeginSend(byteData, 0, byteData.Length, 0, new AsyncCallback(SendCallback), state);
}
void SendCallback(IAsyncResult ar)
{
StateObject state = (StateObject)ar.AsyncState;
Socket client = state.workSocket;
// 完成数据发送.
int bytesSent = client.EndSend(ar);
Trace.WriteLine(string.Format("Sent {0} bytes to server.", bytesSent.ToString()));
try
{
//byte[] recvBytes = new byte[1024];
//int bytes;
//string responseText = string.Empty;
//do
//{
// bytes = client.Receive(recvBytes, recvBytes.Length, 0);
// responseText += StateObject.Encoding.GetString(recvBytes, 0, bytes);
//} while (bytes != 0);
client.BeginReceive(state.buffer, 0, StateObject.BufferSize, 0, new AsyncCallback(ReceiveCallback), state);
}
catch (Exception ex) { }
}
void ReceiveCallback(IAsyncResult ar)
{
// 从输入参数异步state对象中获取state和socket对象
StateObject state = (StateObject)ar.AsyncState;
Socket client = state.workSocket;
//从远程设备读取数据
int bytesRead = client.EndReceive(ar);
if (bytesRead > 0)
{
// 有数据,存储.
// state.sb.Append(StateObject.Encoding.GetString(state.buffer, 0, bytesRead));
state.Stream.Write(state.buffer, 0, bytesRead);
// 继续读取.
client.BeginReceive(state.buffer, 0, StateObject.BufferSize, 0, new AsyncCallback(ReceiveCallback), state);
}
else
{
client.Shutdown(SocketShutdown.Both);
client.Close();
client = null;
//state.HandAction.Invoke(state.sb.ToString());
state.HandAction(state.Stream);
}
}
public void Dispose()
{
State.Dispose();
}
}调用方式:
IPEndPoint ipPoint = new IPEndPoint(hosts.AddressList[0], 80);
AsynchronousClient client = new AsynchronousClient();
string responseText = string.Empty;
string requestText=string.Empty;// = "GET " + url + " HTTP/1.0 \r\n Content-Type: application/x-www-form-urlencoded";
StringBuilder bufRequest = new StringBuilder();
bufRequest.Append("GET ").Append(url).Append(" HTTP/1.0\r\n");
bufRequest.Append("Content-Type: application/x-www-form-urlencoded\r\n");
bufRequest.Append("\r\n");
requestText = bufRequest.ToString();
if (!string.IsNullOrEmpty(header))
requestText += header;
client.StartClient(ipPoint, requestText, new Action<Stream>(x =>
{
byte[] recvBytes = new byte[1024];
int bytes;
Encoding coder = Encoding.UTF8;
x.Seek(0, SeekOrigin.Begin);
do
{
bytes =x.Read(recvBytes,0, recvBytes.Length);
responseText += Encoding.UTF8.GetString(recvBytes, 0, bytes);
} while (bytes != 0);
client.Dispose();
}));
其中的url可以是要爬虫的地址:www.vancl.com http://neiyi.vancl.com/search/
public string Get(string url, params Encoding[] encoding) { string responseText = string.Empty; _statu = HttpRequestStatus.Busy; try { Regex reg = new Regex("(http://)?(?<name>[^/?]+)"); Match m = reg.Match(url); var hostName = m.Groups["name"].Value; IPHostEntry hosts = Dns.GetHostByName(hostName); if (!url.StartsWith("http://")) url = "http://" + hostName + "/"; if (!url.EndsWith("/")) url += "/"; IPEndPoint ipPoint = new IPEndPoint(hosts.AddressList[0], 80); Socket socket = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp);//创建Socket socket.Connect(ipPoint); ///向服务器发送信息 //{GET /index.php HTTP/1.0Content-Type: application/x-www-form-urlencoded StringBuilder bufRequest = new StringBuilder(); bufRequest.Append("GET ").Append(url).Append(" HTTP/1.0\r\n"); bufRequest.Append("Content-Type: application/x-www-form-urlencoded\r\n"); bufRequest.Append("\r\n"); string requestContent = bufRequest.ToString(); byte[] bs = Encoding.ASCII.GetBytes(requestContent); socket.Send(bs);//发送信息 /**/ ///接受从服务器返回的信息 byte[] recvBytes = new byte[1024]; int bytes; Encoding coder = Encoding.UTF8; if (encoding.Count() > 0) { coder = encoding[0]; } do { bytes = socket.Receive(recvBytes, recvBytes.Length, 0); responseText += coder.GetString(recvBytes, 0, bytes); } while (bytes != 0); /**/ ///一定记着用完socket后要关闭 socket.Close(); } catch (Exception ex) { } return responseText; }
以上是用socket同步的方式来实现的,以下是异步方式
public class StateObject:IDisposable
{
public Socket workSocket = null;
public const int BufferSize = 256;
public byte[] buffer = new byte[BufferSize];
// public StringBuilder sb = new StringBuilder();
public MemoryStream Stream = new MemoryStream();
public string header = null;
public static Encoding Encoding = Encoding.UTF8;
public Action<Stream> HandAction;
public void Dispose()
{
if (workSocket != null)
{
workSocket.Shutdown(SocketShutdown.Both);
workSocket.Close();
}
}
}
public class AsynchronousClient:IDisposable
{
public StateObject State { set; get; }
public void StartClient(IPEndPoint ipPoint, string data, Action<Stream> actionHandle)
{
try
{
//IPHostEntry ipHostInfo = Dns.Resolve("host.contoso.com");
//IPAddress ipAddress = ipHostInfo.AddressList[0];
//IPEndPoint remoteEP = new IPEndPoint(ipAddress, port);
Socket client = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp);
StateObject state = new StateObject();
state.workSocket = client;
state.header = data;
state.HandAction = actionHandle;
State = state;
// state.header = "GET "+data+" HTTP/1.1";
client.BeginConnect(ipPoint, new AsyncCallback(ConnectCallback), state);
}
catch { }
}
void ConnectCallback(IAsyncResult ar)
{
StateObject state = (StateObject)ar.AsyncState;
Socket client = state.workSocket;
client.EndConnect(ar);
Trace.WriteLine(string.Format("Socket connected to {0}", client.RemoteEndPoint.ToString()));
byte[] byteData = StateObject.Encoding.GetBytes(state.header);
// 开始发送数据到远程设备.
client.BeginSend(byteData, 0, byteData.Length, 0, new AsyncCallback(SendCallback), state);
}
void SendCallback(IAsyncResult ar)
{
StateObject state = (StateObject)ar.AsyncState;
Socket client = state.workSocket;
// 完成数据发送.
int bytesSent = client.EndSend(ar);
Trace.WriteLine(string.Format("Sent {0} bytes to server.", bytesSent.ToString()));
try
{
//byte[] recvBytes = new byte[1024];
//int bytes;
//string responseText = string.Empty;
//do
//{
// bytes = client.Receive(recvBytes, recvBytes.Length, 0);
// responseText += StateObject.Encoding.GetString(recvBytes, 0, bytes);
//} while (bytes != 0);
client.BeginReceive(state.buffer, 0, StateObject.BufferSize, 0, new AsyncCallback(ReceiveCallback), state);
}
catch (Exception ex) { }
}
void ReceiveCallback(IAsyncResult ar)
{
// 从输入参数异步state对象中获取state和socket对象
StateObject state = (StateObject)ar.AsyncState;
Socket client = state.workSocket;
//从远程设备读取数据
int bytesRead = client.EndReceive(ar);
if (bytesRead > 0)
{
// 有数据,存储.
// state.sb.Append(StateObject.Encoding.GetString(state.buffer, 0, bytesRead));
state.Stream.Write(state.buffer, 0, bytesRead);
// 继续读取.
client.BeginReceive(state.buffer, 0, StateObject.BufferSize, 0, new AsyncCallback(ReceiveCallback), state);
}
else
{
client.Shutdown(SocketShutdown.Both);
client.Close();
client = null;
//state.HandAction.Invoke(state.sb.ToString());
state.HandAction(state.Stream);
}
}
public void Dispose()
{
State.Dispose();
}
}调用方式:
IPEndPoint ipPoint = new IPEndPoint(hosts.AddressList[0], 80);
AsynchronousClient client = new AsynchronousClient();
string responseText = string.Empty;
string requestText=string.Empty;// = "GET " + url + " HTTP/1.0 \r\n Content-Type: application/x-www-form-urlencoded";
StringBuilder bufRequest = new StringBuilder();
bufRequest.Append("GET ").Append(url).Append(" HTTP/1.0\r\n");
bufRequest.Append("Content-Type: application/x-www-form-urlencoded\r\n");
bufRequest.Append("\r\n");
requestText = bufRequest.ToString();
if (!string.IsNullOrEmpty(header))
requestText += header;
client.StartClient(ipPoint, requestText, new Action<Stream>(x =>
{
byte[] recvBytes = new byte[1024];
int bytes;
Encoding coder = Encoding.UTF8;
x.Seek(0, SeekOrigin.Begin);
do
{
bytes =x.Read(recvBytes,0, recvBytes.Length);
responseText += Encoding.UTF8.GetString(recvBytes, 0, bytes);
} while (bytes != 0);
client.Dispose();
}));
其中的url可以是要爬虫的地址:www.vancl.com http://neiyi.vancl.com/search/
相关文章推荐
- 用socket来代替HttpWebRequest和HttpWebResponse
- 用socket代替httpwebrequest/httpwebresponse的研究
- HttpWebRequest在GetResponse时总是超时
- 【转载】C#网页采集数据的几种方式(WebClient、WebBrowser和HttpWebRequest/HttpWebResponse)
- HttpWebRequest 返回BadRequest(400) 同时返回Response
- C# 使用HttpWebRequest,HttpWebResponse 快速验证代理IP是否有用
- C#获取网页内容 (WebClient、WebBrowser和HttpWebRequest/HttpWebResponse)
- HttpWebRequest BeginGetResponse EndGetResponse
- C#使用HttpWebRequest与HttpWebResponse模拟用户登录
- 使用HttpWebRequest以及HttpWebResponse读取Http远程文件[转]
- HttpWebRequest && WebResponse
- InternalServerError时 HttpWebRequest 的 GetResponse 方法处理策略
- 【整理】Asp.net HttpWebRequest和HttpWebResponse发送和接受任何类型数据
- PostDirectoryFiles//HttpWebRequest,HttpWebResponse
- C#模拟POST提交表单(二)--HttpWebRequest以及HttpWebResponse
- HttpWebRequest 和 HttpWebResponse 的应用
- 不二极端编程之JavaWeb详解(HttpServletRequest,HttpServletResponse)
- HttpWebRequest 在出错时获取response内容
- c# HttpWebRequest 和HttpWebResponse 登录网站或论坛(校内网登陆)
- 利用HttpWebRequest和HttpWebResponse获取Cookie并实现模拟登录