您的位置:首页 > 理论基础 > 计算机网络

用socket来代替HttpWebRequest和HttpWebResponse

2012-03-09 09:40 537 查看
紧接着上一篇博客,通常我们爬虫不知道对方的ip,只知道它们的url,先把客户端代码修改如下:

  public string Get(string url, params Encoding[] encoding)
        {
            string responseText = string.Empty;
            _statu = HttpRequestStatus.Busy;
            try
            {
                Regex reg = new Regex("(http://)?(?<name>[^/?]+)");
                Match m = reg.Match(url);
                var hostName = m.Groups["name"].Value;

                IPHostEntry hosts = Dns.GetHostByName(hostName);

                if (!url.StartsWith("http://"))
                    url = "http://" + hostName + "/";
                if (!url.EndsWith("/"))
                    url += "/";
                IPEndPoint ipPoint = new IPEndPoint(hosts.AddressList[0], 80);
                Socket socket = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp);//创建Socket
                socket.Connect(ipPoint);

                ///向服务器发送信息
                //{GET /index.php HTTP/1.0Content-Type: application/x-www-form-urlencoded
                StringBuilder bufRequest = new StringBuilder();
                bufRequest.Append("GET ").Append(url).Append(" HTTP/1.0\r\n");
                bufRequest.Append("Content-Type: application/x-www-form-urlencoded\r\n");
                bufRequest.Append("\r\n");
                string requestContent = bufRequest.ToString();
                byte[] bs = Encoding.ASCII.GetBytes(requestContent);

                socket.Send(bs);//发送信息

                /**/
                ///接受从服务器返回的信息

                byte[] recvBytes = new byte[1024];
                int bytes;
                Encoding coder = Encoding.UTF8;
                if (encoding.Count() > 0)
                {
                    coder = encoding[0];
                }
                do
                {
                    bytes = socket.Receive(recvBytes, recvBytes.Length, 0);
                    responseText += coder.GetString(recvBytes, 0, bytes);

                } while (bytes != 0);

                /**/
                ///一定记着用完socket后要关闭
                socket.Close();
            }
            catch (Exception ex)
            {
            }
            return responseText;
        }

以上是用socket同步的方式来实现的,以下是异步方式

public class StateObject:IDisposable
{
public Socket workSocket = null;
public const int BufferSize = 256;
public byte[] buffer = new byte[BufferSize];
// public StringBuilder sb = new StringBuilder();
public MemoryStream Stream = new MemoryStream();
public string header = null;
public static Encoding Encoding = Encoding.UTF8;
public Action<Stream> HandAction;

public void Dispose()
{
if (workSocket != null)
{
workSocket.Shutdown(SocketShutdown.Both);
workSocket.Close();
}
}
}
public class AsynchronousClient:IDisposable
{
public StateObject State { set; get; }

public void StartClient(IPEndPoint ipPoint, string data, Action<Stream> actionHandle)
{
try
{
//IPHostEntry ipHostInfo = Dns.Resolve("host.contoso.com");
//IPAddress ipAddress = ipHostInfo.AddressList[0];
//IPEndPoint remoteEP = new IPEndPoint(ipAddress, port);
Socket client = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp);
StateObject state = new StateObject();
state.workSocket = client;
state.header = data;
state.HandAction = actionHandle;
State = state;
// state.header = "GET "+data+" HTTP/1.1";
client.BeginConnect(ipPoint, new AsyncCallback(ConnectCallback), state);
}

catch { }
}
void ConnectCallback(IAsyncResult ar)
{
StateObject state = (StateObject)ar.AsyncState;
Socket client = state.workSocket;
client.EndConnect(ar);
Trace.WriteLine(string.Format("Socket connected to {0}", client.RemoteEndPoint.ToString()));
byte[] byteData = StateObject.Encoding.GetBytes(state.header);

// 开始发送数据到远程设备.
client.BeginSend(byteData, 0, byteData.Length, 0, new AsyncCallback(SendCallback), state);
}

void SendCallback(IAsyncResult ar)
{
StateObject state = (StateObject)ar.AsyncState;
Socket client = state.workSocket;
// 完成数据发送.
int bytesSent = client.EndSend(ar);
Trace.WriteLine(string.Format("Sent {0} bytes to server.", bytesSent.ToString()));
try
{
//byte[] recvBytes = new byte[1024];
//int bytes;
//string responseText = string.Empty;
//do
//{
// bytes = client.Receive(recvBytes, recvBytes.Length, 0);
// responseText += StateObject.Encoding.GetString(recvBytes, 0, bytes);

//} while (bytes != 0);
client.BeginReceive(state.buffer, 0, StateObject.BufferSize, 0, new AsyncCallback(ReceiveCallback), state);
}
catch (Exception ex) { }
}

void ReceiveCallback(IAsyncResult ar)
{

// 从输入参数异步state对象中获取state和socket对象
StateObject state = (StateObject)ar.AsyncState;
Socket client = state.workSocket;

//从远程设备读取数据
int bytesRead = client.EndReceive(ar);

if (bytesRead > 0)
{
// 有数据,存储.
// state.sb.Append(StateObject.Encoding.GetString(state.buffer, 0, bytesRead));
state.Stream.Write(state.buffer, 0, bytesRead);
// 继续读取.
client.BeginReceive(state.buffer, 0, StateObject.BufferSize, 0, new AsyncCallback(ReceiveCallback), state);
}
else
{
client.Shutdown(SocketShutdown.Both);
client.Close();
client = null;
//state.HandAction.Invoke(state.sb.ToString());
state.HandAction(state.Stream);
}
}

public void Dispose()
{
State.Dispose();
}
}调用方式:
IPEndPoint ipPoint = new IPEndPoint(hosts.AddressList[0], 80);
AsynchronousClient client = new AsynchronousClient();
string responseText = string.Empty;
string requestText=string.Empty;// = "GET " + url + " HTTP/1.0 \r\n Content-Type: application/x-www-form-urlencoded";
StringBuilder bufRequest = new StringBuilder();
bufRequest.Append("GET ").Append(url).Append(" HTTP/1.0\r\n");
bufRequest.Append("Content-Type: application/x-www-form-urlencoded\r\n");
bufRequest.Append("\r\n");
requestText = bufRequest.ToString();
if (!string.IsNullOrEmpty(header))
requestText += header;

client.StartClient(ipPoint, requestText, new Action<Stream>(x =>
{
byte[] recvBytes = new byte[1024];
int bytes;
Encoding coder = Encoding.UTF8;
x.Seek(0, SeekOrigin.Begin);
do
{
bytes =x.Read(recvBytes,0, recvBytes.Length);
responseText += Encoding.UTF8.GetString(recvBytes, 0, bytes);

} while (bytes != 0);
client.Dispose();
}));


其中的url可以是要爬虫的地址:www.vancl.com http://neiyi.vancl.com/search/
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: