您的位置:首页 > 其它

还是获取网页的问题,不过是多线程

2011-04-08 05:13 239 查看
09年我在这个问题上折腾了大半年,还因此写了篇博文[这下该不会阻塞了吧
],最初的是想实现多线程下载网页源码,但经过对HTTP协议的学习,发现只有支持断点继传的网站才用得上多线程下载,因此又把需求放在批量快速下载网页源码上,比如我同时需要访问100个网页,如果用IdHttp就头大了,Idhttp就连访问302页面也会出现异常,调试起来很麻烦。而用自己写的TWinHttp类,recv函数阻塞的问题是能解决了,但如果短时间频繁的获取网页源码,50%以上的访问都会出现connect失败和recv超时,以前很难解决的问题,这次却让我想到了一个较好的解决办法。

首先,对于connect,如果连接失败就进入死循环重复连接,直接连接成功才退出死循环

对于recv,由于recv失败一般都是因为该函数阻塞导致recv线程超时,而如果超时后不进行再处理的话,下载网页就会失败了。所以解决办法是当recv阻塞导致超时后,重新进行connect、send和recv,也就是当recv失败,就进行死循环重复connect、send和recv操作。

解决connect失败和recv失败的问题都用到了死循环,但如果在这两个操作上始终失败呢,怎么样才能退出死循环?我的解决办法是检查本地网络连接是否正常。当然,有必要也可以对目标网站能否访问进行判断,如果目标网站比较稳定,就不需要这个判断了,比如访问百度。

检查本地网络连接是否正常,我用的方法很简单,就是用gethostbyname函数来获取一个域名的IP地址。

使用以上的办法,目前还没发现有什么负作用,只是可能在短时间频繁访问网页时在死循环中耗费的时间比较多。

下面是新TWinHttp类的源码,这个类实现了GET/HEAD/POST这几个应用,如果本地网络连接正常,经测试,基本不会出现访问失败的现象。

unit WinHttp;
interface
uses
WinSock, Sockets, Windows, SysUtils, Classes;
const
HTTP_OK = 1;
HTTP_TIMEOUT = 2;
HTTP_FAIL = 3;
HTTP_STATECODE_ERR = 4;
type THttpData = record
WSAData:TWSAData;
Host, Path:string;
sockfd:Integer; //套接字
hostEnt:PHostEnt;
addr:sockaddr_in;
SocketHost:TSocketHost;
IsCon:Boolean;
end;
PHttpData = ^THttpData;
type
TWinHttp = class( TObject )
private
FHttpData:THttpData;
FTimeOut:Integer;
FCookie:string;
FHttpHead:string;
FStateCode:Integer;
FLocation:string;
FUrl:string;
FReferer:string;
private
procedure ParseURL( const Url:string; var Host, Path:string);
function Conn( var HttpData:THttpData ):Boolean;
public
property TimeOut: Integer read FTimeOut write FTimeOut;
property Cookie: string read FCookie write FCookie;
property Referer: string read FReferer write FReferer;
function Get( Url:string ):string;
procedure Head( Url:string );
function Post( Url:string; PostData:string ):string;
function HttpHead:string;
function StateCode:Integer;
function Location:string;
constructor Create;overload;
end;

implementation
type
TRecvThread = class( TThread )
private
FHttpData:THttpData;
protected
procedure Execute; override;
public
HtmlSource:string;
HttpHead:string;
StateCode:Integer;
ResultValue:Integer;
public
constructor Create( HttpData:THttpData );overload;

end;
constructor TRecvThread.Create( HttpData:THttpData );
begin
FHttpData := HttpData;
inherited Create( False );
end;
procedure TRecvThread.Execute;
var
Buf:array[0..1024] of char;
nPos,nRecv:Integer;
HeadFine:Boolean;
begin
//FreeOnTerminate := True;
HtmlSource := '';
HttpHead := '';
StateCode := 0;
ResultValue := 1;
HeadFine := False;
while True do
begin
FillChar( Buf, 1024, 0 );
nRecv := recv( FHttpData.sockfd, Buf, 1024, 0 );
if nRecv > 0 then
begin
HtmlSource := HtmlSource + Buf;
if not HeadFine then
begin
nPos := Pos( #13#10#13#10, HtmlSource );
if nPos <> 0 then
begin
HttpHead := Copy( HtmlSource, 1, nPos );
StateCode := StrToInt( Copy( HttpHead, 10, 3 ) );
HeadFine := True;
end;
end;
end
else if nRecv = -1 then
begin
ResultValue := -1;
HtmlSource := '';
Break;
end
else
begin
ResultValue := 0;
Break;
end;
end;
end;
constructor TWinHttp.Create;
begin
FTimeOut := 30;
FHttpData.IsCon := False;
end;
procedure TWinHttp.Head( Url:string );
var
SendBuf:array[0..10240] of char;
RecvThread:TRecvThread;
nPos:Integer;
label start;
begin
start:
FUrl := Url;
Conn( FHttpData );
if FHttpData.IsCon then
begin
FillChar( SendBuf, 10240, 0 );
lstrcpy( SendBuf, PChar( 'HEAD ' + FHttpData.Path + ' HTTP/1.1' + #13#10 ) );
lstrcat( SendBuf, PChar( 'Host: ' + FHttpData.Host + #13#10 ) );
lstrcat( SendBuf, PChar( 'User-Agent: Mozilla/5.0' +
' (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.2.15)' +
' Gecko/20110303 Firefox/3.6.15' + #13#10 ) );
lstrcat( SendBuf, PChar( 'Content-Type: ' +
'application/x-www-form-urlencoded;' + #13#10 ) );
lstrcat( SendBuf, PChar( 'Connection: Close' + #13#10 ) );
if FCookie <> '' then
lstrcat( SendBuf, PChar( 'Cookie: ' + FCookie + #13#10#13#10 ) )
else
lstrcat( SendBuf, PChar( #13#10 ) );
send( FHttpData.sockfd, SendBuf, StrLen( SendBuf ), 0 );
RecvThread := TRecvThread.Create( FHttpData );
if WaitForSingleObject( RecvThread.Handle, FTimeOut * 1000 ) = WAIT_TIMEOUT then
begin
//判断本地网络连接是否正常
if gethostbyname( PChar( FHttpData.Host ) ) <> nil then
begin
FHttpData.IsCon := False;
RecvThread.Free;
goto start;
end;
FHttpHead := '';
end;
if RecvThread.ResultValue = 0 then
begin
FHttpHead := RecvThread.HttpHead;
FStateCode := RecvThread.StateCode;
if FStateCode = 302 then
begin
nPos := Pos( 'Location: ', FHttpHead );
FLocation := Copy( FHttpHead, nPos + Length( 'Location: ' ),
Length( FHttpHead ) - nPos );
FLocation := Copy( FLocation, 1, Pos( #13#10, FLocation ) - 1 );
end;
end
else
begin
//判断本地网络连接是否正常
if gethostbyname( PChar( FHttpData.Host ) ) <> nil then
begin
FHttpData.IsCon := False;
RecvThread.Free;
goto start;
end;
FHttpHead := '';
end;

RecvThread.Free;
//Result := Recv
end
else
begin
//判断本地网络连接是否正常
if gethostbyname( PChar( FHttpData.Host ) ) <> nil then
begin
FHttpData.IsCon := False;
goto start;
end;
FHttpHead := '';
end;

end;
//获取源码
function TWinHttp.Get( Url:string ):string;
var
SendBuf:array[0..10240] of char;
RecvThread:TRecvThread;
HtmlSource:string;
i, nPos, nIndex:Integer;
CookieList:TStringList;
label start;
begin
CookieList := nil;
start:
if CookieList = nil then
CookieList := TStringList.Create
else
CookieList.Clear;
FUrl := Url;
Conn( FHttpData );
if FHttpData.IsCon then
begin
FillChar( SendBuf, 10240, 0 );
lstrcpy( SendBuf, PChar( 'GET ' + FHttpData.Path + ' HTTP/1.1' + #13#10 ) );
lstrcat( SendBuf, PChar( 'Host: ' + FHttpData.Host + #13#10 ) );
lstrcat( SendBuf, PChar( 'User-Agent: Mozilla/5.0' +
' (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.2.15)' +
' Gecko/20110303 Firefox/3.6.15' + #13#10 ) );
lstrcat( SendBuf, PChar( 'Content-Type: ' +
'application/x-www-form-urlencoded;' + #13#10 ) );
lstrcat( SendBuf, PChar( 'Connection: Close' + #13#10 ) );
if FReferer <> '' then
begin
lstrcat( SendBuf, PChar( 'Referer: ' + FReferer + #13#10 ) );
end;

if FCookie <> '' then
lstrcat( SendBuf, PChar( 'Cookie: ' + FCookie + #13#10#13#10 ) )
else
lstrcat( SendBuf, PChar( #13#10 ) );

send( FHttpData.sockfd, SendBuf, StrLen( SendBuf ), 0 );
RecvThread := TRecvThread.Create( FHttpData );
if WaitForSingleObject( RecvThread.Handle, FTimeOut * 1000 ) = WAIT_TIMEOUT then
begin
//判断本地网络连接是否正常
if gethostbyname( PChar( FHttpData.Host ) ) <> nil then
begin
FHttpData.IsCon := False;
RecvThread.Free;
goto start;
end;
HtmlSource := '';
end;
if RecvThread.ResultValue = 0 then
begin
HtmlSource := RecvThread.HtmlSource;
FHttpHead := RecvThread.HttpHead;
FStateCode := RecvThread.StateCode;
if FStateCode = 302 then
begin
nPos := Pos( 'Location: ', FHttpHead );
FLocation := Copy( FHttpHead, nPos + Length( 'Location: ' ),
Length( FHttpHead ) - nPos );
FLocation := Copy( FLocation, 1, Pos( #13#10, FLocation ) - 1 );
end;
ExtractStrings( [#13], [], PChar( FHttpHead ), CookieList );
if CookieList.Count > 0 then
begin
FCookie := '';
for i := 0 to CookieList.Count - 1 do
begin
nPos := Pos( 'Set-Cookie: ', CookieList[i] );
if nPos = 1 then
begin
FCookie := FCookie +
Copy( CookieList[i], Length( 'Set-Cookie: ' ) + 1,
Length( CookieList[i] ) - Length( 'Set-Cookie: ' ) + 1 );
nIndex := Length( FCookie );
if FCookie[nIndex] <> ';' then
FCookie := FCookie + '; ';
end;
end;
end;
end
else
begin
//判断本地网络连接是否正常
if gethostbyname( PChar( FHttpData.Host ) ) <> nil then
begin
FHttpData.IsCon := False;
RecvThread.Free;
goto start;
end;
HtmlSource := '';
end;
RecvThread.Free;
end
else
begin
//判断本地网络连接是否正常
if gethostbyname( PChar( FHttpData.Host ) ) <> nil then
begin
FHttpData.IsCon := False;
goto start;
end;
HtmlSource := '';
end;
CookieList.Free;
Result := HtmlSource;
end;
function TWinHttp.Post( Url:string; PostData:string ):string;
var
SendBuf:array[0..10240] of char;
RecvThread:TRecvThread;
HtmlSource:string;
i,nPos,nIndex:Integer;
CookieList:TStringList;
label start;
begin
CookieList := nil;
start:
if CookieList = nil then
CookieList := TStringList.Create
else
CookieList.Clear;
FUrl := Url;
Conn( FHttpData );
if FHttpData.IsCon then
begin
FillChar( SendBuf, 10240, 0 );
lstrcpy( SendBuf, PChar( 'POST ' + FHttpData.Path + ' HTTP/1.1' + #13#10 ) );
lstrcat( SendBuf, PChar( 'Host: ' + FHttpData.Host + #13#10 ) );
lstrcat( SendBuf, PChar( 'User-Agent: Mozilla/5.0' +
' (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.2.15)' +
' Gecko/20110303 Firefox/3.6.15' + #13#10 ) );
lstrcat( SendBuf, PChar( 'Content-Type: ' +
'application/x-www-form-urlencoded;' + #13#10 ) );
lstrcat( SendBuf, PChar( 'Content-Length: ' +
IntToStr( Length( PostData ) ) + #13#10 ) );
lstrcat( SendBuf, PChar( 'Connection: Close' + #13#10 ) );
if FReferer <> '' then
begin
lstrcat( SendBuf, PChar( 'Referer: ' + FReferer + #13#10 ) );
end;

if FCookie <> '' then
lstrcat( SendBuf, PChar( 'Cookie: ' + FCookie + #13#10#13#10 ) )
else
lstrcat( SendBuf, PChar( #13#10 ) );
lstrcat( SendBuf, PChar( PostData ) );

send( FHttpData.sockfd, SendBuf, StrLen( SendBuf ), 0 );
RecvThread := TRecvThread.Create( FHttpData );
if WaitForSingleObject( RecvThread.Handle, FTimeOut * 1000 ) = WAIT_TIMEOUT then
begin
//判断本地网络连接是否正常
if gethostbyname( PChar( FHttpData.Host ) ) <> nil then
begin
FHttpData.IsCon := False;
RecvThread.Free;
goto start;
end;
HtmlSource := '';
end;
if RecvThread.ResultValue = 0 then
begin
HtmlSource := RecvThread.HtmlSource;
FHttpHead := RecvThread.HttpHead;
FStateCode := RecvThread.StateCode;
if FStateCode = 302 then
begin
nPos := Pos( 'Location: ', FHttpHead );
FLocation := Copy( FHttpHead, nPos + Length( 'Location: ' ),
Length( FHttpHead ) - nPos );
FLocation := Copy( FLocation, 1, Pos( #13#10, FLocation ) - 1 );
end;
ExtractStrings( [#13], [], PChar( FHttpHead ), CookieList );
if CookieList.Count > 0 then
begin
FCookie := '';
for i := 0 to CookieList.Count - 1 do
begin
nPos := Pos( 'Set-Cookie: ', CookieList[i] );
if nPos = 1 then
begin
FCookie := FCookie +
Copy( CookieList[i], Length( 'Set-Cookie: ' ) + 1,
Length( CookieList[i] ) - Length( 'Set-Cookie: ' ) + 1 );
nIndex := Length( FCookie );
if FCookie[nIndex] <> ';' then
FCookie := FCookie + '; ';
end;
end;
end;
end
else
begin
//判断本地网络连接是否正常
if gethostbyname( PChar( FHttpData.Host ) ) <> nil then
begin
FHttpData.IsCon := False;
RecvThread.Free;
goto start;
end;
HtmlSource := '';
end;
RecvThread.Free;
end
else
begin
//判断本地网络连接是否正常
if gethostbyname( PChar( FHttpData.Host ) ) <> nil then
begin
FHttpData.IsCon := False;
goto start;
end;
HtmlSource := '';
end;
CookieList.Free;
Result := HtmlSource;
end;
//连接服务器
function TWinHttp.Conn( var HttpData:THttpData ):Boolean;
var
IsOk:Boolean;
i, nCon:Integer;
begin
HttpData.IsCon := False;
IsOk := False;
HttpData.sockfd := 0;
HttpData.hostEnt := nil;
HttpData.SocketHost := '';
with HttpData do
begin
if WSAStartup(MakeWord(2,2), WSAData) = 0 then
begin
ParseURL(FUrl, Host, Path);
//建立套接字
sockfd := socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
if sockfd <> INVALID_SOCKET then
begin
if Host <> '' then
begin
if Host[1] in ['0'..'9'] then
begin
if inet_addr(PChar(Host)) <> INADDR_NONE then
SocketHost := Host;
end
else
begin
hostEnt := gethostbyname(pchar(Host));
if hostEnt <> nil then
with hostEnt^ do
SocketHost := format('%d.%d.%d.%d',
[ord(h_addr^[0]), ord(h_addr^[1]),
ord(h_addr^[2]), ord(h_addr^[3])]);
end;
addr.sin_family := AF_INET;
addr.sin_port := htons(80);
addr.sin_addr.S_addr := inet_addr(PChar(SocketHost));
for i := 0 to 10 do
begin
//连接
nCon := connect(sockfd, addr, SizeOf(addr) );
if nCon <> 0 then
begin
Sleep(10);
Continue;
end
else
Break;
end;
if nCon = 0 then
begin
IsOk := True;
HttpData.IsCon := True;
end;
end;
end;
end;
end;
if IsOk then
FHttpData := HttpData;
Result := IsOk;
end;
//分隔URL
procedure TWinHttp.ParseURL( const Url:string; var Host, Path:string);
var
nIndex:Integer;
S,tmpUrl:string;
begin
tmpUrl := Url;
S := LowerCase(Url);
if ( Pos('https://', S) <> 0 ) then
begin
//删除http://
Delete(tmpUrl, 1, Length('https://'));
end
else if( Pos( 'http://', S ) <> 0 ) then
begin
//删除http://
Delete(tmpUrl, 1, Length('http://'));
end;
nIndex := Pos('/', tmpUrl);
if nIndex = 0 then
begin
Host := tmpUrl;
Path := '/';
end
else
begin
Host := Copy(tmpUrl, 1, nIndex - 1);
Path := Copy(tmpUrl, nIndex, Length(Url));
end;
end;
function TWinHttp.HttpHead:string;
begin
Result := FHttpHead;
end;
function TWinHttp.StateCode:Integer;
begin
Result := FStateCode;
end;
function TWinHttp.Location:string;
begin
Result := FLocation;
end;
initialization
finalization
end.
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: 
相关文章推荐