您的位置:首页 > 其它

网页抓包工具 只实现了抓图片的功能

2014-10-27 12:46 176 查看
HttpParse.h

#ifndef _HTTP_PARSE_
#define _HTTP_PARSE_
#include <vector>
using namespace std;
class HttpParse
{
public:
HttpParse(void);
~HttpParse(void);

int ParseUrl(string &response, vector<string> &m_imageurl);

};
#endif


HttpRequestResponse.h

#ifndef _HTTP_REQUEST_RESPONSE_
#define _HTTP_REQUEST_RESPONSE_
#include <string>
#include <vector>
#include "winsock2.h"
#include <boost/serialization/singleton.hpp>
using namespace std;

class HttpRequestResponse
{
public:
HttpRequestResponse(void);
~HttpRequestResponse(void);

int Initialise();
int GetHttpResponse(string &url, char * &response, int &bytesRead);
int DownLoadResource(vector<string> &resourceUrl);
int UnInitialise();

protected:
int UrlParse();

private:
string m_url;
string m_host;
string m_resource;
WSADATA m_wsaData;
};
typedef boost::serialization::singleton<HttpRequestResponse> HttpRequestResponseAgent;
#endif


ReptileTool.h

#ifndef _REPTILE_TOOL_
#define _REPTILE_TOOL_

#include <string>
#include "winsock2.h"
#include "HttpRequestResponse.h"
#include "HttpParse.h"
using namespace std;

enum ResType
{
PICTURE = 0,
AUDIO
};
class ReptileTool
{
public:
ReptileTool(void);
~ReptileTool(void);

int InitialiseSocket();
int DownLoadResource(string &url,  ResType type);

private:
HttpRequestResponse m_httpRequestResponse;
HttpParse m_httpParse;
vector<string> m_imageUrl;
};
#endif


ReptileToolDefine.h

#ifndef  _ERROR_DEFINE_
#define _ERROR_DEFINE_

#define MAXHOSTNAMESIZE 200
#define MAXRESOURCESIZE 2000
#define DEFAULT_PAGE_BUF_SIZE 1048576
enum DERROR_TYPE
{
REPTILE_SUCCESS = 0,
REPTILE_FAILED,
HOSTNAME_ERR,
SOCKET_ERR
};
#endif


HttpParse.cpp

#include "HttpParse.h"
#include "ReptileToolDefine.h"
#include<string>

HttpParse::HttpParse(void)
{
}

HttpParse::~HttpParse(void)
{
}

int HttpParse::ParseUrl(string &url, vector<string> &m_imageurl)
{
char* tag ="<img";
const char* pos;
const char* att1= "src=\"";
const char* att2="lazy-src=\"";
const char *pos0 = strstr( url.c_str(), tag);
while( pos0 ){
pos0 += strlen( tag );
const char* pos2 = strstr( pos0, att2 );
if( !pos2 || pos2 > strstr( pos0, ">") ) {
pos = strstr( pos0, att1);
if(!pos) {
pos0 = strstr(att1, tag );
continue;
} else {
pos = pos + strlen(att1);
}
}
else {
pos = pos2 + strlen(att2);
}

const char * nextQ = strstr( pos, "\"");
if( nextQ ){
char * url = new char[nextQ-pos+1];
sscanf( pos, "%[^\"]", url);
string imgUrl = url;
//if( m_imageurl.find( imgUrl ) == m_imageurl.end() ){
m_imageurl.push_back( imgUrl );
///*			*/}
pos0 = strstr(pos0, tag );
delete [] url;
}
}
return REPTILE_SUCCESS;
}


HttpRequestResponse.cpp

#include "HttpRequestResponse.h"
#include "ReptileToolDefine.h"
#include<boost/algorithm/string.hpp>
#include <iostream>
#include <fstream>
#pragma comment(lib, "ws2_32.lib")
using namespace std;

HttpRequestResponse::HttpRequestResponse(void)
{
}

HttpRequestResponse::~HttpRequestResponse(void)
{
}

int HttpRequestResponse::Initialise()
{
if( WSAStartup(MAKEWORD(2,2), &m_wsaData) != 0 )
{
return REPTILE_FAILED;
}
return REPTILE_SUCCESS;
}

int HttpRequestResponse::UnInitialise()
{
WSACleanup();
return REPTILE_SUCCESS;
}

int HttpRequestResponse::GetHttpResponse(string &url, char * &response, int &bytesRead)
{
if(url.size() > MAXHOSTNAMESIZE || url.empty())
{
return HOSTNAME_ERR;
}
m_url = url;
if(REPTILE_SUCCESS != UrlParse())
{
cout<<"UrlParse failed."<<endl;
}
struct hostent * hp= gethostbyname( m_host.c_str() );
if( NULL == hp )
{
return SOCKET_ERR;
}
SOCKET sock = socket( AF_INET, SOCK_STREAM, IPPROTO_TCP);
if( sock == -1 || sock == -2 )
{
return SOCKET_ERR;
}

//建立服务器地址
SOCKADDR_IN sa;
sa.sin_family = AF_INET;
sa.sin_port = htons( 80 );
memcpy( &sa.sin_addr, hp->h_addr, 4 );

//建立连接
if( 0!= connect( sock, (SOCKADDR*)&sa, sizeof(sa) ) )
{
cout << "Can not connect: "<< url <<endl;
closesocket(sock);
return SOCKET_ERR;
};
string request = "GET " + m_resource + " HTTP/1.1\r\nHost:" + m_host + "\r\nConnection:Close\r\n\r\n";

//发送数据
if( SOCKET_ERROR ==send( sock, request.c_str(), request.size(), 0 ) )
{
cout << "send error" <<endl;
closesocket( sock );
return SOCKET_ERR;
}
int m_nContentLength = DEFAULT_PAGE_BUF_SIZE;
char *pageBuf = (char *)malloc(m_nContentLength);
memset(pageBuf, 0, m_nContentLength);

bytesRead = 0;
int ret = 1;
while(ret > 0)
{
ret = recv(sock, pageBuf + bytesRead, m_nContentLength - bytesRead, 0);
if(ret > 0)
{
bytesRead += ret;
}
if( m_nContentLength - bytesRead<100){
m_nContentLength *=2;
pageBuf = (char*)realloc( pageBuf, m_nContentLength);       //重新分配内存
}
}
pageBuf[bytesRead] = '\0';
response = pageBuf;
closesocket( sock );
return REPTILE_SUCCESS;
}

int HttpRequestResponse::UrlParse()
{
string host = boost::erase_first_copy(m_url, "http://");
if( NULL == strstr( host.c_str(), "/") )
{
return HOSTNAME_ERR;
}
int index = host.find_first_of("/");
m_host = host.substr(0, index);
m_resource = host.substr(index, host.size());
return REPTILE_SUCCESS;
}

int HttpRequestResponse::DownLoadResource(vector<string> &resourceUrl)
{
//生成保存该url下图片的文件夹
string foldname = "./image";
if(!CreateDirectory( foldname.c_str(),NULL ))
cout << "Can not create directory:"<< foldname<<endl;
char* image;
int byteRead;
for( int i=0; i<resourceUrl.size(); i++){
//判断是否为图片,bmp,jgp,jpeg,gif
string str = resourceUrl[i];
int pos = str.find_last_of(".");
if( pos == string::npos )
continue;
else{
string ext = str.substr( pos+1, str.size()-pos-1 );
if( ext!="bmp"&& ext!="jpg" && ext!="jpeg"&& ext!="gif"&&ext!="png")
continue;
}
//下载其中的内容
if( REPTILE_SUCCESS == GetHttpResponse(resourceUrl[i], image, byteRead)){
if (0 == strlen(image)) {
continue;
}
const char *p=image;
const char * pos = strstr(p,"\r\n\r\n")+strlen("\r\n\r\n");
int index = resourceUrl[i].find_last_of("/");
if( index!=string::npos ){
string imgname = resourceUrl[i].substr( index , resourceUrl[i].size() );
ofstream ofile( foldname+imgname, ios::binary );
if( !ofile.is_open() )
continue;
int titlesize = pos-p;
ofile.write( pos, byteRead- titlesize );
ofile.close();
}
//			free(image.c_str());
}
}
return REPTILE_SUCCESS;
}


ReptileTool.cpp

#include "ReptileTool.h"
#include "ReptileToolDefine.h"

ReptileTool::ReptileTool(void)
{
}

ReptileTool::~ReptileTool(void)
{
}

int ReptileTool::InitialiseSocket()
{
//   if( WSAStartup(MAKEWORD(2,2), &m_wsaData) != 0 )
//{
//	return REPTILE_FAILED;
//   }
return REPTILE_SUCCESS;
}

int ReptileTool::DownLoadResource(string &url,  ResType type)
{
int readBytes;
char* response;
m_httpRequestResponse.Initialise();
m_httpRequestResponse.GetHttpResponse(url ,response, readBytes);
string httpresponse = response;
m_httpParse.ParseUrl(httpresponse, m_imageUrl);
m_httpRequestResponse.DownLoadResource(m_imageUrl);
m_httpRequestResponse.UnInitialise();
return REPTILE_SUCCESS;
}


Work.cpp

#include "ReptileTool.h"
#include "ReptileToolDefine.h"
int main(int argc, char** agrv)
{
string url("http://blog.csdn.net/z644041867/article/details/40376383");
ReptileTool tool;
tool.DownLoadResource (url, PICTURE);
return REPTILE_SUCCESS;
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: 
相关文章推荐