网页抓包工具 只实现了抓图片的功能
2014-10-27 12:46
176 查看
HttpParse.h
HttpRequestResponse.h
ReptileTool.h
ReptileToolDefine.h
HttpParse.cpp
HttpRequestResponse.cpp
ReptileTool.cpp
Work.cpp
#ifndef _HTTP_PARSE_ #define _HTTP_PARSE_ #include <vector> using namespace std; class HttpParse { public: HttpParse(void); ~HttpParse(void); int ParseUrl(string &response, vector<string> &m_imageurl); }; #endif
HttpRequestResponse.h
#ifndef _HTTP_REQUEST_RESPONSE_ #define _HTTP_REQUEST_RESPONSE_ #include <string> #include <vector> #include "winsock2.h" #include <boost/serialization/singleton.hpp> using namespace std; class HttpRequestResponse { public: HttpRequestResponse(void); ~HttpRequestResponse(void); int Initialise(); int GetHttpResponse(string &url, char * &response, int &bytesRead); int DownLoadResource(vector<string> &resourceUrl); int UnInitialise(); protected: int UrlParse(); private: string m_url; string m_host; string m_resource; WSADATA m_wsaData; }; typedef boost::serialization::singleton<HttpRequestResponse> HttpRequestResponseAgent; #endif
ReptileTool.h
#ifndef _REPTILE_TOOL_ #define _REPTILE_TOOL_ #include <string> #include "winsock2.h" #include "HttpRequestResponse.h" #include "HttpParse.h" using namespace std; enum ResType { PICTURE = 0, AUDIO }; class ReptileTool { public: ReptileTool(void); ~ReptileTool(void); int InitialiseSocket(); int DownLoadResource(string &url, ResType type); private: HttpRequestResponse m_httpRequestResponse; HttpParse m_httpParse; vector<string> m_imageUrl; }; #endif
ReptileToolDefine.h
#ifndef _ERROR_DEFINE_ #define _ERROR_DEFINE_ #define MAXHOSTNAMESIZE 200 #define MAXRESOURCESIZE 2000 #define DEFAULT_PAGE_BUF_SIZE 1048576 enum DERROR_TYPE { REPTILE_SUCCESS = 0, REPTILE_FAILED, HOSTNAME_ERR, SOCKET_ERR }; #endif
HttpParse.cpp
#include "HttpParse.h" #include "ReptileToolDefine.h" #include<string> HttpParse::HttpParse(void) { } HttpParse::~HttpParse(void) { } int HttpParse::ParseUrl(string &url, vector<string> &m_imageurl) { char* tag ="<img"; const char* pos; const char* att1= "src=\""; const char* att2="lazy-src=\""; const char *pos0 = strstr( url.c_str(), tag); while( pos0 ){ pos0 += strlen( tag ); const char* pos2 = strstr( pos0, att2 ); if( !pos2 || pos2 > strstr( pos0, ">") ) { pos = strstr( pos0, att1); if(!pos) { pos0 = strstr(att1, tag ); continue; } else { pos = pos + strlen(att1); } } else { pos = pos2 + strlen(att2); } const char * nextQ = strstr( pos, "\""); if( nextQ ){ char * url = new char[nextQ-pos+1]; sscanf( pos, "%[^\"]", url); string imgUrl = url; //if( m_imageurl.find( imgUrl ) == m_imageurl.end() ){ m_imageurl.push_back( imgUrl ); ///* */} pos0 = strstr(pos0, tag ); delete [] url; } } return REPTILE_SUCCESS; }
HttpRequestResponse.cpp
#include "HttpRequestResponse.h" #include "ReptileToolDefine.h" #include<boost/algorithm/string.hpp> #include <iostream> #include <fstream> #pragma comment(lib, "ws2_32.lib") using namespace std; HttpRequestResponse::HttpRequestResponse(void) { } HttpRequestResponse::~HttpRequestResponse(void) { } int HttpRequestResponse::Initialise() { if( WSAStartup(MAKEWORD(2,2), &m_wsaData) != 0 ) { return REPTILE_FAILED; } return REPTILE_SUCCESS; } int HttpRequestResponse::UnInitialise() { WSACleanup(); return REPTILE_SUCCESS; } int HttpRequestResponse::GetHttpResponse(string &url, char * &response, int &bytesRead) { if(url.size() > MAXHOSTNAMESIZE || url.empty()) { return HOSTNAME_ERR; } m_url = url; if(REPTILE_SUCCESS != UrlParse()) { cout<<"UrlParse failed."<<endl; } struct hostent * hp= gethostbyname( m_host.c_str() ); if( NULL == hp ) { return SOCKET_ERR; } SOCKET sock = socket( AF_INET, SOCK_STREAM, IPPROTO_TCP); if( sock == -1 || sock == -2 ) { return SOCKET_ERR; } //建立服务器地址 SOCKADDR_IN sa; sa.sin_family = AF_INET; sa.sin_port = htons( 80 ); memcpy( &sa.sin_addr, hp->h_addr, 4 ); //建立连接 if( 0!= connect( sock, (SOCKADDR*)&sa, sizeof(sa) ) ) { cout << "Can not connect: "<< url <<endl; closesocket(sock); return SOCKET_ERR; }; string request = "GET " + m_resource + " HTTP/1.1\r\nHost:" + m_host + "\r\nConnection:Close\r\n\r\n"; //发送数据 if( SOCKET_ERROR ==send( sock, request.c_str(), request.size(), 0 ) ) { cout << "send error" <<endl; closesocket( sock ); return SOCKET_ERR; } int m_nContentLength = DEFAULT_PAGE_BUF_SIZE; char *pageBuf = (char *)malloc(m_nContentLength); memset(pageBuf, 0, m_nContentLength); bytesRead = 0; int ret = 1; while(ret > 0) { ret = recv(sock, pageBuf + bytesRead, m_nContentLength - bytesRead, 0); if(ret > 0) { bytesRead += ret; } if( m_nContentLength - bytesRead<100){ m_nContentLength *=2; pageBuf = (char*)realloc( pageBuf, m_nContentLength); //重新分配内存 } } pageBuf[bytesRead] = '\0'; response = pageBuf; closesocket( sock ); return REPTILE_SUCCESS; } int HttpRequestResponse::UrlParse() { string host = boost::erase_first_copy(m_url, "http://"); if( NULL == strstr( host.c_str(), "/") ) { return HOSTNAME_ERR; } int index = host.find_first_of("/"); m_host = host.substr(0, index); m_resource = host.substr(index, host.size()); return REPTILE_SUCCESS; } int HttpRequestResponse::DownLoadResource(vector<string> &resourceUrl) { //生成保存该url下图片的文件夹 string foldname = "./image"; if(!CreateDirectory( foldname.c_str(),NULL )) cout << "Can not create directory:"<< foldname<<endl; char* image; int byteRead; for( int i=0; i<resourceUrl.size(); i++){ //判断是否为图片,bmp,jgp,jpeg,gif string str = resourceUrl[i]; int pos = str.find_last_of("."); if( pos == string::npos ) continue; else{ string ext = str.substr( pos+1, str.size()-pos-1 ); if( ext!="bmp"&& ext!="jpg" && ext!="jpeg"&& ext!="gif"&&ext!="png") continue; } //下载其中的内容 if( REPTILE_SUCCESS == GetHttpResponse(resourceUrl[i], image, byteRead)){ if (0 == strlen(image)) { continue; } const char *p=image; const char * pos = strstr(p,"\r\n\r\n")+strlen("\r\n\r\n"); int index = resourceUrl[i].find_last_of("/"); if( index!=string::npos ){ string imgname = resourceUrl[i].substr( index , resourceUrl[i].size() ); ofstream ofile( foldname+imgname, ios::binary ); if( !ofile.is_open() ) continue; int titlesize = pos-p; ofile.write( pos, byteRead- titlesize ); ofile.close(); } // free(image.c_str()); } } return REPTILE_SUCCESS; }
ReptileTool.cpp
#include "ReptileTool.h" #include "ReptileToolDefine.h" ReptileTool::ReptileTool(void) { } ReptileTool::~ReptileTool(void) { } int ReptileTool::InitialiseSocket() { // if( WSAStartup(MAKEWORD(2,2), &m_wsaData) != 0 ) //{ // return REPTILE_FAILED; // } return REPTILE_SUCCESS; } int ReptileTool::DownLoadResource(string &url, ResType type) { int readBytes; char* response; m_httpRequestResponse.Initialise(); m_httpRequestResponse.GetHttpResponse(url ,response, readBytes); string httpresponse = response; m_httpParse.ParseUrl(httpresponse, m_imageUrl); m_httpRequestResponse.DownLoadResource(m_imageUrl); m_httpRequestResponse.UnInitialise(); return REPTILE_SUCCESS; }
Work.cpp
#include "ReptileTool.h" #include "ReptileToolDefine.h" int main(int argc, char** agrv) { string url("http://blog.csdn.net/z644041867/article/details/40376383"); ReptileTool tool; tool.DownLoadResource (url, PICTURE); return REPTILE_SUCCESS; }
相关文章推荐
- C#实现将网页保存成图片的网页拍照功能
- 简单图片浏览工具——ImageSwitcher:实现上一幅、下一幅切换功能
- iOS WebView 如何通过js获取网页中所有图片并加入点击事件,实现浏览图片的功能
- c#实现网页图片提取工具代码分享
- 进击的KFC:iOS WebView 如何通过js获取网页中所有图片并加入点击事件,实现浏览图片的功能
- python批处理实现爬取网页静态图片文件重命名图片统一修改大小等功能
- javascript实现查看html网页放大图片功能
- Android----Intent,运用由android系统帮助匹配实现打电话、发送短信、打开网页、播放音乐、打开视频、打开图片、安装APK、通知栏消息、拍照上传头像等功能
- 搭建带lua模块的nginx调用ImageMagick工具实现实时剪切缩放图片功能
- Servlet实现将图片写入到网页和实现图片下载的功能
- Android实现网页图片浏览功能
- [PHP]移动端网页如何使用JqueryMobile+PHP实现上传图片的功能
- dreamweaver实现图片超链接(点击图片按钮实现下载指定apk功能,可以用于在静态网页里做公司宣传页等)
- 【JavaScript】基于H5 canvas实现的画板绘图工具(类似你画我猜)——整合颜色选取、保存图片到本地功能
- Servlet实现将图片写入到网页和实现图片下载的功能
- Java实现网页截屏功能(图片下载功能)的几种方式(整理)
- 实现自动判断图片或flash,并在网页中显示
- JavaScript实现类似TitleAlt功能并且可以显示图片。
- 让网页实现弹出QQ,MSN窗口功能
- CSS+JS实现网页(图片)特效