您的位置:首页 > 其它

VC中使用CInternetSession抓取网页内容

2011-12-01 11:37 411 查看

VC中使用CInternetSession抓取网页内容

2010-09-02 20:23:13| 分类:

vc++ MFC | 标签:
|字号大中小 订阅

VC:
#include "afxinet.h"

#include <string>
CString url ("http://localhost/web?cmd=start&sn=123456&ai=abcd");

CInternetSession session;

CHttpFile* pfile = (CHttpFile*)session.OpenURL(url,1,INTERNET_FLAG_TRANSFER_ASCII||INTERNET_FLAG_RELOAD,NULL,0);
DWORD dwStatusCode;

pfile -> QueryInfoStatusCode(dwStatusCode);

if(dwStatusCode == HTTP_STATUS_OK)

{

char strBuff[1025] = {0};

std::string strHtml;

while ((pfile->Read((void*)strBuff, 1024)) > 0)

{

strHtml += strBuff;

}
CString str;

str = strHtml.c_str();
int rcS = str.Find(_T("<rc>"));

int rcE = str.Find(_T("</rc>"));

CString rc = str.Mid(rcS + 4,rcE - rcS - 4);
int msgS = str.Find(_T("<msg>"));

int msgE = str.Find(_T("</msg>"));
CString msg = str.Mid( msgS + 5,msgE - msgS - 5);

}

pfile -> Close();

delete pfile;

session.Close();
C#:

using System.Net;
using System.IO;
string url = "http://localhost/web?cmd=start&sn=123456&ai=abcd";

WebRequest wrq = WebRequest.Create(url);
WebRequest wrs = wrq.GetResponse();
Stream stream = wrs.GetResponseStream();

StreamReader sr = new StreamReader(stream);
string response;
while ((response = sr.ReadLine()) != null)
{
int rcS = response.LastIndexOf("<rc>");
int rcE = response.LastIndexOf("</rc>");
if (idS != -1 && idE != -1)
{
string rc = response.Substring(rcS + 4, rcE - rcS - 4);
}
if (msgS != -1 && msgE != -1)
{
string msg = response.Substring(msgS + 5, msgE - msgS - 5);
}
}
---------------------------------------------------------------------------------------------------------------------------------------
//头部包含afxinet.h

#include <afxinet.h>
CInternetSession::OpenURL函数原型为:
CStdioFile* OpenURL( LPCTSTR pstrURL, //文件URL地址

DWORD dwContext = 1, //上下文ID

DWORD dwFlags = INTERNET_FLAG_TRANSFER_ASCII, //标记

LPCTSTR pstrHeaders = NULL, //发送到服务器的数据头

DWORD dwHeadersLength = 0 );//发送到服务器的数据头长度
dwFlags可以为:

INTERNET_FLAG_RELOAD 强制重读数据

INTERNET_FLAG_DONT_CACHE 不保存到缓存

INTERNET_FLAG_TRANSFER_ASCII 使用文本数据

INTERNET_FLAG_TRANSFER_BINARY 使用二进制数据
在 VC 中用 WinInet 的 CInternetSession::OpenURL(url),得到一个 CFile,读取其中的内容即可,详细代码如下

#include <stdio.h>

#include <afxinet.h>

int main(int argc, char* argv[])

{

CInternetSession session("HttpClient");

char * url = " http://www.imobile.com.cn/simcard.php?simcard=1392658";
CHttpFile* pfile = (CHttpFile *)session.OpenURL(url);

DWORD dwStatusCode;

pfile -> QueryInfoStatusCode(dwStatusCode);

if(dwStatusCode == HTTP_STATUS_OK)

{

CString content;

CString data;

while (pfile -> ReadString(data))

{

content += data + "\r\n";

}

content.TrimRight();

printf(" %s\n " ,(LPCTSTR)content);

}

pfile -> Close();

delete pfile;

session.Close();

return 0 ;

--------------------------------------------------------------------------------------------------------------------------------------------------------------

在 VC 中用 WinInet 的 CInternetSession::OpenURL(url),得到一个 CFile,读取其中的内容即可,细致代码如下

  #include <stdio.h>

  #include <afxinet.h>

  int main(int argc, char* argv[])

  {

  CInternetSession session("HttpClient");

  char * url = " http://www.imobile.com.cn/simcard.php?simcard=1392658";
  CHttpFile* pfile = (CHttpFile *)session.OpenURL(url);

  DWORD dwStatusCode;

  pfile -> QueryInfoStatusCode(dwStatusCode);

  if(dwStatusCode == HTTP_STATUS_OK)

  {

  CString content;

  CString data;

  while (pfile -> ReadString(data))

  {

  content += data + "rn";

  }

  content.TrimRight();

  printf(" %sn " ,(LPCTSTR)content);

  }

  pfile -> Close();

  delete pfile;

  session.Close();

  return  0 ;

  }

  #include <stdio.h>

  #include <afxinet.h>

  int main(int argc, char* argv[])

  {

  CInternetSession session("HttpClient");

  char * url = " http://www.imobile.com.cn/simcard.php?simcard=1392658";
  CHttpFile* pfile = (CHttpFile *)session.OpenURL(url);

  DWORD dwStatusCode;

  pfile -> QueryInfoStatusCode(dwStatusCode);

  if(dwStatusCode == HTTP_STATUS_OK)

  {

  CString content;

  CString data;

  while (pfile -> ReadString(data))

  {

  content += data + "rn";

  }

  content.TrimRight();

  printf(" %sn " ,(LPCTSTR)content);

  }

  pfile -> Close();

  delete pfile;

  session.Close();

  return  0 ;

  }

  其他如不从缓存中读取内容及如何使用代理连接现在就不说了,能够参考下面的链接,或者下次补上。另外不妨看看 Java 是如何读取 URL 内容的,更简单

  GetMethod httpMethod = new GetMethod("http://unmi.blogcn.com");

  int statusCode = new HttpClient().executeMethod(httpMethod);

  if(statusCode == HttpStatus.SC_OK)

  {

  System.out.println(httpMethod.getResponseBodyAsString());

  }

  httpMethod.releaseConnection();

  GetMethod httpMethod = new GetMethod("http://unmi.blogcn.com");

  int statusCode = new HttpClient().executeMethod(httpMethod);

  if(statusCode == HttpStatus.SC_OK)

  {

  System.out.println(httpMethod.getResponseBodyAsString());

  }

  httpMethod.releaseConnection();

  内容取过来之后,分是希望从中拣出需要的数据,可惜 VC6 中没有本人的正则表达式库,所以下一步要学用 boost 的正则表达式库。
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: