您的位置:首页 > 产品设计 > UI/UE

【LibUIDK界面库系列文章】解析网页中的table内容

2017-10-04 20:29 411 查看
作者:刘树伟

QQ:1584793892

查看完整内容及示例代码,请访问:www.iuishop.com

日期: 2013-10-18

main.jsp由menu1.jsp和../hq/hq.jsp组成。

而我们需要解析的table,正是位于hq.jsp中。

主界面是main.jsp。

一、不管main.jsp是由第三方软件用webbrowser加载,还是由IE加载。我们的入口,是先找到web窗口的窗口句柄,这可以由FindWindow结合GetWindow来做到。下面是一个通过主窗口句柄,采用递归找到子窗口为webbrowser的函数:

HWND FindWithClassName(HWND ParentWnd, TCHAR *szFindClassName)

{

 HWND hChild = ::GetWindow(ParentWnd, GW_CHILD);

 

 for(; hChild != NULL; hChild = ::GetWindow(hChild, GW_HWNDNEXT))

 {

  TCHAR szClassName[100] = {0};

  ::GetClassName(hChild, szClassName, 100);

  

  if (_tcscmp(szClassName, szFindClassName) == 0)

   return hChild;

  

  HWND hFindWnd = FindWithClassName(hChild, szFindClassName);

  if (hFindWnd != NULL)

   return hFindWnd;

 }

 return NULL;

}

我们只要调用HWND hBrowserWnd = FindWithClassName(主窗口句柄, _T("Internet Explorer_Server"))就可以找到web窗口。

二、通过hBrowserWnd句柄,可以得到web的IHTMLDocument2、IHTMLWindow2接口:

int GetPageIHTMLDocument2Interface(HWND hBrowserWnd, IHTMLDocument2 **ppDoc2)

{

 // Explicitly load MSAA so we know if it's installed

 HINSTANCE hInst = ::LoadLibrary(_T("OLEACC.DLL"));

 if (hInst == NULL)

  return -1;

 LRESULT lr = 0; // SendMessageTimeout后的返回值,用于函数pfObjectFromLresult的第1个参数

 UINT nMsg = ::RegisterWindowMessage(_T("WM_HTML_GETOBJECT"));

 ::SendMessageTimeout(hBrowserWnd, nMsg, 0L, 0L, SMTO_ABORTIFHUNG, 1000, (DWORD *)&lr);

 

 // 获取函数pfObjectFromLresult

 LPFNOBJECTFROMLRESULT ObjectFromLresult = (LPFNOBJECTFROMLRESULT)::GetProcAddress(hInst, _T("ObjectFromLresult"));

 if (ObjectFromLresult == NULL)

 {

  return -2;

 }

 

 HRESULT hr = ObjectFromLresult(lr, IID_IHTMLDocument, 0, (void**)ppDoc2);

 if (FAILED(hr))

 {

  ::FreeLibrary(hInst);

  return -3;

 }

 ::FreeLibrary(hInst);

 

 CComPtr<IDispatch> spScript;

 hr = (*ppDoc2)->get_Script(&spScript);

 if (FAILED(hr))

 {

  return -4;

 }

 

 CComQIPtr<IHTMLWindow2> spWin = spScript;

 hr = spWin->get_document(ppDoc2);

 if (FAILED(hr))

 {

  return -5;

 }

#ifdef _DEBUG

 BSTR bstrUrl;

 (*ppDoc2)->get_URL(&bstrUrl);

#endif // _DEBUG

 return 0;

}

三、通过main.jsp的IHTMLDocument2接口,可以得到hq.jsp子frame的IHTMLDocument2:

// nChildFrameIndex表示hq.jsp子frame在main.jsp中的位置索引,lpszDocTitleFilter用来验证得到的hq.jsp子frame的title是否与lpszDocTitleFilter相等

// 如果lpszDocTitleFilter为NULL,则不经行验证。

int GetChildFrameIHTMLDocument2Interface(IHTMLDocument2 *pDoc2, int nChildFrameIndex, LPCTSTR lpszDocTitleFilter, IHTMLDocument2 **ppDoc2)

{

 if (pDoc2 == NULL)

  return -1;

 if (ppDoc2 == NULL)

  return -2;

 //

 // Get the child frame collection.

 //

 CComPtr<IHTMLFramesCollection2> spfc2Frames;

 HRESULT hr = pDoc2->get_frames(&spfc2Frames);

 if (FAILED(hr))

 {

  return -10;

 }

#ifdef _DEBUG

 LONG lChildFrameCount = 0;

 spfc2Frames->get_length(&lChildFrameCount);

#endif // _DEBUG

 

 //

 // Get child frame by index

 //

 CComVariant varChildFrame;

 hr = spfc2Frames->item(&CComVariant(nChildFrameIndex), &varChildFrame);

 if (FAILED(hr))

 {

  return -11;

 }

 CComQIPtr<IHTMLWindow2> spwin2ChildFrame = varChildFrame.pdispVal;

 if (spwin2ChildFrame == NULL)

 {

  return -12;

 }

 

 hr = spwin2ChildFrame->get_document(ppDoc2);

 if (FAILED(hr))

 {

  return -13;

 }

#ifdef _DEBUG

 BSTR bstrURL;

 (*ppDoc2)->get_URL(&bstrURL);

 CString strURL = bstrURL;

#endif // _DEBUG

 // Get the title of document(<title> tag)

 if (lpszDocTitleFilter != NULL)

 {

  CComBSTR bstrTitle;

  (*ppDoc2)->get_title(&bstrTitle);

  CString strTitle = (LPCTSTR)_bstr_t(bstrTitle);

  if (strTitle.CompareNoCase(lpszDocTitleFilter) != 0)

  {

   *ppDoc2 = NULL;

   return -14;

  }

 }

 return 0;

}

四、通过hq.jsp的IHTMLDocument2接口,得到table的IHTMLTable接口:

// lpszTableIDFilter表示我们要得到的table的id。GetIHTMLTableInterface返回第一个id为lpszTableIDFilter的table

int GetIHTMLTableInterface(IHTMLDocument2 *pDoc2, LPCTSTR lpszTableIDFilter, IHTMLTable **ppTable)

{

 if (pDoc2 == NULL)

  return -1;

 if (ppTable == NULL)

  return -2;

 if (lpszTableIDFilter == NULL)

  return -3;

 

 CComQIPtr<IHTMLElementCollection> spChildrenElement;

 HRESULT hr = pDoc2->get_all(&spChildrenElement);

 if (FAILED(hr))

 {

  return -10;

 }

 

 BOOL bFind = FALSE;

 

 // 通过遍历所有元素,找到id为lpszTableIDFilter的table

 LONG lEleCount = 0;

 hr = spChildrenElement->get_length(&lEleCount);

 for (int i = 0; i < lEleCount; ++i)

 {

  CComPtr<IDispatch> pTable;

  hr = spChildrenElement->item(COleVariant((long)i), COleVariant((long)0), &pTable);

  if (FAILED(hr))

   continue;

  

  CComQIPtr<IHTMLElement> spEleTable = pTable;

  

  BSTR bstrEleID;

  spEleTable->get_id(&bstrEleID);

  CString strEleID = bstrEleID;

  if (strEleID == lpszTableIDFilter)

  {

   hr = pTable->QueryInterface(IID_IHTMLTable, (void **)ppTable);

   if (FAILED(hr))

   {

    return -11;

   }

   

   bFind = TRUE;

   

   break;

  }

 }

 

 return bFind ? 0 : -1;

}

五、通过table的IHTMLTable接口,可以读取table的单元格IHTMLElement:

int GetCellIHTMLElementInterface(IHTMLTable *pTable, UINT uRow, UINT uCol, IHTMLElement **ppCell)

{

 if (pTable == NULL)

  return -1;

 if (ppCell == NULL)

  return -2;

 // 每行作为一个单位,先得到所有行。

 CComPtr<IHTMLElementCollection> spAllRows;

 HRESULT hr = pTable->get_rows(&spAllRows);

 if (FAILED(hr))

 {

  return -10;

 }

 

 LONG lRowCount = 0;

 spAllRows->get_length(&lRowCount);

 if (uRow >= (UINT)lRowCount)

 {

  CString strInfo;

  strInfo.Format(_T("现在只有 %d 行自选商品,你输入了一个超出范围的数字,请重新输入!"), lRowCount - 1); // lRowCount包含了表头

  AfxMessageBox(strInfo);

  return -11;

 }

 

 // 得到第nRow行

 _variant_t varIndex;

 IDispatch *pRow = NULL;

 varIndex = (LONG)uRow;

 hr = spAllRows->item(varIndex, varIndex, &pRow);

 if (FAILED(hr))

 {

  return -12;

 }

 

 CComQIPtr<IHTMLTableRow> spRow = pRow;

 pRow->Release();

 

 // 每一行都是由N个单元格cell组成.

 CComPtr<IHTMLElementCollection> pAllRowCells = NULL; 

 hr = spRow->get_cells(&pAllRowCells);

 if (FAILED(hr))

 {

  return -13;

 }

 

 LONG lCellCountPerRow = 0;

 hr = pAllRowCells->get_length(&lCellCountPerRow);

 ASSERT(lCellCountPerRow == 8);

 if (FAILED(hr))

 {

  return -14;

 }

 if (uCol >= (UINT)lCellCountPerRow)

 {

  CString strInfo;

  strInfo.Format(_T("现在只有 %d 列数据,你输入了一个超出范围的数字,请重新输入!"), lCellCountPerRow);

  AfxMessageBox(strInfo);

  return -15;

 }

 // 读取第nRow行第nCol列数据

 V_I4(&varIndex) = uCol;

 IDispatch *pCol = NULL;

 hr = pAllRowCells->item(varIndex, varIndex, &pCol);

 if (FAILED(hr) || pCol == NULL)

 {

  return -16;

 }

 hr = pCol->QueryInterface(__uuidof(IHTMLElement), (void **)ppCell);

 pCol->Release();

 if (FAILED(hr))

 {

  return -17;

 }

 return 0;

}

六、这样,通过单元格的IHTMLElement接口,就可以访问单元格的内容了:

 extern IHTMLElement *pCell;

 BSTR bstrCell;

 HRESULT hr = pCell->get_innerText(&bstrCell);

详细的工程,请参考:depot\Outsourcing\bdszjj\SecKiller

hq.jsp文件内容如下:

<!--公用js函数-->

<SCRIPT LANGUAGE="JavaScript">

<!--

function dateChk1(str){ //hh:mm

 var reg = /^(\d{2})\:(\d{2})$/;

 var r = str.match(reg);

 if(r==null){

  return false;

 }else{

  if(r[1] < 0 || r[1] >= 24){

   return false;

  }

  if(r[2] < 0 || r[2] >= 60){

   return false;

  }

 }

 return true;

}

function dateChk(str){ //yyyy-mm-dd

 var reg = /^(\d{4})\-(\d{2})\-(\d{2})$/;

 var r = str.match(reg);

 if(r==null){

  return false;

 }else{

  if(r[1] <= 1950 || r[1] > 2050){

   return false;

  }

  if(r[2] <= 0 || r[2] > 12){

   return false;

  }

  if(r[3] <= 0 || r[3] > 31){

   return false;

  }

 }

 return true;

}

function FormatNumber(srcStr,nAfterDot){

  var srcStr,nAfterDot;

  var resultStr,nTen;

  srcStr = ""+srcStr+"";

  strLen = srcStr.length;

  dotPos = srcStr.indexOf(".",0);

  if (dotPos == -1){

    resultStr = srcStr+".";

    for (i=0;i<nAfterDot;i++){

      resultStr = resultStr+"0";

    }

    return resultStr;

  }

  else{

    if ((strLen - dotPos - 1) >= nAfterDot){

      nAfter = dotPos + nAfterDot + 1;

      nTen =1;

      for(j=0;j<nAfterDot;j++){

        nTen = nTen*10;

      }

      resultStr = Math.round(parseFloat(srcStr)*nTen)/nTen;

      return resultStr;

    }

    else{

      resultStr = srcStr;

      for (i=0;i<(nAfterDot - strLen + dotPos + 1);i++){

        resultStr = resultStr+"0";

      }

      return resultStr;

    }

  }

}

//禁止右键的脚本

//from www.jx165.com

function nocontextmenu(){

event.cancelBubble = true

event.returnValue = false;

return false;}

function norightclick(e){

if (window.Event){

if (e.which == 2 || e.which == 3)

return false;}

else

if (event.button == 2 || event.button == 3){

event.cancelBubble = true

event.returnValue = false;

return false;}

}

//禁止右键

document.oncontextmenu = nocontextmenu;  // for IE5+

document.onmousedown = norightclick;  // for all others

//-->

</SCRIPT>

<html>

<head>

<link href="main.css" rel="stylesheet" type="text/css">

<title></title>

</head>

<body bgcolor="#eaf4fd" >

<form name=frm>

<table width="94%" id=tb  border="0" align="center" cellpadding="0" cellspacing="1" bgcolor="#b8c3c9">

<tr height="25" id="0">

 <!-- <td width=""><div align="center" class="td_bt">下单</div></td> -->

 <!-- <td width=""><div align="center" class="td_bt">商品详情</div></td>-->

    <td width=""><div align="center" class="td_bt">商品码</div></td>

 <td width=""><div align="center" class="td_bt">品名/质量标识</div></td>

 <td width=""><div align="center" class="td_bt">重量</div></td> 

 <td width=""><div align="center" class="td_bt">价格</div></td>

 <td width=""><div align="center" class="td_bt">交货仓库</div></td>

 <td width=""><div align="center" class="td_bt">次数</div></td>

 <td width=""><div align="center" class="td_bt">自选商品</div></td>

  </tr>

  <script><!--

 var codes="";

 //--></script>

 

</table>

 

<br>

</form>

</body>

</html>

main.jsp内容如下:

<!--公用js函数-->

<SCRIPT LANGUAGE="JavaScript">

<!--

function dateChk1(str){ //hh:mm

 var reg = /^(\d{2})\:(\d{2})$/;

 var r = str.match(reg);

 if(r==null){

  return false;

 }else{

  if(r[1] < 0 || r[1] >= 24){

   return false;

  }

  if(r[2] < 0 || r[2] >= 60){

   return false;

  }

 }

 return true;

}

function dateChk(str){ //yyyy-mm-dd

 var reg = /^(\d{4})\-(\d{2})\-(\d{2})$/;

 var r = str.match(reg);

 if(r==null){

  return false;

 }else{

  if(r[1] <= 1950 || r[1] > 2050){

   return false;

  }

  if(r[2] <= 0 || r[2] > 12){

   return false;

  }

  if(r[3] <= 0 || r[3] > 31){

   return false;

  }

 }

 return true;

}

function FormatNumber(srcStr,nAfterDot){

  var srcStr,nAfterDot;

  var resultStr,nTen;

  srcStr = ""+srcStr+"";

  strLen = srcStr.length;

  dotPos = srcStr.indexOf(".",0);

  if (dotPos == -1){

    resultStr = srcStr+".";

    for (i=0;i<nAfterDot;i++){

      resultStr = resultStr+"0";

    }

    return resultStr;

  }

  else{

    if ((strLen - dotPos - 1) >= nAfterDot){

      nAfter = dotPos + nAfterDot + 1;

      nTen =1;

      for(j=0;j<nAfterDot;j++){

        nTen = nTen*10;

      }

      resultStr = Math.round(parseFloat(srcStr)*nTen)/nTen;

      return resultStr;

    }

    else{

      resultStr = srcStr;

      for (i=0;i<(nAfterDot - strLen + dotPos + 1);i++){

        resultStr = resultStr+"0";

      }

      return resultStr;

    }

  }

}

//禁止右键的脚本

//from www.jx165.com

function nocontextmenu(){

event.cancelBubble = true

event.returnValue = false;

return false;}

function norightclick(e){

if (window.Event){

if (e.which == 2 || e.which == 3)

return false;}

else

if (event.button == 2 || event.button == 3){

event.cancelBubble = true

event.returnValue = false;

return false;}

}

//禁止右键

document.oncontextmenu = nocontextmenu;  // for IE5+

document.onmousedown = norightclick;  // for all others

//-->

</SCRIPT>

<!--用户身份判断-->

<html>

<head>

<title>竞买竞卖系统</title>

</head>

<frameset rows="37,*" cols="*" framespacing="0" frameborder="no" border="0">

  <frame src="menu1.jsp" name="top1" frameborder="no" scrolling="no" noresize id="top1" APPLICATION="yes">

  <frame src="../hq/hq.jsp" name="commodityList" frameborder="no" scrolling="auto" id="commodityList" APPLICATION="yes">

</frameset>

<noframes>

<body>

该系统应用了框架技术,您的浏览器不支持框架,请更换更高版本的浏览器。

</body>

</noframes>

</html>


内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息