Delphi 通过MSHTML实现一个HTML解析类
2010-01-05 09:14
344 查看
最近经常会模拟网页提交返回网页源码,然后获得网页中相应的元素,于是需要常常解析Html中相应的各种元素,网络是个好东西,搜索一番,就找到了
好几个Delphi版本的HtmlParser的类库,试着使用了几个,发现解析起来都不完整,或多或少的回出现一些问题!于是想到了如果界面上有一个浏
览器,我们可以通过WebBrowser的Document接口对网页元素进行操作,很是方便!但是模拟网页提交,界面上是不一定要出现
WebBrowser的,肯定有办法,不通过WebBrowser就直接解析HTML的,那便是我不要WebBrowser这个外壳,只要他里面的
Document文档接口对象就能实现对Html的解析了,查找了一番MSDN,然后Google一下,果然可行,构建方法如下:
//创建
IHTMLDocument2接口
CoCreateInstance(CLASS_HTMLDocument, nil,
CLSCTX_INPROC_SERVER, IID_IHTMLDocument2, FHtmlDoc);
接口创建好了之后就能够对文
档元素进行解析了,很是爽快!
结合了我自己的特有操作,我对Combobox,Table,Frame等一些网页元素做了相应的封装,实现
了一个HTMLParser,大致代码如下:
这里只给出声明,代
码请在最后下载
代码
(*
****************************************************
*)
(*
得闲工作
室
*)
(*
网页元素操作
类库
*)
(*
*)
(*
DxHtmlElement Unit
*)
(*
Copyright(c) 2008-2010 不得
闲
*)
(*
email:appleak46@yahoo.com.cn QQ:75492895
*)
(*
****************************************************
*)
unit
DxHtmlElement;
interface
uses
Windows,sysUtils,Clipbrd,MSHTML,ActiveX,OleCtrls,Graphics,TypInfo;
{
Get EleMent Type
}
function
IsSelectElement(eleElement: IHTMLElement): Boolean;
function
IsPwdElement(eleElement: IHTMLElement): Boolean;
function
IsTextElement(element: IHTMLElement): boolean;
function
IsTableElement(element: IHTMLElement): Boolean;
function
IsElementCollection(element: IHTMLElement): Boolean;
function
IsChkElement(element: IHTMLElement): boolean;
function
IsRadioBtnElement(element: IHTMLElement): boolean;
function
IsMemoElement(element: IHTMLElement): boolean;
function
IsFormElement(element: IHTMLElement): boolean;
function
IsIMGElement(element: IHTMLElement): boolean;
function
IsInIMGElement(element: IHTMLElement): boolean;
function
IsLabelElement(element: IHTMLElement): boolean;
function
IsLinkElement(element: IHTMLElement): boolean;
function
IsListElement(element: IHTMLElement): boolean;
function
IsControlElement(element: IHTMLElement): boolean;
function
IsObjectElement(element: IHTMLElement): boolean;
function
IsFrameElement(element: IHTMLElement): boolean;
function
IsInPutBtnElement(element: IHTMLElement): boolean;
function
IsInHiddenElement(element: IHTMLElement): boolean;
function
IsSubmitElement(element: IHTMLElement): boolean;
{
Get ImgElement Data
}
function
GetPicIndex(doc: IHTMLDocument2; Src:
string
; Alt:
string
): Integer;
function
GetPicElement(doc: IHTMLDocument2;imgName:
string
;src:
string
;Alt:
string
): IHTMLImgElement;
function
GetRegCodePic(doc: IHTMLDocument2;ImgName:
string
; Src:
string
; Alt:
string
): TPicture;
overload
;
function
GetRegCodePic(doc: IHTMLDocument2;Index: integer): TPicture;
overload
;
function
GetRegCodePic(doc: IHTMLDocument2;element: IHTMLIMGElement): TPicture;
overload
;
type
TObjectFromLResult
=
function
(LRESULT: lResult;
const
IID: TIID; WPARAM: wParam;
out
pObject): HRESULT;
stdcall
;
TEleMentType
=
(ELE_UNKNOW,ELE_TEXT,ELE_PWD,ELE_SELECT,ELE_CHECKBOX,ELE_RADIOBTN,ELE_MEMO,ELE_FORM,ELE_IMAGE,
ELE_LABEL,ELE_LINK,ELE_LIST,ELE_CONTROL,ELE_OBJECT,ELE_FRAME,ELE_INPUTBTN,ELE_INIMAGE,ELE_INHIDDEN);
function
GetElementType(element: IHTMLELEMENT): TEleMentType;
function
GetElementTypeName(element: IHTMLELEMENT):
string
;
function
GetHtmlTableCell(aTable: IHTMLTable;aRow,aCol: Integer): IHTMLElement;
function
GetHtmlTable(aDoc: IHTMLDocument2; aIndex: Integer): IHTMLTable;
function
GetWebBrowserHtmlTableCellText(Doc: IHTMLDocument2;
const
TableIndex, RowIndex, ColIndex: Integer;
var
ResValue:
string
): Boolean;
function
GetHtmlTableRowHtml(aTable: IHTMLTable; aRow: Integer): IHTMLElement;
function
GetWebBrowserHtmlTableCellHtml(Doc: IHTMLDocument2;
const
TableIndex,RowIndex,ColIndex: Integer;
var
ResValue:
string
): Boolean;
function
GeHtmlTableHtml(aTable: IHTMLTable; aRow: Integer): IHTMLElement;
function
GetWebBrowserHtmlTableHtml(Doc: IHTMLDocument2;
const
TableIndex,RowIndex: Integer;
var
ResValue:
string
): Boolean;
type
TDxWebFrameCollection
=
class
;
TDxWebElementCollection
=
class
;
TLoadState
=
(Doc_Loading,Doc_Completed,Doc_Invalidate);
TDxWebFrame
=
class
private
FFrame: IHTMLWINDOW2;
FElementCollections: TDxWebElementCollection;
FWebFrameCollections: TDxWebFrameCollection;
function
GetSrc:
string
;
function
GetElementCount: integer;
function
GetWebFrameCollections: TDxWebFrameCollection;
function
GetElementCollections: TDxWebElementCollection;
function
GetDocument: IHTMLDOCUMENT2;
function
GetReadState: TLoadState;
function
GetIsLoaded: boolean;
procedure
SetFrame(
const
Value: IHTMLWINDOW2);
function
GetName:
string
;
public
Constructor Create(IFrame: IHTMLWINDOW2);
Destructor Destroy;
override
;
property
Frame: IHTMLWINDOW2
read
FFrame
write
SetFrame;
property
Src:
string
read
GetSrc;
property
Document: IHTMLDOCUMENT2
read
GetDocument;
property
Name:
string
read
GetName;
property
Frames: TDxWebFrameCollection
read
GetWebFrameCollections;
property
ElementCount: integer
read
GetElementCount;
property
ElementCollections: TDxWebElementCollection
read
GetElementCollections;
property
ReadyState: TLoadState
read
GetReadState;
property
IsLoaded: boolean
read
GetIsLoaded;
end
;
TDxWebFrameCollection
=
Class
private
FFrameCollection: IHTMLFramesCollection2;
Frame: TDxWebFrame;
function
GetCount: integer;
function
GetFrameInterfaceByIndex(index: integer): IHTMLWINDOW2;
function
GetFrameInterfaceByName(Name:
string
): IHTMLWINDOW2;
function
GetFrameByIndex(index: integer): TDxWebFrame;
function
GetFrameByName(Name:
string
): TDxWebFrame;
procedure
SetFrameCollection(
const
Value: IHTMLFramesCollection2);
public
Constructor Create(ACollection: IHTMLFramesCollection2);
Destructor Destroy;
override
;
property
FrameCollection: IHTMLFramesCollection2
read
FFrameCollection
write
SetFrameCollection;
property
Count: integer
read
GetCount;
property
FrameInterfaceByIndex[index: integer]: IHTMLWINDOW2
read
GetFrameInterfaceByIndex;
property
FrameInterfaceByName[Name:
string
]: IHTMLWINDOW2
read
GetFrameInterfaceByName;
property
FrameByIndex[index: integer]: TDxWebFrame
read
GetFrameByIndex;
property
FrameByName[Name:
string
]: TDxWebFrame
read
GetFrameByName;
end
;
TDxWebElementCollection
=
class
private
FCollection: IHTMLElementCollection;
FChildCollection: TDxWebElementCollection;
function
GetCollection(index: String): TDxWebElementCollection;
function
GetCount: integer;
function
GetElement(itemName:
string
; index: integer): IHTMLElement;
function
GetElementByName(itemName:
string
): IHTMLELEMENT;
function
GetElementByIndex(index: integer): IHTMLELEMENT;
procedure
SetCollection(
const
Value: IHTMLElementCollection);
public
Constructor Create(ACollection: IHTMLElementCollection);
Destructor Destroy;
override
;
property
Collection: IHTMLElementCollection
read
FCollection
write
SetCollection;
property
ChildElementCollection[index: String]: TDxWebElementCollection
read
GetCollection;
property
ElementCount: integer
read
GetCount;
property
Element[itemName:
string
;index: integer]: IHTMLElement
read
GetElement;
property
ElementByName[itemName:
string
]: IHTMLELEMENT
read
GetElementByName;
property
ElementByIndex[index: integer]: IHTMLELEMENT
read
GetElementByIndex;
end
;
TLinkCollection
=
class
(TDxWebElementCollection)
end
;
TDxWebTable
=
class
;
TDxTableCollection
=
class
private
FTableCollection: IHTMLElementCollection;
FDocument: IHTMLDOCUMENT2;
FWebTable: TDxWebTable;
function
GetTableInterfaceByName(AName:
string
): IHTMLTABLE;
procedure
SetDocument(Value: IHTMLDOCUMENT2);
function
GetTableInterfaceByIndex(index: integer): IHTMLTABLE;
function
GetCount: integer;
function
GetTableByIndex(index: integer): TDxWebTable;
function
GetTableByName(AName:
string
): TDxWebTable;
public
Constructor Create(Doc: IHTMLDOCUMENT2);
destructor
Destroy;
override
;
property
TableInterfaceByName[AName:
string
]: IHTMLTABLE
read
GetTableInterfaceByName;
property
TableInterfaceByIndex[index: integer]: IHTMLTABLE
read
GetTableInterfaceByIndex;
property
TableByName[AName:
string
]: TDxWebTable
read
GetTableByName;
property
TableByIndex[index: integer]: TDxWebTable
read
GetTableByIndex;
property
Document: IHTMLDOCUMENT2
read
FDocument
write
SetDocument;
property
Count: integer
read
GetCount;
end
;
TDxWebTable
=
class
private
FTableInterface: IHTMLTABLE;
function
GetRowCount: integer;
procedure
SetTableInterface(
const
Value: IHTMLTABLE);
function
GetCell(ACol, ARow: integer):
string
;
function
GetRowColCount(RowIndex: integer): integer;
function
GetInnerHtml:
string
;
function
GetInnerText:
string
;
function
GetCellElement(ACol, ARow: Integer): IHTMLTableCell;
public
Constructor Create(ATable: IHTMLTABLE);
property
TableInterface: IHTMLTABLE
read
FTableInterface
write
SetTableInterface;
property
RowCount: integer
read
GetRowCount;
property
Cell[ACol: integer;ARow: integer]:
string
read
GetCell;
property
CellElement[ACol: Integer;ARow: Integer]: IHTMLTableCell
read
GetCellElement;
property
RowColCount[RowIndex: integer]: integer
read
GetRowColCount;
property
InnerHtml:
string
read
GetInnerHtml;
property
InnerText:
string
read
GetInnerText;
end
;
TDxWebCombobox
=
class
private
FHtmlSelect: IHTMLSelectElement;
function
GetCount: Integer;
procedure
SetItemIndex(
const
Value: Integer);
function
GetItemIndex: Integer;
function
GetName:
string
;
procedure
SetName(
const
Value:
string
);
function
GetValue:
string
;
procedure
SetValue(
const
Value:
string
);
procedure
SetCombInterface(
const
Value: IHTMLSelectElement);
function
GetItemByName(EleName:
string
):
string
;
function
GetItemByIndex(index: integer):
string
;
function
GetItemAttribute(index: Integer; AttribName:
string
): OleVariant;
public
constructor
Create(AWebCombo: IHTMLSelectElement);
procedure
Add(Ele: IHTMLElement);
procedure
Insert(Ele: IHTMLElement;Index: Integer);
procedure
Remove(index: Integer);
property
CombInterface: IHTMLSelectElement
read
FHtmlSelect
write
SetCombInterface;
property
Count: Integer
read
GetCount;
property
ItemIndex: Integer
read
GetItemIndex
write
SetItemIndex;
property
ItemByIndex[index: integer]:
string
read
GetItemByIndex;
property
ItemByName[EleName:
string
]:
string
read
GetItemByName;
property
ItemAttribute[index: Integer;AttribName:
string
]: OleVariant
read
GetItemAttribute;
property
Name:
string
read
GetName
write
SetName;
property
value:
string
read
GetValue
write
SetValue;
end
;
implementation
end
.
HTMLParser解析类的代码实现单元
代码
(*
****************************************************
*)
(*
得闲工作
室
*)
(*
HTML解析
单元库
*)
(*
*)
(*
DxHtmlParser Unit
*)
(*
Copyright(c) 2008-2010 不得
闲
*)
(*
email:appleak46@yahoo.com.cn QQ:75492895
*)
(*
****************************************************
*)
unit
DxHtmlParser;
interface
uses
Windows,MSHTML,ActiveX,DxHtmlElement,Forms;
type
TDxHtmlParser
=
class
private
FHtmlDoc: IHTMLDocument2;
FHTML:
string
;
FWebTables: TDxTableCollection;
FWebElements: TDxWebElementCollection;
FWebComb: TDxWebCombobox;
procedure
SetHTML(
const
Value:
string
);
function
GetWebCombobox(AName:
string
): TDxWebCombobox;
public
constructor
Create;
destructor
Destroy;
override
;
property
HTML:
string
read
FHTML
write
SetHTML;
property
WebTables: TDxTableCollection
read
FWebTables;
property
WebElements: TDxWebElementCollection
read
FWebElements;
property
WebCombobox[Name:
string
]: TDxWebCombobox
read
GetWebCombobox;
end
;
implementation
{
TDxHtmlParser
}
constructor
TDxHtmlParser.Create;
begin
CoInitialize(
nil
);
//
创建IHTMLDocument2接口
CoCreateInstance(CLASS_HTMLDocument,
nil
, CLSCTX_INPROC_SERVER, IID_IHTMLDocument2, FHtmlDoc);
Assert(FHtmlDoc
<>
nil
,
'
构建HTMLDocument接口失败
'
);
FHtmlDoc.Set_designMode(
'
On
'
);
//
设置为设计模式,不执行脚本
while
not
(FHtmlDoc.readyState
=
'
complete
'
)
do
begin
sleep(
1
);
Application.ProcessMessages;
end
;
FWebTables :
=
TDxTableCollection.Create(FHtmlDoc);
FWebElements :
=
TDxWebElementCollection.Create(
nil
);
FWebComb :
=
TDxWebCombobox.Create(
nil
);
end
;
destructor
TDxHtmlParser.Destroy;
begin
FWebTables.Free;
FWebElements.Free;
FWebComb.Free;
CoUninitialize;
inherited
;
end
;
function
TDxHtmlParser.GetWebCombobox(AName:
string
): TDxWebCombobox;
begin
if
FWebElements.Collection
<>
nil
then
begin
FWebComb.CombInterface :
=
FWebElements.ElementByName[AName]
as
IHTMLSelectElement;
Result :
=
FWebComb;
end
else
Result :
=
nil
;
end
;
procedure
TDxHtmlParser.SetHTML(
const
Value:
string
);
begin
if
FHTML
<>
Value
then
begin
FHTML :
=
Value;
FHtmlDoc.body.innerHTML :
=
FHTML;
FWebElements.Collection :
=
FHtmlDoc.all;
end
;
end
;
end
.
全
部代码下载
好几个Delphi版本的HtmlParser的类库,试着使用了几个,发现解析起来都不完整,或多或少的回出现一些问题!于是想到了如果界面上有一个浏
览器,我们可以通过WebBrowser的Document接口对网页元素进行操作,很是方便!但是模拟网页提交,界面上是不一定要出现
WebBrowser的,肯定有办法,不通过WebBrowser就直接解析HTML的,那便是我不要WebBrowser这个外壳,只要他里面的
Document文档接口对象就能实现对Html的解析了,查找了一番MSDN,然后Google一下,果然可行,构建方法如下:
//创建
IHTMLDocument2接口
CoCreateInstance(CLASS_HTMLDocument, nil,
CLSCTX_INPROC_SERVER, IID_IHTMLDocument2, FHtmlDoc);
接口创建好了之后就能够对文
档元素进行解析了,很是爽快!
结合了我自己的特有操作,我对Combobox,Table,Frame等一些网页元素做了相应的封装,实现
了一个HTMLParser,大致代码如下:
这里只给出声明,代
码请在最后下载
代码
(*
****************************************************
*)
(*
得闲工作
室
*)
(*
网页元素操作
类库
*)
(*
*)
(*
DxHtmlElement Unit
*)
(*
Copyright(c) 2008-2010 不得
闲
*)
(*
email:appleak46@yahoo.com.cn QQ:75492895
*)
(*
****************************************************
*)
unit
DxHtmlElement;
interface
uses
Windows,sysUtils,Clipbrd,MSHTML,ActiveX,OleCtrls,Graphics,TypInfo;
{
Get EleMent Type
}
function
IsSelectElement(eleElement: IHTMLElement): Boolean;
function
IsPwdElement(eleElement: IHTMLElement): Boolean;
function
IsTextElement(element: IHTMLElement): boolean;
function
IsTableElement(element: IHTMLElement): Boolean;
function
IsElementCollection(element: IHTMLElement): Boolean;
function
IsChkElement(element: IHTMLElement): boolean;
function
IsRadioBtnElement(element: IHTMLElement): boolean;
function
IsMemoElement(element: IHTMLElement): boolean;
function
IsFormElement(element: IHTMLElement): boolean;
function
IsIMGElement(element: IHTMLElement): boolean;
function
IsInIMGElement(element: IHTMLElement): boolean;
function
IsLabelElement(element: IHTMLElement): boolean;
function
IsLinkElement(element: IHTMLElement): boolean;
function
IsListElement(element: IHTMLElement): boolean;
function
IsControlElement(element: IHTMLElement): boolean;
function
IsObjectElement(element: IHTMLElement): boolean;
function
IsFrameElement(element: IHTMLElement): boolean;
function
IsInPutBtnElement(element: IHTMLElement): boolean;
function
IsInHiddenElement(element: IHTMLElement): boolean;
function
IsSubmitElement(element: IHTMLElement): boolean;
{
Get ImgElement Data
}
function
GetPicIndex(doc: IHTMLDocument2; Src:
string
; Alt:
string
): Integer;
function
GetPicElement(doc: IHTMLDocument2;imgName:
string
;src:
string
;Alt:
string
): IHTMLImgElement;
function
GetRegCodePic(doc: IHTMLDocument2;ImgName:
string
; Src:
string
; Alt:
string
): TPicture;
overload
;
function
GetRegCodePic(doc: IHTMLDocument2;Index: integer): TPicture;
overload
;
function
GetRegCodePic(doc: IHTMLDocument2;element: IHTMLIMGElement): TPicture;
overload
;
type
TObjectFromLResult
=
function
(LRESULT: lResult;
const
IID: TIID; WPARAM: wParam;
out
pObject): HRESULT;
stdcall
;
TEleMentType
=
(ELE_UNKNOW,ELE_TEXT,ELE_PWD,ELE_SELECT,ELE_CHECKBOX,ELE_RADIOBTN,ELE_MEMO,ELE_FORM,ELE_IMAGE,
ELE_LABEL,ELE_LINK,ELE_LIST,ELE_CONTROL,ELE_OBJECT,ELE_FRAME,ELE_INPUTBTN,ELE_INIMAGE,ELE_INHIDDEN);
function
GetElementType(element: IHTMLELEMENT): TEleMentType;
function
GetElementTypeName(element: IHTMLELEMENT):
string
;
function
GetHtmlTableCell(aTable: IHTMLTable;aRow,aCol: Integer): IHTMLElement;
function
GetHtmlTable(aDoc: IHTMLDocument2; aIndex: Integer): IHTMLTable;
function
GetWebBrowserHtmlTableCellText(Doc: IHTMLDocument2;
const
TableIndex, RowIndex, ColIndex: Integer;
var
ResValue:
string
): Boolean;
function
GetHtmlTableRowHtml(aTable: IHTMLTable; aRow: Integer): IHTMLElement;
function
GetWebBrowserHtmlTableCellHtml(Doc: IHTMLDocument2;
const
TableIndex,RowIndex,ColIndex: Integer;
var
ResValue:
string
): Boolean;
function
GeHtmlTableHtml(aTable: IHTMLTable; aRow: Integer): IHTMLElement;
function
GetWebBrowserHtmlTableHtml(Doc: IHTMLDocument2;
const
TableIndex,RowIndex: Integer;
var
ResValue:
string
): Boolean;
type
TDxWebFrameCollection
=
class
;
TDxWebElementCollection
=
class
;
TLoadState
=
(Doc_Loading,Doc_Completed,Doc_Invalidate);
TDxWebFrame
=
class
private
FFrame: IHTMLWINDOW2;
FElementCollections: TDxWebElementCollection;
FWebFrameCollections: TDxWebFrameCollection;
function
GetSrc:
string
;
function
GetElementCount: integer;
function
GetWebFrameCollections: TDxWebFrameCollection;
function
GetElementCollections: TDxWebElementCollection;
function
GetDocument: IHTMLDOCUMENT2;
function
GetReadState: TLoadState;
function
GetIsLoaded: boolean;
procedure
SetFrame(
const
Value: IHTMLWINDOW2);
function
GetName:
string
;
public
Constructor Create(IFrame: IHTMLWINDOW2);
Destructor Destroy;
override
;
property
Frame: IHTMLWINDOW2
read
FFrame
write
SetFrame;
property
Src:
string
read
GetSrc;
property
Document: IHTMLDOCUMENT2
read
GetDocument;
property
Name:
string
read
GetName;
property
Frames: TDxWebFrameCollection
read
GetWebFrameCollections;
property
ElementCount: integer
read
GetElementCount;
property
ElementCollections: TDxWebElementCollection
read
GetElementCollections;
property
ReadyState: TLoadState
read
GetReadState;
property
IsLoaded: boolean
read
GetIsLoaded;
end
;
TDxWebFrameCollection
=
Class
private
FFrameCollection: IHTMLFramesCollection2;
Frame: TDxWebFrame;
function
GetCount: integer;
function
GetFrameInterfaceByIndex(index: integer): IHTMLWINDOW2;
function
GetFrameInterfaceByName(Name:
string
): IHTMLWINDOW2;
function
GetFrameByIndex(index: integer): TDxWebFrame;
function
GetFrameByName(Name:
string
): TDxWebFrame;
procedure
SetFrameCollection(
const
Value: IHTMLFramesCollection2);
public
Constructor Create(ACollection: IHTMLFramesCollection2);
Destructor Destroy;
override
;
property
FrameCollection: IHTMLFramesCollection2
read
FFrameCollection
write
SetFrameCollection;
property
Count: integer
read
GetCount;
property
FrameInterfaceByIndex[index: integer]: IHTMLWINDOW2
read
GetFrameInterfaceByIndex;
property
FrameInterfaceByName[Name:
string
]: IHTMLWINDOW2
read
GetFrameInterfaceByName;
property
FrameByIndex[index: integer]: TDxWebFrame
read
GetFrameByIndex;
property
FrameByName[Name:
string
]: TDxWebFrame
read
GetFrameByName;
end
;
TDxWebElementCollection
=
class
private
FCollection: IHTMLElementCollection;
FChildCollection: TDxWebElementCollection;
function
GetCollection(index: String): TDxWebElementCollection;
function
GetCount: integer;
function
GetElement(itemName:
string
; index: integer): IHTMLElement;
function
GetElementByName(itemName:
string
): IHTMLELEMENT;
function
GetElementByIndex(index: integer): IHTMLELEMENT;
procedure
SetCollection(
const
Value: IHTMLElementCollection);
public
Constructor Create(ACollection: IHTMLElementCollection);
Destructor Destroy;
override
;
property
Collection: IHTMLElementCollection
read
FCollection
write
SetCollection;
property
ChildElementCollection[index: String]: TDxWebElementCollection
read
GetCollection;
property
ElementCount: integer
read
GetCount;
property
Element[itemName:
string
;index: integer]: IHTMLElement
read
GetElement;
property
ElementByName[itemName:
string
]: IHTMLELEMENT
read
GetElementByName;
property
ElementByIndex[index: integer]: IHTMLELEMENT
read
GetElementByIndex;
end
;
TLinkCollection
=
class
(TDxWebElementCollection)
end
;
TDxWebTable
=
class
;
TDxTableCollection
=
class
private
FTableCollection: IHTMLElementCollection;
FDocument: IHTMLDOCUMENT2;
FWebTable: TDxWebTable;
function
GetTableInterfaceByName(AName:
string
): IHTMLTABLE;
procedure
SetDocument(Value: IHTMLDOCUMENT2);
function
GetTableInterfaceByIndex(index: integer): IHTMLTABLE;
function
GetCount: integer;
function
GetTableByIndex(index: integer): TDxWebTable;
function
GetTableByName(AName:
string
): TDxWebTable;
public
Constructor Create(Doc: IHTMLDOCUMENT2);
destructor
Destroy;
override
;
property
TableInterfaceByName[AName:
string
]: IHTMLTABLE
read
GetTableInterfaceByName;
property
TableInterfaceByIndex[index: integer]: IHTMLTABLE
read
GetTableInterfaceByIndex;
property
TableByName[AName:
string
]: TDxWebTable
read
GetTableByName;
property
TableByIndex[index: integer]: TDxWebTable
read
GetTableByIndex;
property
Document: IHTMLDOCUMENT2
read
FDocument
write
SetDocument;
property
Count: integer
read
GetCount;
end
;
TDxWebTable
=
class
private
FTableInterface: IHTMLTABLE;
function
GetRowCount: integer;
procedure
SetTableInterface(
const
Value: IHTMLTABLE);
function
GetCell(ACol, ARow: integer):
string
;
function
GetRowColCount(RowIndex: integer): integer;
function
GetInnerHtml:
string
;
function
GetInnerText:
string
;
function
GetCellElement(ACol, ARow: Integer): IHTMLTableCell;
public
Constructor Create(ATable: IHTMLTABLE);
property
TableInterface: IHTMLTABLE
read
FTableInterface
write
SetTableInterface;
property
RowCount: integer
read
GetRowCount;
property
Cell[ACol: integer;ARow: integer]:
string
read
GetCell;
property
CellElement[ACol: Integer;ARow: Integer]: IHTMLTableCell
read
GetCellElement;
property
RowColCount[RowIndex: integer]: integer
read
GetRowColCount;
property
InnerHtml:
string
read
GetInnerHtml;
property
InnerText:
string
read
GetInnerText;
end
;
TDxWebCombobox
=
class
private
FHtmlSelect: IHTMLSelectElement;
function
GetCount: Integer;
procedure
SetItemIndex(
const
Value: Integer);
function
GetItemIndex: Integer;
function
GetName:
string
;
procedure
SetName(
const
Value:
string
);
function
GetValue:
string
;
procedure
SetValue(
const
Value:
string
);
procedure
SetCombInterface(
const
Value: IHTMLSelectElement);
function
GetItemByName(EleName:
string
):
string
;
function
GetItemByIndex(index: integer):
string
;
function
GetItemAttribute(index: Integer; AttribName:
string
): OleVariant;
public
constructor
Create(AWebCombo: IHTMLSelectElement);
procedure
Add(Ele: IHTMLElement);
procedure
Insert(Ele: IHTMLElement;Index: Integer);
procedure
Remove(index: Integer);
property
CombInterface: IHTMLSelectElement
read
FHtmlSelect
write
SetCombInterface;
property
Count: Integer
read
GetCount;
property
ItemIndex: Integer
read
GetItemIndex
write
SetItemIndex;
property
ItemByIndex[index: integer]:
string
read
GetItemByIndex;
property
ItemByName[EleName:
string
]:
string
read
GetItemByName;
property
ItemAttribute[index: Integer;AttribName:
string
]: OleVariant
read
GetItemAttribute;
property
Name:
string
read
GetName
write
SetName;
property
value:
string
read
GetValue
write
SetValue;
end
;
implementation
end
.
HTMLParser解析类的代码实现单元
代码
(*
****************************************************
*)
(*
得闲工作
室
*)
(*
HTML解析
单元库
*)
(*
*)
(*
DxHtmlParser Unit
*)
(*
Copyright(c) 2008-2010 不得
闲
*)
(*
email:appleak46@yahoo.com.cn QQ:75492895
*)
(*
****************************************************
*)
unit
DxHtmlParser;
interface
uses
Windows,MSHTML,ActiveX,DxHtmlElement,Forms;
type
TDxHtmlParser
=
class
private
FHtmlDoc: IHTMLDocument2;
FHTML:
string
;
FWebTables: TDxTableCollection;
FWebElements: TDxWebElementCollection;
FWebComb: TDxWebCombobox;
procedure
SetHTML(
const
Value:
string
);
function
GetWebCombobox(AName:
string
): TDxWebCombobox;
public
constructor
Create;
destructor
Destroy;
override
;
property
HTML:
string
read
FHTML
write
SetHTML;
property
WebTables: TDxTableCollection
read
FWebTables;
property
WebElements: TDxWebElementCollection
read
FWebElements;
property
WebCombobox[Name:
string
]: TDxWebCombobox
read
GetWebCombobox;
end
;
implementation
{
TDxHtmlParser
}
constructor
TDxHtmlParser.Create;
begin
CoInitialize(
nil
);
//
创建IHTMLDocument2接口
CoCreateInstance(CLASS_HTMLDocument,
nil
, CLSCTX_INPROC_SERVER, IID_IHTMLDocument2, FHtmlDoc);
Assert(FHtmlDoc
<>
nil
,
'
构建HTMLDocument接口失败
'
);
FHtmlDoc.Set_designMode(
'
On
'
);
//
设置为设计模式,不执行脚本
while
not
(FHtmlDoc.readyState
=
'
complete
'
)
do
begin
sleep(
1
);
Application.ProcessMessages;
end
;
FWebTables :
=
TDxTableCollection.Create(FHtmlDoc);
FWebElements :
=
TDxWebElementCollection.Create(
nil
);
FWebComb :
=
TDxWebCombobox.Create(
nil
);
end
;
destructor
TDxHtmlParser.Destroy;
begin
FWebTables.Free;
FWebElements.Free;
FWebComb.Free;
CoUninitialize;
inherited
;
end
;
function
TDxHtmlParser.GetWebCombobox(AName:
string
): TDxWebCombobox;
begin
if
FWebElements.Collection
<>
nil
then
begin
FWebComb.CombInterface :
=
FWebElements.ElementByName[AName]
as
IHTMLSelectElement;
Result :
=
FWebComb;
end
else
Result :
=
nil
;
end
;
procedure
TDxHtmlParser.SetHTML(
const
Value:
string
);
begin
if
FHTML
<>
Value
then
begin
FHTML :
=
Value;
FHtmlDoc.body.innerHTML :
=
FHTML;
FWebElements.Collection :
=
FHtmlDoc.all;
end
;
end
;
end
.
全
部代码下载
相关文章推荐
- Delphi通过MSHTML实现一个HTML解析类
- Delphi通过MSHTML实现一个HTML解析类
- 将不同的html页面组合成一个——通过框架标签frameset和frame实现
- 在页面中,我们经常看到,一个button按钮,如果属标点击,就会触发一个窗口的显示,如果二次点击并可以隐藏,那么如何通过JAVA配合html来实现这一功能呢?
- 关于 数据源 导出excel (这是) 通过 画一个html 实现的、最简单、好理解、的代码、
- 自我学习总结2:在各个网页基本上一致时,通过URL传一个值就可以实现一个html实现多个页
- Struts2中通过实现Aware接口解耦servlet API,可以多个方法共享一个application,session,request,parameter对象
- 实现一个栈(元素遵守先入后出顺序),能够通过 min 方法在 O(1)时间内获取栈中的最小元素。同时,栈的基本操作:入栈(Push)、出栈(Pop),也是在O(1)时间内完成的
- 通过布赛尔曲线以及CAShapeLayer的strokeStart 、strokeEnd 属性来实现一个圆形进度条
- 使用XIB自定义一个UIView,然后将这个view添加到controller的view 上(相当于所有界面都通过xib来实现)
- html+css:一个自适应的两栏布局的实现
- SVM实现多分类的程序基础工作(二)——通过一个简单libsvm例子迈入libsvm学习的大门
- python调用HTMLTestRunner+unittest实现一次执行多个测试类,并生成与每个测试类对应的测试报告,并不像某些人写的每次只执行一个测试类,具体看代码,附上整个project代码
- Fieldset,一个不常用的HTML标签 (表单分组实现效果,象Winfrom 里的GroupBox 在左上角显示标题.)
- 实现一个2008serve的IIS的虚拟目录(通过网络路径(UNC)的形式,共享在另外一个2008服务器上
- Android之通过配置Flavor实现一个项目打包成多个apk
- 求助哇, 我这里有一个存储过程 我想实现排序 通过判断然后 在排序 大家 请看下面的存储过程
- Delphi实现窗体总是向前操作,Delphi拖拽的一个例子
- 一个在HTML使用JavaScript实现调用Windows Shell.Application的例子。
- jQuery通过点击行来删除HTML表格行的实现示例