Delphi 解析HTML
uses mshtml;
IHTMLEleMent.ID;
IHTMLEleMent.tagName;
IHTMLEleMent.title;
elmt._className;
elmt.getAttribute('anchor', 0);
procedure TForm1.btnphClick(Sender: TObject);
var
Document: IHTMLDocument2;
FTableCollection, tempCoc: IHTMLElementCollection;
table: IHTMLTABLE;
TableRow: IHTMLTableRow;
elmt: IHTMLEleMent;
I, J, K: integer;
str: string;
begin
Document := WebBrowser1.Document as IHTMLDocument2;
FTableCollection := Document.all;
FTableCollection.Length; //
FTableCollection.item(1, 0);
FTableCollection := Document.all.tags('table') as IHTMLElementCollection;
for I := 0 to FTableCollection.Length - 1 do
begin
table := FTableCollection.item(I, 0) as IHTMLTABLE; //题数
for J := 0 to table.rows.Length - 1 do
begin
TableRow := (table.rows.item(J, 0) as IHTMLTableRow); //每道题信息
str := '';
for K := 0 to TableRow.cells.Length - 1 do
begin
elmt := TableRow.cells.item(K, 0) as IHTMLEleMent;
str := str + elmt.innerText + #9;
end;
str := StringReplace(str, ''#$D#$A'', '', [rfReplaceAll]);
Memo3.Lines.Add(str);
Memo3.Lines.Add('------------------------------------------');
end;
end;
end;
URL := 'http://bbs.csdn.net/forums/Delphi';
WebBrowser1.Navigate(URL);
下面是一行tablerow的数据,可以分解出来,<td><td>之间是一个cell列。
如何解析第一列的href、class之间的3个数据呢,?、delphi7 滚动条颜色、VCL组件开发及应用
<tr>
<td class="title">
<strong class="green">?</strong>
<a href="/topics/390861446" target="_blank" title="delphi7 滚动条颜色">delphi7 滚动条颜色</a>
<span class="forum_link">[<span class="parent"><a href="/forums/Delphi">Delphi</a></span> <a href="/forums/DelphiVCL">VCL组件开发及应用</a>]</span>
</td>
<td class="tc">20</td>
<td class="tc">
<a href="http://my.csdn.net/u010745617" rel="nofollow" target="_blank">u010745617</a><br />
<span class="time">08-15 16:25</span></td>
<td class="tc">1</td>
<td class="tc">
<a href="http://my.csdn.net/NongCunGongLu" rel="nofollow" target="_blank">NongCunGongLu</a><br />
<span class="time">08-17 13:41</span>
</td>
<td class="tc">
<a href="/topics/390861446/close" target="_blank">管理</a>
</td>
</tr>
<td class="title">
<strong class="green">?</strong>
<a href="/topics/390861446" target="_blank" title="delphi7 滚动条颜色">delphi7 滚动条颜色</a>
<span class="forum_link">[<span class="parent"><a href="/forums/Delphi">Delphi</a></span> <a href="/forums/DelphiVCL">VCL组件开发及应用</a>]</span>
</td>
把td这一部分IHTMLEleMent当作IHTMLElementCollection解析就可以了,tagName,getAttribute('href',0),title,_className都可以获得正确的值,是6个集合元素。
tempCoc := elmt.all as IHTMLElementCollection;
if (tempCoc.Length = 6) then
begin
for q := 0 to tempCoc.Length - 1 do
begin
emt2 := tempCoc.item(q, 0) as IHTMLEleMent;
if emt2.tagName = 'STRONG' then
s2 := emt2.innerText
else if emt2.tagName = 'A' then
begin
s2 := emt2.getAttribute('href',0);
s2 := emt2.title;
s2 := emt2.innerText;
end
else if emt2.tagName = 'SPAN' then
begin
emt2.tagName;
emt2._className;
emt2.title;
emt2.innerText;
end;
end;
end;
elmt: IHTMLEleMent;
elmt := (WebBrowser1.Document as ihtmldocument3).getElementById('idbtn001');
getElementsByName
getElementById
getElementsByTagName
doc2: IHTMLDocument2;
doc3: IHTMLDocument3;
doc2.forms.item('form1',0) as IHTMLFormElement;//取form1的表单
elmt := (WebBrowser1.Document as IHTMLDocument3).getElementById('divfirstID');//通过ID取得某个节点
elmt := ((elmt.children as IHTMLElementCollection).item(0, 0)) as IHTMLEleMent;//子节点
elmt := ((elmt.children as IHTMLElementCollection).item(0, 0)) as IHTMLEleMent;//子节点
elmt := ((elmt.children as IHTMLElementCollection).item(0, 0)) as IHTMLEleMent;//子节点
elmt := ((elmt.children as IHTMLElementCollection).item(2, 0)) as IHTMLEleMent;//子节点里的第3个子节点
elmt := ((elmt.children as IHTMLElementCollection).item(0, 0)) as IHTMLEleMent;//子节点
Copyright © 2014 DelphiW.com 开发 源码 文档 技巧 All Rights Reserved
晋ICP备14006235号-8 晋公网安备 14108102000087号
执行时间: 0.19444394111633 seconds