Delphi 解析HTML  
官方Delphi 学习QQ群: 682628230(三千人)
频道

Delphi 解析HTML


Delphi 解析HTML


uses mshtml;


IHTMLEleMent.ID;


IHTMLEleMent.tagName;


IHTMLEleMent.title;

elmt._className;

elmt.getAttribute('anchor', 0);


procedure TForm1.btnphClick(Sender: TObject);

var

  Document: IHTMLDocument2;

  FTableCollection, tempCoc: IHTMLElementCollection;

  table: IHTMLTABLE;

  TableRow: IHTMLTableRow;

  elmt: IHTMLEleMent;

  I, J, K: integer;

  str: string;

begin

  Document := WebBrowser1.Document as IHTMLDocument2;

  FTableCollection := Document.all;

  FTableCollection.Length; //

  FTableCollection.item(1, 0);

  FTableCollection := Document.all.tags('table') as IHTMLElementCollection;

  for I := 0 to FTableCollection.Length - 1 do

  begin

    table := FTableCollection.item(I, 0) as IHTMLTABLE;  //题数

    for J := 0 to table.rows.Length - 1 do

    begin

      TableRow := (table.rows.item(J, 0) as IHTMLTableRow); //每道题信息

      str := '';

      for K := 0 to TableRow.cells.Length - 1 do

      begin

        elmt := TableRow.cells.item(K, 0) as IHTMLEleMent;

        str := str + elmt.innerText + #9;

      end;

      str := StringReplace(str, ''#$D#$A'', '', [rfReplaceAll]);

      Memo3.Lines.Add(str);

      Memo3.Lines.Add('------------------------------------------');

    end;

  end;

end;


  URL := 'http://bbs.csdn.net/forums/Delphi';

  WebBrowser1.Navigate(URL);


下面是一行tablerow的数据,可以分解出来,<td><td>之间是一个cell列。


如何解析第一列的href、class之间的3个数据呢,?、delphi7 滚动条颜色、VCL组件开发及应用


 



          <tr>

    <td class="title">

      <strong class="green">?</strong>

      <a href="/topics/390861446" target="_blank" title="delphi7 滚动条颜色">delphi7 滚动条颜色</a>

      <span class="forum_link">[<span class="parent"><a href="/forums/Delphi">Delphi</a></span> <a href="/forums/DelphiVCL">VCL组件开发及应用</a>]</span>

    </td>

    <td class="tc">20</td>

    <td class="tc">

      <a href="http://my.csdn.net/u010745617" rel="nofollow" target="_blank">u010745617</a><br />

      <span class="time">08-15 16:25</span></td>

    <td class="tc">1</td>

    <td class="tc">

      <a href="http://my.csdn.net/NongCunGongLu" rel="nofollow" target="_blank">NongCunGongLu</a><br />

      <span class="time">08-17 13:41</span>

    </td>

    <td class="tc">

      <a href="/topics/390861446/close" target="_blank">管理</a>

    </td>

  </tr>


 


    <td class="title">

      <strong class="green">?</strong>

      <a href="/topics/390861446" target="_blank" title="delphi7 滚动条颜色">delphi7 滚动条颜色</a>

      <span class="forum_link">[<span class="parent"><a href="/forums/Delphi">Delphi</a></span> <a href="/forums/DelphiVCL">VCL组件开发及应用</a>]</span>

    </td>

把td这一部分IHTMLEleMent当作IHTMLElementCollection解析就可以了,tagName,getAttribute('href',0),title,_className都可以获得正确的值,是6个集合元素。



       tempCoc := elmt.all as IHTMLElementCollection;

        if (tempCoc.Length = 6) then

        begin

          for q := 0 to tempCoc.Length - 1 do

          begin

            emt2 := tempCoc.item(q, 0) as IHTMLEleMent;

            if emt2.tagName = 'STRONG' then

              s2 := emt2.innerText

            else if emt2.tagName = 'A' then

            begin

              s2 := emt2.getAttribute('href',0);

              s2 := emt2.title;

              s2 := emt2.innerText;

            end

            else if emt2.tagName = 'SPAN' then

            begin

              emt2.tagName;

              emt2._className;

              emt2.title;

              emt2.innerText;

            end;

          end;

        end;


 elmt: IHTMLEleMent;


  elmt := (WebBrowser1.Document as ihtmldocument3).getElementById('idbtn001');


getElementsByName


getElementById


getElementsByTagName


 


doc2: IHTMLDocument2;

doc3: IHTMLDocument3;

doc2.forms.item('form1',0) as IHTMLFormElement;//取form1的表单


 



elmt := (WebBrowser1.Document as IHTMLDocument3).getElementById('divfirstID');//通过ID取得某个节点


elmt := ((elmt.children as IHTMLElementCollection).item(0, 0)) as IHTMLEleMent;//子节点

elmt := ((elmt.children as IHTMLElementCollection).item(0, 0)) as IHTMLEleMent;//子节点

elmt := ((elmt.children as IHTMLElementCollection).item(0, 0)) as IHTMLEleMent;//子节点

elmt := ((elmt.children as IHTMLElementCollection).item(2, 0)) as IHTMLEleMent;//子节点里的第3个子节点

elmt := ((elmt.children as IHTMLElementCollection).item(0, 0)) as IHTMLEleMent;//子节点




推荐分享
图文皆来源于网络,内容仅做公益性分享,版权归原作者所有,如有侵权请告知删除!
 

Copyright © 2014 DelphiW.com 开发 源码 文档 技巧 All Rights Reserved
晋ICP备14006235号-8 晋公网安备 14108102000087号

执行时间: 0.19444394111633 seconds