首页 > 解决方案 > VBA - 从html中提取数据到excel

问题描述

我在编码时遇到问题。我使用excel VBA从网页中提取数据到excel。网页是https://proptx.midland.com.hk/utx/index.jsp?est_id=E12837&lang=en 点击“所有交易”,它会在第一个表格的底部显示第二个表格。我想从底部表格(不是顶部表格)中提取数据。

这是代码:

Sub PropertyTransactions()
Dim ieObj As InternetExplorer
Dim htmlEle As IHTMLElement
Dim i As Integer

i = 1

Set ieObj = New InternetExplorer
ieObj.Visible = True
ieObj.navigate "https://proptx.midland.com.hk/utx/index.jsp?est_id=E12837&lang=en"

Application.Wait Now + TimeValue("00:00:05")

For Each htmlEle In ieObj.document.getElementsByClassName("tablesorter")(0).getElementsByTagName("tr")
Set htmlEle = ActiveDocument.all.tags("head").Item(0)
    With ActiveSheet
        .Range("A" & i).Value = htmlEle.Children(0).textContent
        .Range("B" & i).Value = htmlEle.Children(1).textContent
        .Range("C" & i).Value = htmlEle.Children(2).textContent
        .Range("D" & i).Value = htmlEle.Children(3).textContent
        .Range("E" & i).Value = htmlEle.Children(4).textContent
        .Range("F" & i).Value = htmlEle.Children(5).textContent
        .Range("G" & i).Value = htmlEle.Children(6).textContent
        .Range("H" & i).Value = htmlEle.Children(7).textContent
        .Range("I" & i).Value = htmlEle.Children(8).textContent
        .Range("J" & i).Value = htmlEle.Children(9).textContent
    End With

    i = i + 1
Next htmlEle

End Sub

但是,这一行有一个错误:
For Each htmlEle In ieObj.document.getElementsByClassName("tablesorter")(0).getElementsByTagName("tr")

它显示运行时错误 91:对象变量或未设置块变量。我该如何解决?非常感谢你!

标签: htmlexcelvbaweb-scraping

解决方案


我使用后期绑定,但这没关系。请阅读宏中的注释:

Sub PropertyTransactions()

  Dim url As String
  Dim browser As Object
  Dim nodeTransactionTab As Object
  Dim nodeTransactionTable As Object
  Dim nodesHeaderAll As Object
  Dim nodeHeaderOne As Object
  Dim nodesTrAll As Object
  Dim nodeTrOne As Object
  Dim nodesTdAll As Object
  Dim nodeTdOne As Object
  Dim currentRow As Long
  Dim currentColumn As Long

  'Initialize variables
  currentRow = 1
  currentColumn = 1
  url = "https://proptx.midland.com.hk/utx/index.jsp?est_id=E12837&lang=en"

  'Initialize Internet Explorer, set visibility,
  'call URL and wait until page is fully loaded
  Set browser = CreateObject("internetexplorer.application")
  browser.Visible = True 'You can set this to False to make the IE invisible
  browser.navigate url
  Do Until browser.ReadyState = 4: DoEvents: Loop

  'First we must click the 'All Transaction' tab to load the table you want
  'The tab works like a button and has the id 'tx_record_3'
  'The nature of an id is that it is unique
  'With the following lines you can try to get elements by them ids
  'The method getElementByID() doesn't build a node collection
  'We switch off all runtime errors when we use it because we get
  'a runtime error if there is nor element with that id
  On Error Resume Next
  Set nodeTransactionTab = browser.document.getElementByID("tx_record_3")
  On Error GoTo 0

  'Now we check if we have an html element 'nodeTransactionTab'
  If Not nodeTransactionTab Is Nothing Then
    'Click the tab and give a short break to load the transaction table
    nodeTransactionTab.Click
    Application.Wait Now + TimeValue("00:00:02")

    'Now we can get the transaction table
    'You try that with the css class 'tablesorter' but the table has also an id
    '(The CSS class 'tablesorter' can also only be found after the transaction
    'table has been loaded)
    On Error Resume Next
    Set nodeTransactionTable = browser.document.getElementByID("Tx_hist_table")
    On Error GoTo 0
  Else
    'The object nodeTransactionTab couldn't be build
    MsgBox "No transaction tab found"
  End If

  'To avoid deep if nesting, we can check at this point whether the transaction
  'table was found, because an object is nothing by default
  If Not nodeTransactionTable Is Nothing Then
    'We trust the header is present
    Set nodesHeaderAll = nodeTransactionTable.getElementsByTagName("th")

    'Get header of transaction table
    For Each nodeHeaderOne In nodesHeaderAll
      ActiveSheet.Cells(currentRow, currentColumn).Value = Trim(nodeHeaderOne.innertext)
      currentColumn = currentColumn + 1
    Next nodeHeaderOne

    'Prepare next line
    currentRow = currentRow + 1
    currentColumn = 1

    'We trust the values are present too
    'Get all value table rows
    Set nodesTrAll = nodeTransactionTable.getElementsByTagName("tbody")(0).getElementsByTagName("tr")

    'Get one row of values after the other from transaction table
    For Each nodeTrOne In nodesTrAll
      'Get all cells of next row
      Set nodesTdAll = nodeTrOne.getElementsByTagName("td")

      'Get values cell by cell
      For Each nodeTdOne In nodesTdAll
        ActiveSheet.Cells(currentRow, currentColumn).Value = Trim(nodeTdOne.innertext)
        currentColumn = currentColumn + 1
      Next nodeTdOne
    'Prepare next line
    currentRow = currentRow + 1
    currentColumn = 1
    Next nodeTrOne
  Else
    'The object nodeTransactionTable couldn't be build
    MsgBox "No transaction table found"
  End If

  'Clean up
  browser.Quit
  Set browser = Nothing
  Set nodeTransactionTab = Nothing
  Set nodesHeaderAll = Nothing
  Set nodeHeaderOne = Nothing
  Set nodesTrAll = Nothing
  Set nodeTrOne = Nothing
  Set nodesTdAll = Nothing
  Set nodeTdOne = Nothing
End Sub

推荐阅读