html - VBA - 从html中提取数据到excel
问题描述
我在编码时遇到问题。我使用excel VBA从网页中提取数据到excel。网页是https://proptx.midland.com.hk/utx/index.jsp?est_id=E12837&lang=en 点击“所有交易”,它会在第一个表格的底部显示第二个表格。我想从底部表格(不是顶部表格)中提取数据。
这是代码:
Sub PropertyTransactions()
Dim ieObj As InternetExplorer
Dim htmlEle As IHTMLElement
Dim i As Integer
i = 1
Set ieObj = New InternetExplorer
ieObj.Visible = True
ieObj.navigate "https://proptx.midland.com.hk/utx/index.jsp?est_id=E12837&lang=en"
Application.Wait Now + TimeValue("00:00:05")
For Each htmlEle In ieObj.document.getElementsByClassName("tablesorter")(0).getElementsByTagName("tr")
Set htmlEle = ActiveDocument.all.tags("head").Item(0)
With ActiveSheet
.Range("A" & i).Value = htmlEle.Children(0).textContent
.Range("B" & i).Value = htmlEle.Children(1).textContent
.Range("C" & i).Value = htmlEle.Children(2).textContent
.Range("D" & i).Value = htmlEle.Children(3).textContent
.Range("E" & i).Value = htmlEle.Children(4).textContent
.Range("F" & i).Value = htmlEle.Children(5).textContent
.Range("G" & i).Value = htmlEle.Children(6).textContent
.Range("H" & i).Value = htmlEle.Children(7).textContent
.Range("I" & i).Value = htmlEle.Children(8).textContent
.Range("J" & i).Value = htmlEle.Children(9).textContent
End With
i = i + 1
Next htmlEle
End Sub
但是,这一行有一个错误:
For Each htmlEle In ieObj.document.getElementsByClassName("tablesorter")(0).getElementsByTagName("tr")
它显示运行时错误 91:对象变量或未设置块变量。我该如何解决?非常感谢你!
解决方案
我使用后期绑定,但这没关系。请阅读宏中的注释:
Sub PropertyTransactions()
Dim url As String
Dim browser As Object
Dim nodeTransactionTab As Object
Dim nodeTransactionTable As Object
Dim nodesHeaderAll As Object
Dim nodeHeaderOne As Object
Dim nodesTrAll As Object
Dim nodeTrOne As Object
Dim nodesTdAll As Object
Dim nodeTdOne As Object
Dim currentRow As Long
Dim currentColumn As Long
'Initialize variables
currentRow = 1
currentColumn = 1
url = "https://proptx.midland.com.hk/utx/index.jsp?est_id=E12837&lang=en"
'Initialize Internet Explorer, set visibility,
'call URL and wait until page is fully loaded
Set browser = CreateObject("internetexplorer.application")
browser.Visible = True 'You can set this to False to make the IE invisible
browser.navigate url
Do Until browser.ReadyState = 4: DoEvents: Loop
'First we must click the 'All Transaction' tab to load the table you want
'The tab works like a button and has the id 'tx_record_3'
'The nature of an id is that it is unique
'With the following lines you can try to get elements by them ids
'The method getElementByID() doesn't build a node collection
'We switch off all runtime errors when we use it because we get
'a runtime error if there is nor element with that id
On Error Resume Next
Set nodeTransactionTab = browser.document.getElementByID("tx_record_3")
On Error GoTo 0
'Now we check if we have an html element 'nodeTransactionTab'
If Not nodeTransactionTab Is Nothing Then
'Click the tab and give a short break to load the transaction table
nodeTransactionTab.Click
Application.Wait Now + TimeValue("00:00:02")
'Now we can get the transaction table
'You try that with the css class 'tablesorter' but the table has also an id
'(The CSS class 'tablesorter' can also only be found after the transaction
'table has been loaded)
On Error Resume Next
Set nodeTransactionTable = browser.document.getElementByID("Tx_hist_table")
On Error GoTo 0
Else
'The object nodeTransactionTab couldn't be build
MsgBox "No transaction tab found"
End If
'To avoid deep if nesting, we can check at this point whether the transaction
'table was found, because an object is nothing by default
If Not nodeTransactionTable Is Nothing Then
'We trust the header is present
Set nodesHeaderAll = nodeTransactionTable.getElementsByTagName("th")
'Get header of transaction table
For Each nodeHeaderOne In nodesHeaderAll
ActiveSheet.Cells(currentRow, currentColumn).Value = Trim(nodeHeaderOne.innertext)
currentColumn = currentColumn + 1
Next nodeHeaderOne
'Prepare next line
currentRow = currentRow + 1
currentColumn = 1
'We trust the values are present too
'Get all value table rows
Set nodesTrAll = nodeTransactionTable.getElementsByTagName("tbody")(0).getElementsByTagName("tr")
'Get one row of values after the other from transaction table
For Each nodeTrOne In nodesTrAll
'Get all cells of next row
Set nodesTdAll = nodeTrOne.getElementsByTagName("td")
'Get values cell by cell
For Each nodeTdOne In nodesTdAll
ActiveSheet.Cells(currentRow, currentColumn).Value = Trim(nodeTdOne.innertext)
currentColumn = currentColumn + 1
Next nodeTdOne
'Prepare next line
currentRow = currentRow + 1
currentColumn = 1
Next nodeTrOne
Else
'The object nodeTransactionTable couldn't be build
MsgBox "No transaction table found"
End If
'Clean up
browser.Quit
Set browser = Nothing
Set nodeTransactionTab = Nothing
Set nodesHeaderAll = Nothing
Set nodeHeaderOne = Nothing
Set nodesTrAll = Nothing
Set nodeTrOne = Nothing
Set nodesTdAll = Nothing
Set nodeTdOne = Nothing
End Sub
推荐阅读
- swift - 在 Swift Realm 中删除带有子类的对象
- python - 减小 csv 数据框中的大小
- here-api - 路由 API 忽略封闭的道路
- cmake - 告诉 CMake 通过命令行构建清理?
- python-3.x - 根据熊猫中的匹配按3列排序并分配列属性
- c - 如何在动态结构变量中分配动态内存?
- vim - 在 VIM 退出时关闭所有 NERDtree 实例
- firebase - Dialogflow webhook 中未调用 Google Cloud 函数
- python - 从 bam 文件中提取行
- javascript - puppeteer 使用 evaluate() 后无法获取页面源