html - 试图从 url 中抓取 web 数据使用框架。获取 MSHTML.HTMLDocument 对象中的数据。我想将对象另存为 .xls 在硬盘上
问题描述
我想从 URL 中抓取数据。我在excel中使用VBA。我能够在 MSHTML.HTMLDocument iframeDoc 对象中获取数据。现在我想将 iframeDoc 对象中的数据保存为硬盘上的 excel 文件。iframeDoc.Documentelent.innerHTML 之类的属性都不起作用。它会产生运行时错误。对象不支持该属性。所以我需要帮助将对象转换为字符串类型或任何其他方法来保存到硬盘。谢谢。
我尝试将 MSHTML.HTMLDocument DocumentElemnt 和 Body 的属性保存到字符串。那些给出运行时错误。
Sub
Dim ie As SHDocVw.InternetExplorer
Dim doc As MSHTML.HTMLDocument
Dim url As String
url = "http://www.1line.williams.com/Transco/info-postings/notices/critical-notices.html"
Set ie = New SHDocVw.InternetExplorer
ie.Visible = True
ie.navigate url
While ie.Busy Or ie.readyState <> READYSTATE_COMPLETE
DoEvents
Wend
Set doc = ie.document
Dim iframeDoc As MSHTML.HTMLDocument
Set iframeDoc = doc.frames.Item("theiframe")
If iframeDoc Is Nothing Then
MsgBox "IFrame with name 'iframename' was not found."
ie.Quit
Exit Sub
Else
'*** to save iframeDoc object as .xls file on hard disc ***
Dim strframe As String
strframe = CStr(iframeDoc.DocumentElement.innerHTML) ' Run time error
'object does not support the property or method
End If
End Sub
我希望 iframedoc.DocumentElement.innerHTML 被转换为字符串。但是它给出了运行时错误对象不支持该属性或方法。
解决方案
您需要留出足够的时间让 javascript 在 iframe 中运行和填充表格。我为此使用了一个定时循环
Option Explicit
Public Sub GetInfo()
Dim ie As InternetExplorer, ws As Worksheet
Dim hTable As HTMLTable, t As Date
Const MAX_WAIT_SEC As Long = 10 '<==Adjust wait time
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set ie = New InternetExplorer
With ie
.Visible = True
.Navigate2 "http://www.1line.williams.com/Transco/info-postings/notices/critical-notices.html"
While .Busy Or .readyState < 4: DoEvents: Wend
t = Timer
Do
On Error Resume Next
With .document.getElementById("theiframe").contentDocument.getElementById("j_idt11")
Set hTable = .getElementsByTagName("table")(1)
End With
On Error GoTo 0
If Timer - t > MAX_WAIT_SEC Then Exit Do
Loop While hTable Is Nothing
If Not hTable Is Nothing Then
WriteTable hTable, 1, ws
End If
.Quit
End With
End Sub
Public Sub WriteTable(ByVal hTable As HTMLTable, Optional ByVal startRow As Long = 1, Optional ByVal ws As Worksheet)
If ws Is Nothing Then Set ws = ActiveSheet
Dim tRow As Object, td As Object, r As Long, c As Long
r = startRow
With ws
Dim headers As Object, header As Object, columnCounter As Long
Set headers = hTable.getElementsByTagName("th")
For Each header In headers
columnCounter = columnCounter + 1
.Cells(r, columnCounter) = header.innerText
Next header
For Each tRow In hTable.getElementsByTagName("tr")
r = r + 1: c = 1
For Each td In tRow.getElementsByTagName("td")
If td.classname = "ui-col-7" Then 'or you could use if c = 7
.Cells(r, c).Value = "http://www.1line.williams.com" & Replace$(Split(Split(td.outerhtml, "href=" & Chr$(34))(1), ">")(0), Chr$(34), vbNullString)
Else
.Cells(r, c).Value = td.innerText
End If
c = c + 1
Next td
Next tRow
End With
End Sub
推荐阅读
- javascript - 使用地图调整 IP 命中计数器的大小
- ruby-on-rails - 带有载波的更新请求的 Rspec 未将文件列入白名单
- laravel - Laravel:验证数组字段具有相同的大小
- reactjs - Styled Components / React - Fragment 元素上的样式
- python - 如何从python中的matplotlib艺术家路径中获取勾勒出形状的点?
- python - 将列添加到 pandas 数据框中的最有效方法是什么,其中包含来自每行周围 n 行的连接值?
- javascript - ajax 中的分页无法转到正确的页面
- ios - 打破父约束的子视图约束
- javascript - HTML Pre 并支持正确使用 CR (\r)
- android - 具有三个片段的 Android BottomNavigationView - 如何仅设置一个以具有带有选项的工具栏