html - 具有各种搜索参数的 HTML 网页查询
问题描述
我正在尝试从需要某些搜索参数的网站导入数据。网站上有大约 5 个选项可供选择。我正在尝试构建一个脚本来查询网站,选择特定的搜索参数并进行搜索。从那里将结果导入我的 excel 工作表。
我创建的代码不起作用。我是 VBA 新手,非常感谢您的帮助。
这就是我所拥有的:
Private Sub RegulatoryDataPull_Click()
Dim eRow As Long
Dim objIE As Object
Dim HDoc As HTMLDocument
Dim HEle As HTMLUListElement
Set objIE = CreateObject("InternetExplorer.Application") ' Create document object.
Set HDoc = objIE.document ' Create HTML element (<ul>) object.
Set HEle = HDoc.getElementById("dnn_ctr85406_StateNetDB_resultsCount") ' Get the element reference using its ID.
Set sht = Sheets("Sheet1")
eRow = Sheet1.Cells(Rows.Count, 7).End(x1Up.Offset(7, 0)).Row
With objIE
.Visible = True
.navigate "https://www.ncsl.org/research/energy/energy-legislation-tracking-database.aspx"
Do While .Busy Or _
.readyState <> 4
DoEvents
Loop
Var arr = [document.querySelectorAll('["name=dnn$ctr85406$StateNetDB$ckBxTopics$16"],[name="dnn$ctr85406$StateNetDB$ckBxTopics$5"],[name="dnn$ctr85406$StateNetDB$ckBxTopics$3"],[name="dnn$ctr85406$StateNetDB$ckBxTopics$8"]')]
Topics.Item(0).Value = Topicchoice
Set States = .document.getElementsByName("dnn$ctr85406$StateNetDB$ckBxAllStates")
States.Item(0).Value = Stateschoice
Set Status = .document.getElementsByName("dnn$ctr85406$StateNetDB$ddlStatus")
Status.Item(0).Value = Statuschoice
Set Year = .document.getElementsByName("dnn$ctr85406$StateNetDB$ddlYear")
Year.Item(0).Value = Yearchoice
.document.getElementById("dnn_ctr85406_StateNetDB_btnSearch").Click
Do While .Busy Or _
.readyState <> 4
DoEvents
Loop
Dim ele As Object
' Loop through elements inside the <ul> element and find <br>, which has the texts we want.
With HEle
For ele = 0 To .getElementsByTagName("br").Length - 1
Debug.Print .getElementsByTagName("br").Item(ele).getElementsByTagName("br").Item(0).innerHTML
End Select
Next ele
End With
Set objIE = Nothing
End Sub
解决方案
欢迎来到 SO!我将您的代码复制粘贴到 Excel-VBA 中,它确实崩溃了。在这种情况下,最简单的方法是使用 F8 单步执行(不要只使用 F5/a 按钮运行代码)。这确实有助于找到代码块/崩溃的行。经过一些修改后,我想出了可以在我的机器上运行的代码。它还没有结束,但应该给你一个好的开始。
Private Sub RegulatoryDataPullTWO()
Dim eRow As Long
Dim objIE As Object
Dim HDoc As HTMLDocument
Dim HEle As HTMLUListElement
Set objIE = CreateObject("InternetExplorer.Application") ' Create document object.
objIE.Visible = True
objIE.navigate "https://www.ncsl.org/research/energy/energy-legislation-tracking-database.aspx"
Do While objIE.Busy Or objIE.readyState <> 4
DoEvents
Loop
Set HDoc = objIE.document ' Create HTML element (<ul>) object.
Set Top1 = objIE.document.getElementsByName("dnn$ctr85406$StateNetDB$ckBxTopics$16")
Top1.Item(0).Value = True
Set States = objIE.document.getElementsByName("dnn$ctr85406$StateNetDB$ckBxAllStates")
States.Item(0).Value = True
Set Status = objIE.document.getElementsByName("dnn$ctr85406$StateNetDB$ddlStatus")
Status.Item(0).Value = "Adopted"
Set yr = objIE.document.getElementsByName("dnn$ctr85406$StateNetDB$ddlYear")
yr.Item(0).Value = "2019"
objIE.document.getElementById("dnn_ctr85406_StateNetDB_btnSearch").Click
Do While objIE.Busy Or objIE.readyState <> 4
DoEvents
Loop
Set HEle = HDoc.getElementById("dnn_ctr85406_StateNetDB_resultsCount") ' Get the element reference using its ID.
Set HList = HDoc.getElementById("dnn_ctr85406_StateNetDB_linkList")
Set Sht = Sheets("Sheet1")
Debug.Print HEle.outerText
Sht.Range("B2").Value = HEle.outerText
ResRw = 3
For e = 0 To HList.getElementsByTagName("a").Length - 1
Set lnk = HList.getElementsByTagName("a").Item(e)
'Debug.Print e1.outerText, e1.outerHTML
If lnk.outerText <> "Bill Text Lookup" And lnk.outerText <> "*" Then
Debug.Print Replace(Replace(lnk.ParentNode.innerText, Chr(10), ""), Chr(13), "")
Debug.Print lnk.ParentNode.NextSibling.NextSibling.innerText
Sht.Range("A" & ResRw).Value = Replace(Replace(lnk.ParentNode.innerText, Chr(10), ""), Chr(13), "")
Sht.Range("B" & ResRw).Value = lnk.ParentNode.NextSibling.NextSibling.innerText
ResRw = ResRw + 1
End If
Next e
Set objIE = Nothing
End Sub
推荐阅读
- applescript - 如何使用 AppleScript 访问子标题组下的子菜单项
- python - 语音识别安全循环中的错误
- data-structures - 为什么小负载因子哈希表的大 O 复杂度为 O(1)?
- audio - 是否可以通过标准 GET 请求将广告或消息动态拼接到 MP3 文件中?
- html - 如何对齐循环打印的 HTML 元素?
- c - 镜像音频帧
- google-apps-script - 如何使用谷歌脚本抓取带有分页的网站?
- c# - 如何在视图中获取 Ienumerable 的属性?
- asp.net - 使用新的“System.Text.Json”类对 System.Data.DataTable 进行序列化时出现异常(Asp.net core 3.0 preview 8)
- python - 回滚后“此事务已关闭”