首页 > 解决方案 > 具有各种搜索参数的 HTML 网页查询

问题描述

我正在尝试从需要某些搜索参数的网站导入数据。网站上有大约 5 个选项可供选择。我正在尝试构建一个脚本来查询网站,选择特定的搜索参数并进行搜索。从那里将结果导入我的 excel 工作表。

我创建的代码不起作用。我是 VBA 新手,非常感谢您的帮助。

这就是我所拥有的:

Private Sub RegulatoryDataPull_Click()

Dim eRow As Long
Dim objIE As Object
Dim HDoc As HTMLDocument
Dim HEle As HTMLUListElement

Set objIE = CreateObject("InternetExplorer.Application") ' Create document object.
Set HDoc = objIE.document ' Create HTML element (<ul>) object.
Set HEle = HDoc.getElementById("dnn_ctr85406_StateNetDB_resultsCount") ' Get the element reference using its ID.

Set sht = Sheets("Sheet1")

eRow = Sheet1.Cells(Rows.Count, 7).End(x1Up.Offset(7, 0)).Row

With objIE
.Visible = True
.navigate "https://www.ncsl.org/research/energy/energy-legislation-tracking-database.aspx"

Do While .Busy Or _
.readyState <> 4
DoEvents
Loop

Var arr = [document.querySelectorAll('["name=dnn$ctr85406$StateNetDB$ckBxTopics$16"],[name="dnn$ctr85406$StateNetDB$ckBxTopics$5"],[name="dnn$ctr85406$StateNetDB$ckBxTopics$3"],[name="dnn$ctr85406$StateNetDB$ckBxTopics$8"]')]
Topics.Item(0).Value = Topicchoice

Set States = .document.getElementsByName("dnn$ctr85406$StateNetDB$ckBxAllStates")
States.Item(0).Value = Stateschoice

Set Status = .document.getElementsByName("dnn$ctr85406$StateNetDB$ddlStatus")
Status.Item(0).Value = Statuschoice

Set Year = .document.getElementsByName("dnn$ctr85406$StateNetDB$ddlYear")
Year.Item(0).Value = Yearchoice

.document.getElementById("dnn_ctr85406_StateNetDB_btnSearch").Click

Do While .Busy Or _
.readyState <> 4
DoEvents
Loop

Dim ele As Object

' Loop through elements inside the <ul> element and find <br>, which has the texts we want.
With HEle
    For ele = 0 To .getElementsByTagName("br").Length - 1
        Debug.Print .getElementsByTagName("br").Item(ele).getElementsByTagName("br").Item(0).innerHTML
    End Select
    Next ele

End With

Set objIE = Nothing

End Sub

标签: htmlexcelvbatagsexcel-web-query

解决方案


欢迎来到 SO!我将您的代码复制粘贴到 Excel-VBA 中,它确实崩溃了。在这种情况下,最简单的方法是使用 F8 单步执行(不要只使用 F5/a 按钮运行代码)。这确实有助于找到代码块/崩溃的行。经过一些修改后,我想出了可以在我的机器上运行的代码。它还没有结束,但应该给你一个好的开始。

Private Sub RegulatoryDataPullTWO()

Dim eRow As Long
Dim objIE As Object
Dim HDoc As HTMLDocument
Dim HEle As HTMLUListElement

Set objIE = CreateObject("InternetExplorer.Application") ' Create document object.
objIE.Visible = True
objIE.navigate "https://www.ncsl.org/research/energy/energy-legislation-tracking-database.aspx"

Do While objIE.Busy Or objIE.readyState <> 4
    DoEvents
Loop

Set HDoc = objIE.document ' Create HTML element (<ul>) object.

Set Top1 = objIE.document.getElementsByName("dnn$ctr85406$StateNetDB$ckBxTopics$16")
Top1.Item(0).Value = True

Set States = objIE.document.getElementsByName("dnn$ctr85406$StateNetDB$ckBxAllStates")
States.Item(0).Value = True

Set Status = objIE.document.getElementsByName("dnn$ctr85406$StateNetDB$ddlStatus")
Status.Item(0).Value = "Adopted"

Set yr = objIE.document.getElementsByName("dnn$ctr85406$StateNetDB$ddlYear")
yr.Item(0).Value = "2019"

objIE.document.getElementById("dnn_ctr85406_StateNetDB_btnSearch").Click

Do While objIE.Busy Or objIE.readyState <> 4
    DoEvents
Loop

Set HEle = HDoc.getElementById("dnn_ctr85406_StateNetDB_resultsCount") ' Get the element reference using its ID.
Set HList = HDoc.getElementById("dnn_ctr85406_StateNetDB_linkList")
Set Sht = Sheets("Sheet1")

Debug.Print HEle.outerText
Sht.Range("B2").Value = HEle.outerText
ResRw = 3
For e = 0 To HList.getElementsByTagName("a").Length - 1
    Set lnk = HList.getElementsByTagName("a").Item(e)
    'Debug.Print e1.outerText, e1.outerHTML
    If lnk.outerText <> "Bill Text Lookup" And lnk.outerText <> "*" Then
        Debug.Print Replace(Replace(lnk.ParentNode.innerText, Chr(10), ""), Chr(13), "")
        Debug.Print lnk.ParentNode.NextSibling.NextSibling.innerText
        Sht.Range("A" & ResRw).Value = Replace(Replace(lnk.ParentNode.innerText, Chr(10), ""), Chr(13), "")
        Sht.Range("B" & ResRw).Value = lnk.ParentNode.NextSibling.NextSibling.innerText
        ResRw = ResRw + 1
    End If
Next e

Set objIE = Nothing

End Sub

推荐阅读