首页 > 解决方案 > 使用快递网站的 VBA 进行网页抓取

问题描述

我想在 excel A 列中记录具有跟踪号的包裹,并在其他列中记录可用字段的详细信息,以便每当我按下按钮运行模块时,它都会更新我从网站获取的包裹详细信息。我的目标网站是“ http://trackandtrace.courierpost.co.nz/Search/ ”。我已经制作了在该链接之后嵌入跟踪号并获取其他字段的代码,但该代码没有获取任何数据,它只是使用 Internet Explorer 打开链接。我得到的错误是

在此处输入图像描述

这是我的代码:

Sub Yellowcom()
    'Dim ieObj As InternetExplorer
    Dim htmlELe As IHTMLElement
    Dim HTML As HTMLDocument
    Dim i As Integer
    Dim x As Integer


    Dim URL As String
    Dim URLParameter As String
    Dim page As Long
    Dim links As Object
    Dim IE As Object


    i = 1

    Set IE = CreateObject("InternetExplorer.Application")
    'Set ieObj = New InternetExplorer
    IE.Visible = True
    URL = "http://trackandtrace.courierpost.co.nz/search/"
    'Application.Wait Now + TimeValue("00:00:05")
    x = 1

    For page = 2 To 10

        If page > 1 Then URLParameter = Sheet1.Cells(x, 1).Value

        IE.navigate URL & URLParameter

        ' Wait for the browser to load the page
        Do Until IE.readyState = 4

            DoEvents

        Loop

       Set HTML = IE.document
Set OrganicLinks = HTML.getElementsByClassName("search-results organic")
Set links = OrganicLinks.Item(0).getElementsByClassName("info")



    For Each htmlELe In links

        With ActiveSheet
            .Range("A" & i).Value = htmlELe.Children(0).textContent

            On Error Resume Next
.Range("B" & i).Value = htmlELe.getElementsByClassName("track-visit-website")(0).href
On Error GoTo 0


            On Error Resume Next
.Range("C" & i).Value = htmlELe.getElementsByClassName("info-section info-secondary")(0).href
On Error GoTo 0


            '.Range("B" & i).Value = htmlELe.getElementsByTagName("a")(0).href
            '.Range("C" & i).Value = htmlELe.Children(2).textContent
            .Range("D" & i).Value = htmlELe.Children(2).querySelector("a[href]")
             'links2 = htmlELe.getElementsByClassName("links")(1)
           ' .Range("D" & i).Value = links2.href


        End With
    i = i + 1

    x = x + 1
    Next htmlELe


    Next page

    IE.Quit
    Set IE = Nothing

    End Sub

标签: excelvba

解决方案


我强烈建议您使用背景对象向网站发送信息,例如以下 MSXML2 对象可用于发送 GET 和 POST 请求,在以下代码中,我使用搜索代码向您的网站发送请求(从值中提取A列)然后将您所需的交货状态和时间xml放在B列中

Sub demoMsxml2()
  Dim mySearchCode As String
  Dim myConnection As Object
  Dim Status As String
  Dim i As Long
  Dim lastRow As Long
  lastRow = Sheet1.Cells(Sheet1.Rows.Count, 1).End(xlUp).Row
  For i = 1 To lastRow
    mySearchCode = Sheet1.Range("A" & i).Value2
    Set myConnection = CreateObject("MSXML2.ServerXMLHTTP.6.0")
    Call myConnection.Open("GET", "http://trackandtrace.courierpost.co.nz/Search/" + mySearchCode)
    myConnection.send
    Sheet1.Range("B" & i).Value2 = ExtractString(Trim(Replace(myConnection.responseText, vbCrLf, "")), "<li class=""status""><span", "</li>")
  Next i
End Sub

Function ExtractString(parentString As String, beginsWith As String, endsWith As String) As String
  Dim a As Long: a = InStr(1, parentString, beginsWith)
  Dim b As Long: b = InStr(a, parentString, endsWith)
  If (a <> 0 And b <> 0) Then ExtractString = Trim(Mid(parentString, a, b - a)) Else ExtractString = ""
End Function

无需将文本放入 B 列,您只需从中刮取数据即可。使用此方法意味着您不必在屏幕上看到任何内容,无需创建 Internet Explorer 实例,无需等待页面加载等。这一切都是自动处理的。


推荐阅读