首页 > 解决方案 > VBA - 网络抓取找不到正确的 GET 请求

问题描述

我的问题与其他问题VBA-web scraping can not get HTMLElement innerText有关。我有一个类似的问题

网站网址 - https://www.hnb.hr/en/core-functions/monetary-policy/exchange-rate-list/exchange-rate-list

我需要获取货币参考日期和所选值。问题是我找不到GET最终生成这些值的正确请求。我发现它与POST请求有关:

POST/en/core-functions/monetary-policy/exchange-rate-list/exchange-rate-list?p_p_id=tecajnalistacontroller_WAR_hnbtecajnalistaportlet&p_p_lifecycle=2&p_p_state=normal&p_p_mode=view&p_p_resource_id=getTecajnaAjaxDataURL&p_p_col_id=cacheLevelPage&p_p_col_id=column-2&p_p_p

我想使用一种通过 id、class 或 tag 获取的技术——无论如何,只要GETURL 请求太快而无法检索所需的信息

标签: excelvbaweb-scrapingxmlhttprequest

解决方案


XMLHTTP 请求和 API:

我会使用他们的API,如下所示。我有一些辅助函数来帮助解析响应。在GetDict函数中可以设置你感兴趣的货币。在函数中GetRate你可以指定你感兴趣的汇率。如果不指定,则默认为"median_rate"

调用 API:

要获取特定日期的费率,请对以下 URL 进行 [n] HTTP 调用:

http://hnbex.eu/api/v1/rates/daily/?date=YYYY-MM-DD

日期参数是可选的。如果未设置,则使用当前日期(今天)。

您可以使用 a 解析JSON响应,JSON parser但我发现使用它从字符串Split中获取所需信息更简单。JSON如果您熟悉,JSON我会很高兴地更新一个 JSON 解析示例。

Option Explicit

Public Sub GetInfo()
    'http://hnbex.eu/api/v1/
    Dim strJSON As String, http As Object, json As Object
    Const URL As String = "http://hnbex.eu/api/v1/rates/daily/"

    Set http = CreateObject("MSXML2.XMLHTTP")
    With http
        .Open "GET", URL, False
        .send
        strJSON = .responseText
    End With
    'Set json = JsonConverter.ParseJson(strJSON) '<== You could parse the JSON using a JSON parse such as [JSONConverter][1]

    Dim currencyDict As Object
    Set currencyDict = GetDict

    Dim key As Variant, dictKeys As Variant, result As Variant
    For Each key In currencyDict.keys
        result = GetRate(strJSON, key)
        If Not IsError(result) Then currencyDict(key) = result
        result = vbNullString
    Next key

    PrintDictionary currencyDict

End Sub

Public Function GetDict() As Object '<== You could adapt to pass currencies as string arguments to the function. Or even a string array.
    Dim dict As Object
    Set dict = CreateObject("Scripting.Dictionary")
    dict.Add "EUR", vbNullString
    dict.Add "CZK", vbNullString
    dict.Add "HRK", vbNullString
    dict.Add "HUF", vbNullString
    dict.Add "PLN", vbNullString
    dict.Add "RON", vbNullString
    dict.Add "RSD", vbNullString
    Set GetDict = dict
End Function

Public Function GetRate(ByVal json As String, ByVal key As Variant, Optional ByVal rate As String = "median_rate") As Variant
    Dim arr() As String, tempString As String
    On Error GoTo Errhand
    arr = Split(json, """currency_code"": " & Chr$(34) & key & Chr$(34))
    tempString = arr(1)
    tempString = Split(arr(1), Chr$(34) & rate & Chr$(34) & ":")(1)
    tempString = Split(tempString, ",")(0)
    GetRate = tempString
    Exit Function
Errhand:
    GetRate = CVErr(xlErrNA)
End Function

Public Sub PrintDictionary(ByVal dict As Object)
    Dim key As Variant
    For Each key In dict.keys
        Debug.Print key & " : " & dict(key)
    Next
End Sub

IE浏览器:

您可以使用带有显式等待元素的循环出现在页面上(或填充)

Option Explicit
Public Sub GetInfo()
    Dim IE As New InternetExplorer, t As Date, hTable As HTMLTable, clipboard As Object
    Const WAIT_TIME_SECS As Long = 5
    t = Timer

    With IE
        .Visible = True
        .navigate "https://www.hnb.hr/en/core-functions/monetary-policy/exchange-rate-list/exchange-rate-list"

        While .Busy Or .readyState < 4: DoEvents: Wend

        Do
            DoEvents
            On Error Resume Next
            Set hTable = .document.getElementById("records_table")
            On Error GoTo 0
            If Timer - t > WAIT_TIME_SECS Then Exit Do
        Loop While hTable Is Nothing

        If hTable Is Nothing Then
            .Quit
            Exit Sub
        End If
        Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
        clipboard.SetText hTable.outerHTML
        clipboard.PutInClipboard
        ThisWorkbook.Worksheets("Sheet1").Cells(1, 1).PasteSpecial
        .Quit                                    '<== Remember to quit application
    End With
End Sub

推荐阅读