首页 > 解决方案 > WebScraping with VBA - change value of InputBox

问题描述

I'm experienced with VBA but really new with webscraping. So far I managed to extract some tables from other webpages but this one is giving me a hard time. The link is http://www.banxico.org.mx/SieInternet/consultarDirectorioInternetAction.do?sector=6&accion=consultarCuadro&idCuadro=CF102&locale=es

Basically, I click the arrow drop down list next to "Exportar Cuadro" button. After that, I need to change both dates that appear there to a specific one I will have into a variable.

How can I get to change that input boxes on webpage? My code so far is the next one:

Option Explicit

Sub test()

Dim URL As String, URL2 As String, URL3 As String, URL4 As String
Dim IE As Object, obj As Object, colTR As Object, doc As Object, tr As Object
Dim eleColtr As MSHTML.IHTMLElementCollection 'Element collection for tr tags
Dim eleColtd As MSHTML.IHTMLElementCollection 'Element collection for td tags
Dim eleRow As MSHTML.IHTMLElement 'Row elements
Dim eleCol As MSHTML.IHTMLElement 'Column elements
Dim objCollection As Object
Dim j As String, i As Integer


URL = "https://www.banxico.org.mx/SieInternet/consultarDirectorioInternetAction.do?sector=18&accion=consultarCuadroAnalitico&idCuadro=CA51&locale=es"
URL2 = "https://www.banxico.org.mx/SieInternet/consultarDirectorioInternetAction.do?sector=18&accion=consultarCuadroAnalitico&idCuadro=CA52&locale=es"
URL3 = "https://www.banxico.org.mx/SieInternet/consultarDirectorioInternetAction.do?sector=18&accion=consultarCuadroAnalitico&idCuadro=CA53&locale=es"
URL4 = "http://www.banxico.org.mx/SieInternet/consultarDirectorioInternetAction.do?sector=6&accion=consultarCuadro&idCuadro=CF102&locale=es"
'Tipos de cambio
Set IE = CreateObject("InternetExplorer.Application")

IE.Visible = True
IE.navigate URL4

Do While IE.Busy Or IE.readyState <> 4
    DoEvents
Loop

Application.Wait (Now + TimeValue("00:00:01"))

IE.document.getElementById("exportaCuadroToggle").Click

Set objCollection = IE.document.getElementsByTagName("ID")
i = 0
While i < objCollection.Length
    If objCollection(i).Value = "26/08/2019" Then
        ' Set text for search
        objCollection(i).Value = "01/08/2019"
    End If
    If objCollection(i).Name = "form-control form-control-sm fechaFin" Then
        ' Set text for search
        objCollection(i).Value = "01/08/2019"
    End If
Wend

End Sub

Note: URL, URL2 and URL3 are used in the complete code but I ommited that part for now because those links are already doing what I want.

标签: htmlexcelvba

解决方案


查看@StavrosJon 引用的 API 文档,您似乎可以执行以下操作。相关的 API 端点是:

获取系列/:idSerie/datos/:fechaI/:fechaF

您可以获得免费令牌。有关使用和限制的详细信息在此处

API 调用需要以逗号分隔的系列 ID 列表作为其参数之一。你可以硬编码这些,或者像我一样,简单地从你引用的现有网页中获取它们,然后传入后续的 API 调用。我正则表达式出必要的系列 ID。

响应是 json - 如此详述- 因此,您需要一个 json 解析器来处理响应。我使用 jsonconverter.bas。从这里下载原始代码并添加到名为 jsonConverter 的标准模块中。然后您需要转到 VBE > 工具 > 参考 > 添加对 Microsoft Scripting Runtime 的引用。

我使用一些辅助函数来确保我正确排序了日期输出,并且正确处理了丢失的信息。

titulo如果您想要项目配对,例如 Max / Min,请在列上对输出进行排序。否则,您可以实现自定义排序。


VBA:

Option Explicit
Public Sub GetData()
    '<  VBE > Tools > References > Microsoft Scripting Runtime
    Dim json As Object, re As Object, s As String, xhr As Object
    Dim startDate As String, endDate As String, ws As Worksheet, ids As String

    startDate = "2019-08-18"
    endDate = "2019-08-24"

    Dim datesDict As Object, headers(), results(), key As Variant, r As Long

    Set datesDict = GetDateDictionary(startDate, endDate)
    ReDim headers(1 To datesDict.Count + 2)
    headers(1) = "idSerie"
    headers(2) = "titulo"
    r = 3

    For Each key In datesDict.keys
        headers(r) = key
        r = r + 1
    Next

    Set ws = ThisWorkbook.Worksheets("Sheet1")
    Set re = CreateObject("VBScript.RegExp")
    Set xhr = CreateObject("MSXML2.XMLHTTP")

    With xhr
        .Open "GET", "http://www.banxico.org.mx/SieInternet/consultarDirectorioInternetAction.do?sector=6&accion=consultarCuadro&idCuadro=CF102&locale=es", False
        .send
        s = .responseText
        ids = GetIds(re, s)
        If ids = "No match" Then Exit Sub
        .Open "GET", "https://www.banxico.org.mx/SieAPIRest/service/v1/series/" & ids & "/datos/" & startDate & "/" & endDate & "", False 'https://www.banxico.org.mx/SieAPIRest/service/v1/doc/consultaDatosSerieRango
        .setRequestHeader "User-Agent", "Mozilla/5.0"
        .setRequestHeader "Bmx-Token", "aa833b22ee2a350192df6962b1eb6d8ea569ac895862ecc31b79b46859c7e74c" 'https://www.banxico.org.mx/SieAPIRest/service/v1/token  ''<== Replace with your generated token
        .send
        s = .responseText
    End With

    Set json = JsonConverter.ParseJson(s)("bmx")("series")

    ReDim results(1 To json.Count, 1 To UBound(headers))

    WriteOutResults ws, re, startDate, endDate, json, results, headers
End Sub

Public Sub WriteOutResults(ByVal ws As Worksheet, ByVal re As Object, ByVal startDate As String, ByVal endDate As String, ByVal json As Object, ByRef results(), ByRef headers())
    Dim item As Object, subItem As Object, key As Variant
    Dim r As Long, c As Long, datesDict As Object, nextKey As Variant
    re.Pattern = "\s{2,}"
    For Each item In json
        Set datesDict = GetDateDictionary(startDate, endDate)
        r = r + 1

        For Each key In item.keys
            Select Case key
            Case "idSerie"
                results(r, 1) = item(key)
            Case "titulo"
                results(r, 2) = re.Replace(item(key), Chr$(32))
            Case "datos"
                c = 3
                For Each subItem In item(key)
                    datesDict(subItem("fecha")) = subItem("dato")
                Next subItem
                For Each nextKey In datesDict.keys
                    results(r, c) = datesDict(nextKey)
                    c = c + 1
                Next
            End Select
        Next
    Next
    With ws
        .Cells(1, 1).Resize(1, UBound(headers)) = headers
        .Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
    End With
End Sub

Public Function GetIds(ByVal re As Object, ByVal responseText As String) As String
    Dim matches As Object, i As Long, dict As Object
    Set dict = CreateObject("Scripting.Dictionary")
    With re
        .Global = True
        .MultiLine = True
        .IgnoreCase = False
        .Pattern = "'(SF\d{5})'"                 'regex pattern to get json string
        If .test(responseText) Then
            Set matches = .Execute(responseText)
            For i = 0 To matches.Count - 1
                dict(matches(i).SubMatches(0)) = vbNullString
            Next
            GetIds = Join$(dict.keys, ",")
        Else
            GetIds = "No match"
        End If
    End With
End Function

Public Function GetDateDictionary(ByVal startDate As String, ByVal endDate As String) As Object
    Dim sDate As Long, eDate As Long
    Dim dateDict As Object, i As Long

    Set dateDict = CreateObject("Scripting.Dictionary")
    sDate = CDate(startDate)
    eDate = CDate(endDate)
    For i = sDate To eDate
        dateDict(Format$(i, "dd/mm/yyyy")) = vbNullString
    Next
    Set GetDateDictionary = dateDict
End Function

示例结果:

在此处输入图像描述


正则表达式:

在此处输入图像描述


推荐阅读