html - WebScraping with VBA - change value of InputBox
问题描述
I'm experienced with VBA but really new with webscraping. So far I managed to extract some tables from other webpages but this one is giving me a hard time. The link is http://www.banxico.org.mx/SieInternet/consultarDirectorioInternetAction.do?sector=6&accion=consultarCuadro&idCuadro=CF102&locale=es
Basically, I click the arrow drop down list next to "Exportar Cuadro" button. After that, I need to change both dates that appear there to a specific one I will have into a variable.
How can I get to change that input boxes on webpage? My code so far is the next one:
Option Explicit
Sub test()
Dim URL As String, URL2 As String, URL3 As String, URL4 As String
Dim IE As Object, obj As Object, colTR As Object, doc As Object, tr As Object
Dim eleColtr As MSHTML.IHTMLElementCollection 'Element collection for tr tags
Dim eleColtd As MSHTML.IHTMLElementCollection 'Element collection for td tags
Dim eleRow As MSHTML.IHTMLElement 'Row elements
Dim eleCol As MSHTML.IHTMLElement 'Column elements
Dim objCollection As Object
Dim j As String, i As Integer
URL = "https://www.banxico.org.mx/SieInternet/consultarDirectorioInternetAction.do?sector=18&accion=consultarCuadroAnalitico&idCuadro=CA51&locale=es"
URL2 = "https://www.banxico.org.mx/SieInternet/consultarDirectorioInternetAction.do?sector=18&accion=consultarCuadroAnalitico&idCuadro=CA52&locale=es"
URL3 = "https://www.banxico.org.mx/SieInternet/consultarDirectorioInternetAction.do?sector=18&accion=consultarCuadroAnalitico&idCuadro=CA53&locale=es"
URL4 = "http://www.banxico.org.mx/SieInternet/consultarDirectorioInternetAction.do?sector=6&accion=consultarCuadro&idCuadro=CF102&locale=es"
'Tipos de cambio
Set IE = CreateObject("InternetExplorer.Application")
IE.Visible = True
IE.navigate URL4
Do While IE.Busy Or IE.readyState <> 4
DoEvents
Loop
Application.Wait (Now + TimeValue("00:00:01"))
IE.document.getElementById("exportaCuadroToggle").Click
Set objCollection = IE.document.getElementsByTagName("ID")
i = 0
While i < objCollection.Length
If objCollection(i).Value = "26/08/2019" Then
' Set text for search
objCollection(i).Value = "01/08/2019"
End If
If objCollection(i).Name = "form-control form-control-sm fechaFin" Then
' Set text for search
objCollection(i).Value = "01/08/2019"
End If
Wend
End Sub
Note: URL
, URL2
and URL3
are used in the complete code but I ommited that part for now because those links are already doing what I want.
解决方案
查看@StavrosJon 引用的 API 文档,您似乎可以执行以下操作。相关的 API 端点是:
获取系列/:idSerie/datos/:fechaI/:fechaF
API 调用需要以逗号分隔的系列 ID 列表作为其参数之一。你可以硬编码这些,或者像我一样,简单地从你引用的现有网页中获取它们,然后传入后续的 API 调用。我正则表达式出必要的系列 ID。
响应是 json - 如此处详述- 因此,您需要一个 json 解析器来处理响应。我使用 jsonconverter.bas。从这里下载原始代码并添加到名为 jsonConverter 的标准模块中。然后您需要转到 VBE > 工具 > 参考 > 添加对 Microsoft Scripting Runtime 的引用。
我使用一些辅助函数来确保我正确排序了日期输出,并且正确处理了丢失的信息。
titulo
如果您想要项目配对,例如 Max / Min,请在列上对输出进行排序。否则,您可以实现自定义排序。
VBA:
Option Explicit
Public Sub GetData()
'< VBE > Tools > References > Microsoft Scripting Runtime
Dim json As Object, re As Object, s As String, xhr As Object
Dim startDate As String, endDate As String, ws As Worksheet, ids As String
startDate = "2019-08-18"
endDate = "2019-08-24"
Dim datesDict As Object, headers(), results(), key As Variant, r As Long
Set datesDict = GetDateDictionary(startDate, endDate)
ReDim headers(1 To datesDict.Count + 2)
headers(1) = "idSerie"
headers(2) = "titulo"
r = 3
For Each key In datesDict.keys
headers(r) = key
r = r + 1
Next
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set re = CreateObject("VBScript.RegExp")
Set xhr = CreateObject("MSXML2.XMLHTTP")
With xhr
.Open "GET", "http://www.banxico.org.mx/SieInternet/consultarDirectorioInternetAction.do?sector=6&accion=consultarCuadro&idCuadro=CF102&locale=es", False
.send
s = .responseText
ids = GetIds(re, s)
If ids = "No match" Then Exit Sub
.Open "GET", "https://www.banxico.org.mx/SieAPIRest/service/v1/series/" & ids & "/datos/" & startDate & "/" & endDate & "", False 'https://www.banxico.org.mx/SieAPIRest/service/v1/doc/consultaDatosSerieRango
.setRequestHeader "User-Agent", "Mozilla/5.0"
.setRequestHeader "Bmx-Token", "aa833b22ee2a350192df6962b1eb6d8ea569ac895862ecc31b79b46859c7e74c" 'https://www.banxico.org.mx/SieAPIRest/service/v1/token ''<== Replace with your generated token
.send
s = .responseText
End With
Set json = JsonConverter.ParseJson(s)("bmx")("series")
ReDim results(1 To json.Count, 1 To UBound(headers))
WriteOutResults ws, re, startDate, endDate, json, results, headers
End Sub
Public Sub WriteOutResults(ByVal ws As Worksheet, ByVal re As Object, ByVal startDate As String, ByVal endDate As String, ByVal json As Object, ByRef results(), ByRef headers())
Dim item As Object, subItem As Object, key As Variant
Dim r As Long, c As Long, datesDict As Object, nextKey As Variant
re.Pattern = "\s{2,}"
For Each item In json
Set datesDict = GetDateDictionary(startDate, endDate)
r = r + 1
For Each key In item.keys
Select Case key
Case "idSerie"
results(r, 1) = item(key)
Case "titulo"
results(r, 2) = re.Replace(item(key), Chr$(32))
Case "datos"
c = 3
For Each subItem In item(key)
datesDict(subItem("fecha")) = subItem("dato")
Next subItem
For Each nextKey In datesDict.keys
results(r, c) = datesDict(nextKey)
c = c + 1
Next
End Select
Next
Next
With ws
.Cells(1, 1).Resize(1, UBound(headers)) = headers
.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End With
End Sub
Public Function GetIds(ByVal re As Object, ByVal responseText As String) As String
Dim matches As Object, i As Long, dict As Object
Set dict = CreateObject("Scripting.Dictionary")
With re
.Global = True
.MultiLine = True
.IgnoreCase = False
.Pattern = "'(SF\d{5})'" 'regex pattern to get json string
If .test(responseText) Then
Set matches = .Execute(responseText)
For i = 0 To matches.Count - 1
dict(matches(i).SubMatches(0)) = vbNullString
Next
GetIds = Join$(dict.keys, ",")
Else
GetIds = "No match"
End If
End With
End Function
Public Function GetDateDictionary(ByVal startDate As String, ByVal endDate As String) As Object
Dim sDate As Long, eDate As Long
Dim dateDict As Object, i As Long
Set dateDict = CreateObject("Scripting.Dictionary")
sDate = CDate(startDate)
eDate = CDate(endDate)
For i = sDate To eDate
dateDict(Format$(i, "dd/mm/yyyy")) = vbNullString
Next
Set GetDateDictionary = dateDict
End Function
示例结果:
正则表达式:
推荐阅读
- java - 如何与 bin、conf 等少数目录一起构建战争
- javascript - 无法在 Angular 中访问对象属性
- r - 在 Rscript 中使用从 ShinyApp 获取的变量
- sql-server - Azure 数据同步可以用作主到主数据库复制吗?
- php - 我的前端不会与我的 Heroku 后端交互
- python - guvectorize 在 nopython 模式下不解析类型
- c++ - 虚拟模板成员方法!一个实用的解决方法
- excel - 如何在后台启动浏览器,同时在用户窗体中运行宏的 Excel VBA 在前台保持活动状态
- mysql - SELECT,从 JOIN 上的 WHERE 获取反转结果
- python - Pandas - 滚动获取最后一个非 Nan 值加上列中非 Nan 的计数