vba - 如何使用 VBA 从 Internet Explorer 中的绘图/图形中提取信息
问题描述
<body id="ctl00_Body1" class="pBack"><div class="tipsy tipsy-w" style="top: 3279.47px; left: 1096.63px; visibility: visible; display: block; opacity: 0.8; z-index: 100000;"><div class="tipsy-arrow tipsy-arrow-w"></div><div class="tipsy-inner">Mon Sep 03 2018<br>357,938 Units</div></div>
我对 VBA 编程和网络抓取都很陌生,所以如果我的编码有点破旧,请原谅我。
项目详情
我正在开展一个项目,该项目涉及抓取网站以获取大量数据。
到目前为止我做了什么
目前我已经设法使用用户名和密码登录网站,输入搜索参数,单击按钮显示结果。在结果上,我能够提取表格中的数据。我在下面给出了我迄今为止开发的代码。
问题陈述
现在我需要一个显示在图表中的信息,当我单击绘图点时会显示该信息(我附上了图表的屏幕截图/链接以及 html 代码)。我想在单位文本之前获取数字。该链接是公司内部的,所以我无法分享
这是我到目前为止开发的代码:
Sub MultiCpn_Div_Class()
Application.ScreenUpdating = False
Sheets("Sheet1").Activate
'Active sheet where the search parameter will be
Dim rc As Long
'loop variable
Dim CPN As String
'search parameter from active sheet
Dim MyHTML_Element As IHTMLElement
Dim MyURL As String
Dim HTMLTABL As MSHTML.IHTMLElement
Dim HTMLTABLC As MSHTML.IHTMLElementCollection
Dim HTMLDivc As MSHTML.IHTMLElementCollection
Dim HTMLDIV As MSHTML.IHTMLElement
Dim HTMLROW As MSHTML.IHTMLElement
Dim HTMLCELL As MSHTML.IHTMLElement
Dim Rownum, Colnum, Sheetnum As Long
MyURL = "XXXXXXXXX"
Set ie = New InternetExplorerMedium
'Had to declare ie as Medium because it kept on failing if i didnt
'ie.Silent = True
ie.Visible = False
ie.navigate MyURL
Do
Loop Until ie.readyState = READYSTATE_COMPLETE
Set HTMLDOC = ie.document
HTMLDOC.all.ctl00_PageContent_UserName.Value = "username"
HTMLDOC.all.ctl00_PageContent_Password.Value = "password"
HTMLDOC.all.ctl00_PageContent_OKButton__Button.Click
'Providing user name, password and then signing in
Do
DoEvents
Loop Until ie.readyState = READYSTATE_COMPLETE
rc = 2
'starting from second row of data
For rc = 2 To 5
Sheets("Sheet1").Activate
CPN = Range("A" & rc).Value
'assigning cell value to variable
ie.navigate "YYYY"
'After logging in i had to navigate to a specific link-YYY otherwise it kept showing an error
Do
Loop Until ie.readyState = READYSTATE_COMPLETE
Set IEDOC = ie.document
'reassigned the HTML document to the newly loaded page
IEDOC.all.cpnval.Value = CPN
IEDOC.all.run_Button.Click
'entering the search parameter and then clicking the search button
Do
Loop Until ie.readyState = READYSTATE_COMPLETE
Application.Wait (Now + TimeValue("0:00:02"))
'had to include a wait for 2 seconds to cater to network delay issues
Set HTMLDOC = ie.document
'again reassigned the HTML document to the newly loaded page which had all necessary data
Set HTMLDivc = HTMLDOC.getElementsByTagName("div")
'the page has multiple div classes inside which multiple tables are located, so getting a
'div collection and then looping over all tables data to search for particular tables that i want
For Each HTMLDIV In HTMLDivc
If HTMLDIV.ID = "attributes_table" Then
Set HTMLTABLC = HTMLDIV.Children
For Each HTMLTABL In HTMLTABLC
If HTMLTABL.className = "table table-striped temp" Then
Sheets.Add.Name = "T1" & CPN
Range("A1").Value = HTMLTABL.className
Range("B1").Value = Now
Rownum = 2
'Once required table is found i'm copying that table data and pasting it into new sheet as required for each CPN
For Each HTMLROW In HTMLTABL.getElementsByTagName("tr")
Colnum = 1
For Each HTMLCELL In HTMLROW.Children
Cells(Rownum, Colnum) = HTMLCELL.innerText
Colnum = Colnum + 1
Next HTMLCELL
Rownum = Rownum + 1
Next HTMLROW
Sheets("Sheet1").Range("F" & rc).Value = Sheets("T1" & CPN).Range("B4")
Sheets("Sheet1").Range("G" & rc).Value = Sheets("T1" & CPN).Range("B16")
Sheets("Sheet1").Range("M" & rc).Value = Sheets("T1" & CPN).Range("B5")
Sheets("Sheet1").Range("J" & rc).Value = Sheets("T1" & CPN).Range("B25")
'After extracting the data to new sheet for each search parameter i'm copying that data and pasting into my active sheet
End If
Next HTMLTABL
ElseIf HTMLDIV.ID = "award_table" Then
'same as above creating new sheet for another table information
Set HTMLTABLC = HTMLDIV.Children
For Each HTMLTABL In HTMLTABLC
If HTMLTABL.className = "table table-striped temp" Then
Sheets.Add.Name = "T2" & CPN
Range("A1").Value = HTMLTABL.className
Range("B1").Value = Now
Rownum = 2
For Each HTMLROW In HTMLTABL.getElementsByTagName("tr")
Colnum = 1
For Each HTMLCELL In HTMLROW.Children
Cells(Rownum, Colnum) = HTMLCELL.innerText
Colnum = Colnum + 1
Next HTMLCELL
Rownum = Rownum + 1
Next HTMLROW
Dim i As Long
Dim sht As Worksheet
Dim LastRow As Long
Dim Moq, Moq2, Ven, Ven2 As Variant
Moq = ""
Ven = ""
Set sht = Sheets("T2" & CPN)
LastRow = sht.Cells(sht.Rows.Count, "A").End(xlUp).Row
For i = 3 To LastRow
Moq2 = Sheets("T2" & CPN).Cells(i, 9).Value & vbCrLf
Moq = Moq + Moq2
Ven2 = Sheets("T2" & CPN).Cells(i, 1).Value & vbCrLf
Ven = Ven + Ven2
Next i
Sheets("Sheet1").Range("E" & rc).Value = Moq
Sheets("Sheet1").Range("K" & rc).Value = Ven
End If
Next HTMLTABL
Else
If HTMLDIV.ID = "full_avl_table" Then
'same as above creating new sheet for another table information
Set HTMLTABLC = HTMLDIV.Children
For Each HTMLTABL In HTMLTABLC
If HTMLTABL.className = "table table-striped temp" Then
Sheets.Add.Name = "T3" & CPN
Range("A1").Value = HTMLTABL.className
Range("B1").Value = Now
Rownum = 2
For Each HTMLROW In HTMLTABL.getElementsByTagName("tr")
Colnum = 1
For Each HTMLCELL In HTMLROW.Children
Cells(Rownum, Colnum) = HTMLCELL.innerText
Colnum = Colnum + 1
Next HTMLCELL
Rownum = Rownum + 1
Next HTMLROW
Dim i2 As Long
Dim sht2 As Worksheet
Dim LR As Long
Dim AVL, AVL2 As Variant
AVL = ""
Set sht2 = Sheets("T3" & CPN)
LR = sht2.Cells(sht2.Rows.Count, "A").End(xlUp).Row
For i2 = 3 To LR
Cells(i2, 7).Value = Cells(i2, 1) & " - " & Cells(i2, 2) & " - " & Cells(i2, 4) & " - " & Cells(i2, 6)
AVL2 = Sheets("T3" & CPN).Cells(i2, 7).Value & vbCrLf
AVL = AVL2 + AVL
Next i2
Sheets("Sheet1").Range("N" & rc).Value = AVL
Columns("N").AutoFit
End If
Next HTMLTABL
End If
End If
Next HTMLDIV
Next
ie.Quit
Sheets("Sheet1").Activate
Application.ScreenUpdating = True
End Sub
关闭 HTML 代码 更新的图像: 调试页面片段
解决方案
很难从那个小图像中分辨出来。请参阅我重新插入 HTML 的评论。
根据我所看到的,您可以尝试以下方法:
Debug.Print Split(Split(HMLDoc.querySelector(".tipsy-inner").innerText, Chr(10))(1), Chr$(32))(0)
这一点,querySelector(".tipsy-inner")
按类定位页面上类名为tipsy-inner 的第一个元素。然后我解析.innerText
我猜想是存在的分隔符,以尝试拆分返回字符串的数字部分。
如果没有找到,可以尝试定时循环查找:
Dim t As Date, ele As Object
Const WAIT_TIME_SECS As Long = 10
t = Timer
Do
DoEvents
If Timer - t > WAIT_TIME_SECS Then Exit Do
On Error Resume Next
Set ele = HMLDoc.querySelector(".tipsy-inner")
On Error GoTo 0
Loop While ele Is Nothing
If ele Is Nothing Then Exit Sub
Debug.Print Split(Split(ele.innerText, Chr(10))(1), Chr$(32))(0)
推荐阅读
- javascript - 如何在html表格中改变颜色一半和一半
- kubernetes - 尝试使用自定义服务帐户创建 Pod 时出现 403 错误
- amazon-cloudwatch - 当值超过阈值时未触发基于 Kinesis 指标的 Cloudwatch 警报
- c# - 是否可以在没有 Firestore 或 FCM 的情况下在我的 Flutter 聊天应用程序中通知用户新消息?
- r - 在 R 中如何处理 Stan 中的二进制数据?
- laravel-5 - 如何切换 VueJs 组件的可见性?
- graphql - GraphQL:如何通过游乐场传递查询变量?
- javascript - 根据数组值显示 UL 元素
- javascript - 我在使用 REACT 制作计算器时遇到问题
- javascript - 如何正确销毁cropper.js,以便下一次裁剪获得新图像?