首页 > 解决方案 > 如何使用 VBA 从 Internet Explorer 中的绘图/图形中提取信息

问题描述

<body id="ctl00_Body1" class="pBack"><div class="tipsy tipsy-w" style="top: 3279.47px; left: 1096.63px; visibility: visible; display: block; opacity: 0.8; z-index: 100000;"><div class="tipsy-arrow tipsy-arrow-w"></div><div class="tipsy-inner">Mon Sep 03 2018<br>357,938 Units</div></div>

我对 VBA 编程和网络抓取都很陌生,所以如果我的编码有点破旧,请原谅我。

项目详情

我正在开展一个项目,该项目涉及抓取网站以获取大量数据。

到目前为止我做了什么

目前我已经设法使用用户名和密码登录网站,输入搜索参数,单击按钮显示结果。在结果上,我能够提取表格中的数据。我在下面给出了我迄今为止开发的代码。

问题陈述

现在我需要一个显示在图表中的信息,当我单击绘图点时会显示该信息(我附上了图表的屏幕截图/链接以及 html 代码)。我想在单位文本之前获取数字。该链接是公司内部的,所以我无法分享

在此处输入图像描述

这是我到目前为止开发的代码:

Sub MultiCpn_Div_Class()

Application.ScreenUpdating = False
Sheets("Sheet1").Activate
'Active sheet where the search parameter will be
Dim rc As Long
'loop variable
Dim CPN As String
'search parameter from active sheet

Dim MyHTML_Element As IHTMLElement
Dim MyURL As String
Dim HTMLTABL As MSHTML.IHTMLElement
Dim HTMLTABLC As MSHTML.IHTMLElementCollection
Dim HTMLDivc As MSHTML.IHTMLElementCollection
Dim HTMLDIV As MSHTML.IHTMLElement
Dim HTMLROW As MSHTML.IHTMLElement
Dim HTMLCELL As MSHTML.IHTMLElement
Dim Rownum, Colnum, Sheetnum As Long


MyURL = "XXXXXXXXX"

Set ie = New InternetExplorerMedium
'Had to declare ie as Medium because it kept on failing if i didnt
'ie.Silent = True
ie.Visible = False
ie.navigate MyURL
Do
Loop Until ie.readyState = READYSTATE_COMPLETE
Set HTMLDOC = ie.document
HTMLDOC.all.ctl00_PageContent_UserName.Value = "username"
HTMLDOC.all.ctl00_PageContent_Password.Value = "password"
HTMLDOC.all.ctl00_PageContent_OKButton__Button.Click
'Providing user name, password and then signing in
Do
DoEvents
Loop Until ie.readyState = READYSTATE_COMPLETE
rc = 2
'starting from second row of data
For rc = 2 To 5
Sheets("Sheet1").Activate

CPN = Range("A" & rc).Value
'assigning cell value to variable
ie.navigate "YYYY"
'After logging in i had to navigate to a specific link-YYY otherwise it kept showing an error
   
Do
Loop Until ie.readyState = READYSTATE_COMPLETE

Set IEDOC = ie.document
'reassigned the HTML document to the newly loaded page
IEDOC.all.cpnval.Value = CPN
IEDOC.all.run_Button.Click
'entering the search parameter and then clicking the search button

Do
Loop Until ie.readyState = READYSTATE_COMPLETE
Application.Wait (Now + TimeValue("0:00:02"))
'had to include a wait for 2 seconds to cater to network delay issues

Set HTMLDOC = ie.document
 'again reassigned the HTML document to the newly loaded page which had all necessary data
Set HTMLDivc = HTMLDOC.getElementsByTagName("div")
'the page has multiple div classes inside which multiple tables are located, so getting a
'div collection and then looping over all tables data to search for particular tables that i want
   
For Each HTMLDIV In HTMLDivc
    
    If HTMLDIV.ID = "attributes_table" Then
        Set HTMLTABLC = HTMLDIV.Children
 
        For Each HTMLTABL In HTMLTABLC
            If HTMLTABL.className = "table table-striped temp" Then
                Sheets.Add.Name = "T1" & CPN
                Range("A1").Value = HTMLTABL.className
                Range("B1").Value = Now
                Rownum = 2
                'Once required table is found i'm copying that table data and pasting it into new sheet as required for each CPN

                For Each HTMLROW In HTMLTABL.getElementsByTagName("tr")
                Colnum = 1
         
                    For Each HTMLCELL In HTMLROW.Children
                    Cells(Rownum, Colnum) = HTMLCELL.innerText
                    Colnum = Colnum + 1
                    Next HTMLCELL
                    
                Rownum = Rownum + 1
                Next HTMLROW

            Sheets("Sheet1").Range("F" & rc).Value = Sheets("T1" & CPN).Range("B4")
            Sheets("Sheet1").Range("G" & rc).Value = Sheets("T1" & CPN).Range("B16")
            Sheets("Sheet1").Range("M" & rc).Value = Sheets("T1" & CPN).Range("B5")
            Sheets("Sheet1").Range("J" & rc).Value = Sheets("T1" & CPN).Range("B25")
            'After extracting the data to new sheet for each search parameter i'm copying that data and pasting into my active sheet
            End If
            
        Next HTMLTABL
        
    ElseIf HTMLDIV.ID = "award_table" Then
    'same as above creating new sheet for another table information
        Set HTMLTABLC = HTMLDIV.Children
        
        For Each HTMLTABL In HTMLTABLC
                If HTMLTABL.className = "table table-striped temp" Then
                    Sheets.Add.Name = "T2" & CPN
                    Range("A1").Value = HTMLTABL.className
                    Range("B1").Value = Now
                    Rownum = 2
        
                     For Each HTMLROW In HTMLTABL.getElementsByTagName("tr")
                     Colnum = 1
                     
                         For Each HTMLCELL In HTMLROW.Children
                         Cells(Rownum, Colnum) = HTMLCELL.innerText
                         Colnum = Colnum + 1
            
                         Next HTMLCELL
                         Rownum = Rownum + 1
            
                     Next HTMLROW
                     Dim i As Long
                     Dim sht As Worksheet
                     Dim LastRow As Long
                     Dim Moq, Moq2, Ven, Ven2 As Variant
                     Moq = ""
                     Ven = ""
                     Set sht = Sheets("T2" & CPN)
                     LastRow = sht.Cells(sht.Rows.Count, "A").End(xlUp).Row
                    
                            For i = 3 To LastRow
                                Moq2 = Sheets("T2" & CPN).Cells(i, 9).Value & vbCrLf
                                Moq = Moq + Moq2
                                Ven2 = Sheets("T2" & CPN).Cells(i, 1).Value & vbCrLf
                                Ven = Ven + Ven2
                            Next i
             
                    Sheets("Sheet1").Range("E" & rc).Value = Moq
                    Sheets("Sheet1").Range("K" & rc).Value = Ven
                              
                End If
        Next HTMLTABL
        
        Else
            If HTMLDIV.ID = "full_avl_table" Then
             'same as above creating new sheet for another table information
                Set HTMLTABLC = HTMLDIV.Children
                For Each HTMLTABL In HTMLTABLC
                    If HTMLTABL.className = "table table-striped temp" Then
                        Sheets.Add.Name = "T3" & CPN
                        Range("A1").Value = HTMLTABL.className
                        Range("B1").Value = Now
                        Rownum = 2
                
                         For Each HTMLROW In HTMLTABL.getElementsByTagName("tr")
                         Colnum = 1
                         
                             For Each HTMLCELL In HTMLROW.Children
                             Cells(Rownum, Colnum) = HTMLCELL.innerText
                             Colnum = Colnum + 1
                        
                             Next HTMLCELL
                             Rownum = Rownum + 1
                        
                         Next HTMLROW
                         Dim i2 As Long
                         Dim sht2 As Worksheet
                         Dim LR As Long
                         Dim AVL, AVL2 As Variant
                         AVL = ""
                         Set sht2 = Sheets("T3" & CPN)
                         LR = sht2.Cells(sht2.Rows.Count, "A").End(xlUp).Row
                         For i2 = 3 To LR
                            Cells(i2, 7).Value = Cells(i2, 1) & " - " & Cells(i2, 2) & " - " & Cells(i2, 4) & " - " & Cells(i2, 6)
                            AVL2 = Sheets("T3" & CPN).Cells(i2, 7).Value & vbCrLf
                            AVL = AVL2 + AVL
                         Next i2

                        Sheets("Sheet1").Range("N" & rc).Value = AVL
                        Columns("N").AutoFit
                                                    
                       End If
                Next HTMLTABL
            End If
    End If
    Next HTMLDIV

    Next
   ie.Quit
   Sheets("Sheet1").Activate
   Application.ScreenUpdating = True
End Sub

关闭 HTML 代码 更新的图像: 调试页面片段

标签: vbaexcelweb-scraping

解决方案


很难从那个小图像中分辨出来。请参阅我重新插入 HTML 的评论。

根据我所看到的,您可以尝试以下方法:

Debug.Print Split(Split(HMLDoc.querySelector(".tipsy-inner").innerText, Chr(10))(1), Chr$(32))(0)

这一点,querySelector(".tipsy-inner")按类定位页面上类名为tipsy-inner 的第一个元素。然后我解析.innerText我猜想是存在的分隔符,以尝试拆分返回字符串的数字部分。

如果没有找到,可以尝试定时循环查找:

Dim t As Date, ele As Object
Const WAIT_TIME_SECS As Long = 10

t = Timer
Do
    DoEvents
    If Timer - t > WAIT_TIME_SECS Then Exit Do
    On Error Resume Next
    Set ele = HMLDoc.querySelector(".tipsy-inner")
    On Error GoTo 0
Loop While ele Is Nothing

If ele Is Nothing Then Exit Sub

Debug.Print Split(Split(ele.innerText, Chr(10))(1), Chr$(32))(0)

推荐阅读