首页 > 解决方案 > 如何使用 VBA 改进数据抓取?

问题描述

我有以下代码,它从 Intranet 获取数据。但是获取数据需要更多时间。有人可以帮我修改代码以提高性能。提前致谢

注意-我没有发布 URL,因为它是客户网站。对于那个很抱歉。

Sub FetchData() 
Dim IE As Object
Dim Doc As HTMLDocument
Dim myStr As String
On Error Resume Next

  Set IE = CreateObject("InternetExplorer.Application") 'SetBrowser
  IE.Visible = False

IE.navigate "URL" 'Open website
Do While IE.Busy Or IE.ReadyState <> 4
Application.Wait DateAdd("s", 1, Now)
Loop

Set Doc = IE.Document

Doc.getElementById("tbxUserID").Value = InputBox("Please Enter Your ID")
Doc.getElementById("txtPassword").Value = InputBox("Please Enter Your                     
Password")
Doc.getElementById("BtnLogin").Click
Do While IE.Busy Or IE.ReadyState <> 4
Application.Wait DateAdd("s", 1, Now)
Loop

IE.navigate "URL"
Do While IE.Busy Or IE.ReadyState <> 4
Application.Wait DateAdd("s", 1, Now)
Loop

Dim LastRow As Long

Set wks = ActiveSheet
LastRow = wks.Cells(wks.Rows.Count, "A").End(xlUp).Row
Set rowNo = wks.Range("A1:A" & LastRow)
  For rowNo = 2 To LastRow
Doc.getElementById("txtField1").Value =         
ThisWorkbook.Sheets("Sheet1").Range("A" & rowNo).Value
Doc.getElementById("CtrlQuickSearch1_imgBtnSumbit").Click
Do While IE.Busy Or IE.ReadyState <> 4
Application.Wait DateAdd("s", 1, Now)
Loop

strVal1 = Doc.querySelectorAll("span")(33).innerText
ThisWorkbook.Sheets("Sheet1").Range("B" & rowNo).Value = strVal1
strVal2 = Doc.querySelectorAll("span")(35).innerText
ThisWorkbook.Sheets("Sheet1").Range("C" & rowNo).Value = strVal2

Next

End Sub 

标签: excelvbaweb-scraping

解决方案


不能保证这会运行。注意事项:

  1. Worksheets收藏品的使用
  2. 使用Option Explicit- 这意味着您必须始终使用正确的数据类型。目前您有未声明的变量,例如,rowNo 用作 Long 和范围。
  3. 删除On Error Resume Next
  4. 将所有工作表放入变量中
  5. 将值放入数组和循环数组以获取 id 值。循环片很贵
  6. 使用早期绑定和添加类到 InternetExplorer
  7. 假设登录后存在一个新的 url,并且您需要在每个新循环值之前导航回该 URL
  8. 删除匈牙利符号
  9. ID 是最快的选择器方法,因此没有改进
  10. 使用您的 css 类型选择器,例如.document.querySelectorAll("span")(33),您可能会寻找是否有可以使用的单节点短选择器,而不是使用 nodeList

VBA:

Option Explicit  
Public Sub FetchData()
    Dim ie As Object, ie As InternetExplorer
    Dim lastRow As Long, wks As Worksheet, i As Long, ws As Worksheet

    Set ie = New SHDocVw.InternetExplorer        'SetBrowser
    Set ws = ThisWorkbook.Worksheets("Sheet1")
    Set wks = ActiveSheet                        '<==use explicit sheet name if possible
    lastRow = wks.Cells(wks.rows.Count, "A").End(xlUp).Row
    loopvalues = Application.Transpose(wks.Range("A2:A" & lastRow).Value)

    With ie

        .Visible = False
        .Navigate2 "URL"                         'Open website

        While .Busy Or .readyState < 4: DoEvents: Wend

        .document.getElementById("tbxUserID").Value = InputBox("Please Enter Your ID")
        .document.getElementById("txtPassword").Value = InputBox("Please Enter Your Password")
        .document.getElementById("BtnLogin").Click

        While .Busy Or ie.readyState < 4: DoEvents: Wend

        Dim newURL As String, val1 As String, val2 As String
        newURL = .document.URL

        For i = LBound(loopvalues) To UBound(loopvalues)

            .document.getElementById("txtField1").Value = loopvalues(i)
            .document.getElementById("CtrlQuickSearch1_imgBtnSumbit").Click

            While .Busy Or .readyState < 4: DoEvents: Wend

            val1 = .document.querySelectorAll("span")(33).innerText
            ws.Range("B" & i).Value = val1
            val2 = .document.querySelectorAll("span")(35).innerText
            ws.Range("C" & i).Value = val2

            .Navigate2 newURL

            While .Busy Or ie.readyState < 4: DoEvents: Wend
        Next
        .Quit
    End With
End Sub

推荐阅读