首页 > 解决方案 > 使用宏将数据从网站抓取到 Excel...丢失

问题描述

我对此完全陌生,但这是我的范围。我正在运行一个宏来从业务系统中提取数据。提取此信息后,我想要一个宏来获取某些字段,将它们放入网站表单中,单击提交,然后将某些数据结果抓取并粘贴回 Excel。一切正常,减去刮擦和粘贴回 excel。

请帮忙!

我已经搜索了整个堆栈溢出并观看了视频以试图弄清楚我需要做什么,但我一定是误解了一些东西。

Sub Track()
Range("B2").Select

'This should call to PT and deliver tracking info

Dim IE As Object
Dim tbl As Object, td As Object



 Set IE = CreateObject("InternetExplorer.Application") 'Set IEapp = 
 InternetExplorer
 IE.Visible = True

      IE.Navigate "https://www.partstown.com/track-my-order"
      With IEapp
          Do
          DoEvents
          Loop Until IE.readyState = 4



'Input PO and zip
 Call IE.Document.getElementById("orderNo").SetAttribute("value", 
 "4500969111")
'ActiveCell.Offset(0, 2).Select
 Call IE.Document.getElementById("postalCode").SetAttribute("value", 
 "37040")
 IE.Document.forms(7).Submit

 Application.Wait Now + TimeValue("00:00:09")

'this is where i am stuck. I know this isnt right but tried to piece it 
 together
 Set elemCollection = IE.Document.getelElementsByTagname("table.account- 
 table details _tc_table_highlighted")

 For t = 0 To (elemCollection.Length - 1)
 For r = 0 To (elemCollection(t).Rows.Length - 1)
    For c = 0 To (elemCollection(t).Rows(r).Cells.Length - 1)
 ThisWorkbook.Worksheets(1).Cells(r + 1, c + 1) = 
 elemCollection(t).Rows.Cells(c).innertext
 Next c
 Next r
 Next t

 End With


 End Sub

这是我希望它提取的内容: Shipping 列 QTY ordered QTY shipping Product 并以线性方式显示: Shipping, QTY ordered, QTY shipping, Product

标签: htmlexcelvbaweb-scraping

解决方案


IE浏览器:

我把它比平时更详细一点,这样你就可以看到每一步。

关键事项:

1)正确的页面加载等待While .Busy Or .readyState < 4: DoEvents: Wend

2) 尽可能按 id 选择元素。这#是一个 css id 选择器css 选择器通过querySelector.document 的方法应用并检索页面中与指定模式匹配的第一个元素

3) 需要一个定时循环来等待结果出现

4)订单数量等信息是换行符分割的字符串。在这些换行符上拆分似乎最容易,然后按索引从结果数组中访问单个项目

5) 我按照您的规范订购了一个数组中的结果,然后将该数组一次性写到工作表中

6)“。” 是一个类选择器.order-history__item-descript--min即返回第一个class元素order-history__item-descript--min

7) [x=y] 是一个属性 = 值选择器[data-label=Shipping]即返回第一个具有data-label属性值的元素Shipping

8) 组合.details-table a使用后代组合器“”来指定我想要a的标签元素具有父类和类.details-table

VBA:

Option Explicit

'VBE > Tools > References:
' Microsoft Internet Controls
Public Sub RetrieveInfo()
    Dim ie As InternetExplorer, ele As Object, t As Date
    Const MAX_WAIT_SEC As Long = 5

    Set ie = New InternetExplorer

    With ie
        .Visible = True
        .Navigate2 "https://www.partstown.com/track-my-order"

        While .Busy Or .readyState < 4: DoEvents: Wend

        With .document
            .querySelector("#orderNo").Value = "4500969111"
            .querySelector("#postalCode").Value = "37040"
            .querySelector("#orderLookUpForm").submit  
        End With

        While .Busy Or .readyState < 4: DoEvents: Wend

        Dim shipping As String, order As String, items() As String
        With .document
            t = Timer
            Do
                On Error Resume Next
                Set ele = .querySelector("[data-label=Shipping]")
                On Error GoTo 0
                If Timer - t > MAX_WAIT_SEC Then Exit Do
            Loop While ele Is Nothing

            If ele Is Nothing Then Exit Sub

            shipping = ele.innerText
            order = .querySelector(".order-history__item-descript--min").innerText
            items = Split(order, vbNewLine)

            Dim qtyOrdered As Long, qtyShipped As String, product As String

            qtyOrdered = CLng(Replace$(items(0), "Qty Ordered: ", vbNullString))
            qtyShipped = CLng(Replace$(items(1), "Qty Shipped: ", vbNullString))
            product = .querySelector(".details-table a").Title

            Dim results()
            results = Array(shipping, qtyOrdered, qtyShipped, product)
            ThisWorkbook.Worksheets("Sheet1").Cells(1, 1).Resize(1, UBound(results) + 1) = results

        End With
        .Quit
    End With
End Sub

如果是 HTML 新手,请查看:

https://developer.mozilla.org/en-US/docs/Web/HTML

如果是 CSS 选择器的新手,请查看:

https://flukeout.github.io/


XMLHTTP:

整个事情也可以用XHR完成。这比打开浏览器要快得多。

XHR:

使用 XMLHttpRequest (XHR) 对象与服务器交互。您可以从 URL 中检索数据,而无需执行整个页面 [render]

在这种情况下,我对登录页面进行初始GET请求,以检索 当您手动输入数据并按下提交时页面向服务器发出CSRFToken的请求的重新制定时使用的请求。POST您在服务器响应中获得所需的数据。我在 POST 发送行的正文中传递了一个查询字符串 .send "orderNo=4500969111&postalCode=37040&CSRFToken=" & csrft;你可以在那里看到你的参数。

Option Explicit
Public Sub GetInfo()
    Dim html As HTMLDocument, csrft As String  '<  VBE > Tools > References > Microsoft HTML Object Library
    Set html = New HTMLDocument

    With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", "https://www.partstown.com", False
        .send

        html.body.innerHTML = .responseText

        csrft = html.querySelector("[name=CSRFToken]").Value

        .Open "POST", "https://www.partstown.com/track-my-order", False
        .setRequestHeader "Referer", "https://www.partstown.com/track-my-order"
        .setRequestHeader "User-Agent", "Mozilla/5.0"
        .setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
        .setRequestHeader "Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"
        .setRequestHeader "Accept-Encoding", "gzip, deflate"
        .setRequestHeader "Accept-Language", "en-US,en;q=0.9"
        .send "orderNo=4500969111&postalCode=37040&CSRFToken=" & csrft

        html.body.innerHTML = .responseText
    End With

    Dim shipping As String, order As String, items() As String

    shipping = html.querySelector("[data-label=Shipping]").innerText
    order = html.querySelector(".order-history__item-descript--min").innerText
    items = Split(order, vbNewLine)

    Dim qtyOrdered As Long, qtyShipped As String, product As String

    qtyOrdered = CLng(Replace$(items(0), "Qty Ordered: ", vbNullString))
    qtyShipped = CLng(Replace$(items(1), "Qty Shipped: ", vbNullString))
    product = html.querySelector(".details-table a").Title

    Dim results()
    results = Array(shipping, qtyOrdered, qtyShipped, product)
    ThisWorkbook.Worksheets("Sheet1").Cells(1, 1).Resize(1, UBound(results) + 1) = results
End Sub

循环示例:

Option Explicit

Public Sub GetInfo()
    Dim html As HTMLDocument, csrft As String, lastRow As Long, sourceValues() '<  VBE > Tools > References > Microsoft HTML Object Library
    Set html = New HTMLDocument
    Dim ws As Worksheet, i As Long
    Set ws = ThisWorkbook.Worksheets("Sheet4")
    lastRow = ws.Cells(ws.rows.Count, "B").End(xlUp).Row
    sourceValues = ws.Range("B2:D" & lastRow).Value
    Dim results()
    ReDim results(1 To UBound(sourceValues, 1), 1 To 4)
    With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", "https://www.partstown.com", False
        .send
        html.body.innerHTML = .responseText

        csrft = html.querySelector("[name=CSRFToken]").Value
        Stop
        For i = LBound(sourceValues, 1) To UBound(sourceValues, 1)
            If sourceValues(i, 1) <> vbNullString And sourceValues(i, 3) <> vbNullString Then
                DoEvents
                .Open "POST", "https://www.partstown.com/track-my-order", False
                .setRequestHeader "Referer", "https://www.partstown.com/track-my-order"
                .setRequestHeader "User-Agent", "Mozilla/5.0"
                .setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
                .setRequestHeader "Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"
                .setRequestHeader "Accept-Encoding", "gzip, deflate"
                .setRequestHeader "Accept-Language", "en-US,en;q=0.9"
                .send "orderNo=" & sourceValues(i, 1) & "&postalCode=" & sourceValues(i, 3) & "&CSRFToken=" & csrft

                html.body.innerHTML = .responseText

                Dim shipping As String, order As String, items() As String

                shipping = html.querySelector("[data-label=Shipping]").innerText
                order = html.querySelector(".order-history__item-descript--min").innerText
                items = Split(order, vbNewLine)

                Dim qtyOrdered As Long, qtyShipped As String, product As String

                qtyOrdered = CLng(Replace$(items(0), "Qty Ordered: ", vbNullString))
                qtyShipped = CLng(Replace$(items(1), "Qty Shipped: ", vbNullString))
                product = html.querySelector(".details-table a").Title

                results(i, 1) = shipping
                results(i, 2) = qtyOrdered
                results(i, 3) = qtyShipped
                results(i, 4) = product
            End If
            'Application.Wait Now + TimeSerial(0, 0, 1)
        Next
    End With
    'results written out from row 2 column E
    ws.Cells(2, 5).Resize(UBound(results, 1), UBound(results, 2)) = results
End Sub

推荐阅读