首页 > 解决方案 > VBA网页抓取更新

问题描述

我有以下代码:

  1. 打开一个网页(在这种情况下是亚马逊)
  2. 单击页面上出现的所有产品(并在新标签中打开每个产品)
  3. 浏览每个打开的选项卡(从步骤 2 开始),复制“产品标题”并将其粘贴到 A 列

你能帮我更新代码以包含一个循环吗:

  1. 它通过每个打开的选项卡(从步骤 2 开始)并复制价格元素并将其粘贴到与产品标题相对应的 B 列中

HTML 元素的价格是“649”

Sub launch_product()
Dim IE As SHDocVw.InternetExplorer
Dim idoc As MSHTML.HTMLDocument
Dim doc_ele As MSHTML.IHTMLElement
Dim doc_eles As MSHTML.IHTMLElementCollection

Dim startoftitle As Integer, endoftitle As Integer, rownum As Long
Dim vouterHTML As String, ProductTitle As String

Set IE = New SHDocVw.InternetExplorer
IE.Visible = True
IE.Navigate "https://www.amazon.in/s?k=rudra+fashion&rh=p_n_size_two_browse-vebin%3A1975333031&dc&crid=2AKWK100N33Q9&qid=1574534623&rnid=1974754031&sprefix=rudra+fas%2Caps%2C287&ref=sr_nr_p_n_size_two_browse-vebin_8"

Do While IE.ReadyState <> READYSTATE_COMPLETE
   Application.StatusBar = "Loading"
Loop
Set idoc = IE.Document

Set doc_eles = idoc.getElementsByTagName("img")
rownum = 1

For Each doc_ele In doc_eles
    If doc_ele.className = "s-image" Then
       doc_ele.Click

       vouterHTML = doc_ele.outerHTML
       startoftitle = InStr(1, vouterHTML, "alt=") + 5
       endoftitle = InStr(startoftitle, vouterHTML, """") - 1
       ProductTitle = Mid(vouterHTML, startoftitle, endoftitle - startoftitle + 1)
       ActiveSheet.Cells(rownum, 1).Value = ProductTitle
       rownum = rownum + 1
    End If
Next doc_ele

ActiveSheet.Columns(1).EntireColumn.AutoFit
IE.Quit

结束子

标签: htmlvbawebweb-scrapingscreen-scraping

解决方案


我会从类和价格匹配的节点的 alt 属性中获取标题,假设你想要当前的,从两个类名匹配节点之一。您不需要浏览器,因为内容是响应一个更快的简单 xmlhttp 请求而出现的。

由于并非所有价格节点都存在卢比符号,因此我将其删除。

Option Explicit

Public Sub WriteOutProductInfo()
    'VBE>Tools>References> Microsoft HTML Object Library
    Dim html As MSHTML.HTMLDocument

    Set html = New MSHTML.HTMLDocument

    With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", "https://www.amazon.in/s?k=kuki+fashion&rh=p_72%3A1318476031&dc&qid=1574617862&rnid=1318475031&ref=sr_nr_p_72_1", False
        .setRequestHeader "User-Agent", "Mozilla/5.0"
        .send
        html.body.innerHTML = .responseText
    End With

    Dim headers(), titles As Object, prices As Object
    headers = Array("Title", "Price", "Img url")

    With html
        Set titles = .querySelectorAll(".s-image")
        Set prices = .querySelectorAll(".a-price-whole,.a-color-price")
    End With

    Dim results(), r As Long, priceInfo As String

    ReDim results(1 To titles.Length, 1 To UBound(headers) + 1)

    For r = 0 To titles.Length - 1
        results(r + 1, 1) = titles.Item(r).alt
        results(r + 1, 2) = Replace$(prices.Item(r).innerText, ChrW(8377), vbNullString)
        results(r + 1, 3) = titles.Item(r).src
    Next

    Dim ws As Worksheet

    Set ws = ThisWorkbook.Worksheets("Sheet1")

    With ws
        .Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
        .Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
    End With
End Sub

推荐阅读