首页 > 解决方案 > 使用 XMLHTTP 抓取会在特定类名处引发错误



Sub Test()
Dim htmlDoc         As Object
Dim htmlDoc2        As Object
Dim elem            As Variant
Dim tag             As Variant
Dim dns             As String
Dim pageSource      As String
Dim pageSource2     As String
Dim url             As String
Dim row             As Long

row = 2
dns = "https://www.zillow.com/detroit-mi/real-estate-agent-reviews/"

With CreateObject("MSXML2.XMLHTTP")
    .Open "GET", dns, True

    While .readyState <> 4: DoEvents: Wend

    If .statusText <> "OK" Then
        MsgBox "ERROR" & .Status & " - " & .statusText, vbExclamation
        Exit Sub
    End If

    pageSource = .responseText
End With

Set htmlDoc = CreateObject("htmlfile")
htmlDoc.body.innerHTML = pageSource

Dim xx '这里出错 Set xx = htmlDoc.getElementsByClassName("ldb-contact-summary")

Set htmlDoc = Nothing
Set htmlDoc2 = Nothing
End Sub


Set xx = htmlDoc.getElementsByClassName("ldb-contact-summary")


标签: excelvbaweb-scraping



Sub GetProfileInfo()
    Const URL$ = "https://www.zillow.com/detroit-mi/real-estate-agent-reviews/?page="
    Dim Http As New XMLHTTP60, Html As New HTMLDocument
    Dim post As HTMLDivElement, R&, P&

    For p = 1 To 3 'put here the highest number you wanna traverse
        With Http
            .Open "GET", URL & p, False
            Html.body.innerHTML = .responseText
        End With

        For Each post In Html.getElementsByClassName("ldb-contact-summary")
            With post.querySelectorAll(".ldb-contact-name a")
                If .Length Then R = R + 1: Cells(R, 1) = .item(0).innerText
            End With

            With post.getElementsByClassName("ldb-phone-number")
                If .Length Then Cells(R, 2) = .item(0).innerText
            End With
        Next post
    Next p
End Sub


Microsoft xml, v6.0
Microsoft Html Object Library
