首页 > 解决方案 > I'm getting stuck at vba runtime error 424

问题描述

I'm getting

run-time error 424

in 68th row (line)

request.Open "GET", Url, False

and I don't know how to fix it.

My previous question I posted ; How to scrape specific part of online english dictionary?

My final goal is to get result like this;

    A          B
beginning   bɪˈɡɪnɪŋ
behalf      bɪˈhæf
behave      bɪˈheɪv
behaviour   bɪˈheɪvjər
belong      bɪˈlɔːŋ
below       bɪˈloʊ
bird        bɜːrd
biscuit     ˈbɪskɪt

Here's code I wrote, and it's mostly based on someone else's code I found on internet.

'   Microsoft ActiveX Data Objects x.x Library
'   Microsoft XML, v3.0
'   Microsoft VBScript Regular Expressions

Sub ParseHelp()

    ' Word reference from
    Dim Url As String
    Url = "https://www.oxfordlearnersdictionaries.com/definition/english/" & Cells(ActiveCell.Row, "B").Value
   
    ' Get dictionary's html
    Dim Html As String
    Html = GetHtml(Url)
    
    ' Check error
    If InStr(Html, "<TITLE>Not Found</Title>") > 0 Then
        MsgBox "404"
        Exit Sub
    End If
    
    ' Extract phonetic alphabet from HTML
    Dim wrapPattern As String
    wrapPattern = "<span class='name' (.*?)</span>"
    Set wrapCollection = FindRegexpMatch(Html, wrapPattern)
    ' MsgBox StripHtml(CStr(wrapCollection(1)))
    
    ' Fill phonetic alphabet into cell
    If Not wrapCollection Is Nothing Then
        Dim wrap As String
        
        On Error Resume Next
            wrap = StripHtml(CStr(wrapCollection(1)))
        If Err.Number <> 0 Then
            wrap = ""
        End If
        Cells(ActiveCell.Row, "C").Value = wrap
    Else
        MsgBox "not found"
    End If

End Sub

Public Function StripHtml(Html As String) As String
    Dim RegEx As New RegExp
    Dim sOut As String
    
    Html = Replace(Html, "</li>", vbNewLine)
    Html = Replace(Html, "&nbsp;", " ")
    
    With RegEx
        .Global = True
        .IgnoreCase = True
        .MultiLine = True
        .Pattern = "<[^>]+>"
    End With
    
    sOut = RegEx.Replace(Html, "")
    StripHtml = sOut
    Set RegEx = Nothing
End Function

Public Function GetHtml(Url As String) As String
    Dim xmlhttp As Object
    Set xmlhttp = CreateObject("MSXML2.serverXMLHTTP")
    Dim converter As New ADODB.stream
    
    ' Get
    request.Open "GET", Url, False
    request.send
    
    ' raw bytes
    converter.Open
    converter.Type = adTypeBinary
    converter.Write request.responseBody
    
    ' read
    converter.Position = 0
    converter.Type = adTypeText
    converter.Charset = "utf-8"
    
    ' close
    GetHtml = converter.ReadText
    converter.Close

End Function

Public Function FindRegexpMatch(txt As String, pat As String) As Collection
    Set FindRegexpMatch = New Collection

    Dim rx As New RegExp
    Dim matcol As MatchCollection
    Dim mat As Match
    Dim ret As String
    Dim delimiter As String
    
    txt = Replace(txt, Chr(10), "")
    txt = Replace(txt, Chr(13), "")

    rx.Global = True
    rx.IgnoreCase = True
    rx.MultiLine = True
    rx.Pattern = pat
    Set matcol = rx.Execute(txt)
    'MsgBox "Match:" & matcol.Count
    
    On Error GoTo ErrorHandler
    For Each mat In matcol
        'FindRegexpMatch.Add mat.SubMatches(0)
        FindRegexpMatch.Add mat.Value

    Next mat
    Set rx = Nothing
  
      
   ' Insert code that might generate an error here
   Exit Function
ErrorHandler:
   ' Insert code to handle the error here
   MsgBox "FindRegexpMatch. " & Err.GetException()
   Resume Next
    
End Function

Any kind of help would be greatly appreciated.

标签: htmlexcelvbaweb-scrapingweb-crawler

解决方案


以下是如何从 A 列读入值并将发音写到 B 列的示例。它使用 css 选择器匹配子节点,然后逐步上升到 parentNode 以确保抓取整个发音。您可以通过多种方式在父节点上匹配以获得第二个发音。请注意,我使用父节点,Replace因为发音可能跨越多个子节点。

如果这样做是为了大量查找,请做一个好网民,并在代码中放置一些等待,以免用请求轰炸网站。

Option Explicit

Public Sub WriteOutPronounciations()
    Dim html As MSHTML.HTMLDocument, i As Long, ws As Worksheet
    Dim data As String, lastRow As Long, urls()

    Set ws = ThisWorkbook.Worksheets("Sheet1")
    lastRow = ws.Cells(ws.rows.Count, "A").End(xlUp).row 'you need at least two words in column A or change the redim.
    urls = Application.Transpose(ws.Range("A1:A" & lastRow).Value)

    ReDim results(1 To UBound(urls))

    Set html = New MSHTML.HTMLDocument

    With CreateObject("MSXML2.ServerXMLHTTP")
        For i = LBound(urls) To UBound(urls)
            .Open "GET", "https://www.oxfordlearnersdictionaries.com/definition/english/" & urls(i), False
            .send
            html.body.innerHTML = .responseText
            data = Replace$(Replace$(html.querySelector(".name ~ .wrap").ParentNode.innerText, "/", vbNullString), Chr$(10), Chr$(32))
            results(i) = Right$(data, Len(data) - 4)
        Next
    End With

    With ThisWorkbook.Worksheets(1)
        .Cells(1, 2).Resize(UBound(results, 1), 1) = Application.Transpose(results)
    End With
End Sub

必需的参考资料(VBE>工具>参考资料):

  1. Microsoft HTML 对象库

如果你沿着API 路线走,那么这里是一个小例子。使用 Prototype 帐户,您可以在一个月内拨打 1000 次免费电话。下一个最好的,取决于您希望拨打多少电话,看起来像 10,001 次电话(额外的一次 PAYG 电话会使价格减半)。# 调用将受单词是头词还是首先需要引理查找调用的影响。您需要的端点构造GET /entries/{source_lang}/{word_id}?fields=pronunciations虽然似乎没有进行大规模过滤。您将需要一个 json 解析器来处理返回的 json,例如 github.com/VBA-tools/VBA-JSON/blob/master/JsonConverter.bas。从那里下载原始代码并添加到名为 JsonConverter 的标准模块中。然后您需要转到 VBE > 工具 > 参考 > 添加对 Microsoft Scripting Runtime 的引用。从复制的代码中删除顶部的属性行。

Option Explicit

Public Sub WriteOutPronounciations()
    Dim html As MSHTML.HTMLDocument, i As Long, ws As Worksheet
    Dim data As String, lastRow As Long, words()

    'If not performing lemmas lookup then must be head word e.g. behave, behalf
    Const appId As String = "yourAppId"
    Const appKey As String = "yourAppKey"

    Set ws = ThisWorkbook.Worksheets("Sheet1")
    lastRow = ws.Cells(ws.rows.Count, "A").End(xlUp).row
    words = Application.Transpose(ws.Range("A1:A" & lastRow).Value)

    ReDim results(1 To UBound(words))

    Set html = New MSHTML.HTMLDocument

    Dim json As Object

    With CreateObject("MSXML2.ServerXMLHTTP")
        For i = LBound(words) To UBound(words)
            .Open "GET", "https://od-api.oxforddictionaries.com/api/v2/entries/en-us/" & LCase$(words(i)) & "?fields=pronunciations", False
            .setRequestHeader "app_id", appId
            .setRequestHeader "app_key", appKey
            .setRequestHeader "ContentType", "application/json"
            .send
            Set json = JsonConverter.ParseJson(.responseText)
            results(i) = IIf(json("results")(1)("type") = "headword", json("results")(1)("lexicalEntries")(1)("pronunciations")(2)("phoneticSpelling"), "lemmas lookup required")
            Set json = Nothing
        Next
    End With

    With ThisWorkbook.Worksheets(1)
        .Cells(1, 2).Resize(UBound(results, 1), 1) = Application.Transpose(results)
    End With
End Sub

推荐阅读