html - I'm getting stuck at vba runtime error 424
问题描述
I'm getting
run-time error 424
in 68th row (line)
request.Open "GET", Url, False
and I don't know how to fix it.
My previous question I posted ; How to scrape specific part of online english dictionary?
My final goal is to get result like this;
A B
beginning bɪˈɡɪnɪŋ
behalf bɪˈhæf
behave bɪˈheɪv
behaviour bɪˈheɪvjər
belong bɪˈlɔːŋ
below bɪˈloʊ
bird bɜːrd
biscuit ˈbɪskɪt
Here's code I wrote, and it's mostly based on someone else's code I found on internet.
' Microsoft ActiveX Data Objects x.x Library
' Microsoft XML, v3.0
' Microsoft VBScript Regular Expressions
Sub ParseHelp()
' Word reference from
Dim Url As String
Url = "https://www.oxfordlearnersdictionaries.com/definition/english/" & Cells(ActiveCell.Row, "B").Value
' Get dictionary's html
Dim Html As String
Html = GetHtml(Url)
' Check error
If InStr(Html, "<TITLE>Not Found</Title>") > 0 Then
MsgBox "404"
Exit Sub
End If
' Extract phonetic alphabet from HTML
Dim wrapPattern As String
wrapPattern = "<span class='name' (.*?)</span>"
Set wrapCollection = FindRegexpMatch(Html, wrapPattern)
' MsgBox StripHtml(CStr(wrapCollection(1)))
' Fill phonetic alphabet into cell
If Not wrapCollection Is Nothing Then
Dim wrap As String
On Error Resume Next
wrap = StripHtml(CStr(wrapCollection(1)))
If Err.Number <> 0 Then
wrap = ""
End If
Cells(ActiveCell.Row, "C").Value = wrap
Else
MsgBox "not found"
End If
End Sub
Public Function StripHtml(Html As String) As String
Dim RegEx As New RegExp
Dim sOut As String
Html = Replace(Html, "</li>", vbNewLine)
Html = Replace(Html, " ", " ")
With RegEx
.Global = True
.IgnoreCase = True
.MultiLine = True
.Pattern = "<[^>]+>"
End With
sOut = RegEx.Replace(Html, "")
StripHtml = sOut
Set RegEx = Nothing
End Function
Public Function GetHtml(Url As String) As String
Dim xmlhttp As Object
Set xmlhttp = CreateObject("MSXML2.serverXMLHTTP")
Dim converter As New ADODB.stream
' Get
request.Open "GET", Url, False
request.send
' raw bytes
converter.Open
converter.Type = adTypeBinary
converter.Write request.responseBody
' read
converter.Position = 0
converter.Type = adTypeText
converter.Charset = "utf-8"
' close
GetHtml = converter.ReadText
converter.Close
End Function
Public Function FindRegexpMatch(txt As String, pat As String) As Collection
Set FindRegexpMatch = New Collection
Dim rx As New RegExp
Dim matcol As MatchCollection
Dim mat As Match
Dim ret As String
Dim delimiter As String
txt = Replace(txt, Chr(10), "")
txt = Replace(txt, Chr(13), "")
rx.Global = True
rx.IgnoreCase = True
rx.MultiLine = True
rx.Pattern = pat
Set matcol = rx.Execute(txt)
'MsgBox "Match:" & matcol.Count
On Error GoTo ErrorHandler
For Each mat In matcol
'FindRegexpMatch.Add mat.SubMatches(0)
FindRegexpMatch.Add mat.Value
Next mat
Set rx = Nothing
' Insert code that might generate an error here
Exit Function
ErrorHandler:
' Insert code to handle the error here
MsgBox "FindRegexpMatch. " & Err.GetException()
Resume Next
End Function
Any kind of help would be greatly appreciated.
解决方案
以下是如何从 A 列读入值并将发音写到 B 列的示例。它使用 css 选择器匹配子节点,然后逐步上升到 parentNode 以确保抓取整个发音。您可以通过多种方式在父节点上匹配以获得第二个发音。请注意,我使用父节点,Replace
因为发音可能跨越多个子节点。
如果这样做是为了大量查找,请做一个好网民,并在代码中放置一些等待,以免用请求轰炸网站。
Option Explicit
Public Sub WriteOutPronounciations()
Dim html As MSHTML.HTMLDocument, i As Long, ws As Worksheet
Dim data As String, lastRow As Long, urls()
Set ws = ThisWorkbook.Worksheets("Sheet1")
lastRow = ws.Cells(ws.rows.Count, "A").End(xlUp).row 'you need at least two words in column A or change the redim.
urls = Application.Transpose(ws.Range("A1:A" & lastRow).Value)
ReDim results(1 To UBound(urls))
Set html = New MSHTML.HTMLDocument
With CreateObject("MSXML2.ServerXMLHTTP")
For i = LBound(urls) To UBound(urls)
.Open "GET", "https://www.oxfordlearnersdictionaries.com/definition/english/" & urls(i), False
.send
html.body.innerHTML = .responseText
data = Replace$(Replace$(html.querySelector(".name ~ .wrap").ParentNode.innerText, "/", vbNullString), Chr$(10), Chr$(32))
results(i) = Right$(data, Len(data) - 4)
Next
End With
With ThisWorkbook.Worksheets(1)
.Cells(1, 2).Resize(UBound(results, 1), 1) = Application.Transpose(results)
End With
End Sub
必需的参考资料(VBE>工具>参考资料):
- Microsoft HTML 对象库
如果你沿着API 路线走,那么这里是一个小例子。使用 Prototype 帐户,您可以在一个月内拨打 1000 次免费电话。下一个最好的,取决于您希望拨打多少电话,看起来像 10,001 次电话(额外的一次 PAYG 电话会使价格减半)。# 调用将受单词是头词还是首先需要引理查找调用的影响。您需要的端点构造GET /entries/{source_lang}/{word_id}?fields=pronunciations
虽然似乎没有进行大规模过滤。您将需要一个 json 解析器来处理返回的 json,例如 github.com/VBA-tools/VBA-JSON/blob/master/JsonConverter.bas。从那里下载原始代码并添加到名为 JsonConverter 的标准模块中。然后您需要转到 VBE > 工具 > 参考 > 添加对 Microsoft Scripting Runtime 的引用。从复制的代码中删除顶部的属性行。
Option Explicit
Public Sub WriteOutPronounciations()
Dim html As MSHTML.HTMLDocument, i As Long, ws As Worksheet
Dim data As String, lastRow As Long, words()
'If not performing lemmas lookup then must be head word e.g. behave, behalf
Const appId As String = "yourAppId"
Const appKey As String = "yourAppKey"
Set ws = ThisWorkbook.Worksheets("Sheet1")
lastRow = ws.Cells(ws.rows.Count, "A").End(xlUp).row
words = Application.Transpose(ws.Range("A1:A" & lastRow).Value)
ReDim results(1 To UBound(words))
Set html = New MSHTML.HTMLDocument
Dim json As Object
With CreateObject("MSXML2.ServerXMLHTTP")
For i = LBound(words) To UBound(words)
.Open "GET", "https://od-api.oxforddictionaries.com/api/v2/entries/en-us/" & LCase$(words(i)) & "?fields=pronunciations", False
.setRequestHeader "app_id", appId
.setRequestHeader "app_key", appKey
.setRequestHeader "ContentType", "application/json"
.send
Set json = JsonConverter.ParseJson(.responseText)
results(i) = IIf(json("results")(1)("type") = "headword", json("results")(1)("lexicalEntries")(1)("pronunciations")(2)("phoneticSpelling"), "lemmas lookup required")
Set json = Nothing
Next
End With
With ThisWorkbook.Worksheets(1)
.Cells(1, 2).Resize(UBound(results, 1), 1) = Application.Transpose(results)
End With
End Sub
推荐阅读
- java - 在使用 GSON 的 JSON 序列化中包含某个瞬态字段
- flutter - Flutter : PlatformException(no_available_camera, 没有可用于拍照的相机。, null, null)
- apache-kafka - 如何在 Spring Kafka 中使用确认
- google-cloud-platform - Java Cloud Function 访问预设环境变量
- c++ - 如何忽略该行的其余部分?
- pine-script - 尝试将 pinescript 代码转换为版本 4 时出现无法使用参数调用“abs”错误
- html - 使用 CSS 打印半张 A4 页面
- reactjs - 将 Route 中的多个参数与 WordPress 中的 React Router Dom 匹配
- laravel - 如何在 Laravel 中使用主管处理作业?
- r - 根据条件使用其他列的值创建列