html - 从网站上刮桌子
问题描述
我有以下代码导航到一个网站,输入两个名字(例如,这里使用的真实姓名将从电子表格中提取 10 个姓名的列表),然后搜索他们的记录。我正在尝试将生成的结果表拉到电子表格中。我已经尝试了几种方法,但似乎无法使其正常工作。在“Scrape Table Here”评论下寻找代码。我知道这涉及访问网站的 HTML,我也可以这样做,但我对 HTML 不够熟悉,无法自己解决这个问题。奖励问题:我还想将每个人的 ID# 添加到电子表格中。在 HTML 中,它列在“MP_Details?”之后。例如,对于“罗伯特·琼斯”,我要抓取的是“36481”。
Sub Input_And_Return()
'Create new instance of Internet Explorer
Dim ieApp As Object: Set ieApp = New InternetExplorer
Dim ieDoc As Object
Dim html As HTMLDocument
ieApp.Visible = True
ieApp.navigate "https://hdmaster.net/MP/MP_Public"
Do While ieApp.Busy: DoEvents: Loop
Do Until ieApp.readyState = READYSTATE_COMPLETE: DoEvents: Loop
Set ieDoc = ieApp.document
Set html = ieApp.document
'Enter names into search box and click search
With ieDoc.forms(0)
.SearchFor.Value = "Anderson, Kelly" & Chr(10) & "Jones, Robert"
.submit
End With
'Scrape Table Here
'Close down IE and reset status bar
Set ieApp = Nothing
Application.StatusBar = ""
End Sub
解决方案
您可以将表 outerHTML 复制到剪贴板并将其粘贴到 Excel。这很好,简单快捷。
Option Explicit
Public Sub GetInfo()
Dim IE As New InternetExplorer
Dim nameList As String
nameList = "Anderson, Kelly" & Chr$(10) & "Jones, Robert"
With IE
.Visible = True
.navigate "https://hdmaster.net/MP/MP_Public"
While .Busy Or .readyState < 4: DoEvents: Wend
With .document
.querySelector("[name=SearchFor]").Value = nameList
.querySelector("#search").Click
End With
While .Busy Or .readyState < 4: DoEvents: Wend
Dim clipboard As Object
Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
clipboard.SetText .document.querySelector(".newTable").outerHTML
clipboard.PutInClipboard
ThisWorkbook.Worksheets("Sheet1").Cells(1, 1).PasteSpecial
.Quit
End With
End Sub
参考资料(VBE > 工具 > 参考资料):
- Microsoft HTML 对象库
- 微软互联网控制
您的上述代码版本:
Public Sub Input_And_Return()
Dim ieApp As Object: Set ieApp = New InternetExplorer
Dim ieDoc As Object
With ieApp
.Visible = True
.navigate "https://hdmaster.net/MP/MP_Public"
While .Busy Or .readyState < 4: DoEvents: Wend
With .document.forms(0)
.SearchFor.Value = "Anderson, Kelly" & Chr$(10) & "Jones, Robert"
.submit
Dim clipboard As Object
Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
clipboard.SetText .getElementsByClassName("newTable")(0).outerHTML
clipboard.PutInClipboard
End With
ThisWorkbook.Worksheets("Sheet1").Cells(1, 1).PasteSpecial
.Quit
End With
End Sub
或者通过循环表格的行和列:
Public Sub Input_And_Return()
Dim ieApp As Object, ieDoc As Object
Set ieApp = New InternetExplorer
With ieApp
.Visible = True
.navigate "https://hdmaster.net/MP/MP_Public"
While .Busy Or .readyState < 4: DoEvents: Wend
With .document.forms(0)
.SearchFor.Value = "Anderson, Kelly" & Chr$(10) & "Jones, Robert"
.submit
Dim r As Long, c As Long, tr As Object, td As Object
With .getElementsByClassName("newTable")(0)
For Each tr In .getElementsByTagName("tr")
r = r + 1: c = 1
For Each td In tr.getElementsByTagName("td")
Cells(r, c).Value = td.innerText
c = c + 1
Next td
Next tr
End With
End With
.Quit
End With
End Sub
输出:
编辑:
一些丑陋的代码来获取短ID
Option Explicit
Public Sub Input_And_Return()
Dim ieApp As Object, ieDoc As Object
Set ieApp = New InternetExplorer
With ieApp
.Visible = True
.navigate "https://hdmaster.net/MP/MP_Public"
While .Busy Or .readyState < 4: DoEvents: Wend
With .document.forms(0)
.SearchFor.Value = "Anderson, Kelly" & Chr$(10) & "Jones, Robert"
.submit
Dim r As Long, c As Long, tr As Object, td As Object, hTable As Object, aNodeList As Object
Set hTable = .getElementsByClassName("newTable")(0)
Set aNodeList = .getElementsByClassName("newTable")(0).querySelectorAll("[align=center][onclick*='javascript:rowClick']")
Dim idDict As Object, i As Long, tempVal As Long
Set idDict = CreateObject("Scripting.Dictionary")
For i = 0 To aNodeList.Length - 1
tempVal = Split(Split(aNodeList.Item(i).onclick, "id=")(1), Chr$(39))(0)
If Not idDict.exists(tempVal) Then idDict.Add tempVal, vbNullString
Next i
With hTable
For Each tr In .getElementsByTagName("tr")
r = r + 1: c = 1
For Each td In tr.getElementsByTagName("td")
Cells(r, c).Value = td.innerText
c = c + 1
Next td
Next tr
If idDict.Count = r - 1 Then Cells(2, c).Resize(idDict.Count, 1) = Application.WorksheetFunction.Transpose(idDict.keys)
End With
End With
.Quit
End With
End Sub
推荐阅读
- unity3d - Unity WebGL 在加载和不播放视频时构建缺少 sharedassets1.resource
- intellij-idea - Intellij 测试用例未退出
- swift - SwiftUI - 前提条件失败:输入索引无效:键盘通知上的 4
- python - 计算需要多少行才能加起来一个值
- angular - 关于排序选项卡对象(ANGULAR,TS)显示“冠军分类”的问题
- javascript - 隐藏 div,两个动态状态 javascript
- python - 使用 Python 基于特定模式进行标记
- redirect - 如何在 nuxt.js 中从 www 重定向到非 www
- android - 是否可以将 cameraX 或 Camera2 缩放到 Api 最大缩放限制之外
- php - 使用PDO记录sql查询时的运行函数?