excel - Excel VBA 拉取网站数据
问题描述
我想通过输入卷号“217449”将网站“ http://result.biselahore.com/ ”中的任何数据提取到 Excel 表。输入卷号后,它会进入带有详细主题标记的结果卡页面。
要从下一页获取主题标记并将其粘贴到 excel 上,以下代码不起作用,它给出错误号 91,“未设置块变量的对象变量”。
这是我的整个代码:
Sub WData()
Do Until ActiveCell.Value = "100000"
Dim IE As New InternetExplorer
Dim DOCS As HTMLDocument
Dim str, str1, str2, str3, str4, str5 As String
IE.navigate "http://result.biselahore.com/"
IE.Visible = True
Do
DoEvents
Loop Until IE.readyState = READYSTATE_COMPLETE
IE.document.getElementById("rollNum").Value = ActiveCell.Value
IE.document.forms(0).submit
Do While IE.Busy
DoEvents
Loop
Set DOCS = IE.document
Do While DOCS.readyState <> "complete"
DoEvents
Loop
str = IE.document.getElementsByTagName("td")(4).innerText
str1 = IE.document.getElementsByTagName("td")(7).innerText
str2 = IE.document.getElementsByTagName("td")(9).innerText
str3 = IE.document.getElementsByTagName("td")(20).innerText
str4 = IE.document.getElementsByTagName("td")(23).innerText
str5 = IE.document.getElementsByTagName("td")(25).innerText
str6 = IE.document.getElementsByTagName("td")(27).innerText
str7 = IE.document.getElementsByTagName("td")(37).innerText
str8 = IE.document.getElementsByTagName("td")(38).innerText
str9 = IE.document.getElementsByTagName("td")(42).innerText
str10 = IE.document.getElementsByTagName("td")(43).innerText
str11 = IE.document.getElementsByTagName("td")(47).innerText
str12 = IE.document.getElementsByTagName("td")(48).innerText
str13 = IE.document.getElementsByTagName("td")(52).innerText
str14 = IE.document.getElementsByTagName("td")(53).innerText
str15 = IE.document.getElementsByTagName("td")(57).innerText
str16 = IE.document.getElementsByTagName("td")(58).innerText
str17 = IE.document.getElementsByTagName("td")(62).innerText
str18 = IE.document.getElementsByTagName("td")(63).innerText
str19 = IE.document.getElementsByTagName("td")(71).innerText
Dim lastrow As Integer
lastrow = Worksheets(1).Range("b" & Worksheets(1).Rows.Count).End(xlUp).Row + 1
Cells(lastrow, 2).Value = Trim(str)
Cells(lastrow, 3).Value = Trim(str1)
Cells(lastrow, 4).Value = Trim(str2)
Cells(lastrow, 5).Value = Trim(str3)
Cells(lastrow, 6).Value = Trim(str4)
Cells(lastrow, 7).Value = Trim(str5)
Cells(lastrow, 8).Value = Trim(str6)
Cells(lastrow, 9).Value = Trim(str7)
Cells(lastrow, 10).Value = Trim(str8)
Cells(lastrow, 11).Value = Trim(str9)
Cells(lastrow, 12).Value = Trim(str10)
Cells(lastrow, 13).Value = Trim(str11)
Cells(lastrow, 14).Value = Trim(str12)
Cells(lastrow, 15).Value = Trim(str13)
Cells(lastrow, 16).Value = Trim(str14)
Cells(lastrow, 17).Value = Trim(str15)
Cells(lastrow, 18).Value = Trim(str16)
Cells(lastrow, 19).Value = Trim(str17)
Cells(lastrow, 20).Value = Trim(str18)
Cells(lastrow, 21).Value = Trim(str19)
IE.Quit
Set IE = Nothing
Selection.Offset(1, 0).Select
Loop
End Sub
我想要的输出:
Subject Marks Subject Marks Subject Marks Subject Marks
URDU 68 62 ENGLISH 75 70 ISLAMIAT 50 49 MATHEMATICS 75 75
PHYSICS 60 59 CHEMISTRY 60 60 BIOLOGY 58 59
解决方案
网络“表”是一团糟。我正在跳过具有“合并单元格”的 2 个标题。
我添加了一个循环检查,直到@PeterAlbert用超时功能设置表,在设定的时间后退出循环,停止无限循环。
Option Explicit
Public Sub GetInfo()
Dim IE As New InternetExplorer, html As HTMLDocument, hTable As Object, ele As Object
With IE
.Visible = True
.navigate "http://result.biselahore.com/"
While .Busy Or .readyState < 4: DoEvents: Wend
.document.querySelector("#rollNum").innerText = 217449
.document.forms(0).submit
Dim dblStart As Double
Dim tmp As Long
Const cDblMaxTimeInSeconds As Double = 5 '<==Second to wait until timeout
dblStart = Timer
While .Busy Or .readyState < 4: DoEvents: Wend
Do
DoEvents
On Error Resume Next
Set hTable = .document.getElementsByTagName("table")(1)
On Error GoTo 0
If TimerDiff(dblStart, Timer) > cDblMaxTimeInSeconds Then Exit Do
Loop While hTable Is Nothing
Dim list As Object, list2 As Object
Set list = hTable.getElementsByTagName("tr")
Dim i As Long, j As Long, r As Long, c As Long
Application.ScreenUpdating = False
For i = 13 To list.Length - 1
Set list2 = list.item(i).getElementsByTagName("td")
r = r + 1: c = 0
For j = 0 To list2.Length - 1
c = c + 1
Cells(r, c) = list2.item(j).innerText
Next j
Next i
Application.ScreenUpdating = True
End With
End Sub
Public Function TimerDiff(ByVal dblTimerStart As Double, ByVal dblTimerEnd As Double) As Double
Dim dblTemp As Double
dblTemp = dblTimerEnd - dblTimerStart
If dblTemp < -43200 Then 'half a day
dblTemp = dblTemp + 86400
End If
TimerDiff = dblTemp
End Function
版本 2(使用上面的计时器功能)
Public Sub GetInfo()
Dim IE As New InternetExplorer, html As HTMLDocument, hTable As Object, ele As Object
With IE
.Visible = True
.navigate "http://result.biselahore.com/"
While .Busy Or .readyState < 4: DoEvents: Wend
.document.querySelector("#rollNum").innerText = 217449
.document.forms(0).submit
Dim dblStart As Double, tmp As Long, clipboard As Object
Const cDblMaxTimeInSeconds As Double = 5 '<==Second to wait until timeout
dblStart = Timer
While .Busy Or .readyState < 4: DoEvents: Wend
Do
DoEvents
On Error Resume Next
Set hTable = .document.getElementsByTagName("table")(1)
On Error GoTo 0
If TimerDiff(dblStart, Timer) > cDblMaxTimeInSeconds Then Exit Do
Loop While hTable Is Nothing
Application.ScreenUpdating = False
Set clipboard = New MSForms.DataObject
clipboard.SetText hTable.outerHTML
clipboard.PutInClipboard
ActiveSheet.Cells(1, 1).PasteSpecial
Application.ScreenUpdating = True
End With
End Sub
推荐阅读
- hyperparameters - GPflow 中超参数的初步猜测
- java - 表示方法名称和形式参数的 Java 类
- memory - 如何获取对象的运行时内存大小?
- javascript - mySQL“UPDATE table SET ...”不更新列
- android - Flutter:图像到 Base64 的转换输出被截断
- sql - 日期之间的频率
- python - MySQL 查询在 Django 上没有提供足够的参数错误
- image - Xamarin Forms 选择多个图像,取消并重新排列
- visual-studio-code - 如何更改 vscode-server 目录
- javascript - 比较 2 个对象数组并找到匹配的颜色 ID,然后创建一个新数组 Javascript