html - Excel VBA 登录网站并获取 TR 和 TD 元素
问题描述
我正在测试下面的代码。我认为这非常接近,但由于某种原因我似乎无法登录该站点。
Sub Website_Login_Test()
Dim oHTML_Element As IHTMLElement
Dim sURL As String
On Error GoTo Err_Clear
sURL = "https://login.my_site_here.jsp?"
Set oBrowser = New InternetExplorer
oBrowser.Silent = True
oBrowser.timeout = 60
oBrowser.navigate sURL
oBrowser.Visible = True
Do
' Wait till the Browser is loaded
Loop Until oBrowser.readyState = READYSTATE_COMPLETE
Set HTMLDoc = oBrowser.document
For Each oHTML_Element In HTMLDoc
Debug.Print oHTML_Element
Next
HTMLDoc.all.UserId.Value = "my_id"
HTMLDoc.all.Password.Value = "my_pass"
For Each oHTML_Element In HTMLDoc.getElementsByTagName("Button")
Debug.Print oHTML_Element.Name
'oHTML_Element.Click: Exit For
'Debug.Print oHTML_Element.Type
'If oHTML_Element.Type = "submit" Then oHTML_Element.Click: Exit For
Next
' oBrowser.Refresh ' Refresh If Needed
Err_Clear:
If Err <> 0 Then
Err.Clear
Resume Next
End If
Call Test
End Sub
Sub Test()
Dim ie As Object, i As Long, strText As String
Dim doc As Object, hTable As Object, hBody As Object, hTR As Object, hTD As Object
Dim tb As Object, bb As Object, tr As Object, td As Object
Dim y As Long, z As Long, wb As Excel.Workbook, ws As Excel.Worksheet
Set wb = Excel.ActiveWorkbook
Set ws = wb.ActiveSheet
Set ie = CreateObject("InternetExplorer.Application")
ie.Visible = True
y = 1 'Column A in Excel
z = 1 'Row 1 in Excel
ie.navigate "https://after_login_move_to_page_for_scraping.jsp"
Do While ie.busy: DoEvents: Loop
Do While ie.readyState <> 4: DoEvents: Loop
Set doc = ie.document
Set hTable = doc.getElementsByTagName("table")
For Each tb In hTable
Set hBody = tb.getElementsByTagName("tbody")
For Each bb In hBody
Set hTR = bb.getElementsByTagName("tr")
For Each tr In hTR
Set hTD = tr.getElementsByTagName("td")
y = 1 ' Resets back to column A
For Each td In hTD
ws.Cells(z, y).Value = td.innertext
y = y + 1
Next td
DoEvents
z = z + 1
Next tr
Exit For
Next bb
Exit For
Next tb
End Sub
我无法登录该站点,因此无法进行抓取,但我认为代码非常接近。这是 id 对象、密码对象和按钮对象的 HTML。我究竟做错了什么?
解决方案
我认为您必须触发输入字段的按键事件。如果还有其他必须触发的事件,请查看此处,如何找到它们:
Automate IE via Excel to fill down a dropdown and continue
Sub WebsiteLogin()
Const url As String = "https://login.my_site_here.jsp"
Const userName As String = "Here Your LogIn Name"
Const passWord As String = "Here Your Password"
Dim ie As Object
Dim htmlDoc As Object
Dim nodeInputUserName As Object
Dim nodeInputPassWord As Object
'Initialize Internet Explorer, set visibility,
'call URL and wait until page is fully loaded
Set ie = CreateObject("InternetExplorer.Application")
ie.Visible = True
ie.navigate url
Do Until ie.readyState = 4: DoEvents: Loop
Set htmlDoc = ie.document
'Set the log in name
Set nodeInputUserName = htmlDoc.getElementById("USERID")
nodeInputUserName.Value = userName
Call TriggerEvent(htmlDoc, nodeInputUserName, "onkeypress")
'Set the password
Set nodeInputPassWord = htmlDoc.getElementById("PASSWORD")
nodeInputPassWord.Value = passWord
Call TriggerEvent(htmlDoc, nodeInputPassWord, "onkeypress")
'Click submit button
htmlDoc.querySelector("a[role='button']").Click
End Sub
这是触发事件的过程:
Private Sub TriggerEvent(htmlDocument As Object, htmlElementWithEvent As Object, eventType As String)
Dim theEvent As Object
htmlElementWithEvent.Focus
Set theEvent = htmlDocument.createEvent("HTMLEvents")
theEvent.initEvent eventType, True, False
htmlElementWithEvent.dispatchEvent theEvent
End Sub
推荐阅读
- typescript - 如何在方法中使用“this”创建类的新实例?
- python - 从python中的xml文件中提取特定对象
- parallel-processing - 为什么并行处理比普通代码花费更长的时间?
- laravel - 在生产 Laravel Livewire 中出现错误
- python - np.dot 错误地从矩阵乘法更改为元素乘积
- python - python中针对特定情况的变量变量
- python - python 'AttributeError' 与 can.BLFReader
- docker - 如何将自定义环境变量添加到 docker-ejabberd
- django-rest-framework - 序列化程序中未读取嵌套关系的属性
- amazon-route53 - Route53:2个A记录到1个负载均衡器