首页 > 解决方案 > Excel VBA 登录网站并获取 TR 和 TD 元素

问题描述

我正在测试下面的代码。我认为这非常接近,但由于某种原因我似乎无法登录该站点。

Sub Website_Login_Test()

Dim oHTML_Element As IHTMLElement
Dim sURL As String

On Error GoTo Err_Clear
sURL = "https://login.my_site_here.jsp?"
Set oBrowser = New InternetExplorer
oBrowser.Silent = True
oBrowser.timeout = 60
oBrowser.navigate sURL
oBrowser.Visible = True

Do
' Wait till the Browser is loaded
Loop Until oBrowser.readyState = READYSTATE_COMPLETE

Set HTMLDoc = oBrowser.document


For Each oHTML_Element In HTMLDoc
    Debug.Print oHTML_Element
Next

HTMLDoc.all.UserId.Value = "my_id"
HTMLDoc.all.Password.Value = "my_pass"

For Each oHTML_Element In HTMLDoc.getElementsByTagName("Button")
Debug.Print oHTML_Element.Name
'oHTML_Element.Click: Exit For
'Debug.Print oHTML_Element.Type
'If oHTML_Element.Type = "submit" Then oHTML_Element.Click: Exit For
Next

' oBrowser.Refresh ' Refresh If Needed
Err_Clear:
If Err <> 0 Then
Err.Clear
Resume Next
End If

Call Test
End Sub

Sub Test()

Dim ie As Object, i As Long, strText As String

Dim doc As Object, hTable As Object, hBody As Object, hTR As Object, hTD As Object
Dim tb As Object, bb As Object, tr As Object, td As Object

Dim y As Long, z As Long, wb As Excel.Workbook, ws As Excel.Worksheet

     Set wb = Excel.ActiveWorkbook
     Set ws = wb.ActiveSheet

     Set ie = CreateObject("InternetExplorer.Application")
     ie.Visible = True

      y = 1   'Column A in Excel
      z = 1   'Row 1 in Excel

     ie.navigate "https://after_login_move_to_page_for_scraping.jsp"

     Do While ie.busy: DoEvents: Loop
     Do While ie.readyState <> 4: DoEvents: Loop

     Set doc = ie.document
     Set hTable = doc.getElementsByTagName("table")


     For Each tb In hTable

        Set hBody = tb.getElementsByTagName("tbody")
        For Each bb In hBody

            Set hTR = bb.getElementsByTagName("tr")
            For Each tr In hTR


                 Set hTD = tr.getElementsByTagName("td")
                 y = 1 ' Resets back to column A
                 For Each td In hTD
                   ws.Cells(z, y).Value = td.innertext
                   y = y + 1
                 Next td
                 DoEvents
                 z = z + 1
            Next tr
            Exit For
        Next bb
    Exit For
  Next tb

End Sub

我无法登录该站点,因此无法进行抓取,但我认为代码非常接近。这是 id 对象、密码对象和按钮对象的 HTML。我究竟做错了什么?

在此处输入图像描述

在此处输入图像描述

在此处输入图像描述

标签: htmlexcelvba

解决方案


我认为您必须触发输入字段的按键事件。如果还有其他必须触发的事件,请查看此处,如何找到它们:
Automate IE via Excel to fill down a dropdown and continue

Sub WebsiteLogin()

Const url As String = "https://login.my_site_here.jsp"
Const userName As String = "Here Your LogIn Name"
Const passWord As String = "Here Your Password"

Dim ie As Object
Dim htmlDoc As Object
Dim nodeInputUserName As Object
Dim nodeInputPassWord As Object

  'Initialize Internet Explorer, set visibility,
  'call URL and wait until page is fully loaded
  Set ie = CreateObject("InternetExplorer.Application")
  ie.Visible = True
  ie.navigate url
  Do Until ie.readyState = 4: DoEvents: Loop
  Set htmlDoc = ie.document
  
  'Set the log in name
  Set nodeInputUserName = htmlDoc.getElementById("USERID")
  nodeInputUserName.Value = userName
  Call TriggerEvent(htmlDoc, nodeInputUserName, "onkeypress")
  
  'Set the password
  Set nodeInputPassWord = htmlDoc.getElementById("PASSWORD")
  nodeInputPassWord.Value = passWord
  Call TriggerEvent(htmlDoc, nodeInputPassWord, "onkeypress")
  
  'Click submit button
  htmlDoc.querySelector("a[role='button']").Click
  
End Sub

这是触发事件的过程:

Private Sub TriggerEvent(htmlDocument As Object, htmlElementWithEvent As Object, eventType As String)

  Dim theEvent As Object

  htmlElementWithEvent.Focus
  Set theEvent = htmlDocument.createEvent("HTMLEvents")
  theEvent.initEvent eventType, True, False
  htmlElementWithEvent.dispatchEvent theEvent
End Sub

推荐阅读