首页 > 解决方案 > VBA Excel 数据抓取

问题描述

我想用 Excel 中的 VBA 刮掉这个网站,它可以工作,它可以连接到 IE 上的网站,但没有写在工作表上,我该如何解决?

它:https ://www.betfair.it/exchange/plus/it/calcio-scommesse-1/today

通讯:https ://www.betfair.com/exchange/plus/en/football-betting-1/today

    Private Sub Test()

   Dim ie As Object, i As Long, strText As String

   Dim doc As Object, hTable As Object, hBody As Object, hTR As Object, hTD As Object
   Dim tb As Object, bb As Object, tr As Object, td As Object

   Dim y As Long, z As Long, wb As Excel.Workbook, ws As Excel.Worksheet

     Set wb = Excel.ActiveWorkbook
     Set ws = wb.ActiveSheet

     Set ie = CreateObject("InternetExplorer.Application")
     ie.Visible = True

      y = 1   'Column A in Excel
      z = 1   'Row 1 in Excel

     ie.navigate "https://www.betfair.it/exchange/plus/inplay/football" 

     Do While ie.busy: DoEvents: Loop
     Do While ie.ReadyState <> 4: DoEvents: Loop

     Set doc = ie.document
     Set hTable = doc.GetElementsByTagName("table")


     For Each tb In hTable

        Set hBody = tb.GetElementsByTagName("tbody")
        For Each bb In hBody

            Set hTR = bb.GetElementsByTagName("tr")
            For Each tr In hTR


                 Set hTD = tr.GetElementsByTagName("td")
                 y = 1 ' Resets back to column A
                 For Each td In hTD
                   ws.Cells(z, y).Value = td.innertext
                   y = y + 1
                 Next td
                 DoEvents
                 z = z + 1
            Next tr
            Exit For
        Next bb
    Exit For
  Next tb

End Sub

标签: htmlexcelvbaweb-scraping

解决方案


请阅读我在代码中的评论:

Private Sub Test()

   Dim ie As Object, i As Long 'strText As String (not used)

   Dim main As Object, hTable As Object, hBody As Object, hTR As Object, hTD As Object
   Dim tb As Object, bb As Object, tr As Object, td As Object

   Dim y As Long, z As Long, wb As Excel.Workbook, ws As Excel.Worksheet

     Set wb = Excel.ActiveWorkbook
     Set ws = wb.ActiveSheet

     Set ie = CreateObject("InternetExplorer.Application")
     ie.Visible = True

      y = 1   'Column A in Excel
      z = 1   'Row 1 in Excel

     ie.navigate "https://www.betfair.it/exchange/plus/inplay/football"

     'Do While ie.busy: DoEvents: Loop 'not needed
     Do While ie.ReadyState <> 4: DoEvents: Loop
     'There are AJAX data to load in the Tables after ReadyState is set to 4 (which means 'complete')
     'So you need a manuell break to give the data time to come in
     Application.Wait (Now + TimeSerial(0, 0, 5))

     'Set doc = ie.document 'Not needed

     'At first you must grab the HTML area which includes the tables you want
     'Without this you get only the first table in the whole HTML document
     'which lays in the header of the page
     Set main = ie.document.GetElementsByTagName("main")(0)

     'From here it's your code
     'I think you will soon see that it is not enough just to read the inner text of the TD tags
     'You will have to split it further. But that is your next planned step I think.
     Set hTable = main.GetElementsByTagName("table")


     For Each tb In hTable

        Set hBody = tb.GetElementsByTagName("tbody")
        For Each bb In hBody

            Set hTR = bb.GetElementsByTagName("tr")
            For Each tr In hTR


                 Set hTD = tr.GetElementsByTagName("td")
                 y = 1 ' Resets back to column A
                 For Each td In hTD
                   ws.Cells(z, y).Value = td.innertext
                   y = y + 1
                 Next td
                 DoEvents
                 z = z + 1
            Next tr
            Exit For
        Next bb
    Exit For
  Next tb

End Sub

推荐阅读