首页 > 解决方案 > VBA网页抓取英超联赛表格

问题描述

我正在尝试抓取表单数据(最近 6 场比赛),但它并没有按照我想要的方式出现。表单列显示绿色(赢)、橙色(平局)和输(红色)。我想先看看球队如下形式:

例如,利物浦 WWWWDL

下面是我用过的代码

Sub ELPForm()
 
    'dimension (set aside memory for) our variables
    Dim objIE As InternetExplorer
    Dim ele As Object
    Dim y As Integer
 
    'start a new browser instance
    Set objIE = New InternetExplorer
    'make browser visible
    objIE.Visible = False
 
    'navigate to page with needed data
    objIE.navigate "https://www.soccerstats.com/latest.asp?league=england"
    'wait for page to load
    Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop
 
    'we will output data to excel, starting on row 1
    y = 1
 
  
    For Each ele In objIE.document.getElementById("btable").getElementsByTagName("tr")
        'show the text content of 'tr' element being looked at
        Debug.Print ele.textContent
        'each 'tr' (table row) element contains 4 children ('td') elements
        Sheets("Sheet1").Range("A" & y).Value = ele.Children(0).textContent
        Sheets("Sheet1").Range("B" & y).Value = ele.Children(1).textContent
        Sheets("Sheet1").Range("D" & y).Value = ele.Children(10).textContent
        'increment row counter by 1
        y = y + 1
    'repeat until last ele has been evaluated
    Next
 
     
End Sub

标签: excelvba

解决方案


在德国已经很晚了,但是您可以通过以下宏获得所需的内容。如果您不需要结果字符串的颜色,请将这些行注释掉。

宏代码并不长,但有很多注释和 HTML 代码示例可以理解宏在做什么:

Sub ELPForm()

Dim browser As Object
Dim url As String
Dim nodesAllOdd As Object
Dim nodeSingleOdd As Object
Dim nodeAllCellsOfTheGotTableRow As Object
Dim nodeTableCellWithLastSixResults As Object
Dim nodeLastSixResults As Object
Dim nodesColors As Object
Dim nodeSingleColor As Object

Dim targetTable As String
Dim indexOdd As Long
Dim indexOddLowerBound As Long
Dim indexOddUpperBound As Long
Dim tableRow As Long
Dim lastSixResults As String
Dim charIndex As Byte

  'Table for imported data
  targetTable = "Sheet1"

  'First row for output
  tableRow = 1

  'Page with needed data
  url = "https://www.soccerstats.com/latest.asp?league=england"

  'Css index "odd" from bottom to top for the relevant table
  'Adjust the boundaries when the table is moved
  'That is the reason I prefered the "odd" class and not the
  'table ID you used
  indexOddLowerBound = 0
  indexOddUpperBound = 19

  'Initialize Internet Explorer, set visibility,
  'call URL and wait until page is fully loaded
  Set browser = CreateObject("internetexplorer.application")
  browser.Visible = False
  browser.navigate url
  Do Until browser.ReadyState = 4: DoEvents: Loop

  'Get all html nodes with the css class odd
  Set nodesAllOdd = browser.document.getElementsByClassName("odd")

  'Check if there is a node collection
  If Not nodesAllOdd Is Nothing Then
    'The firt 20 elements of the node collection are relevant at this time
    'In the future it can be the tables will be other ordered. All you have
    'to do then is to set the borders for the the new order
    'Now we go for the first 20 elements by a loop from 0 to 19 (by node
    'collection index) if there are more than 19 elements (by real count
    'of elements by the property "length")
    If nodesAllOdd.Length > 19 Then
      For indexOdd = indexOddLowerBound To indexOddUpperBound
        'Get the current "odd" node
        'All 20 single nodes looks like this example:
        '
        '<tr class='odd' height='24'>
        '  <td bgcolor='#12bc3b' height='22' align='center'>
        '    <b>1</b>
        '  </td>
        '  <td align='left' width='110'> &nbsp;<a href='team.asp?league=england&stats=u40-liverpool' title='Liverpool stats' target='_top'>Liverpool</a>
        '  </td>
        '  <td align='center'>
        '    <font color='green'>11</font>
        '  </td>
        '  <td align='center'> 10 </td>
        '  <td align='center'> 1 </td>
        '  <td align='center'> 0 </td>
        '  <td align='center'>
        '    <font color='blue'>25</font>
        '  </td>
        '  <td align='center'>
        '    <font color='red'>9</font>
        '  </td>
        '  <td align='center'>
        '    <font color='gray' size='1'>+16</font>
        '  </td>
        '  <td align='center' bgcolor='#dedede'>
        '    <b>31</b>
        '  </td>
        '  <td style='padding-left:10px;'>
        '    <a href='#' class='tooltip2'>
        '      <div>
        '        <div class='dgreen'></div>
        '        <div class='dgreen'></div>
        '        <div class='dgreen'></div>
        '        <div class='dorange'></div>
        '        <div class='dgreen'></div>
        '        <div class='dgreen'></div>
        '      </div>
        '      <span>
        '        <div style='text-align:left;'>
        '          <table width='100%' border='0' cellspacing='0' cellpadding='1'>
        '            <tr>
        '              <td align='right' height='18' style='background-color: #D7EFBE;'>
        '                <font color='gray' size='1'>22 Sep</font>
        '              </td>
        '              <td style='background-color: #D7EFBE;'> Chelsea - <b> Liverpool </b>
        '              </td>
        '              <td align='center' style='background-color: #D7EFBE;'>
        '                <b>1-2</b>
        '              </td>
        '            </tr>
        '            <tr>
        '              <td align='right' height='18' style='background-color: #D7EFBE;'>
        '                <font color='gray' size='1'>28 Sep</font>
        '              </td>
        '              <td style='background-color: #D7EFBE;'> Sheffield Utd - <b> Liverpool </b>
        '              </td>
        '              <td align='center' style='background-color: #D7EFBE;'>
        '                <b>0-1</b>
        '              </td>
        '            </tr>
        '            <tr>
        '              <td align='right' height='18' style='background-color: #D7EFBE;'>
        '                <font color='gray' size='1'>5 Oct</font>
        '              </td>
        '              <td style='background-color: #D7EFBE;'>
        '                <b> Liverpool </b> - Leicester City </td>
        '              <td align='center' style='background-color: #D7EFBE;'>
        '                <b>2-1</b>
        '              </td>
        '            </tr>
        '            <tr>
        '              <td align='right' height='18' style='background-color: #FFFFBF;'>
        '                <font color='gray' size='1'>20 Oct</font>
        '              </td>
        '              <td style='background-color: #FFFFBF;'> Manchester Utd - <b> Liverpool </b>
        '              </td>
        '              <td align='center' style='background-color: #FFFFBF;'>
        '                <b>1-1</b>
        '              </td>
        '            </tr>
        '            <tr>
        '              <td align='right' height='18' style='background-color: #D7EFBE;'>
        '                <font color='gray' size='1'>27 Oct</font>
        '              </td>
        '              <td style='background-color: #D7EFBE;'>
        '                <b> Liverpool </b> - Tottenham </td>
        '              <td align='center' style='background-color: #D7EFBE;'>
        '                <b>2-1</b>
        '              </td>
        '            </tr>
        '            <tr>
        '              <td align='right' height='18' style='background-color: #D7EFBE;'>
        '                <font color='gray' size='1'>2 Nov</font>
        '              </td>
        '              <td style='background-color: #D7EFBE;'> Aston Villa - <b> Liverpool </b>
        '              </td>
        '              <td align='center' style='background-color: #D7EFBE;'>
        '                <b>1-2</b>
        '              </td>
        '            </tr>
        '          </table>
        '        </div>
        '      </span>
        '    </a>
        '  </td>
        '  <td align='center'>
        '    <font color='black'>2.82</font>
        '  </td>
        '  <td align='center'>
        '    <font color='red'>2.75</font>
        '  </td>
        '  <td align='center'>
        '    <font color='green'>18%</font>
        '  </td>
        '  <td align='center'>
        '    <font color='red'>0%</font>
        '  </td>
        '</tr>
        Set nodeSingleOdd = nodesAllOdd(indexOdd)

        'The information we need are in certain cells of the got table row
        'So at first we need all cells of the got table row
        Set nodeAllCellsOfTheGotTableRow = nodeSingleOdd.getElementsByTagName("td")

        'We need the first table cell for the team place in the table of the league
        '<td bgcolor='#12bc3b' height='22' align='center'>
        '  <b>1</b>
        '</td>
        '
        'Node collection index for the first element is 0
        'We place the place of the team in thie first column of the current row in the table
        Sheets(targetTable).Cells(tableRow, 1).Value = nodeAllCellsOfTheGotTableRow(0).innertext

        'We need the second table cell for the team name
        '  <td align='left' width='110'> &nbsp;<a href='team.asp?league=england&stats=u40-liverpool' title='Liverpool stats' target='_top'>Liverpool</a>
        '  </td>
        '
        'Node collection index for the second element is 1
        'We place the team name in the second column of the current row in the table
        Sheets(targetTable).Cells(tableRow, 2).Value = nodeAllCellsOfTheGotTableRow(1).innertext

        'And our patient. The 11th cell of the current table row
        '<td style='padding-left:10px;'>
        '  <a href='#' class='tooltip2'>
        '    <div>
        '      <div class='dgreen'></div>
        '      <div class='dgreen'></div>
        '      <div class='dgreen'></div>
        '      <div class='dorange'></div>
        '      <div class='dgreen'></div>
        '      <div class='dgreen'></div>
        '    </div>
        '    <span>
        '      <div style='text-align:left;'>
        '        <table width='100%' border='0' cellspacing='0' cellpadding='1'>
        '          <tr>
        '            <td align='right' height='18' style='background-color: #D7EFBE;'>
        '              <font color='gray' size='1'>22 Sep</font>
        '            </td>
        '            <td style='background-color: #D7EFBE;'> Chelsea - <b> Liverpool </b>
        '            </td>
        '            <td align='center' style='background-color: #D7EFBE;'>
        '              <b>1-2</b>
        '            </td>
        '          </tr>
        '          <tr>
        '            <td align='right' height='18' style='background-color: #D7EFBE;'>
        '              <font color='gray' size='1'>28 Sep</font>
        '            </td>
        '            <td style='background-color: #D7EFBE;'> Sheffield Utd - <b> Liverpool </b>
        '            </td>
        '            <td align='center' style='background-color: #D7EFBE;'>
        '              <b>0-1</b>
        '            </td>
        '          </tr>
        '          <tr>
        '            <td align='right' height='18' style='background-color: #D7EFBE;'>
        '              <font color='gray' size='1'>5 Oct</font>
        '            </td>
        '            <td style='background-color: #D7EFBE;'>
        '              <b> Liverpool </b> - Leicester City </td>
        '            <td align='center' style='background-color: #D7EFBE;'>
        '              <b>2-1</b>
        '            </td>
        '          </tr>
        '          <tr>
        '            <td align='right' height='18' style='background-color: #FFFFBF;'>
        '              <font color='gray' size='1'>20 Oct</font>
        '            </td>
        '            <td style='background-color: #FFFFBF;'> Manchester Utd - <b> Liverpool </b>
        '            </td>
        '            <td align='center' style='background-color: #FFFFBF;'>
        '              <b>1-1</b>
        '            </td>
        '          </tr>
        '          <tr>
        '            <td align='right' height='18' style='background-color: #D7EFBE;'>
        '              <font color='gray' size='1'>27 Oct</font>
        '            </td>
        '            <td style='background-color: #D7EFBE;'>
        '              <b> Liverpool </b> - Tottenham </td>
        '            <td align='center' style='background-color: #D7EFBE;'>
        '              <b>2-1</b>
        '            </td>
        '          </tr>
        '          <tr>
        '            <td align='right' height='18' style='background-color: #D7EFBE;'>
        '              <font color='gray' size='1'>2 Nov</font>
        '            </td>
        '            <td style='background-color: #D7EFBE;'> Aston Villa - <b> Liverpool </b>
        '            </td>
        '            <td align='center' style='background-color: #D7EFBE;'>
        '              <b>1-2</b>
        '            </td>
        '          </tr>
        '        </table>
        '      </div>
        '    </span>
        '  </a>
        '</td>
        '
        'Node collection index for the 11th element is 10
        Set nodeTableCellWithLastSixResults = nodeAllCellsOfTheGotTableRow(10)

        'The desired information are the results of the last six games in form of win, draw or lost
        'We see a lot of information in the HTML code obove. For the needed one, we need only the first div tag
        '<div>
        '  <div class='dgreen'></div>
        '  <div class='dgreen'></div>
        '  <div class='dgreen'></div>
        '  <div class='dorange'></div>
        '  <div class='dgreen'></div>
        '  <div class='dgreen'></div>
        '</div>
        '
        'The needed div Tag is the first one
        'We get it by the tag name and get the first div element by the index 0 of the node collecton
        Set nodeLastSixResults = nodeTableCellWithLastSixResults.getElementsByTagName("div")(0)

        'There is no text but three different colors for win, draw or lost
        'dgreen  = win
        'dorange = draw
        'dred    = lost
        '
        'We make a node collection of the div tags inside the last got div tag
        Set nodesColors = nodeLastSixResults.getElementsByTagName("div")

        'This is what we have now in our node collection
        '<div class='dgreen'></div>
        '<div class='dgreen'></div>
        '<div class='dgreen'></div>
        '<div class='dorange'></div>
        '<div class='dgreen'></div>
        '<div class='dgreen'></div>
        '
        'We get all colors in a loop
        For Each nodeSingleColor In nodesColors
          Select Case nodeSingleColor.getAttribute("class")
            Case "dgreen": lastSixResults = lastSixResults & "W"
            Case "dorange": lastSixResults = lastSixResults & "D"
            Case "dred": lastSixResults = lastSixResults & "L"
          End Select
        Next nodeSingleColor

        'Write the result string to the table
        Sheets(targetTable).Cells(tableRow, 3).Value = lastSixResults

        'Coloring the result string with the same colors like on the page
        'and set the proportional font Courier New for better eye traking
        For charIndex = 1 To Len(lastSixResults)
          Select Case Mid(lastSixResults, charIndex, 1)
            Case "W" 'Green 51, 153, 51
              Sheets(targetTable).Cells(tableRow, 3).Characters(Start:=charIndex, Length:=1).Font.Color = RGB(51, 153, 51)
              Sheets(targetTable).Cells(tableRow, 3).Characters(Start:=charIndex, Length:=1).Font.Name = "Courier New"
            Case "D" 'Orange 255, 181, 108
              Sheets(targetTable).Cells(tableRow, 3).Characters(Start:=charIndex, Length:=1).Font.Color = RGB(255, 181, 108)
              Sheets(targetTable).Cells(tableRow, 3).Characters(Start:=charIndex, Length:=1).Font.Name = "Courier New"
            Case "L" 'Red 255, 0, 0
              Sheets(targetTable).Cells(tableRow, 3).Characters(Start:=charIndex, Length:=1).Font.Color = RGB(255, 0, 0)
              Sheets(targetTable).Cells(tableRow, 3).Characters(Start:=charIndex, Length:=1).Font.Name = "Courier New"
          End Select
        Next charIndex

        'Reset result string
        lastSixResults = ""

        'Next Excel table row for the next team
        tableRow = tableRow + 1
      Next indexOdd
    Else
      MsgBox "No node collection with at least 20 css odd elements."
    End If
  Else
    MsgBox "Wrong page. No relevant tables found"
  End If

  'Clean up
  browser.Quit
  Set browser = Nothing
  Set nodesAllOdd = Nothing
  Set nodeSingleOdd = Nothing
  Set nodeAllCellsOfTheGotTableRow = Nothing
  Set nodeTableCellWithLastSixResults = Nothing
  Set nodeLastSixResults = Nothing
  Set nodesColors = Nothing
  Set nodeSingleColor = Nothing
End Sub

此致

兹文


推荐阅读