excel - VBA网页抓取英超联赛表格
问题描述
我正在尝试抓取表单数据(最近 6 场比赛),但它并没有按照我想要的方式出现。表单列显示绿色(赢)、橙色(平局)和输(红色)。我想先看看球队如下形式:
例如,利物浦 WWWWDL
下面是我用过的代码
Sub ELPForm()
'dimension (set aside memory for) our variables
Dim objIE As InternetExplorer
Dim ele As Object
Dim y As Integer
'start a new browser instance
Set objIE = New InternetExplorer
'make browser visible
objIE.Visible = False
'navigate to page with needed data
objIE.navigate "https://www.soccerstats.com/latest.asp?league=england"
'wait for page to load
Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop
'we will output data to excel, starting on row 1
y = 1
For Each ele In objIE.document.getElementById("btable").getElementsByTagName("tr")
'show the text content of 'tr' element being looked at
Debug.Print ele.textContent
'each 'tr' (table row) element contains 4 children ('td') elements
Sheets("Sheet1").Range("A" & y).Value = ele.Children(0).textContent
Sheets("Sheet1").Range("B" & y).Value = ele.Children(1).textContent
Sheets("Sheet1").Range("D" & y).Value = ele.Children(10).textContent
'increment row counter by 1
y = y + 1
'repeat until last ele has been evaluated
Next
End Sub
解决方案
在德国已经很晚了,但是您可以通过以下宏获得所需的内容。如果您不需要结果字符串的颜色,请将这些行注释掉。
宏代码并不长,但有很多注释和 HTML 代码示例可以理解宏在做什么:
Sub ELPForm()
Dim browser As Object
Dim url As String
Dim nodesAllOdd As Object
Dim nodeSingleOdd As Object
Dim nodeAllCellsOfTheGotTableRow As Object
Dim nodeTableCellWithLastSixResults As Object
Dim nodeLastSixResults As Object
Dim nodesColors As Object
Dim nodeSingleColor As Object
Dim targetTable As String
Dim indexOdd As Long
Dim indexOddLowerBound As Long
Dim indexOddUpperBound As Long
Dim tableRow As Long
Dim lastSixResults As String
Dim charIndex As Byte
'Table for imported data
targetTable = "Sheet1"
'First row for output
tableRow = 1
'Page with needed data
url = "https://www.soccerstats.com/latest.asp?league=england"
'Css index "odd" from bottom to top for the relevant table
'Adjust the boundaries when the table is moved
'That is the reason I prefered the "odd" class and not the
'table ID you used
indexOddLowerBound = 0
indexOddUpperBound = 19
'Initialize Internet Explorer, set visibility,
'call URL and wait until page is fully loaded
Set browser = CreateObject("internetexplorer.application")
browser.Visible = False
browser.navigate url
Do Until browser.ReadyState = 4: DoEvents: Loop
'Get all html nodes with the css class odd
Set nodesAllOdd = browser.document.getElementsByClassName("odd")
'Check if there is a node collection
If Not nodesAllOdd Is Nothing Then
'The firt 20 elements of the node collection are relevant at this time
'In the future it can be the tables will be other ordered. All you have
'to do then is to set the borders for the the new order
'Now we go for the first 20 elements by a loop from 0 to 19 (by node
'collection index) if there are more than 19 elements (by real count
'of elements by the property "length")
If nodesAllOdd.Length > 19 Then
For indexOdd = indexOddLowerBound To indexOddUpperBound
'Get the current "odd" node
'All 20 single nodes looks like this example:
'
'<tr class='odd' height='24'>
' <td bgcolor='#12bc3b' height='22' align='center'>
' <b>1</b>
' </td>
' <td align='left' width='110'> <a href='team.asp?league=england&stats=u40-liverpool' title='Liverpool stats' target='_top'>Liverpool</a>
' </td>
' <td align='center'>
' <font color='green'>11</font>
' </td>
' <td align='center'> 10 </td>
' <td align='center'> 1 </td>
' <td align='center'> 0 </td>
' <td align='center'>
' <font color='blue'>25</font>
' </td>
' <td align='center'>
' <font color='red'>9</font>
' </td>
' <td align='center'>
' <font color='gray' size='1'>+16</font>
' </td>
' <td align='center' bgcolor='#dedede'>
' <b>31</b>
' </td>
' <td style='padding-left:10px;'>
' <a href='#' class='tooltip2'>
' <div>
' <div class='dgreen'></div>
' <div class='dgreen'></div>
' <div class='dgreen'></div>
' <div class='dorange'></div>
' <div class='dgreen'></div>
' <div class='dgreen'></div>
' </div>
' <span>
' <div style='text-align:left;'>
' <table width='100%' border='0' cellspacing='0' cellpadding='1'>
' <tr>
' <td align='right' height='18' style='background-color: #D7EFBE;'>
' <font color='gray' size='1'>22 Sep</font>
' </td>
' <td style='background-color: #D7EFBE;'> Chelsea - <b> Liverpool </b>
' </td>
' <td align='center' style='background-color: #D7EFBE;'>
' <b>1-2</b>
' </td>
' </tr>
' <tr>
' <td align='right' height='18' style='background-color: #D7EFBE;'>
' <font color='gray' size='1'>28 Sep</font>
' </td>
' <td style='background-color: #D7EFBE;'> Sheffield Utd - <b> Liverpool </b>
' </td>
' <td align='center' style='background-color: #D7EFBE;'>
' <b>0-1</b>
' </td>
' </tr>
' <tr>
' <td align='right' height='18' style='background-color: #D7EFBE;'>
' <font color='gray' size='1'>5 Oct</font>
' </td>
' <td style='background-color: #D7EFBE;'>
' <b> Liverpool </b> - Leicester City </td>
' <td align='center' style='background-color: #D7EFBE;'>
' <b>2-1</b>
' </td>
' </tr>
' <tr>
' <td align='right' height='18' style='background-color: #FFFFBF;'>
' <font color='gray' size='1'>20 Oct</font>
' </td>
' <td style='background-color: #FFFFBF;'> Manchester Utd - <b> Liverpool </b>
' </td>
' <td align='center' style='background-color: #FFFFBF;'>
' <b>1-1</b>
' </td>
' </tr>
' <tr>
' <td align='right' height='18' style='background-color: #D7EFBE;'>
' <font color='gray' size='1'>27 Oct</font>
' </td>
' <td style='background-color: #D7EFBE;'>
' <b> Liverpool </b> - Tottenham </td>
' <td align='center' style='background-color: #D7EFBE;'>
' <b>2-1</b>
' </td>
' </tr>
' <tr>
' <td align='right' height='18' style='background-color: #D7EFBE;'>
' <font color='gray' size='1'>2 Nov</font>
' </td>
' <td style='background-color: #D7EFBE;'> Aston Villa - <b> Liverpool </b>
' </td>
' <td align='center' style='background-color: #D7EFBE;'>
' <b>1-2</b>
' </td>
' </tr>
' </table>
' </div>
' </span>
' </a>
' </td>
' <td align='center'>
' <font color='black'>2.82</font>
' </td>
' <td align='center'>
' <font color='red'>2.75</font>
' </td>
' <td align='center'>
' <font color='green'>18%</font>
' </td>
' <td align='center'>
' <font color='red'>0%</font>
' </td>
'</tr>
Set nodeSingleOdd = nodesAllOdd(indexOdd)
'The information we need are in certain cells of the got table row
'So at first we need all cells of the got table row
Set nodeAllCellsOfTheGotTableRow = nodeSingleOdd.getElementsByTagName("td")
'We need the first table cell for the team place in the table of the league
'<td bgcolor='#12bc3b' height='22' align='center'>
' <b>1</b>
'</td>
'
'Node collection index for the first element is 0
'We place the place of the team in thie first column of the current row in the table
Sheets(targetTable).Cells(tableRow, 1).Value = nodeAllCellsOfTheGotTableRow(0).innertext
'We need the second table cell for the team name
' <td align='left' width='110'> <a href='team.asp?league=england&stats=u40-liverpool' title='Liverpool stats' target='_top'>Liverpool</a>
' </td>
'
'Node collection index for the second element is 1
'We place the team name in the second column of the current row in the table
Sheets(targetTable).Cells(tableRow, 2).Value = nodeAllCellsOfTheGotTableRow(1).innertext
'And our patient. The 11th cell of the current table row
'<td style='padding-left:10px;'>
' <a href='#' class='tooltip2'>
' <div>
' <div class='dgreen'></div>
' <div class='dgreen'></div>
' <div class='dgreen'></div>
' <div class='dorange'></div>
' <div class='dgreen'></div>
' <div class='dgreen'></div>
' </div>
' <span>
' <div style='text-align:left;'>
' <table width='100%' border='0' cellspacing='0' cellpadding='1'>
' <tr>
' <td align='right' height='18' style='background-color: #D7EFBE;'>
' <font color='gray' size='1'>22 Sep</font>
' </td>
' <td style='background-color: #D7EFBE;'> Chelsea - <b> Liverpool </b>
' </td>
' <td align='center' style='background-color: #D7EFBE;'>
' <b>1-2</b>
' </td>
' </tr>
' <tr>
' <td align='right' height='18' style='background-color: #D7EFBE;'>
' <font color='gray' size='1'>28 Sep</font>
' </td>
' <td style='background-color: #D7EFBE;'> Sheffield Utd - <b> Liverpool </b>
' </td>
' <td align='center' style='background-color: #D7EFBE;'>
' <b>0-1</b>
' </td>
' </tr>
' <tr>
' <td align='right' height='18' style='background-color: #D7EFBE;'>
' <font color='gray' size='1'>5 Oct</font>
' </td>
' <td style='background-color: #D7EFBE;'>
' <b> Liverpool </b> - Leicester City </td>
' <td align='center' style='background-color: #D7EFBE;'>
' <b>2-1</b>
' </td>
' </tr>
' <tr>
' <td align='right' height='18' style='background-color: #FFFFBF;'>
' <font color='gray' size='1'>20 Oct</font>
' </td>
' <td style='background-color: #FFFFBF;'> Manchester Utd - <b> Liverpool </b>
' </td>
' <td align='center' style='background-color: #FFFFBF;'>
' <b>1-1</b>
' </td>
' </tr>
' <tr>
' <td align='right' height='18' style='background-color: #D7EFBE;'>
' <font color='gray' size='1'>27 Oct</font>
' </td>
' <td style='background-color: #D7EFBE;'>
' <b> Liverpool </b> - Tottenham </td>
' <td align='center' style='background-color: #D7EFBE;'>
' <b>2-1</b>
' </td>
' </tr>
' <tr>
' <td align='right' height='18' style='background-color: #D7EFBE;'>
' <font color='gray' size='1'>2 Nov</font>
' </td>
' <td style='background-color: #D7EFBE;'> Aston Villa - <b> Liverpool </b>
' </td>
' <td align='center' style='background-color: #D7EFBE;'>
' <b>1-2</b>
' </td>
' </tr>
' </table>
' </div>
' </span>
' </a>
'</td>
'
'Node collection index for the 11th element is 10
Set nodeTableCellWithLastSixResults = nodeAllCellsOfTheGotTableRow(10)
'The desired information are the results of the last six games in form of win, draw or lost
'We see a lot of information in the HTML code obove. For the needed one, we need only the first div tag
'<div>
' <div class='dgreen'></div>
' <div class='dgreen'></div>
' <div class='dgreen'></div>
' <div class='dorange'></div>
' <div class='dgreen'></div>
' <div class='dgreen'></div>
'</div>
'
'The needed div Tag is the first one
'We get it by the tag name and get the first div element by the index 0 of the node collecton
Set nodeLastSixResults = nodeTableCellWithLastSixResults.getElementsByTagName("div")(0)
'There is no text but three different colors for win, draw or lost
'dgreen = win
'dorange = draw
'dred = lost
'
'We make a node collection of the div tags inside the last got div tag
Set nodesColors = nodeLastSixResults.getElementsByTagName("div")
'This is what we have now in our node collection
'<div class='dgreen'></div>
'<div class='dgreen'></div>
'<div class='dgreen'></div>
'<div class='dorange'></div>
'<div class='dgreen'></div>
'<div class='dgreen'></div>
'
'We get all colors in a loop
For Each nodeSingleColor In nodesColors
Select Case nodeSingleColor.getAttribute("class")
Case "dgreen": lastSixResults = lastSixResults & "W"
Case "dorange": lastSixResults = lastSixResults & "D"
Case "dred": lastSixResults = lastSixResults & "L"
End Select
Next nodeSingleColor
'Write the result string to the table
Sheets(targetTable).Cells(tableRow, 3).Value = lastSixResults
'Coloring the result string with the same colors like on the page
'and set the proportional font Courier New for better eye traking
For charIndex = 1 To Len(lastSixResults)
Select Case Mid(lastSixResults, charIndex, 1)
Case "W" 'Green 51, 153, 51
Sheets(targetTable).Cells(tableRow, 3).Characters(Start:=charIndex, Length:=1).Font.Color = RGB(51, 153, 51)
Sheets(targetTable).Cells(tableRow, 3).Characters(Start:=charIndex, Length:=1).Font.Name = "Courier New"
Case "D" 'Orange 255, 181, 108
Sheets(targetTable).Cells(tableRow, 3).Characters(Start:=charIndex, Length:=1).Font.Color = RGB(255, 181, 108)
Sheets(targetTable).Cells(tableRow, 3).Characters(Start:=charIndex, Length:=1).Font.Name = "Courier New"
Case "L" 'Red 255, 0, 0
Sheets(targetTable).Cells(tableRow, 3).Characters(Start:=charIndex, Length:=1).Font.Color = RGB(255, 0, 0)
Sheets(targetTable).Cells(tableRow, 3).Characters(Start:=charIndex, Length:=1).Font.Name = "Courier New"
End Select
Next charIndex
'Reset result string
lastSixResults = ""
'Next Excel table row for the next team
tableRow = tableRow + 1
Next indexOdd
Else
MsgBox "No node collection with at least 20 css odd elements."
End If
Else
MsgBox "Wrong page. No relevant tables found"
End If
'Clean up
browser.Quit
Set browser = Nothing
Set nodesAllOdd = Nothing
Set nodeSingleOdd = Nothing
Set nodeAllCellsOfTheGotTableRow = Nothing
Set nodeTableCellWithLastSixResults = Nothing
Set nodeLastSixResults = Nothing
Set nodesColors = Nothing
Set nodeSingleColor = Nothing
End Sub
此致
兹文
推荐阅读
- react-native - 从 google api 获取无法正常工作?
- python - python 3中的单行“编织”连接
- angular - Angular Parse 错误:在预期表达式的位置得到插值 ({{}})
- html - 对齐两个 div 以使第二个 div 获得所需的所有空间
- networking - 为什么设置静态ip时需要DNS服务器
- ruby - RSpec:断言模拟是正确的
- android - Android 图标大小应遵循哪个 Google 文档?
- javascript - 从javascript代码访问json函数变量到Angular6
- javascript - 如何使用扩展语法或解构到 `this` 值?
- android - 当用户启用新键盘时做某事