r - 使用 Rselenium 进行网页抓取并保存为数据框时创建“for”循环
问题描述
我一直在玩 Rselenium 并从 URL 列表中进行网络抓取。自然,我想将我抓取的每个 URL 中的数据组合到一个数据框中。当我这样做时,返回的数据框将包含数据,以及诸如“ checkStatus
”、“ statusClass
”等杂项。这很难解释,但我希望代码能帮助更好地解释它。
remDr <- remoteDriver( remoteServerAddr = "localhost",
port = 4444,
browserName = "chrome")
remDr$open()
URL_list <- c("https://www.premierleague.com/players/4183/Ahmed-El-Mohamady/stats?co=1&se=363")
# Webscrape function
ScrapeDF <- function(link_element){
#General Stats
link_element <- remDr$findElement(using = "css selector",".statappearances")
Appearance <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".statwins")
Wins <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".statlosses")
Losses <- as.character(link_element$getElementText())
#Defence Stats
link_element <- remDr$findElement(using = "css selector",".statclean_sheet")
CleanSheet <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".statgoals_conceded")
Conceded <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".stattotal_tackle")
Tackles <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".stattackle_success")
SuccessfulTackle <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".statlast_man_tackle")
LastManTackle <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".statblocked_scoring_att")
BlockedShots <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".statinterception")
Interceptions <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".stattotal_clearance")
Clearance <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".stateffective_head_clearance")
HeadedClearance <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".statclearance_off_line")
ClearanceOffLine <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".statball_recovery")
Recovery <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".statduel_won")
DuelsWon <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".statduel_lost")
DuelsLost <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".statwon_contest")
Successful5050 <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".stataerial_won")
AerialWon <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".stataerial_lost")
AerialLost <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".statown_goals")
OwnGoal <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".staterror_lead_to_goal")
ErrorsToGoal <- as.character(link_element$getElementText())
#Team Play Stats
link_element <- remDr$findElement(using = "css selector",".statgoal_assist")
Assists <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".stattotal_pass")
Passes <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".stattotal_pass_per_game")
PassperMatch <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".statbig_chance_created")
BigChanceCreated <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".stattotal_cross")
Crosses <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".statcross_accuracy")
CrossAcc <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".stattotal_through_ball")
ThroughBall <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".stataccurate_long_balls")
LongBall <- as.character(link_element$getElementText())
#Discipline Stats
link_element <- remDr$findElement(using = "css selector",".statyellow_card")
YelCard <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".statred_card")
RedCard <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".statfouls")
Fouls <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".stattotal_offside")
Offside <- as.character(link_element$getElementText())
#Attack stats
link_element <- remDr$findElement(using = "css selector",".statgoals")
Goals <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".statatt_hd_goal")
HeadedGoal <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".statatt_rf_goal")
RightFootGoal <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".statatt_lf_goal")
LeftFootGoal <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".stathit_woodwork")
Woodwork <- as.character(link_element$getElementText())
DF_Compiled <- data.frame("Position" = Text, "Appearance" = Appearance,
"Wins" = Wins, "Losses" = Losses, "Goals" = Goals,
"HeadedGoals" = HeadedGoal, "RightFootGoal" = RightFootGoal,
"LeftFootGoal" = LeftFootGoal, "Woodwork" = Woodwork,
"YellowCard" = YelCard, "RedCard" = RedCard,
"Fouls" = Fouls, "Offside" = Offside, "Assist" = Assists,
"Passes" = Passes, "PassperMatch" = PassperMatch, "BigChanceCreated" = BigChanceCreated,
"Crosses" = Crosses, "CrossAcc" = CrossAcc, "ThroughBall" = ThroughBall,
"AccLongBall" = LongBall, "CleanSheet" = CleanSheet,
"Conceded" = Conceded, "Tackles" = Tackles,
"SuccessTackle" = SuccessfulTackle, "LastManTackle" = LastManTackle,
"BlockedShots" = BlockedShots, "Interceptions" = Interceptions,
"Clearances" = Clearance, "HeadedClearance" = HeadedClearance,
"OffLineClearance" = ClearanceOffLine,"Recoveries" = Recovery,
"DuelsWon" = DuelsWon, "DuelsLost" = DuelsLost,
"Successful50_50" = Successful5050, "AerialWon" = AerialWon,
"AerialLost" = AerialLost, "OwnGoal" = OwnGoal,
"ErrorsToGoal" = ErrorsToGoal)
}
## For loop to webscrape
CompletePlayerData <- data.frame(matrix(nrow = 0,ncol = 0))
#looping function of scraping the stats for all the players
for (url in URL_list) {
remDr$navigate(url)
Sys.sleep(4)
Position <- remDr$findElement(using = "css selector",".info")
Text <- as.character(Position$getElementText())
if(Text == "Defender"){
saved_list <- lapply(Position, ScrapeDF)
} else {
Position <- remDr$findElement(using = "css selector",".info~ .info")
Text <- as.character(Position$getElementText())
if(Text == "Defender"){
saved_list <- lapply(Position, ScrapeDF)
}
}
CompletePlayerData <- bind_rows(CompletePlayerData, saved_list)
}
这将返回一个包含 900 列的数据框,如下所示
checkError.Position ... checkStatus.Position ... nativeEvents.Appearance ...
Defender ... Defender ... 14 ...
所以我的问题是:
- 为什么它返回这么多列
- 有没有办法绑定数据,使这些列与“位置”、“外观”、“目标”等相对应?
我想提前为长代码道歉
解决方案
library(RSelenium)
library(tidyverse)
remDr <- remoteDriver( remoteServerAddr = "localhost",
port = 4444,
browserName = "chrome")
remDr$open()
URL_list <- c("https://www.premierleague.com/players/4183/Ahmed-El-Mohamady/stats?co=1&se=363")
# Webscrape function
ScrapeDF <- function(link_element){
#General Stats
link_element <- remDr$findElement(using = "css selector",".statappearances")
Appearance <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".statwins")
Wins <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".statlosses")
Losses <- as.character(link_element$getElementText())
#Defence Stats
link_element <- remDr$findElement(using = "css selector",".statclean_sheet")
CleanSheet <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".statgoals_conceded")
Conceded <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".stattotal_tackle")
Tackles <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".stattackle_success")
SuccessfulTackle <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".statlast_man_tackle")
LastManTackle <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".statblocked_scoring_att")
BlockedShots <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".statinterception")
Interceptions <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".stattotal_clearance")
Clearance <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".stateffective_head_clearance")
HeadedClearance <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".statclearance_off_line")
ClearanceOffLine <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".statball_recovery")
Recovery <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".statduel_won")
DuelsWon <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".statduel_lost")
DuelsLost <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".statwon_contest")
Successful5050 <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".stataerial_won")
AerialWon <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".stataerial_lost")
AerialLost <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".statown_goals")
OwnGoal <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".staterror_lead_to_goal")
ErrorsToGoal <- as.character(link_element$getElementText())
#Team Play Stats
link_element <- remDr$findElement(using = "css selector",".statgoal_assist")
Assists <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".stattotal_pass")
Passes <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".stattotal_pass_per_game")
PassperMatch <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".statbig_chance_created")
BigChanceCreated <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".stattotal_cross")
Crosses <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".statcross_accuracy")
CrossAcc <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".stattotal_through_ball")
ThroughBall <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".stataccurate_long_balls")
LongBall <- as.character(link_element$getElementText())
#Discipline Stats
link_element <- remDr$findElement(using = "css selector",".statyellow_card")
YelCard <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".statred_card")
RedCard <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".statfouls")
Fouls <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".stattotal_offside")
Offside <- as.character(link_element$getElementText())
#Attack stats
link_element <- remDr$findElement(using = "css selector",".statgoals")
Goals <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".statatt_hd_goal")
HeadedGoal <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".statatt_rf_goal")
RightFootGoal <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".statatt_lf_goal")
LeftFootGoal <- as.character(link_element$getElementText())
link_element <- remDr$findElement(using = "css selector",".stathit_woodwork")
Woodwork <- as.character(link_element$getElementText())
DF_Compiled <- data.frame("Position" = Text, "Appearance" = Appearance,
"Wins" = Wins, "Losses" = Losses, "Goals" = Goals,
"HeadedGoals" = HeadedGoal, "RightFootGoal" = RightFootGoal,
"LeftFootGoal" = LeftFootGoal, "Woodwork" = Woodwork,
"YellowCard" = YelCard, "RedCard" = RedCard,
"Fouls" = Fouls, "Offside" = Offside, "Assist" = Assists,
"Passes" = Passes, "PassperMatch" = PassperMatch, "BigChanceCreated" = BigChanceCreated,
"Crosses" = Crosses, "CrossAcc" = CrossAcc, "ThroughBall" = ThroughBall,
"AccLongBall" = LongBall, "CleanSheet" = CleanSheet,
"Conceded" = Conceded, "Tackles" = Tackles,
"SuccessTackle" = SuccessfulTackle, "LastManTackle" = LastManTackle,
"BlockedShots" = BlockedShots, "Interceptions" = Interceptions,
"Clearances" = Clearance, "HeadedClearance" = HeadedClearance,
"OffLineClearance" = ClearanceOffLine,"Recoveries" = Recovery,
"DuelsWon" = DuelsWon, "DuelsLost" = DuelsLost,
"Successful50_50" = Successful5050, "AerialWon" = AerialWon,
"AerialLost" = AerialLost, "OwnGoal" = OwnGoal,
"ErrorsToGoal" = ErrorsToGoal)
}
## For loop to webscrape
CompletePlayerData <- tibble()
#looping function of scraping the stats for all the players
for (url in URL_list) {
remDr$navigate(url)
Sys.sleep(4)
Position <- remDr$findElement(using = "css selector",".info")
Text <- as.character(Position$getElementText())
if(Text == "Defender"){
# Return an empty list if call fails
saved_list <- lapply(Position, possibly(ScrapeDF, list()))
}
new_data <- tibble(
Position = Position %>% list(),
Text = Text,
saved_list = saved_list %>% list()
)
CompletePlayerData <- bind_rows(CompletePlayerData, new_data)
}
CompletePlayerData %>%
select(saved_list) %>%
unnest(saved_list) %>%
unnest(saved_list) %>%
distinct(Position, Goals, Appearance)
输出:
# A tibble: 1 x 3
Position Appearance Goals
<fct> <fct> <fct>
1 Defender 14 0
推荐阅读
- r - 无法在 Azure DSVM 上安装 AzureStor,因为它不会检测 Rtools
- r - 在R上使用“chull”后如何创建凸面区域(由线连接)
- bash - 你能解释一下 bash shell 中的数学语法吗?
- java - 已解决 - Java ShakeDetector 多次触发 - 如何获得最后一个输出?
- c# - 如何从 .net Core 3.1 dll (WPF) 访问 Dispatcher
- javascript - 在 p5.js 中添加多个对象
- python - 在使用 Django/DRF 时需要有关如何处理和/或避免循环导入的架构建议
- mysql - 节点 MySQL 连接池 - 等待数据库启动
- amazon-s3 - 从 Terraform 在 Ansible 中运行时,AWS s3 副本在大量传输后挂起
- android - 导航架构组件 - BottomNavigationView 避免片段重新加载