首页 > 解决方案 > for 循环中的变量未在另一个循环中显示

问题描述

我一直在练习使用 nba.com 播放器列表进行网络抓取,但我遇到了一个问题,当我在另一个 for 循环中调用它时,我在一个 for 循环中抓取的链接没有出现。

我已经尝试在原始 for 循环和我希望变量显示的 for 循环中创建更多变量,但它没有出现。我正在尝试使用我抓取的链接(它不是完整的链接,而是我试图与链接的基础结合的链接的末端部分)。

from selenium import webdriver
from bs4 import BeautifulSoup
from selenium.webdriver.chrome.options import Options
class Player():
    """docstring for ClassName"""
    def __init__(self):
        self.name = ""
        self.link = ""
        self.PPG = ""
        self.RPG = ""
def get_player_list():
    opt = webdriver.ChromeOptions()
    opt.add_argument('headless')
    browser = webdriver.Chrome(options=opt)
    browser.get('https://stats.nba.com/players/list/')
    soup = BeautifulSoup(browser.page_source, 'lxml')
    browser.quit()
    names = soup.find('div',class_='stats-player-list players-list')
    player_list = []



for name in names.find_all('a'):
        new_play = Player()
        new_play.name = name.text
        new_play.link = name["href"]
        player_list.append(new_play)

    for one_player in player_list:
        print (one_player.name)
        print (one_player.link)



return player_list
def get_player_stats(player_list):
    opt = webdriver.ChromeOptions()
    opt.add_argument('headless')
    browser = webdriver.Chrome(options=opt)
    for p in player_list[0:2]:

        browser.get('https://stats.nba.com'+p.link)

        soup = BeautifulSoup(browser.page_source, 'lxml')

        browser.quit()
        PPG = ""
        points1 = soup.find('a',href = '/players/traditional/?sort=PTS&dir=-1')
        points = points1.div
        for point in points.findNextSiblings():
            PPG = "PPG" + point.text
        RPG = ""
        rebounds1 = soup.find('a',href = '/players/traditional/?sort=REB&dir=-1')
        rebounds = rebounds1.div
        for rebound in rebounds.findNextSiblings():
            RPG = "RPG" + rebound.text

        p.PPG = PPG
        p.RPG = RPG

    browser.quit()
    return player_list
player_list = get_player_stats(get_player_list())

如以 开头的偏移代码所示names.find_all('a'):,一切正常,链接被传输并按照模板打印出来(例如 Abrines,Alex /player/203518/),但是当它到达 时for p in player_list[0:2]:,当我输入 p.link 时,它没有被转移过来,当我试图让它打印 p.link 时,什么都没有打印出来。任何帮助将不胜感激,因为我已经测试了这么多东西一段时间了!

标签: pythonseleniumweb-scrapingchrome-web-driver

解决方案


这是因为第一个href在 null 中。在这种情况下,您需要先放置条件,然后再将其添加到中list。我已经添加了该步骤,现在检查。

from selenium import webdriver
from bs4 import BeautifulSoup
from selenium.webdriver.chrome.options import Options
class Player():

    """docstring for ClassName"""
    def __init__(self):
        self.name = ""
        self.link = ""
        self.PPG = ""
        self.RPG = ""

def get_player_list():
    opt = webdriver.ChromeOptions()
    opt.add_argument('headless')
    browser = webdriver.Chrome(options=opt)
    browser.get('https://stats.nba.com/players/list/')
    soup = BeautifulSoup(browser.page_source, 'lxml')
    browser.quit()
    names = soup.find('div',class_='stats-player-list players-list')
    player_list = []



    for name in names.find_all('a'):
     if name["href"]:
        new_play = Player()
        new_play.name = name.text
        new_play.link = name["href"]
        player_list.append(new_play)


    # for one_player in player_list:
    #    print (one_player.name)
    #    print (one_player.link)



    return player_list
def get_player_stats(player_list):
    opt = webdriver.ChromeOptions()
    opt.add_argument('headless')
    browser = webdriver.Chrome(options=opt)
    for p in player_list[0:2]:
        print('https://stats.nba.com'+p.link)
        browser.get('https://stats.nba.com'+p.link)

        soup = BeautifulSoup(browser.page_source, 'lxml')

        browser.quit()
        PPG = ""
        points1 = soup.find('a',href = '/players/traditional/?sort=PTS&dir=-1')
        points = points1.div
        for point in points.findNextSiblings():
            PPG = "PPG" + point.text
        RPG = ""
        rebounds1 = soup.find('a',href = '/players/traditional/?sort=REB&dir=-1')
        rebounds = rebounds1.div
        for rebound in rebounds.findNextSiblings():
            RPG = "RPG" + rebound.text

        p.PPG = PPG
        p.RPG = RPG

    browser.quit()
    return player_list

player_list = get_player_stats(get_player_list())

推荐阅读