首页 > 解决方案 > 将 For 循环变成多处理循环

问题描述

我已经在这段代码上工作了一段时间,并且一切正常,但我希望它在按下按钮时更新得更快。我正在研究多处理,但我是 python 新手,所以我对如何将 def click() 中的 for 循环转换为多处理版本以便它们可以同时运行有点困惑。我已经删减了一些代码和 GUI 设置,以使代码更易于接受,但这就是为什么某些变量可能看起来没有必要的原因。任何建议都会很棒。谢谢你。

import tkinter as tk
import time

from selenium import webdriver
options = webdriver.ChromeOptions()
options.add_argument("--mute-audio")
options.headless = True
browser = webdriver.Chrome('/Users/Matt/Downloads/chromedriver', options=options)

app = tk.Tk()
app.title("Over / Under")

#Entries for user to insert player name that will be searched

p1 = tk.StringVar()
p2 = tk.StringVar()
p3 = tk.StringVar()

Player1Name = tk.Entry(app, textvariable = p1, width=20)
Player1Name.grid(column=1, row=0, padx=10, pady=5, sticky=tk.N)
Player2Name = tk.Entry(app, textvariable = p2, width=20)
Player2Name.grid(column=1, row=1, padx=10, pady=5, sticky=tk.S)
Player3Name = tk.Entry(app, textvariable = p3, width=20)
Player3Name.grid(column=1, row=2, padx=10, pady=5, sticky=tk.S)

#Empty label that will be filled with current stats

P1CurrentStatText = tk.StringVar()
P2CurrentStatText = tk.StringVar()
P3CurrentStatText = tk.StringVar()

P1CurrentStatLabel = tk.Entry(app, width=5, textvariable = P1CurrentStatText)
P1CurrentStatLabel.grid(column=5, row=0, padx=10, pady=5)
P2CurrentStatLabel = tk.Entry(app, width=5, textvariable = P2CurrentStatText)
P2CurrentStatLabel.grid(column=5, row=1, padx=10, pady=5)
P3CurrentStatLabel = tk.Entry(app, width=5, textvariable = P3CurrentStatText)
P3CurrentStatLabel.grid(column=5, row=2, padx=10, pady=5)

def click():
    for i in range(1, 6):
        pID = eval(("p" + str(i) + ".get()"))
        pcID = eval(("P" + str(i) + "CurrentStatText"))
        PRAID = eval(("PRA" + str(i) + "opt.get()"))
        OUID = eval(("OU" + str(i) + "opt.get"))
        if pID == "":
            continue
        browser.get("https://www.espn.com/")
        time.sleep(1)
        srch_btn = browser.find_element_by_xpath('//*[@id="global-search-trigger"]')
        srch_btn.click()
        srch_bar = browser.find_element_by_xpath('//*[@id="global-search"]/input[1]')
        srch_bar.send_keys(pID)
        time.sleep(1)
        player_page = browser.find_element_by_xpath('//*[@id="global-search"]/div/div/div[1]/ul/li/a')
        player_page.click()
        time.sleep(1)
        points = browser.find_element_by_xpath(
            '//*[@id="fittPageContainer"]/div[2]/div[5]/div/div/div[1]/section/div/div[2]/article[1]/ul/li/div/div')
        rebounds = browser.find_element_by_xpath(
            '//*[@id="fittPageContainer"]/div[2]/div[5]/div/div/div[1]/section/div/div[2]/article[2]/ul/li/div/div')
        assists = browser.find_element_by_xpath(
            '//*[@id="fittPageContainer"]/div[2]/div[5]/div/div/div[1]/section/div/div[2]/article[3]/ul/li/div/div')
        pointstxt = points.get_property('textContent')
        reboundstxt = rebounds.get_property('textContent')
        assiststxt = assists.get_property('textContent')

        if PRAID == "Rebounds":
            pcID.set(reboundstxt)
        elif PRAID == "Points":
            pcID.set(pointstxt)
        else:
            pcID.set(assiststxt)

def quit():
    app.destroy()
    browser.close()

UpdateButton = tk.Button(app, text = 'Update', command = click)
UpdateButton.grid(column=2, row=5, pady=10, padx = 10)

ExitButton = tk.Button(app, text = 'Exit', command = quit)
ExitButton.grid(column=4, row=5, pady=10, padx = 10)

app.mainloop()

标签: pythonseleniumloopstkintermultiprocessing

解决方案


Selenium是缓慢的。使用multiprocessing它必须在每次流程开始Seleniumbrowser在每个流程中重新开始。

深入研究DevTools/ChromeFirefox发现页面用于获取数据为 JSON 的 url。

搜索:

url = 'https://site.web.api.espn.com/apis/common/v3/search?region=us&lang=en&query={}&limit=5&mode=prefix&type=player'.format(query)

细节:

url = 'https://site.web.api.espn.com/apis/common/v3/sports/basketball/mens-college-basketball/athletes/{}/overview?region=us&lang=en&contentorigin=espn'.format(id_)

使用requests我可以在 1.9 秒内获取一名玩家的数据 - 所以它甚至不需要多进程。它只需要与tkinter.

我不知道是否不需要添加一些cookie或标题。

import requests
import time

def get_data(query):

    url = 'https://site.web.api.espn.com/apis/common/v3/search?region=us&lang=en&query={}&limit=5&mode=prefix&type=player'.format(query)

    r = requests.get(url)
    data = r.json()

    id_ = data['items'][0]['id']
    name = data['items'][0]['displayName'] 

    #url = 'https://site.web.api.espn.com/apis/common/v3/sports/basketball/mens-college-basketball/athletes/{}?region=us&lang=en&contentorigin=espn'.format(id_)
    #r = requests.get(url)
    #data = r.json()

    url = 'https://site.web.api.espn.com/apis/common/v3/sports/basketball/mens-college-basketball/athletes/{}/overview?region=us&lang=en&contentorigin=espn'.format(id_)
    r = requests.get(url)
    data = r.json()

    labels = data['nextGame']['statistics']['labels']
    stats = data['nextGame']['statistics']['splits'][0]['stats']

    return (id_, name, labels, stats)

# --- main ---

start = time.time()

query = 'markell'

id_, name, labels, stats = get_data(query)

print('id:', id_)
print('name:', name)
for l, s in zip(labels, stats):
    print(l, s)

end = time.time()
print('time:', end-start, 's')

结果:

id: 4065699
name: Markell Johnson
GP 1
MIN 38.0
FG% 28.6
3P% 0.0
FT% 62.5
REB 4.0
AST 6.0
BLK 0.0
STL 1.0
PF 3.0
TO 3.0
PTS 9.0
time: 1.8497259616851807 s

在 JSON 数据中还有其他信息。


编辑:使用它的 tkinter GUI。它工作得很快,但它也可以threading用来更快地工作。

import tkinter as tk
import requests
import time

# --- functions ---

def get_data(query):

    url = 'https://site.web.api.espn.com/apis/common/v3/search?region=us&lang=en&query={}&limit=5&mode=prefix&type=player'.format(query)

    r = requests.get(url)
    data = r.json()

    id_ = data['items'][0]['id']
    name = data['items'][0]['displayName'] 

    #url = 'https://site.web.api.espn.com/apis/common/v3/sports/basketball/mens-college-basketball/athletes/{}?region=us&lang=en&contentorigin=espn'.format(id_)
    #r = requests.get(url)
    #data = r.json()

    url = 'https://site.web.api.espn.com/apis/common/v3/sports/basketball/mens-college-basketball/athletes/{}/overview?region=us&lang=en&contentorigin=espn'.format(id_)
    r = requests.get(url)
    data = r.json()

    labels = data['nextGame']['statistics']['labels']
    stats = data['nextGame']['statistics']['splits'][0]['stats']

    return (id_, name, labels, stats)

def click():

    for i in range(NUMBER):
        query = entry_players[i].get().strip()
        print(i, 'query:', query)
        if query:
            id_, player, names, stats = get_data(query)

            print('id:', id_)
            print('player:', player)

            parts = []
            for l, s in zip(names, stats):
                text = '{}: {}'.format(l,s)
                print(text)
                parts.append(text)

            print(entry_stats[i])
            entry_stats[i].delete('0', 'end')    
            entry_stats[i].insert('end', '; '.join(parts))
            root.update() # force tkinter to redraw widget

# --- main ---

root = tk.Tk()

root.columnconfigure(1, weight=1) # to resize column `Stats`

entry_players = []
entry_stats = []

tk.Label(root, text='Player').grid(column=0, row=0)
tk.Label(root, text='Stats').grid(column=1, row=0)

NUMBER = 3
for i in range(NUMBER):
    e = tk.Entry(root)
    e.grid(column=0, row=i+1)
    entry_players.append(e)

    e = tk.Entry(root)
    e.grid(column=1, row=i+1, sticky='we') # `sticky` to resize column `Stats`
    entry_stats.append(e)

print(entry_stats)

b = tk.Button(root, text='Search', command=click)
b.grid(column=0, row=NUMBER+1)

b = tk.Button(root, text='Exit', command=root.destroy)
b.grid(column=1, row=NUMBER+1)

root.mainloop()

在此处输入图像描述


推荐阅读