首页 > 解决方案 > 使用映射列表进行多线程处理时获取列表中的下一项

问题描述

我正在使用代理来访问url. 由于这是代理列表,因此某些代理可能不起作用。如果连接失败,有没有办法选择下一个代理?我不明白,如何在使用concurrent.futures映射时做到这一点。

 import requests
 from selenium import webdriver
 from selenium.webdriver.chrome.options import Options
 import time
 import concurrent.futures
 from selenium.webdriver.support.ui import WebDriverWait
 from selenium.webdriver.common.by import By
 from selenium.webdriver.support import expected_conditions as EC

proxy_page = requests.get('https://advanced.name/proxy/fe33f5f3990569edd7fed5a076cfad75').text
with open('proxies.txt', 'w', encoding='utf-8') as f:
    f.write(proxy_page)
with open('proxies.txt') as f:
    proxy_list = list(f)

links=['https://www.snapchat.com/add/co.ducks',
'https://www.snapchat.com/add/rebekkaoulie',
'https://www.snapchat.com/add/derrengt',
'https://www.snapchat.com/add/vaumurtch',
'https://www.snapchat.com/add/akseltrefall']

i_list = [1,2,3,4,5]
def get_screenshot(link, i, proxy):

    _start = time.time()
    options = Options()  # Runs Chrome in headless mode.
    '''options.add_argument('--headless')'''
    options.add_argument('--no-sandbox')  # # Bypass OS security model
    options.add_argument('start-maximized')
    options.add_argument('disable-infobars')
    options.add_argument('--proxy-server={}'.format(proxy))
    options.add_argument("--disable-extensions")
    driver = webdriver.Chrome(options=options, executable_path='C:/chromedriver.exe')
    driver.get(link)
    WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By. CLASS_NAME,'css-1motqmv')))
    driver.save_screenshot(f'screenshot-headless{i}.png')
    driver.quit()
    _end = time.time()
    i += 1

    print('Total time for headless {}'.format(_end - _start))

with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
    results = executor.map(get_screenshot, links, i_list, proxy_list)

标签: pythonlist

解决方案


推荐阅读