python - raise TimeoutException(message, screen, stacktrace) selenium.common.exceptions.TimeoutException: Message: ''
问题描述
The script randomly times out. It worked fine for a while but all of a sudden I'm getting this error message every time I run it. The script scrapes a website clicking linked dates iteratively and grabs the raw data, concatenating it to an empty dataframe.
Here's the code:
import pandas as pd
from datetime import date, timedelta, datetime
from selenium import webdriver
from selenium.webdriver.firefox.options import Options as FirefoxOptions
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from io import StringIO
start_date = '04-26-2020'
final_date = '05-26-2020'
def get_dates(start_date, final_date):
start_date = datetime.strptime(start_date, '%m-%d-%Y')
#dates = pd.date_range(start_date, today - timedelta(days=1), freq='d')
dates = pd.date_range(start_date, final_date, freq='d')
dates = dates.strftime('%m-%d-%Y')
dates = dates.tolist()
return dates
options = FirefoxOptions()
options.add_argument("--start-maximized")
options.headless = True
assert options.headless
def scrape(start_date, final_date, path_to_click):
ff_driver = "/usr/bin/geckodriver"
site = "url"
driver = webdriver.Firefox(executable_path=ff_driver, options=options)
driver.get(site)
waiting = WebDriverWait(driver, 10)
df = pd.DataFrame(columns=column_names)
for dt in get_dates(start_date, final_date):
waiting.until(EC.element_to_be_clickable((By.XPATH, '//*[@title="{}.csv"]'.format(dt)))).click()
waiting.until(EC.element_to_be_clickable((By.XPATH, path_to_click))).click()
raw = driver.find_element_by_xpath('/html/body/pre').text
raw_data = StringIO(raw)
if dt != start_date:
df2 = pd.read_csv(raw_data)
replace_columns(df2, column_names)
df2['Last_Update'] = pd.to_datetime(df2['Last_Update'])
df = pd.concat([df, df2], axis=0, ignore_index=True)
else:
df1 = pd.read_csv(raw_data)
replace_columns(df1, column_names)
df1['Last_Update'] = pd.to_datetime(df1['Last_Update'])
df = pd.concat([df, df1], axis=0, ignore_index=True)
driver.back()
driver.back()
return df
解决方案
推荐阅读
- css - 如何用 flexbox 实现这种设计?
- soap - 更改航班行程的 Sabre Steps
- python - imshow 仅适用于外部范围
- reactjs - 尽管 useEffect 具有依赖关系,但 React 仍然需要单击两次才能更新状态
- javascript - 字典错误 - TypeError: undefined is not an object
- c# - 字符串获取/更新特定字符后的数值
- jmeter - 完成并从其他线程组开始
- opengl - 上传索引缓冲区而不更改 VAO 状态
- python - 尝试将动态值传递给 Rundeck 服务器
- javascript - 我应该为函数使用什么 Jest 匹配器?