python - Selenium 下载适用于任何操作系统的 chromedriver
问题描述
我是 selenium 的新手,现在正在尝试构建一个项目,我想在程序开始时下载 chromedriver,而与操作系统无关。我找到了一个由primaryobjects在线编写的工作代码,该代码似乎工作它确实正在下载适当的 chromedriver,但即使在下载后再次执行相同的时间。我认为while循环是无限的,但我无法解决它。我尝试了 chromedriver-autoinstaller,它有效,但真的不想为这个简单的工作安装任何额外的东西。会给予一点帮助。建议表示赞赏。
#!/usr/bin/env python3
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import WebDriverException, SessionNotCreatedException
import sys
import os
import pathlib
import urllib.request
import re
import zipfile
import stat
from sys import platform
def get_driver():
# Attempt to open the Selenium chromedriver. If it fails, download the latest chromedriver.
driver = None
retry = True
while retry:
retry = False
is_download = False
try:
options = webdriver.ChromeOptions()
options.add_argument('--headless')
driver = webdriver.Chrome(chrome_options=options, executable_path='./chromedriver')
except SessionNotCreatedException as e:
if 'This version of ChromeDriver' in e.msg:
is_download = True
except WebDriverException as e:
if "wrong permissions" in e.msg:
st = os.stat('./chromedriver')
os.chmod('./chromedriver', st.st_mode | stat.S_IEXEC)
retry = True
elif "chromedriver' executable needs to be in PATH" in e.msg:
is_download = True
retry = is_download and download_driver()
return driver
def download_driver():
# Find the latest chromedriver, download, unzip, set permissions to executable.
result = False
url = 'https://chromedriver.chromium.org/downloads'
base_driver_url = 'https://chromedriver.storage.googleapis.com/'
file_name = 'chromedriver_' + get_platform_filename()
driver_file_name = 'chromedriver' + '.exe' if platform == "win32" else ''
pattern = 'https://.*?path=(\d+\.\d+\.\d+\.\d+)'
# Download latest chromedriver.
print('Finding latest chromedriver..')
opener = urllib.request.FancyURLopener({})
stream = opener.open(url)
content = stream.read().decode('utf8')
# Parse the latest version.
match = re.search(pattern, content)
if match and match.groups():
# Url of download html page.
url = match.group(0)
# Version of latest driver.
version = match.group(1)
driver_url = base_driver_url + version + '/' + file_name
# Download the file.
print('Version ' + version)
print('Downloading ' + driver_url)
app_path = os.path.dirname(os.path.realpath(__file__))
chromedriver_path = app_path + '/' + driver_file_name
file_path = app_path + '/' + file_name
urllib.request.urlretrieve(driver_url, file_path)
# Unzip the file.
print('Unzipping ' + file_path)
with zipfile.ZipFile(file_path, 'r') as zip_ref:
zip_ref.extractall(app_path)
print('Setting executable permission on ' + chromedriver_path)
st = os.stat(chromedriver_path)
os.chmod(chromedriver_path, st.st_mode | stat.S_IEXEC)
# Cleanup.
os.remove(file_path)
result = True
return result
def get_platform_filename():
filename = ''
is_64bits = sys.maxsize > 2**32
if platform == "linux" or platform == "linux2":
# linux
filename += 'linux'
filename += '64' if is_64bits else '32'
elif platform == "darwin":
# OS X
filename += 'mac64'
elif platform == "win32":
# Windows...
filename += 'win32'
filename += '.zip'
return filename
解决方案
我已经稍微修改了代码,现在几乎可以正常工作了。除了每次运行代码时它都会下载。但它完成了工作。
#!/usr/bin/env python3
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import WebDriverException, SessionNotCreatedException
import sys
import os
import pathlib
import urllib.request
import re
import zipfile
import stat
from sys import platform
def get_driver():
# Attempt to open the Selenium chromedriver. If it fails, download the latest chromedriver.
driver = None
retry = False
while not retry :
retry = False
is_download = False
try:
options = webdriver.ChromeOptions()
options.add_argument('--headless')
driver = webdriver.Chrome(chrome_options=options, executable_path='./chromedriver')
except SessionNotCreatedException as e:
if 'This version of ChromeDriver' in e.msg:
is_download = True
except WebDriverException as e:
if "wrong permissions" in e.msg:
st = os.stat('./chromedriver')
os.chmod('./chromedriver', st.st_mode | stat.S_IEXEC)
retry = True
elif "chromedriver' executable needs to be in PATH" in e.msg:
is_download = True
retry = is_download and download_driver()
return driver
def download_driver():
# Find the latest chromedriver, download, unzip, set permissions to executable.
result = False
url = 'https://chromedriver.chromium.org/downloads'
base_driver_url = 'https://chromedriver.storage.googleapis.com/'
file_name = 'chromedriver_' + get_platform_filename()
driver_file_name = 'chromedriver' + '.exe' if platform == "win32" else ''
pattern = 'https://.*?path=(\d+\.\d+\.\d+\.\d+)'
# Download latest chromedriver.
print('Finding latest chromedriver..')
opener = urllib.request.FancyURLopener({})
stream = opener.open(url)
content = stream.read().decode('utf8')
# Parse the latest version.
match = re.search(pattern, content)
if match and match.groups():
# Url of download html page.
url = match.group(0)
# Version of latest driver.
version = match.group(1)
driver_url = base_driver_url + version + '/' + file_name
# Download the file.
print('Version ' + version)
print('Downloading ' + driver_url)
app_path = os.path.dirname(os.path.realpath(__file__))
chromedriver_path = app_path + '/' + driver_file_name
file_path = app_path + '/' + file_name
urllib.request.urlretrieve(driver_url, file_path)
# Unzip the file.
print('Unzipping ' + file_path)
with zipfile.ZipFile(file_path, 'r') as zip_ref:
zip_ref.extractall(app_path)
print('Setting executable permission on ' + chromedriver_path)
st = os.stat(chromedriver_path)
os.chmod(chromedriver_path, st.st_mode | stat.S_IEXEC)
# Cleanup.
os.remove(file_path)
result = True
return result
def get_platform_filename():
filename = ''
is_64bits = sys.maxsize > 2**32
if platform == "linux" or platform == "linux2":
# linux
filename += 'linux'
filename += '64' if is_64bits else '32'
elif platform == "darwin":
# OS X
filename += 'mac64'
elif platform == "win32":
# Windows...
filename += 'win32'
filename += '.zip'
return filename
推荐阅读
- python - Keras - 嵌入层和 GRU 层形状错误
- javascript - 如何在 Javascript 中将对象数组转换为关联数组对象
- python - 我可以在@app.before_request 中将值附加到我的烧瓶请求对象并将其转发给端点视图函数吗?
- node.js - Azure 存储 - 使用 Node.js 复制包含所有内容的容器
- c++ - C++:我应该返回一个 float 的向量吗?还是它的引用或指针?
- mysql - SQL 触发器回退
- ios - 2018 年 11 月提交应用程序所需的 Xcode 和 MacOS 最低版本
- android - Play 开发者控制台不更新应用
- asp.net - HTTP 错误 403.14 - 禁止 - 关闭弹出窗口时(仅限 IE)
- websphere - 在 WebSphere 项目中使用 main 运行 Java