感觉selenium使用太容易被浏览器检测到,不如cookie好用啊。。。
import json
import requests
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
import time
import re
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36"
}
chrome_options = webdriver.ChromeOptions()
chrome_options.add_experimental_option('w3c', False)
caps = DesiredCapabilities.CHROME
caps['loggingPrefs'] = {'performance': 'ALL'}
driver = webdriver.Chrome(desired_capabilities=caps,options=chrome_options)
driver.get('https://passport.bilibili.com/login')
def input_click_01():
input_name = driver.find_element_by_xpath("//input[@id='login-username']")
input_pwd = driver.find_element_by_xpath("//input[@id='login-passwd']")
input_name.send_keys("username")
input_pwd.send_keys("passport")
time.sleep(3)
login_btn = driver.find_element_by_class_name("btn-login")
login_btn.click()
time.sleep(5)
def browser_log_02():
browser_log_list = driver.get_log("performance")
# 先保存到文件,利于测试,和后面的正则匹配
logs = [json.loads(log['message'])['message'] for log in browser_log_list]
with open('devtools.json', 'w') as f:
json.dump(logs, f, indent=4, ensure_ascii=False)
with open('devtools.json', 'r') as f:
browser_log = f.read()
print("浏览器日志获取完成")
return browser_log
def get_response_img_url_03(browser_log):
# 获取requestId
# 获取到的有两种,取前者,暂时没出错,出现异常再进行筛选
pat = r"""https://api\.geetest\.com/get\.php\?is_next.*?\".*?\"requestId\": \"(\d+?\.\d+?)\","""
requestId = re.findall(pat, browser_log, re.S)[0]
# print(requestId)
# 最重要的一步:调用接口,通过requestId获取请求的响应
response_dict = driver.execute_cdp_cmd('Network.getResponseBody', {'requestId': requestId})
body = response_dict["body"]
# print(body)
# 从响应中获取图片链接
fullbg = re.findall(r"fullbg\":.\"(.*?)\",",body)
bg = re.findall(r"\"bg\":.\"(.*?)\",",body)
fullbg_url = "https://static.geetest.com/" + fullbg[0]
bg_url = "https://static.geetest.com/" + bg[0]
return fullbg_url,bg_url
def get_img_04(fullbg_url,bg_url):
# 请求
origin_img_data = requests.get(fullbg_url, headers=headers).content
fix_img_data = requests.get(bg_url, headers=headers).content
# 先保存图片
with open("full_image.jpg", "wb") as f:
f.write(origin_img_data)
with open("fix_image.jpg", "wb") as f:
f.write(fix_img_data)
print("保存图片完成")
return "full_image.jpg","fix_image.jpg"
def main():
input_click_01()
log_data = browser_log_02()
url_tuple = get_response_img_url_03(log_data)
get_img_04(*url_tuple)
driver.close()
if __name__ == '__main__':
main()