python - 我需要安排一个 Python 脚本在同一目录中打开一个 database.txt
问题描述
我尝试将该方法仅用于脚本,但它不起作用。我的脚本必须访问 database.txt,以检索对程序流程至关重要的值,但是当我尝试安排时,它没有正常运行。这两个文件在同一个目录中。我进行了搜索,但几乎每一个问题都是关于运行一个运行另一个脚本的脚本。我在 Python 3.9 中使用 Selenium
程序抓取 Pixiv,并获取程序中指定的某个角色所拥有的插图数量。然后,它打开 database.txt,搜索其中的字符,并更新值。当页面中的插图数量高于我在数据库中的数量时,它应该更新并执行我在代码中指定的任何操作。
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import *
import requests, os
import datetime
import time
from sys import getsizeof
start = time.time()
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36 OPR/79.0.4143.73',
'referer': 'https://www.pixiv.net/en/'
}
def open_browser(url):
options = webdriver.FirefoxOptions()
options.add_argument('--headless')
driver = webdriver.Firefox(executable_path=r'C:\\codigos\\SeleniumDrivers\\geckodriver.exe', options=options)
driver.set_window_position(3000,0)
driver.maximize_window()
driver.get(url)
return driver
def login(driver, username, password):
button_to_login = driver.find_element_by_class_name(
'signup-form__submit--login'
).click()
email_form = driver.find_element_by_css_selector(
'input[placeholder="E-mail address / pixiv ID"]'
)
email_form.send_keys(username)
password_form = driver.find_element_by_css_selector(
'input[placeholder="password"]'
)
password_form.send_keys(password)
login_button = driver.find_element_by_class_name(
'signup-form__submit'
).click()
def take_aliase(japanese_name, english_name):
with open('database.txt', 'a+', encoding='utf-8') as file:
file.seek(0)
character_names = [character_name.rstrip() for character_name in file]
if not character_names:
file.write(f'{english_name.lower()}/{japanese_name}/\n')
else:
formatted_names = []
for character_name in character_names:
character_name = character_name.split('/')[1]
formatted_names.append(character_name)
if japanese_name not in formatted_names:
file.write(f'{english_name.lower()}/{japanese_name}/\n')
def track_character(driver, character, another_character_name=""): # Pode ser colocado um *args para possíveis aliases e comparações de ilustrações
"""We are using the japanese name of the character for the search query of Pixiv."""
"""First the name in japanese, and in second the name in english."""
# take_aliase('エミリア', 'Emilia')
# take_aliase('中野三玖', 'Miku Nakano')
take_aliase(character, another_character_name)
#take_aliase('中野五月', 'Itsuki Nakano')
recorded_num_illustration = 0
english_name = ""
japanese_name = ""
with open('database.txt', 'a+', encoding='utf-8') as file:
file.seek(0)
character_names = [character_name.rstrip() for character_name in file]
for character_name in character_names:
english_name = character_name.split('/')[0]
japanese_name = character_name.split('/')[1]
if (character.lower() == english_name) or (character.lower() == japanese_name):
character = japanese_name
break
search_field = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CLASS_NAME, 'sc-5ki62n-4'))
)
search_field.send_keys(character)
search_field.submit()
illustrations_page = WebDriverWait(driver, 15).until(
EC.presence_of_element_located(
(By.CSS_SELECTOR, f'a[href="/en/tags/{character}/illustrations?s_mode=s_tag"]')
)
).click()
driver.refresh()
current_num_of_illustrations = WebDriverWait(driver, 10).until(
EC.presence_of_element_located(
(By.CLASS_NAME, 'sc-1pt8s3a-8')
)
).get_attribute('innerHTML')
n = str(current_num_of_illustrations).split('.')
current_num_of_illustrations = n[0] + n[1]
current_num_of_illustrations = int(current_num_of_illustrations)
recorded_num_illustration = get_recorded_num_of_illustrations(character)
if recorded_num_illustration == "":
recorded_num_illustration = current_num_of_illustrations
else:
recorded_num_illustration = int(recorded_num_illustration)
if (current_num_of_illustrations) > (recorded_num_illustration):
# list_of_images = WebDriverWait(driver, 10).until(
# EC.presence_of_all_elements_located(
# (By.CLASS_NAME, 'l7cibp-2')
# )
# )
illustration_to_be_downloaded = ((current_num_of_illustrations) - (recorded_num_illustration))
for i in range(4, illustration_to_be_downloaded + 4):
image_link = WebDriverWait(driver, 10).until(
EC.presence_of_all_elements_located( # <a> tags for the illustrations
(By.CLASS_NAME, 'rp5asc-16')
)
)[i].get_attribute('href')
image_code = image_link.split('/')[-1]
r = requests.get(f'https://www.pixiv.net/ajax/illust/{image_code}/pages?lang=en').json()
image_links = [i['urls']['original'] for i in r['body']]
for j in range(len(image_links)):
im = requests.get(image_links[j], headers=headers)
path = get_correct_path(english_name)
current_datetime = datetime.datetime.now()
# current_datetime = str(current_datetime).split('.')[0].split(' ')
# current_datetime = current_datetime[0] + '-' + current_datetime[1]
year = current_datetime.strftime('%y')
month = current_datetime.strftime('%m')
day = current_datetime.strftime('%d')
hour = current_datetime.strftime('%H')
minute = current_datetime.strftime('%M')
second = current_datetime.strftime('%S')
with open(os.path.join(path, english_name.title() + f" {year}-{month}-{day}--{hour}-{minute}-{second}.jpg",), 'wb') as file:
file.write(im.content)
update_database(current_num_of_illustrations, character)
def get_recorded_num_of_illustrations(character_name):
with open('database.txt', 'r', encoding='utf-8') as file:
lines = [line.rstrip() for line in file]
for i in range(len(lines)):
checker_japanese = lines[i].split('/')[1]
checker_english = lines[i].split('/')[0]
if (checker_japanese == character_name) or (checker_english == character_name.lower()):
return lines[i].split('/')[2]
print(f"There isn't {character_name} recorded.")
def update_database(current_num_of_illustrations, character_name):
with open('database.txt', 'a+', encoding='utf-8') as file:
file.seek(0)
lines = [line.rstrip() for line in file]
for i in range(len(lines)):
new_line = []
checker_japanese = lines[i].split('/')[1]
checker_english = lines[i].split('/')[0]
num_of_illustrations = lines[i].split('/')[2]
new_line.append(checker_english)
new_line.append(checker_japanese)
new_line.append(num_of_illustrations)
if (checker_japanese == character_name) or (checker_english == character_name.lower()):
new_line[2] = current_num_of_illustrations
new_line = f'{new_line[0]}/{new_line[1]}/{new_line[2]}'
else:
new_line = f'{new_line[0]}/{new_line[1]}/{new_line[2]}'
lines[i] = new_line
with open('database.txt', 'w', encoding='utf-8') as f:
for line in lines:
f.write(line + '\n')
def get_correct_path(character_name):
if 'miku' in character_name:
path = 'D:\Enrico\Imagens\Imagens_de_anime\Gotoubun\Miku'
if 'emilia' in character_name:
path = 'D:\Enrico\Imagens\Imagens_de_anime\Re Zero\Emilia\Teste-Scraping'
return path
if __name__ == '__main__':
url = 'https://www.pixiv.net/en/'
driver = open_browser(url)
login(driver, 'pixivteste@gmail.com', 'teste1234#')
track_character(driver, 'エミリア', another_character_name='Emilia')
end = time.time()
print(f'Runtime of the program: {end - start}.')`
但是当我安排任务时,它没有更新,即使当我通过 VSCode 运行它时它也会更新。有什么原因吗?
解决方案
推荐阅读
- r - 按组计算描述性统计
- python - 变量在 for 循环中未正确重新定义(Python)
- android - 完美运行一段时间后,应用程序崩溃
- c++ - C++ 性能 std::array 与 std::vector
- algorithm - 最小化在池中解决债务的交易成本
- c# - 在 Hololens 上的 C# Unity 中找不到模块 C++ .dll
- html - 是否有可能将静态页面连接到我用 Django 制作的 web 应用程序?
- arrays - Mongoose - 查找数组没有任何匹配项的文档
- python - Python中的逃生通道
- java - 如何在使用 spark joinWith 创建 Tuple2 时修复 UnsupportedOperationException