首页 > 解决方案 > 我需要安排一个 Python 脚本在同一目录中打开一个 database.txt

问题描述

我尝试将该方法仅用于脚本,但它不起作用。我的脚本必须访问 database.txt,以检索对程序流程至关重要的值,但是当我尝试安排时,它没有正常运行。这两个文件在同一个目录中。我进行了搜索,但几乎每一个问题都是关于运行一个运行另一个脚本的脚本。我在 Python 3.9 中使用 Selenium

程序抓取 Pixiv,并获取程序中指定的某个角色所拥有的插图数量。然后,它打开 database.txt,搜索其中的字符,并更新值。当页面中的插图数量高于我在数据库中的数量时,它应该更新并执行我在代码中指定的任何操作。

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import *
import requests, os
import datetime
import time
from sys import getsizeof

start = time.time()

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36 OPR/79.0.4143.73',
    'referer': 'https://www.pixiv.net/en/'
}

def open_browser(url):
    options = webdriver.FirefoxOptions()
    options.add_argument('--headless')
    driver = webdriver.Firefox(executable_path=r'C:\\codigos\\SeleniumDrivers\\geckodriver.exe', options=options)
    driver.set_window_position(3000,0)
    driver.maximize_window()
    driver.get(url)
    return driver

def login(driver, username, password):
    button_to_login = driver.find_element_by_class_name(
    'signup-form__submit--login'
    ).click()

    email_form = driver.find_element_by_css_selector(
        'input[placeholder="E-mail address / pixiv ID"]'
    )
    email_form.send_keys(username)

    password_form = driver.find_element_by_css_selector(
        'input[placeholder="password"]'
    )
    password_form.send_keys(password)

    login_button = driver.find_element_by_class_name(
        'signup-form__submit'
        ).click()

def take_aliase(japanese_name, english_name):
    with open('database.txt', 'a+', encoding='utf-8') as file:
        file.seek(0)
        character_names = [character_name.rstrip() for character_name in file]
        if not character_names:
            file.write(f'{english_name.lower()}/{japanese_name}/\n')
        else:
            formatted_names = []
            for character_name in character_names:
                character_name = character_name.split('/')[1]
                formatted_names.append(character_name)
                
            if japanese_name not in formatted_names:
                file.write(f'{english_name.lower()}/{japanese_name}/\n')
                    
def track_character(driver, character, another_character_name=""): # Pode ser colocado um *args para possíveis aliases e comparações de ilustrações
    """We are using the japanese name of the character for the search query of Pixiv."""
    """First the name in japanese, and in second the name in english."""

    # take_aliase('エミリア', 'Emilia')
    # take_aliase('中野三玖', 'Miku Nakano')
    take_aliase(character, another_character_name)
    #take_aliase('中野五月', 'Itsuki Nakano')

    recorded_num_illustration = 0
    english_name = ""
    japanese_name = ""

    with open('database.txt', 'a+', encoding='utf-8') as file:
        file.seek(0)
        character_names = [character_name.rstrip() for character_name in file]
        for character_name in character_names:
            english_name = character_name.split('/')[0]
            japanese_name = character_name.split('/')[1]
            if (character.lower() == english_name) or (character.lower() == japanese_name):
                character = japanese_name
                break

    search_field = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.CLASS_NAME, 'sc-5ki62n-4'))
    )
    search_field.send_keys(character)
    search_field.submit()

    illustrations_page = WebDriverWait(driver, 15).until(
        EC.presence_of_element_located(
            (By.CSS_SELECTOR, f'a[href="/en/tags/{character}/illustrations?s_mode=s_tag"]')
        )
    ).click()

    driver.refresh()

    current_num_of_illustrations = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located(
            (By.CLASS_NAME, 'sc-1pt8s3a-8')
        )
    ).get_attribute('innerHTML')

    n = str(current_num_of_illustrations).split('.')
    current_num_of_illustrations = n[0] + n[1]
    current_num_of_illustrations = int(current_num_of_illustrations)

    recorded_num_illustration = get_recorded_num_of_illustrations(character)
    if recorded_num_illustration == "":
        recorded_num_illustration = current_num_of_illustrations
    else:
        recorded_num_illustration = int(recorded_num_illustration)
    
    if (current_num_of_illustrations) > (recorded_num_illustration):
        
        # list_of_images = WebDriverWait(driver, 10).until(
        #     EC.presence_of_all_elements_located(
        #         (By.CLASS_NAME, 'l7cibp-2')
        #     )
        # )

        illustration_to_be_downloaded = ((current_num_of_illustrations) - (recorded_num_illustration))

        for i in range(4, illustration_to_be_downloaded + 4):
            image_link = WebDriverWait(driver, 10).until(
                EC.presence_of_all_elements_located(                # <a> tags for the illustrations
                    (By.CLASS_NAME, 'rp5asc-16')
                )
            )[i].get_attribute('href')

            image_code = image_link.split('/')[-1]

            r = requests.get(f'https://www.pixiv.net/ajax/illust/{image_code}/pages?lang=en').json()
            image_links = [i['urls']['original'] for i in r['body']]
            for j in range(len(image_links)):
                im = requests.get(image_links[j], headers=headers)

                path = get_correct_path(english_name)
                current_datetime = datetime.datetime.now()
                # current_datetime = str(current_datetime).split('.')[0].split(' ')
                # current_datetime = current_datetime[0] + '-' + current_datetime[1]

                year   = current_datetime.strftime('%y')
                month  = current_datetime.strftime('%m')
                day    = current_datetime.strftime('%d')
                hour   = current_datetime.strftime('%H')
                minute = current_datetime.strftime('%M')
                second = current_datetime.strftime('%S')

                with open(os.path.join(path, english_name.title() + f" {year}-{month}-{day}--{hour}-{minute}-{second}.jpg",), 'wb') as file:
                    file.write(im.content)

    update_database(current_num_of_illustrations, character)
    
def get_recorded_num_of_illustrations(character_name):
    with open('database.txt', 'r', encoding='utf-8') as file:
        lines = [line.rstrip() for line in file]
        
    for i in range(len(lines)):
        checker_japanese = lines[i].split('/')[1]
        checker_english = lines[i].split('/')[0]
        if (checker_japanese == character_name) or (checker_english == character_name.lower()):
            return lines[i].split('/')[2]

    print(f"There isn't {character_name} recorded.")
    
def update_database(current_num_of_illustrations, character_name):
    with open('database.txt', 'a+', encoding='utf-8') as file:
        file.seek(0)
        lines = [line.rstrip() for line in file]
        
    for i in range(len(lines)):
        new_line = []
        checker_japanese = lines[i].split('/')[1]
        checker_english = lines[i].split('/')[0]
        num_of_illustrations = lines[i].split('/')[2]
        new_line.append(checker_english)
        new_line.append(checker_japanese)
        new_line.append(num_of_illustrations)
        if (checker_japanese == character_name) or (checker_english == character_name.lower()):
            new_line[2] = current_num_of_illustrations
            new_line = f'{new_line[0]}/{new_line[1]}/{new_line[2]}'
        else:
            new_line = f'{new_line[0]}/{new_line[1]}/{new_line[2]}'
            
        lines[i] = new_line

    with open('database.txt', 'w', encoding='utf-8') as f:
        for line in lines:
            f.write(line + '\n')
  
def get_correct_path(character_name):
    if 'miku' in character_name:
        path = 'D:\Enrico\Imagens\Imagens_de_anime\Gotoubun\Miku'

    if 'emilia' in character_name:
        path = 'D:\Enrico\Imagens\Imagens_de_anime\Re Zero\Emilia\Teste-Scraping'

    return path


if __name__ == '__main__':
    url = 'https://www.pixiv.net/en/'
    driver = open_browser(url)
    login(driver, 'pixivteste@gmail.com', 'teste1234#')
    track_character(driver, 'エミリア', another_character_name='Emilia')

    end = time.time()
    print(f'Runtime of the program: {end - start}.')`

但是当我安排任务时,它没有更新,即使当我通过 VSCode 运行它时它也会更新。有什么原因吗?

标签: pythonseleniumscheduled-tasks

解决方案


推荐阅读