首页 > 解决方案 > 无法使用 UI(带头)运行 scrapy-selenium 库

问题描述

我正在使用scrapy_selenium,但我不知道如何使用UI运行它我遵循了这个答案,但我收到以下错误:

Traceback (most recent call last):
  File "D:\Anaconda\envs\web scrapping\lib\site-packages\scrapy\crawler.py", line 192, in crawl
    return self._crawl(crawler, *args, **kwargs)
  File "D:\Anaconda\envs\web scrapping\lib\site-packages\scrapy\crawler.py", line 196, in _crawl
    d = crawler.crawl(*args, **kwargs)
  File "D:\Anaconda\envs\web scrapping\lib\site-packages\twisted\internet\defer.py", line 1656, in unwindGenerator
    return _cancellableInlineCallbacks(gen)
  File "D:\Anaconda\envs\web scrapping\lib\site-packages\twisted\internet\defer.py", line 1571, in _cancellableInlineCallbacks
    _inlineCallbacks(None, g, status)
--- <exception caught here> ---
  File "D:\Anaconda\envs\web scrapping\lib\site-packages\twisted\internet\defer.py", line 1445, in _inlineCallbacks
    result = current_context.run(g.send, result)
  File "D:\Anaconda\envs\web scrapping\lib\site-packages\scrapy\crawler.py", line 87, in crawl  
    self.engine = self._create_engine()
  File "D:\Anaconda\envs\web scrapping\lib\site-packages\scrapy\crawler.py", line 101, in _create_engine
    return ExecutionEngine(self, lambda _: self.stop())
  File "D:\Anaconda\envs\web scrapping\lib\site-packages\scrapy\core\engine.py", line 69, in __init__
    self.downloader = downloader_cls(crawler)
  File "D:\Anaconda\envs\web scrapping\lib\site-packages\scrapy\core\downloader\__init__.py", line 83, in __init__
    self.middleware = DownloaderMiddlewareManager.from_crawler(crawler)
  File "D:\Anaconda\envs\web scrapping\lib\site-packages\scrapy\middleware.py", line 53, in from_crawler
    return cls.from_settings(crawler.settings, crawler)
  File "D:\Anaconda\envs\web scrapping\lib\site-packages\scrapy\middleware.py", line 35, in from_settings
    mw = create_instance(mwcls, settings, crawler)
  File "D:\Anaconda\envs\web scrapping\lib\site-packages\scrapy\utils\misc.py", line 167, in create_instance
    instance = objcls.from_crawler(crawler, *args, **kwargs)
  File "D:\Anaconda\envs\web scrapping\lib\site-packages\scrapy_selenium\middlewares.py", line 67, in from_crawler
    middleware = cls(
  File "D:\Anaconda\envs\web scrapping\lib\site-packages\scrapy_selenium\middlewares.py", line 43, in __init__
    for argument in driver_arguments:
builtins.TypeError: 'NoneType' object is not iterable

2021-08-21 11:39:16 [twisted] CRITICAL:
Traceback (most recent call last):
  File "D:\Anaconda\envs\web scrapping\lib\site-packages\twisted\internet\defer.py", line 1445, in _inlineCallbacks
    result = current_context.run(g.send, result)
  File "D:\Anaconda\envs\web scrapping\lib\site-packages\scrapy\crawler.py", line 87, in crawl
    self.engine = self._create_engine() 
  File "D:\Anaconda\envs\web scrapping\lib\site-packages\scrapy\crawler.py", line 101, in _create_engine
    return ExecutionEngine(self, lambda _: self.stop())
  File "D:\Anaconda\envs\web scrapping\lib\site-packages\scrapy\core\engine.py", line 69, in __init__
    self.downloader = downloader_cls(crawler)
  File "D:\Anaconda\envs\web scrapping\lib\site-packages\scrapy\core\downloader\__init__.py", line 83, in __init__
    self.middleware = DownloaderMiddlewareManager.from_crawler(crawler)
  File "D:\Anaconda\envs\web scrapping\lib\site-packages\scrapy\middleware.py", line 53, in from_crawler
    return cls.from_settings(crawler.settings, crawler)
  File "D:\Anaconda\envs\web scrapping\lib\site-packages\scrapy\middleware.py", line 35, in from_settings   
    mw = create_instance(mwcls, settings, crawler)
  File "D:\Anaconda\envs\web scrapping\lib\site-packages\scrapy\utils\misc.py", line 167, in create_instance
    instance = objcls.from_crawler(crawler, *args, **kwargs)    
  File "D:\Anaconda\envs\web scrapping\lib\site-packages\scrapy_selenium\middlewares.py", line 67, in from_crawler
    middleware = cls(
  File "D:\Anaconda\envs\web scrapping\lib\site-packages\scrapy_selenium\middlewares.py", line 43, in __init__
    for argument in driver_arguments:
TypeError: 'NoneType' object is not iterable

settings.py

SELENIUM_DRIVER_NAME = 'chrome'
SELENIUM_DRIVER_EXECUTABLE_PATH ='Musttrybasic/chromedriver.exe'

代码

import scrapy
from scrapy.selector import Selector
from scrapy_selenium import SeleniumRequest
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from shutil import which
import time
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait 
from selenium.webdriver.support import expected_conditions as EC
from twisted.internet import reactor
from scrapy.crawler import CrawlerRunner
from scrapy.crawler import CrawlerProcess
from selenium.webdriver.common.keys import Keys
from scrapy.selector import Selector
from Musttrybasic import settings


class MusttrySpider(scrapy.Spider):
    name = 'mustTry'

    def start_requests(self):
        yield SeleniumRequest(url='https://www.swiggy.com', callback=self.parse, wait_time=3, screenshot=True)

    def parse(self, response):
    driver = response.meta['driver']
        search_input = driver.find_element_by_css_selector('input#location')
        search_input.send_keys('pune')
        time.sleep(5)
        search_input.send_keys(Keys.DOWN)
        search_input.send_keys(Keys.ENTER)
        time.sleep(10)

标签: pythonweb-scrapingscrapy-selenium

解决方案


推荐阅读