首页 > 解决方案 > 设置对象没有属性 DATABASE

问题描述

使用 scrapy 抓取网站。当我尝试运行爬虫时,我收到设置对象没有属性 DATABASE 的错误。这是在我将 Database 属性添加到我的 settings.py 文件之后。

这是 settings.py 代码

BOT_NAME = 'teamblind'

SPIDER_MODULES = ['teamblind.spiders']
NEWSPIDER_MODULE = 'teamblind.spiders'

DATABASE = {

    'drivername' : 'xxxxx',
    'host' : 'xxxxx',
    'port' : xxxx,
    'username' : 'xxxx', 
    'password' : 'xxxxx',
    'database' : 'xxxxxx'

}

以下是models.py

from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.engine.url import URL


from scrapy.utils.project import get_project_settings


DeclarativeBase = declarative_base()

def db_connect():

    settings = get_project_settings()
    return create_engine(URL(**settings.DATABASE))


#creates a table
def create_posts_table(engine): 

    DeclarativeBase.metadata.create_all(engine)


#creates a post for a table 
def create_posts(DeclarativeBase):

    id = Column(Integer, primary_key = True)
    title = Column('title', String)
    views = Column('views', String)
    comments = Column('comments', String) 
    likes = Column('likes', String)
    link = Column('link', String)

这是管道的代码 -

from .models import create_posts, db_connect, create_posts_table


class TeamblindPipeline(object):

    def __init__(self):

        engine = db_connect()
        create_posts_table(engine)
        self.Session = sessionmaker(bind=engine)


    def process_item(self, item, spider):
        
        session = self.Session()
        post = create_posts(**item)
        

        try:
            session.add(post)
            session.commit()
        except:
            session.rollback()
            raise 
        finally:
            close()

        return item 

这是我的 items.py

from scrapy.item import Item, Field


class TeamblindItem(Item):
    # define the fields for your item here like:
    # name = scrapy.Field()
    title = Field()
    views = Field()
    comments = Field()
    likes = Field()
    link = Field()

最后,这是蜘蛛——

import scrapy
#from scrapy.loader import ItemLoader
#from teamblind.items import TeamblindItem


#define the spider class for scraping data from teamblind.com 

class TeamBlindReferrals(scrapy.Spider):

    name = 'teamblind'

    
    #define the start_requests methods to iterate and generate request objects from the start urls
    def start_requests(self):

        urls = ['https://www.teamblind.com/topics/Referrals']

        for url in urls:
            yield scrapy.Request(url=url, callback=self.parse)


    #define the parse function to extract data from the website 
    def parse(self, response):

        posts = response.css('li.word-break')

        for item in posts:

            yield{

                'title': item.xpath('//a/@title').get(),
                'views': item.xpath("//a[contains(@class, 'view')]//text()").get(),
                'comments': item.xpath("//a[contains(@class, 'comment')]//text()").get(),
                'likes': item.xpath("//a[contains(@class, 'like')]//text()").get(),
                'link': item.xpath("//li/a//@href").get()
            } 

当我运行蜘蛛时,我收到以下错误,请帮助解决这个问题。

2020-08-12 06:15:11 [twisted] CRITICAL: 
Traceback (most recent call last):
  File "/Users/arif/scrapy_practice/ScrapeProj/lib/python3.8/site-packages/twisted/internet/defer.py", line 1418, in _inlineCallbacks
    result = g.send(result)
  File "/Users/arif/scrapy_practice/ScrapeProj/lib/python3.8/site-packages/scrapy/crawler.py", line 87, in crawl
    self.engine = self._create_engine()
  File "/Users/arif/scrapy_practice/ScrapeProj/lib/python3.8/site-packages/scrapy/crawler.py", line 101, in _create_engine
    return ExecutionEngine(self, lambda _: self.stop())
  File "/Users/arif/scrapy_practice/ScrapeProj/lib/python3.8/site-packages/scrapy/core/engine.py", line 70, in __init__
    self.scraper = Scraper(crawler)
  File "/Users/arif/scrapy_practice/ScrapeProj/lib/python3.8/site-packages/scrapy/core/scraper.py", line 71, in __init__
    self.itemproc = itemproc_cls.from_crawler(crawler)
  File "/Users/arif/scrapy_practice/ScrapeProj/lib/python3.8/site-packages/scrapy/middleware.py", line 53, in from_crawler
    return cls.from_settings(crawler.settings, crawler)
  File "/Users/arif/scrapy_practice/ScrapeProj/lib/python3.8/site-packages/scrapy/middleware.py", line 35, in from_settings
    mw = create_instance(mwcls, settings, crawler)
  File "/Users/arif/scrapy_practice/ScrapeProj/lib/python3.8/site-packages/scrapy/utils/misc.py", line 156, in create_instance
    instance = objcls(*args, **kwargs)
  File "/Users/arif/scrapy_practice/teamblind/teamblind/pipelines.py", line 16, in __init__
    engine = db_connect()
  File "/Users/arif/scrapy_practice/teamblind/teamblind/models.py", line 14, in db_connect
    return create_engine(URL(**settings.DATABASE))
AttributeError: 'Settings' object has no attribute 'DATABASE'

标签: pythonscrapy

解决方案


推荐阅读