首页 > 解决方案 > 在视图中在 django 项目上运行 scrapy

问题描述

我试图在幕后的一个视图中运行一个scrapy脚本,为我获取一些信息,并尝试了许多不同的方法,如使用 crawl、crawlprocess、scrapyd 等,所有这些都返回错误。scrapy 脚本从它自己的文件中完美运行,但是当我尝试从视图中运行它时,我得到了很多错误。任何建议表示赞赏。(尝试将其添加到视图文件底部的 def 中)。

import scrapy
from scrapy import Spider
from scrapy import Request
from scrapy.crawler import CrawlerProcess


class ProductSpider(scrapy.Spider):
    product = input("What product are you looking for? Keywords help for specific products: ")
    name = "Product_spider"
    allowed_domains=['www.amazon.ca']
    start_urls = ['https://www.amazon.ca/s/ref=nb_sb_noss_2?url=search-alias%3Daps&field-keywords='+product]
    #so that websites will not block access to the spider
    download_delay = 30

    def parse(self, response):
        crawler = self.crawler_process.create_crawler()
        # xpath is similar to an address that is used to find certain elements in HTML code,this info is then extracted
        product_title = response.xpath('//*/div/div/div/div[2]/div[1]/div[1]/a/@title').extract()
        product_price = response.xpath('//span[contains(@class,"s-price")]/text()').extract()
        product_url = response.xpath('//*/div/div/div/div[2]/div[1]/div[1]/a/@href').extract()
        # yield goes through everything once, saves its spot, does not save info but sends it to the pipeline to get processed if need be
        yield{'product_title': product_title, 'product_price': product_price, 'url': product_url,}
                               #it is checking the same url, no generality, need to find, maybe just do like 5 pages, also see if you can have it sort from high to low and find match with certain amount of key words
    process = CrawlerProcess({
        'USER_AGENT': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)'
    })
    process.crawl(ProductSpider)
    process.start()   





from django.shortcuts import render, redirect
from django.contrib.auth.forms import UserCreationForm, AuthenticationForm
from django import forms
from django.contrib.auth.models import User
from .forms import CustomUCreationForm
from django.contrib.auth import authenticate,login,logout
from django.contrib import messages
from .models import Items
from django.core.signals import request_finished
from django.http import HttpRequest
from scrapy.crawler import CrawlerProcess
from scrapyd_api import ScrapydAPI


# Create your views here.
def signup(request):
    if request.method == 'POST':
        form = CustomUCreationForm(request.POST)
        if form.is_valid():
            form.save()
    else:
        form = CustomUCreationForm()
    return render(request, 'signuptemplate.html', {'form': form})


def user_login(request):
    if request.method == 'POST':
        user = authenticate(username=request.POST['username'],password=request.POST['password'])
        if user is not None:
            #not_active means maybe banned or something
            if user.is_active:
                login(request,user)
                return redirect('home')
            else:
                messages.error(request,'username or password not correct')
                return redirect('login')
    return render(request,'design.html')


def home(request):
    return render(request,'home.html')


def logout_method(request):
    return logout(request,'design.html')


def newitem(request):
    if request.method == 'POST':
        current_u = request.user
        temp_Item = Items(Item = request.POST['new-item-input'],Itemuser = current_u)
        temp_Item.save()
        return redirect('home')
    return render(request,'newitem.html')


def currentprices(request):
    #python manage.py runserver --nothreading --noreload
    items = Items.objects.all()
    scrapyd = ScrapydAPI('http://127.0.0.1:8000/')
    scrapyd.schedule('Yourhub2','ProductSpider')
    return render(request,'currentprices.html',{'items':items})

标签: pythondjangoscrapy

解决方案


推荐阅读