python - 在视图中在 django 项目上运行 scrapy
问题描述
我试图在幕后的一个视图中运行一个scrapy脚本,为我获取一些信息,并尝试了许多不同的方法,如使用 crawl、crawlprocess、scrapyd 等,所有这些都返回错误。scrapy 脚本从它自己的文件中完美运行,但是当我尝试从视图中运行它时,我得到了很多错误。任何建议表示赞赏。(尝试将其添加到视图文件底部的 def 中)。
import scrapy
from scrapy import Spider
from scrapy import Request
from scrapy.crawler import CrawlerProcess
class ProductSpider(scrapy.Spider):
product = input("What product are you looking for? Keywords help for specific products: ")
name = "Product_spider"
allowed_domains=['www.amazon.ca']
start_urls = ['https://www.amazon.ca/s/ref=nb_sb_noss_2?url=search-alias%3Daps&field-keywords='+product]
#so that websites will not block access to the spider
download_delay = 30
def parse(self, response):
crawler = self.crawler_process.create_crawler()
# xpath is similar to an address that is used to find certain elements in HTML code,this info is then extracted
product_title = response.xpath('//*/div/div/div/div[2]/div[1]/div[1]/a/@title').extract()
product_price = response.xpath('//span[contains(@class,"s-price")]/text()').extract()
product_url = response.xpath('//*/div/div/div/div[2]/div[1]/div[1]/a/@href').extract()
# yield goes through everything once, saves its spot, does not save info but sends it to the pipeline to get processed if need be
yield{'product_title': product_title, 'product_price': product_price, 'url': product_url,}
#it is checking the same url, no generality, need to find, maybe just do like 5 pages, also see if you can have it sort from high to low and find match with certain amount of key words
process = CrawlerProcess({
'USER_AGENT': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)'
})
process.crawl(ProductSpider)
process.start()
from django.shortcuts import render, redirect
from django.contrib.auth.forms import UserCreationForm, AuthenticationForm
from django import forms
from django.contrib.auth.models import User
from .forms import CustomUCreationForm
from django.contrib.auth import authenticate,login,logout
from django.contrib import messages
from .models import Items
from django.core.signals import request_finished
from django.http import HttpRequest
from scrapy.crawler import CrawlerProcess
from scrapyd_api import ScrapydAPI
# Create your views here.
def signup(request):
if request.method == 'POST':
form = CustomUCreationForm(request.POST)
if form.is_valid():
form.save()
else:
form = CustomUCreationForm()
return render(request, 'signuptemplate.html', {'form': form})
def user_login(request):
if request.method == 'POST':
user = authenticate(username=request.POST['username'],password=request.POST['password'])
if user is not None:
#not_active means maybe banned or something
if user.is_active:
login(request,user)
return redirect('home')
else:
messages.error(request,'username or password not correct')
return redirect('login')
return render(request,'design.html')
def home(request):
return render(request,'home.html')
def logout_method(request):
return logout(request,'design.html')
def newitem(request):
if request.method == 'POST':
current_u = request.user
temp_Item = Items(Item = request.POST['new-item-input'],Itemuser = current_u)
temp_Item.save()
return redirect('home')
return render(request,'newitem.html')
def currentprices(request):
#python manage.py runserver --nothreading --noreload
items = Items.objects.all()
scrapyd = ScrapydAPI('http://127.0.0.1:8000/')
scrapyd.schedule('Yourhub2','ProductSpider')
return render(request,'currentprices.html',{'items':items})
解决方案
推荐阅读
- single-sign-on - 为 Grafana 设置 SSO
- r - 如何在R中为变量设置递增顺序的optimx条件
- excel - Excel查找此值和此值以及它们相遇的位置输入新值
- ldap - ASP.NET 样板 - Ldap 身份验证问题
- r - R:如何使用 XLConnect 的 setCellFormula 函数编写非英语的 excel 公式?
- javascript - SVG`
` javascript 动画未按预期工作 - assembly - 程序集标签前缀
- c# - 用脚本语言实现简洁的 IF 条件
- android - 如何使用颤振框架获取手机号码?
- reactjs - 将服务器端渲染添加到现有的 React 应用程序