首页 > 解决方案 > Scrapy-cluster 回调请求不起作用,卡在处理元直通中间件


这是 kibana 的这个调试面板,我正在尝试使用 scrapy-cluster,但这在回调请求中不起作用。这在scrapy中工作正常,但在scrapy-cluster中不起作用。无法抓取卡在处理元直通中间件中的数据

class EbayDataSpider(RedisSpider):
    name = 'ebay_data'
    # Allow a custom parameter (-a flag in the scrapy command)
    def __init__(self, search="iphone 64GB", *args, **kwargs):
        self.search_string = search
        super(EbayDataSpider, self).__init__(*args, **kwargs)

    def parse(self, response):
        # Extrach the trksid to build a search request
        trksid = response.css("input[type='hidden'][name='_trksid']").xpath(

        # Build the url and start the requests
        yield response.follow(url="http://www.ebay.com/sch/i.html?_from=R40&_trksid=" + trksid +
                             "&_nkw=" +
                                 ' ', '+') + "&_sacat=0",

    # Parse the search results
    def parse_link(self, response):
        # Extract the list of products
        results = response.xpath(
            '//div/div/ul/li[contains(@class, "s-item" )]')

        # Extract info for each product
        for product in results:
            product_url = product.xpath(
        yield response.follow(url=product_url, callback=self.parse_product_details)

    def parse_product_deails(self, response):
        # capture raw response
        item = RawResponseItem()
        # populated from response.meta
        item['appid'] = response.meta['appid']
        item['crawlid'] = response.meta['crawlid']
        item['attrs'] = response.meta['attrs']
        # populated from raw HTTP response
        item["url"] = response.request.url
        item["response_url"] = response.url
        item["status_code"] = response.status
        item["status_msg"] = "OK"
        item["response_headers"] = self.reconstruct_headers(response)
        item["request_headers"] = response.request.headers
        #item["body"] = response.body
        item["body"] = "This is empty body from amazon spider"
        item["links"] = []

        # Add more data from details page
        item['p_brand'] = response.xpath(
        item['p_title'] = response.xpath("//h1[@id='itemTitle']/text()").extract()
        item['p_price'] = response.xpath("//span[@id='prcIsum']/text()").extract()
        yield item

标签: apache-kafkarediscallbackscrapy

