首页 > 解决方案 > Node.js 网络爬虫

问题描述

我一直试图在 url 中获取产品的标题,这是编码中的一个变量。运行程序后,我不明白该错误是什么以及为什么。

const puppeteer = require('puppeteer'),
    url = 'https://www.realcanadiansuperstore.ca/Food/Pantry/Canned-%26-Jarred/Broth/plp/RCSS001008003013?productBrand=Campbell%27s';

(async() => {
    const browser = await puppeteer.launch();
    const page = await browser.newPage();
    await page.goto(url);


    const products = await page.evaluate(() =>
        Array.from(document.querySelectorAll(
            "#site-content > div > div > div:nth-child(2) > div > div.product-grid > div.product-grid__results > div.product-grid__results__products > ul > li > div > div > div.product-tile__details > div.product-tile__details__info > h3 > a > span > span.product-name__item.product-name__item--name"))
            .map(partner => partner.innerText.trim())
    )

    console.log(products);
    await browser.close();
})();

如果在不同的网页中运行它,它可以工作,因为您可以运行以下代码:

const puppeteer = require('puppeteer'),
    url = 'https://shop.saveonfoods.com/store/A8931118/?_ga=2.134607602.2014665139.1585878612-229336869.1585197564/#/category/576,654,882/broth/1?queries=fq%3Dbrand%253ACampbell%2527s%26sort%3DBrand';

(async() => {
    const browser = await puppeteer.launch();
    const page = await browser.newPage();
    await page.goto(url);


    const products = await page.evaluate(() =>
        Array.from(document.querySelectorAll(
            '#content > div > div.main > ul > li > div.product__itemContent ' +
            '> div.productInfo.productDetails > hgroup > h3'))
            .map(partner => partner.innerText.trim())
    )

    console.log(products);
    await browser.close();
})();

标签: javascriptnode.js

解决方案


推荐阅读