首页 > 解决方案 > Puppeteer 抓取在 ubuntu 服务器 18.04 上不起作用

问题描述

如果我在本地机器上运行此代码,它可以工作,但是在我部署到服务器后它不再工作

var express = require('express');
var app = express();

const puppeteer = require('puppeteer');

app.get('/scrape', function (req, res) {
  const url = req.query.url;

  const preparePageForTests = async (page) => {
    // Pass the User-Agent Test.
    const userAgent =
      'Mozilla/5.0 (X11; Linux x86_64)' +
      'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.39 Safari/537.36';
    await page.setUserAgent(userAgent);
  };

  (async () => {
    const browser = await puppeteer.launch({
      args: ['--no-sandbox'],
    });
    const page = await browser.newPage();
    // await page.setViewport({ width: 1280, height: 720 });
    await preparePageForTests(page);

    await page.setRequestInterception(true);
    page.on('request', (req) => {
      if (req.resourceType() == 'stylesheet' || req.resourceType() == 'font') {
        req.abort();
      } else {
        req.continue();
      }
    });

    await page.goto(url, { waitUntil: 'load' });

    if (url.includes('tokopedia')) {
      data = await page.evaluate(() => {
        const product = document.querySelector('h1.css-1wtrxts').textContent;
        const price = document.querySelector('.price');
        const img = document.querySelector('.css-1b60o1a img');
        const dataObj = {
          product,
          price,
          img,
        };
        return dataObj;
      });
    } else if (url.includes('shopee')) {
      data = await page.evaluate(() => {
        const product = document.querySelector('div.attM6y > span').textContent;
        const price = document.querySelector('div.Ybrg9j').textContent;
        const img = null;
        const dataObj = {
          product,
          price,
          img,
        };
        return dataObj;
      });
    } else if (url.includes('blibli')) {
      await page.waitForSelector('div.product-info__main > div.product-name');
      data = await page.evaluate(() => {
        const product = document.querySelector(
          'div.product-info__main > div.product-name'
        ).textContent;
        const price = document.querySelector(
          'div.final-price > span'
        ).textContent;
        const img = null;
        const dataObj = {
          product,
          price,
          img,
        };
        return dataObj;
      });
    } else {
      // await page.waitForSelector('div.pdp-mod-product-badge-title');
      data = await page.evaluate(() => {
        const product = 'null';
        const price = 'asd';
        const img = null;
        const dataObj = {
          product,
          price,
          img,
        };
        return dataObj;
      });
    }
    console.log(data);
    res.json(data);
    await browser.close();
  })();
});

const port = 3000;
app.listen(port, () => {
  console.log(`Server running at http://localhost:${port}`);
});

exports = module.exports = app;

如果我在本地运行:

http://127.0.0.1:3000/scrape?url=https://www.tokopedia.com/collinsofficial/acer-aspire-3-slim-a314-22-amd-ryzen-3-3250u-4gb-256gb-14-fhd-w10-ohs-hd-1366x768?utm_source=Android&utm_source=Android&utm_medium=Share&utm_medium=Share&utm_campaign=Product%20Share&utm_campaign=Product%20Share&_branch_match_id=978964305966617172

它会给出回应:

{
  "product": "Acer Aspire 3 Slim A314-22 AMD RYZEN 3-3250U 4GB 256GB 14\" FHD W10 OHS 
  - HD (1366x768)",
  "price": "Rp6.999.000",
  "img": "https://images.tokopedia.net/img/cache/500- 
  square/VqbcmM/2021/11/10/8feb4ee0-1fb1-49fc-876e-71a0bbda732f.jpg.webp?ect=4g"
}

但我在 ubuntu 服务器 18.04 中部署它不起作用,给出错误:

/var/www/html/scrapejs/node_modules/puppeteer/lib/cjs/puppeteer/common/ExecutionContext.js:221 throw new Error('Evaluation failed:' + helper_js_1.helper.getExceptionMessage(exceptionDetails)); ^

错误:评估失败:TypeError:无法在 ExecutionContext._evaluateInternal (/var/www/html/scrapejs/node_modules/puppeteer/lib/cjs/puppeteer/common/ 处读取 puppeteer_evaluation_script :4:57 处null 属性 (读取 'textContent') ExecutionContext.js:221:19) 在异步 ExecutionContext.evaluate (/var/www/html/scrapejs/node_modules/puppeteer/lib/cjs/puppeteer/common/ExecutionContext.js:110:16) 在异步 /var/www/ html/scrapejs/app.js:43:14

希望我能找到解决方案,我该怎么办?

标签: javascriptnode.jsexpressweb-scrapingpuppeteer

解决方案


推荐阅读