javascript - Puppeteer 抓取在 ubuntu 服务器 18.04 上不起作用
问题描述
如果我在本地机器上运行此代码,它可以工作,但是在我部署到服务器后它不再工作
var express = require('express');
var app = express();
const puppeteer = require('puppeteer');
app.get('/scrape', function (req, res) {
const url = req.query.url;
const preparePageForTests = async (page) => {
// Pass the User-Agent Test.
const userAgent =
'Mozilla/5.0 (X11; Linux x86_64)' +
'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.39 Safari/537.36';
await page.setUserAgent(userAgent);
};
(async () => {
const browser = await puppeteer.launch({
args: ['--no-sandbox'],
});
const page = await browser.newPage();
// await page.setViewport({ width: 1280, height: 720 });
await preparePageForTests(page);
await page.setRequestInterception(true);
page.on('request', (req) => {
if (req.resourceType() == 'stylesheet' || req.resourceType() == 'font') {
req.abort();
} else {
req.continue();
}
});
await page.goto(url, { waitUntil: 'load' });
if (url.includes('tokopedia')) {
data = await page.evaluate(() => {
const product = document.querySelector('h1.css-1wtrxts').textContent;
const price = document.querySelector('.price');
const img = document.querySelector('.css-1b60o1a img');
const dataObj = {
product,
price,
img,
};
return dataObj;
});
} else if (url.includes('shopee')) {
data = await page.evaluate(() => {
const product = document.querySelector('div.attM6y > span').textContent;
const price = document.querySelector('div.Ybrg9j').textContent;
const img = null;
const dataObj = {
product,
price,
img,
};
return dataObj;
});
} else if (url.includes('blibli')) {
await page.waitForSelector('div.product-info__main > div.product-name');
data = await page.evaluate(() => {
const product = document.querySelector(
'div.product-info__main > div.product-name'
).textContent;
const price = document.querySelector(
'div.final-price > span'
).textContent;
const img = null;
const dataObj = {
product,
price,
img,
};
return dataObj;
});
} else {
// await page.waitForSelector('div.pdp-mod-product-badge-title');
data = await page.evaluate(() => {
const product = 'null';
const price = 'asd';
const img = null;
const dataObj = {
product,
price,
img,
};
return dataObj;
});
}
console.log(data);
res.json(data);
await browser.close();
})();
});
const port = 3000;
app.listen(port, () => {
console.log(`Server running at http://localhost:${port}`);
});
exports = module.exports = app;
如果我在本地运行:
http://127.0.0.1:3000/scrape?url=https://www.tokopedia.com/collinsofficial/acer-aspire-3-slim-a314-22-amd-ryzen-3-3250u-4gb-256gb-14-fhd-w10-ohs-hd-1366x768?utm_source=Android&utm_source=Android&utm_medium=Share&utm_medium=Share&utm_campaign=Product%20Share&utm_campaign=Product%20Share&_branch_match_id=978964305966617172
它会给出回应:
{
"product": "Acer Aspire 3 Slim A314-22 AMD RYZEN 3-3250U 4GB 256GB 14\" FHD W10 OHS
- HD (1366x768)",
"price": "Rp6.999.000",
"img": "https://images.tokopedia.net/img/cache/500-
square/VqbcmM/2021/11/10/8feb4ee0-1fb1-49fc-876e-71a0bbda732f.jpg.webp?ect=4g"
}
但我在 ubuntu 服务器 18.04 中部署它不起作用,给出错误:
/var/www/html/scrapejs/node_modules/puppeteer/lib/cjs/puppeteer/common/ExecutionContext.js:221 throw new Error('Evaluation failed:' + helper_js_1.helper.getExceptionMessage(exceptionDetails)); ^
错误:评估失败:TypeError:无法在 ExecutionContext._evaluateInternal (/var/www/html/scrapejs/node_modules/puppeteer/lib/cjs/puppeteer/common/ 处读取 puppeteer_evaluation_script :4:57 处的null 属性 (读取 'textContent') ExecutionContext.js:221:19) 在异步 ExecutionContext.evaluate (/var/www/html/scrapejs/node_modules/puppeteer/lib/cjs/puppeteer/common/ExecutionContext.js:110:16) 在异步 /var/www/ html/scrapejs/app.js:43:14
希望我能找到解决方案,我该怎么办?
解决方案
推荐阅读
- c++ - pthread 与 Qt5 兼容吗?
- node.js - 如何使用 Sequelize 查询包含子类别产品的类别下的所有产品
- c++ - 如何传递私有地图和输入数据
- svn - 在 SVN 中使用旧的重命名分支的名称创建分支时是否存在问题?
- php - Ajax 调用以 Datalist 形式返回错误的日期格式
- swift - 在 UICollectionView 上与内容从右到左对齐
- ruby-on-rails - 无法在一页上显示 2 个部分
- pandas - 如何按列对数据框进行分组并获取重复行数?
- java - 有没有办法链接mysql中已有的条目而不是创建一个新条目?
- go - 这段带有无缓冲通道的代码会导致 Go 中的 goroutine 泄漏吗?