node.js - 为什么 puppeteer 报告“UnhandledPromiseRejectionWarning:错误:导航失败,因为浏览器已断开连接!”?
问题描述
我有一个简单的 node.js 脚本来捕获一些网页的屏幕截图。看来我在使用 async/await 时被绊倒了,但我不知道在哪里。我目前正在使用 puppeteer v1.11.0。
const puppeteer = require('puppeteer');
//a list of sites to screenshot
const papers =
{
nytimes: "https://www.nytimes.com/",
wapo: "https://www.washingtonpost.com/"
};
//launch puppeteer, do everything in .then() handler
puppeteer.launch({devtools:false}).then(function(browser){
//create a load_page function that returns a promise which resolves when screenshot is taken
async function load_page(paper){
const url = papers[paper];
return new Promise(async function(resolve, reject){
const page = await browser.newPage();
await page.setViewport({width:1024, height: 768});
//screenshot on first console message
page.once("console", async console_msg => {
await page.pdf({path: paper + '.pdf',
printBackground:true,
width:'1024px',
height:'768px',
margin: {top:"0px", right:"0px", bottom:"0px", left:"0px"}
});
//close page
await page.close();
//resolve promise
resolve();
});
//go to page
await page.goto(url, {"waitUntil":["load", "networkidle0"]});
})
}
//step through the list of papers, calling the above load_page()
async function stepThru(){
for(var p in papers){
if(papers.hasOwnProperty(p)){
//wait to load page and screenshot before loading next page
await load_page(p);
}
}
//close browser after loop has finished (and all promises resolved)
await browser.close();
}
//kick it off
stepThru();
//getting this error message:
//UnhandledPromiseRejectionWarning: Error: Navigation failed because browser has disconnected!
});
解决方案
该Navigation failed because browser has disconnected
错误通常意味着启动 Puppeteer 的节点脚本没有等待 Puppeteer 操作完成就结束了。因此,正如您所说,这是一些等待的问题。
关于您的脚本,我进行了一些更改以使其正常工作:
- 首先你不是在等待
stepThru
函数的(异步)结束所以改变
stepThru();
至
await stepThru();
和
puppeteer.launch({devtools:false}).then(function(browser){
至
puppeteer.launch({devtools:false}).then(async function(browser){
(我加了async
)
- 我改变了你管理
goto
和page.once
承诺的方式
PDF 承诺现在是:
new Promise(async function(resolve, reject){
//screenshot on first console message
page.once("console", async () => {
await page.pdf({
path: paper + '.pdf',
printBackground:true,
width:'1024px',
height:'768px',
margin: {
top:"0px",
right:"0px",
bottom:"0px",
left:"0px"
}
});
resolve();
});
})
它只有一个职责,就是创建 PDF。
- 然后我
page.goto
用Promise.all
await Promise.all([
page.goto(url, {"waitUntil":["load", "networkidle2"]}),
new Promise(async function(resolve, reject){
// ... pdf creation as above
})
]);
- 我移动了
page.close
之后Promise.all
await Promise.all([
// page.goto
// PDF creation
]);
await page.close();
resolve();
现在它可以工作了,这里是完整的工作脚本:
const puppeteer = require('puppeteer');
//a list of sites to screenshot
const papers =
{
nytimes: "https://www.nytimes.com/",
wapo: "https://www.washingtonpost.com/"
};
//launch puppeteer, do everything in .then() handler
puppeteer.launch({devtools:false}).then(async function(browser){
//create a load_page function that returns a promise which resolves when screenshot is taken
async function load_page(paper){
const url = papers[paper];
return new Promise(async function(resolve, reject){
const page = await browser.newPage();
await page.setViewport({width:1024, height: 768});
await Promise.all([
page.goto(url, {"waitUntil":["load", "networkidle2"]}),
new Promise(async function(resolve, reject){
//screenshot on first console message
page.once("console", async () => {
await page.pdf({path: paper + '.pdf', printBackground:true, width:'1024px', height:'768px', margin: {top:"0px", right:"0px", bottom:"0px", left:"0px"} });
resolve();
});
})
]);
await page.close();
resolve();
})
}
//step through the list of papers, calling the above load_page()
async function stepThru(){
for(var p in papers){
if(papers.hasOwnProperty(p)){
//wait to load page and screenshot before loading next page
await load_page(p);
}
}
await browser.close();
}
await stepThru();
});
请注意:
我改成
networkidle0
是networkidle2
因为 nytimes.com 网站需要很长时间才能达到 0 网络请求状态(因为 AD 等)。您显然可以等待,networkidle0
但这取决于您,这超出了您的问题范围(page.goto
在这种情况下增加超时)。该
www.washingtonpost.com
站点出现TOO_MANY_REDIRECTS
错误,因此我更改为,washingtonpost.com
但我认为您应该对此进行更多调查。为了测试脚本,我多次使用该nytimes
网站和其他网站。再说一遍:这超出了您的问题范围。
如果您需要更多帮助,请告诉我
推荐阅读
- php - 在 Kubernetes 中为 Laravel 运行“php artisan schedule:run”的好习惯
- javascript - 如何在 React 中更新来自 api 的 json 数据?
- modelandview - 如何访问地图对象类型(地图
>) 示例中来自 java 中的 java 的 modelAndView 属性? - python - 在 django 的更新中使用子查询
- python - 如何绘制带有渐变阴影的多图表?
- jasmine - 为什么如果 afterAll() 或 beforeAll() 中的任何行失败并没有失败测试或记录任何内容?
- javascript - 如何解决 CSS 中的左侧菜单扩展跳跃问题?
- python - 尝试在 Python 中使用 calcHist 计算 3 通道颜色直方图时出现“断言失败”错误
- docker - 如何使用 docker-compose 启动带有卷的主机 docker 容器
- c# - 对于运行时数据库上下文,我是否需要 OnModelCreating 中的实体数据模型?