首页 > 解决方案 > UnhandledPromiseRejectionWarning:错误:请求已被处理

问题描述

所以我有这个nodejs,它最初用作api,根据时间表使用puppeteer从网站上抓取数据,现在检查是否有时间表我使用了一个链接到模型查询的函数并检查是否有任何时间表此时此刻。它似乎工作并且我得到了数据,但是当我抓取第二篇文章和下一篇文章时,总是出现这个错误UnhandledPromiseRejectionWarning: Error: Request is already handled!,然后UnhandledPromiseRejectionWarning: Unhandled promise rejection. This error originated either by throwing inside of an async function without a catch block, or by rejecting a promise which was not handled with .catch(). 它似乎从 cpu 和内存中占用了大量资源。所以我的问题是,我的代码中是否有任何阻塞或任何可以做得更好的东西。

这是我的 server.js


function mydbqueryarticle(callback) {
    News.findNullArticle(function(err, article) {
        if(article!=null){
            console.log('Crawling');
            Crawl.article(err)
        }else{
            console.log('No article to crawl');
        }
    })  
    callback();
}

function wait10sec(){
    setTimeout(function(){
        mydbqueryarticle(wait10sec);
    }, 10000);
}

wait10sec();

这是我的新闻模型

News.findNullArticle = function (result) {
    dbConn.query("SELECT id, source, keyword, title, link from article where article_status = 0 ORDER BY created_at ASC LIMIT 1", function (err, res) {
        if(err) {
            console.log("error: ", err);
            result(null, err);
        }
        else{
            console.log('article : ', res);
            result(null, res[0]);
        }
    });
};

这是我的爬行控制器

function crawl_article(news_id, news_link, all_page_tag, body_article, article_date, article_el, article_tag_el) {
    try {
        (async () => {

            // Add stealth plugin and use defaults (all tricks to hide puppeteer usage)
            const StealthPlugin = require('puppeteer-extra-plugin-stealth')
            puppeteer.use(StealthPlugin())
            // Add adblocker plugin to block all ads and trackers (saves bandwidth)
            const AdblockerPlugin = require('puppeteer-extra-plugin-adblocker')
            puppeteer.use(AdblockerPlugin({ blockTrackers: true }))

            puppeteer.launch({
                headless: true,
                executablePath: '/opt/google/chrome/google-chrome',
                args: ['--autoplay-policy=user-gesture-required',
                '--disable-background-networking',
                '--disable-background-timer-throttling',
                '--disable-backgrounding-occluded-windows',
                '--disable-breakpad',
                '--disable-client-side-phishing-detection',
                '--disable-component-update',
                '--disable-default-apps',
                '--disable-dev-shm-usage',
                '--disable-domain-reliability',
                '--disable-extensions',
                '--disable-features=AudioServiceOutOfProcess',
                '--disable-hang-monitor',
                '--disable-ipc-flooding-protection',
                '--disable-notifications',
                '--disable-offer-store-unmasked-wallet-cards',
                '--disable-popup-blocking',
                '--disable-print-preview',
                '--disable-prompt-on-repost',
                '--disable-renderer-backgrounding',
                '--disable-setuid-sandbox',
                '--disable-speech-api',
                '--disable-sync',
                '--hide-scrollbars',
                '--ignore-gpu-blacklist',
                '--metrics-recording-only',
                '--mute-audio',
                '--no-default-browser-check',
                '--no-first-run',
                '--no-pings',
                '--no-sandbox',
                '--no-zygote',
                '--password-store=basic',
                '--use-gl=swiftshader',
                '--use-mock-keychain'] 
            }).then(async browser => {
                console.log(news_link);
                const page = await browser.newPage();
                await page.setDefaultNavigationTimeout(0);
                await page.goto(news_link+all_page_tag);
                await page.waitForSelector('body');
                await autoScroll(page);
                //await page.waitForTimeout(2500);

                let current_time = moment().format('YYYY-MM-DD HH:mm:ss');
                var rposts = await page.evaluate((body_article, article_date, article_el, article_tag_el) => {
                    let get_article_date, get_article, get_tag;
                    let posts = document.body.querySelectorAll(body_article);       
                    postItems = [];
                    posts.forEach((item) => {
                        try{
                            if(item.querySelector(article_el)) {
                                // get_article = item.querySelector(article_el).innerText;
                                get_article = item.querySelector(article_el).innerHTML;
                                // get_article = item.querySelector('p').innerText;
                            } else {
                                get_article = item.querySelector('p').innerText;
                            }
                            if(item.querySelector(article_tag_el))
                                get_tag = item.querySelector(article_tag_el).innerText.replace(/\n/g, ",");
                            if(item.querySelector(article_date)) 
                                get_article_date = item.querySelector(article_date).innerText;

                            postItems.push({get_article_date, get_article, get_tag});
                        } catch(e) {}
                    });
                    return postItems;
                }, body_article, article_date, article_el, article_tag_el);

                //console.log(rposts[0].get_article);
                console.log(rposts);
                if(rposts.length > 0) {
                    News.updateArticle(news_id, rposts, function(err, news) {
                        if (err) console.log(err);
                        else console.log({error:false,message:"Article id " +news_id+"  updated successfully!",data:rposts});
                    });
                } else {
                    News.updateErrorArticle(news_id, rposts, function(err, news) {
                        if (err) console.log(err);
                        else console.log({error:false,message:"Article id " +news_id+"  updated successfully!",data:rposts});
                    });
                }
                await browser.close();
            }).catch(function(error) {
                console.error(error);
            });

        })();
    } catch (err) {
        console.error(err);
    }
}

exports.article = (err) => {
    News.findNullArticle(function(err, result) {
        // if (err) res.send(err);

        if(result) {
            let article_el, article_tag_el
            Sources.findBySource(result.source, function(err, dt_source) {
                if(dt_source) {
                    let crawl = crawl_article(result.id, result.link, dt_source.all_page_tag, dt_source.body_article, dt_source.article_date,dt_source.article_el, dt_source.article_tag_el);
                }
                // res.json({error:false, article:result, source:dt_source});
            })
        } else {
            // res.send({ error:true, response: "No News return from query" });
        }
    });
};

标签: node.jsexpresspuppeteer

解决方案


我想通了,我只是使用了 puppeteer 集群。


推荐阅读