首页 > 解决方案 > HTML没有进入节点js puppeteer

问题描述

错误

无法读取 null 的属性“querySelectorAll”

我正在抓取这个站点,当我在控制台中编写以下行时,它给了我 HTML。但是当我从 puppeteer 抓取 HTML 时,它给了我错误

document.querySelectorAll('#stroke-play-container > .stroke-play-leaderboard > .the-leaderboard.with-rolex > table.leaderboard.leaderboard-table.large')[0].nextSibling;

代码

'use strict';

 const puppeteer = require('puppeteer');
 function run() {
 return new Promise(async (resolve, reject) => {
    try {


        const browser = await puppeteer.launch({
        headless : false
        });

        const page = await browser.newPage();

        await page.goto("https://www.pgatour.com/leaderboard.html");

        await page.evaluate(`window.scrollTo(0, document.body.scrollHeight)`);
        await page.waitFor(5000);
    
        let urls = await page.evaluateHandle(() => {
            let results = [];
            var parser = new DOMParser();
            
            var node = document.querySelectorAll('#stroke-play-container > .stroke-play-leaderboard > .the-leaderboard.with-rolex > table.leaderboard.leaderboard-table.large')[0].nextSibling;
           
            if(node){

            var $ = parser.parseFromString(node, 'text/html');
            
          
            return {
                name: $.querySelectorAll('table > tbody:nth-child(1) > tr > td.player-name > div > div.player-name-col').innerText
            };
            }
            else{
                return 'error';
            }

        })
        browser.close();
        return resolve(urls);
    } catch (e) {
        return reject(e);
    }
})
}
 run().then(console.log).catch(console.error);

标签: javascriptnode.jsweb-scrapingpuppeteer

解决方案


试试这样:

let names = await page.evaluate(() => {
  let css = '.the-leaderboard.with-rolex > table.leaderboard.leaderboard-table.large + div div.player-name-col'
  let divs = [...document.querySelectorAll(css)]
  return divs.map(div => div.innerText)
})

我不确定你想用 DOMParser 完成什么,你不应该使用它。


推荐阅读