首页 > 解决方案 > TypeError:无法读取未定义的属性“getProperty”?Node.js,木偶师

问题描述

尝试为我的刮刀获取“名称”元素的文本时。我尝试使用完整的 Xpath 获取它并得到错误“TypeError:无法读取未定义的属性“getProperty”” 它仅在尝试获取频道标题时发生,在获取个人资料图片时有效。

scaper.js


const puppeteer = require('puppeteer');


async function scrapeChannel(url) {
    const browser = await puppeteer.launch()
    const page = await browser.newPage();
    await page.goto(url);

    // const xpath_expression = '/html/body/ytd-app/div/ytd-page-manager/ytd-browse[2]/div[3]/ytd-c4-tabbed-header-renderer/tp-yt-app-header-layout/div/tp-yt-app-header/div[2]/div[2]/div/div[1]/div/div[1]/ytd-channel-name/div/div/yt-formatted-string';
    // await page.waitForXPath(xpath_expression);
    const [el] = await page.$x('/html/body/ytd-app/div/ytd-page-manager/ytd-browse[2]/div[3]/ytd-c4-tabbed-header-renderer/tp-yt-app-header-layout/div/tp-yt-app-header/div[2]/div[2]/div/div[1]/div/div[1]/ytd-channel-name/div/div/yt-formatted-string');
    const text = await el.getProperty('textContent');
    const name = await text.jsonValue();

    const [el2] = await page.$x('//*[@id="img"]');
    const src = await el2.getProperty('src');
    const avatarURL = await src.jsonValue();

    browser.close();


    console.log({name, avatarURL});

    return { name, avatarURL}
} 
}

scrapeChannel('https://www.youtube.com/channel/UC8butISFwT-Wl7EV0hUK0BQ')

索引.html

  function newEl(type, attrs = {}) {
        const el = document.createElement(type);
        for (let attr in attrs) {
            const value = attrs[attr];
          if (attr == "innerText") el.innerText = value;
          else el.setAttribute(attr, value);
        }
        return el;
      }

标签: javascriptnode.jsweb-scrapingpuppeteergetproperty

解决方案


看来您可能在 XPath 中有错字。当我在浏览器控制台中尝试您的 XPath 时,它不会返回任何元素。但是,通过这一更改,它返回一个元素:

$x('/html/body/ytd-app/div/ytd-page-manager/ytd-browse[1]/div[3]/ytd-c4-tabbed-header-renderer/tp-yt-app-header-layout/div/tp-yt-app-header/div[2]/div[2]/div/div[1]/div/div[1]/ytd-channel-name/div/div/yt-formatted-string')
.......................................................^: 1 instead of 2

推荐阅读