javascript - 从 HTML 元素中提取 href 无法使用 node.js
问题描述
我正在尝试从该网站上抓取数据:https ://www.gelbeseiten.de/Suche/Fotografen/Berlin ,由于某种原因,我无法获得特定元素,而其他元素则可以正常工作。我使用 node.js 和 puppeteer。我需要带有类的元素的 URL,.contains-icon-homepage
并且收到针对它的错误消息。如果我以.contains-icon-aktualisieren
上面的元素为目标,那么它可以工作。
错误信息:
UnhandledPromiseRejectionWarning:错误:评估失败:TypeError:无法读取 null 的属性“getAttribute”
的HTML:
<article class="mod mod-Treffer" data-teilnehmerid="1057046049">
<a href="https://www.gelbeseiten.de/gsbiz/64195b36-063f-4401-8536-0d1c71a76326" data-realid="64195b36-063f-4401-8536-0d1c71a76326" data-tnid="1057046049" target="_self"
>
<div class="mod-hervorhebung">
</div>
<picture class="trefferlisten_logo">
<source media="(min-width: 768px)" srcset="https://ies.v4all.de/0122/GS/0001/7/5017/30015017_310x190.png" />
<img alt="" data-lazy-src="https://ies.v4all.de/0122/GS/0001/7/5017/30015017_310x190.png"/>
</picture>
<h2 data-wipe-name="Titel">Rudolph Silke</h2>
<p class="d-inline-block mod-Treffer--besteBranche">
Fotografen und Fotostudios
</p>
<address class="mod mod-AdresseKompakt">
<p data-wipe-name="Adresse">
Samariterstr. 33,
<span class="nobr">
10247
Berlin
</span>
(Friedrichshain)
<span class="mod-AdresseKompakt__entfernung" title="Entfernung ab Suchmittelpunkt">6 km</span>
</p>
<p class="mod-AdresseKompakt__phoneNumber" data-hochgestellt-position="end" data-wipe-name="Kontaktdaten">(030) 4 26 66 86</p>
</address>
<div class="oeffnungszeit_kompakt__zustandsinfo--geschlossen">
<span>Geschlossen</span>,
<span class="nobr">öffnet Samstag um 10:00</span>
</div>
</a>
<div class="aktionsleiste_kompakt">
<div class="mod-gsSlider mod-gsSlider--noneOnWhite">
<span
class="mod-gsSlider__arrow mod-gsSlider__arrow--arrow" data-direction="left" data-show="false" data-wipe="{"listener":"click","name":"Trefferliste: Aktionleiste-button-links"}"></span>
<span
class="mod-gsSlider__arrow mod-gsSlider__arrow--arrow" data-direction="right" data-show="false" data-wipe="{"listener":"click","name":"Trefferliste: Aktionleiste-button-rechts"}"></span>
<div class="mod-gsSlider__slider">
<a
class="contains-icon-aktualisieren gs-btn"
rel="noopener"
href="https://www.gelbeseiten.de/gsbiz/64195b36-063f-4401-8536-0d1c71a76326#aktuelleinformationen"
data-wipe="{"listener": "mouseup", "name": "Trefferliste Actionbutton Aktualisieren", "id": "1057046049", "synchron": false}" data-isNeededPromise="false" data-cookieinfo="64195b36-063f-4401-8536-0d1c71a76326=1057046049"
>Aktualisieren</a>
<a
class="contains-icon-homepage gs-btn"
target="_blank"
rel=" noopener"
href="http://www.fotoherz.de"
data-wipe="{"listener":"click", "name":"Trefferliste Webseite-Button", "id":"1057046049"}" data-isNeededPromise="false"
>Webseite</a>
<a
class="contains-icon-email gs-btn"
href="mailto:kontakt@silke-rudolph.de?subject=Anfrage%20%C3%BCber%20Gelbe%20Seiten"
data-wipe="{"listener":"click", "name":"Trefferliste Email-Button", "id":"1057046049"}" data-isNeededPromise="false"
>E-Mail</a>
<span
class="contains-icon-route_finden gs-btn"
data-wipe="{"listener":"click", "name":"Trefferliste Navigation-Button", "id":"1057046049"}" data-parameters="{"partner": "googlemaps", "searchquery": "Samariterstr%2033%2010247%20Berlin"}" data-target="_blank"
>Route</span>
<a
class="contains-icon-details gs-btn"
rel="noopener"
href="https://www.gelbeseiten.de/gsbiz/64195b36-063f-4401-8536-0d1c71a76326"
data-wipe="{"listener": "mouseup", "name": "Trefferliste Actionbutton Mehr Details", "id": "1057046049", "synchron": false}" data-isNeededPromise="false" data-cookieinfo="64195b36-063f-4401-8536-0d1c71a76326=1057046049"
>Mehr Details</a>
<div class="mod-gsSlider__spacer"></div>
</div>
</div>
</div>
</article>
我的 JS 代码:
const puppeteer = require("puppeteer");
async function getContacts(){
const browser = await puppeteer.launch({
headless: false,
defaultViewport: null
});
const page = await browser.newPage();
const url = "https://www.gelbeseiten.de/Suche/Fotografen/Berlin";
await page.goto(url);
await page.waitFor(".mod-Treffer");
const results = await page.$$eval(".mod-Treffer", rows => {
return rows.map(row => {
const properties = {};
const firma = row.querySelector(".mod-Treffer h2");
const tel = row.querySelector(".mod-AdresseKompakt__phoneNumber");
const webSite = row.querySelector(" .contains-icon-homepage");
properties.firma = firma.innerText;
properties.tel = tel.innerText;
properties.webSite = webSite.getAttribute("href");
return properties;
})
})
console.log(results)
}
getContacts();
解决方案
推荐阅读
- google-apps-script - 如何以编程方式(通过 AppsScript)通过 URL 在 Google 电子表格中打开特定工作表
- twig - for 循环内的切换标记正在生成错误
- firebase - “同时”使用 Firebase Storage 和 Cloud Firestore
- regex - 检查 PostgreSQL 中 where 子句 PostgreSQL 中的子字符串,替换 DB2 中的 XMLQuery 和 fnmatches
- c - 使用 realloc 和 calloc 增加函数内二维数组的大小
- github - 在第二台计算机上从 GitHub 拉取更改
- angular - 使用 selectionMode 'multiple' 在 p 表中进行编程行选择
- javascript - 使用 javascript matchMedia 的移动设备的 Onclick 事件不起作用
- python - Python中的Matlab options=optimset('display','off') 和 f = fzero(....) 的类比是什么?
- algorithm - 将矩阵 PL/pgSQL 相乘时出现错误“错误:数组下标超出范围”?