javascript - 如何让 puppeteer 探索页面上的链接,直到找到登录表单?
问题描述
我正在编写一个脚本,它将动态查找并填写给定 URL 的登录表单。只要登录表单可以在给定 URL 的第一页上找到,它就可以工作(在一些站点上)。
在找不到登录表单的情况下,我希望脚本探索页面上的每个链接,直到找到一个。检查每个链接,然后返回初始 URL。我只需要它深入一层,我认为不太可能找到比这更深的登录表单,而且我不需要它来处理 100% 的 URL。
如果您现在想查看代码:
const puppeteer = require('puppeteer');
const C = require('./constants');
var USERNAME_SELECTOR;
var PASSWORD_SELECTOR;
var CTA_SELECTOR;
const URLL = process.argv[2];
var usernameFieldWasFound = false;
async function startBrowser() {
const browser = await puppeteer.launch({
headless: false})
const page = await browser.newPage();
return {browser, page};
}
async function closeBrowser(browser) {
return browser.close();
}
(async () => {
await playTest(URLL);
process.exit(1);
})();
async function playTest(url) {
const {browser, page} = await startBrowser();
page.setViewport({width: 1366, height: 768});
await page.goto(url);
await delay(5000);
if (await page.$('#si_username') !== null) {
USERNAME_SELECTOR = '#si_username';
console.log ('Changed username field selector to #si_username')
usernameFieldWasFound = true; }
if (await page.$('[name="username"]') !== null) {
USERNAME_SELECTOR = '[name="username"]'
console.log ('Changed username field selector to name=username with quotes')
usernameFieldWasFound = true; }
if (await page.$('#username') !== null) {
USERNAME_SELECTOR = '#username';
console.log ('Changed username field selector to #username')
usernameFieldWasFound = true; }
if (await page.$('#email') !== null) {
USERNAME_SELECTOR = '#email';
console.log ('Changed username field selector to #email')
usernameFieldWasFound = true; }
if (usernameFieldWasFound == false) {
console.log('No username field was found, exploring links...')
// this is where I need to add code to explore each link and then go back to original URL
}
await page.waitForSelector(USERNAME_SELECTOR);
await page.click(USERNAME_SELECTOR);
await page.keyboard.type(C.username);
if (await page.$('#next') !== null) {
CTA_SELECTOR = '#next';
console.log ('Changed button selector to #next')
await page.click(CTA_SELECTOR);}
else console.log('not found');
if (await page.$('#si_password') !== null) {
PASSWORD_SELECTOR = '#si_password';
console.log ('Changed password field selector to #si_password')}
else console.log('not found');
if (await page.$('#password') !== null) {
PASSWORD_SELECTOR = '#password';
console.log ('Changed password field selector to #password')}
else console.log('not found');
if (await page.$('#pass') !== null) {
PASSWORD_SELECTOR = '#pass';
console.log ('Changed password field selector to #pass')}
else console.log('not found');
if (await page.$('[type="password"]') !== null) {
PASSWORD_SELECTOR = '[type="password"]'
console.log ('Changed button selector to type=password with quotes')}
else console.log('not found');
await page.click(PASSWORD_SELECTOR);
await page.keyboard.type(C.password);
await delay(2000);
if (await page.$([type="submit"]) !== null) {
CTA_SELECTOR = [type="submit"];
console.log ('Changed button selector type=submit without quotes')}
else console.log('not found');
if (await page.$('[type="submit"]') !== null) {
CTA_SELECTOR = '[type="submit"]'
console.log ('Changed button selector to type=submit with quotes')}
else console.log('not found');
if (await page.$('#loginbutton') !== null) {
CTA_SELECTOR = '#loginbutton'
console.log ('Changed button selector to #loginbutton')}
else console.log('not found');
if (await page.$('#submit-btn') !== null) {
CTA_SELECTOR = '#submit-btn'
console.log ('Changed button selector to #submit-btn')}
else console.log('not found');
await page.click(CTA_SELECTOR);
await page.waitForNavigation();
await page.screenshot({path: 'screenshot.png'});
closeBrowser
}
function delay(time) {
return new Promise(function(resolve) {
setTimeout(resolve, time)
});
}
解决方案
您可以只使用页面 $('a') 来获取所有链接,或者只是在评估中使用基本的 javascript