node.js - 如果请求被重定向,如何登录
问题描述
我有这个基本的帖子请求:
'use strict';
const puppeteer = require('puppeteer');
const request_client = require('request-promise-native');
(async () => {
// Create browser instance, and give it a first tab
const browser = await puppeteer.launch({headless: false});
const page = await browser.newPage();
// Allows you to intercept a request; must appear before
// your first page.goto()
await page.setRequestInterception(true);
// Request intercept handler... will be triggered with
// each page.goto() statement
page.on('request', interceptedRequest => {
// Here, is where you change the request method and
// add your post data
var data = {
'method': 'POST',
'postData': 'HasSell=true&ModelYears=2005&MaxModelYearHidden=2020&Brands=BMW&ModelNames=SERIES+3&TransmissionTypes=Otomatik&FuelTypes=Dizel&CarCases=Sedan&Versions=320D+AUTO&HorsePowers=150&MaxHorsePowerHidden=163&IsDontKnowHorsePowerHidden=0&Km=5.000&IsDontKnowKmHidden=0&OuterDemage-1=1-1&OuterDemage-2=2-1&OuterDemage-3=3-1&OuterDemage-4=4-1&OuterDemage-5=5-1&OuterDemage-6=6-1&OuterDemage-7=7-1&OuterDemage-8=8-1&OuterDemage-9=9-1&OuterDemage-10=10-1&OuterDemage-11=11-1&EquipmentCheckBoxSIS+FARI=SIS+FARI&EquipmentCheckBoxSUNROOF=SUNROOF&EquipmentCheckBoxYOL+BILGISAYARI=YOL+BILGISAYARI&progressValue=10',
headers: {
"Content-Type": "application/x-www-form-urlencoded"
}
};
// Request modified... finish sending!
interceptedRequest.continue(data);
});
// Navigate, trigger the intercept, and resolve the response
const response = await page.goto('https://www.ikinciyeni.com/fiyatlandirici');
const responseBody = await response.text();
console.log(responseBody);
// Close the browser - done!
//await browser.close();
})();
在上面的代码中,我给出了发送请求的 url。它被重定向并最终登陆登录页面。在这种情况下如何登录?如何提供登录凭据?
编辑:我最后添加了这段代码:
// Navigate, trigger the intercept, and resolve the response
const response = await page.goto('https://www.ikinciyeni.com/fiyatlandirici');
await page.waitForNavigation({
waitUntil: 'networkidle0',
});
await page.type('#EmailRetail', 'scott');
await page.type('#Password', 'tiger');
await page.click('#LoginSubmitBtn');
await page.waitForNavigation();
const responseBody = await response.text();
console.log(responseBody);
但它抛出了这个异常:
UnhandledPromiseRejectionWarning: TimeoutError: Navigation timeout of 30000 ms exceeded
at E:\code\generic_scrapper\node_modules\puppeteer\lib\LifecycleWatcher.js:100:111
at async FrameManager.waitForFrameNavigation (E:\code\generic_scrapper\node_modules\puppeteer\lib\FrameManager.js:107:23)
at async Frame.waitForNavigation (E:\code\generic_scrapper\node_modules\puppeteer\lib\FrameManager.js:298:16)
at async Page.waitForNavigation (E:\code\generic_scrapper\node_modules\puppeteer\lib\Page.js:492:16)
at async E:\code\generic_scrapper\post.js:39:2
-- ASYNC --
at Frame.<anonymous> (E:\code\generic_scrapper\node_modules\puppeteer\lib\helper.js:94:19)
at Page.waitForNavigation (E:\code\generic_scrapper\node_modules\puppeteer\lib\Page.js:492:53)
at Page.<anonymous> (E:\code\generic_scrapper\node_modules\puppeteer\lib\helper.js:95:27)
at E:\code\generic_scrapper\post.js:39:13
at processTicksAndRejections (internal/process/task_queues.js:97:5)
(node:11400) UnhandledPromiseRejectionWarning: Unhandled promise rejection. This error originated either by throwing inside of an async function without a catch block, or by rejecting a promise which was not handled with .catch(). To terminate the node process on unhandled promise rejection, use the CLI flag `--unhandled-rejections=strict` (see https://nodejs.org/api/cli.html#cli_unhandled_rejections_mode). (rejection id: 1)
(node:11400) [DEP0018] DeprecationWarning: Unhandled promise rejections are deprecated. In the future, promise rejections that are not handled will terminate the Node.js process with a non-zero exit code.
^C
E:\code\generic_scrapper>node post.js
(node:18352) UnhandledPromiseRejectionWarning: TimeoutError: Navigation timeout of 30000 ms exceeded
at E:\code\generic_scrapper\node_modules\puppeteer\lib\LifecycleWatcher.js:100:111
at async FrameManager.waitForFrameNavigation (E:\code\generic_scrapper\node_modules\puppeteer\lib\FrameManager.js:107:23)
at async Frame.waitForNavigation (E:\code\generic_scrapper\node_modules\puppeteer\lib\FrameManager.js:298:16)
at async Page.waitForNavigation (E:\code\generic_scrapper\node_modules\puppeteer\lib\Page.js:492:16)
at async E:\code\generic_scrapper\post.js:39:2
-- ASYNC --
at Frame.<anonymous> (E:\code\generic_scrapper\node_modules\puppeteer\lib\helper.js:94:19)
at Page.waitForNavigation (E:\code\generic_scrapper\node_modules\puppeteer\lib\Page.js:492:53)
at Page.<anonymous> (E:\code\generic_scrapper\node_modules\puppeteer\lib\helper.js:95:27)
at E:\code\generic_scrapper\post.js:39:13
at processTicksAndRejections (internal/process/task_queues.js:97:5)
(node:18352) UnhandledPromiseRejectionWarning: Unhandled promise rejection. This error originated either by throwing inside of an async function without a catch block, or by rejecting a promise which was not handled with .catch(). To terminate the node process on unhandled promise rejection, use the CLI flag `--unhandled-rejections=strict` (see https://nodejs.org/api/cli.html#cli_unhandled_rejections_mode). (rejection id: 1)
(node:18352) [DEP0018] DeprecationWarning: Unhandled promise rejections are deprecated. In the future, promise rejections that are not handled will terminate the Node.js process with a non-zero exit code.
^C
E:\code\generic_scrapper>node post.js
(node:11528) UnhandledPromiseRejectionWarning: TimeoutError: Navigation timeout of 30000 ms exceeded
at E:\code\generic_scrapper\node_modules\puppeteer\lib\LifecycleWatcher.js:100:111
at async FrameManager.waitForFrameNavigation (E:\code\generic_scrapper\node_modules\puppeteer\lib\FrameManager.js:107:23)
at async Frame.waitForNavigation (E:\code\generic_scrapper\node_modules\puppeteer\lib\FrameManager.js:298:16)
at async Page.waitForNavigation (E:\code\generic_scrapper\node_modules\puppeteer\lib\Page.js:492:16)
at async E:\code\generic_scrapper\post.js:39:2
-- ASYNC --
at Frame.<anonymous> (E:\code\generic_scrapper\node_modules\puppeteer\lib\helper.js:94:19)
at Page.waitForNavigation (E:\code\generic_scrapper\node_modules\puppeteer\lib\Page.js:492:53)
at Page.<anonymous> (E:\code\generic_scrapper\node_modules\puppeteer\lib\helper.js:95:27)
at E:\code\generic_scrapper\post.js:39:13
at processTicksAndRejections (internal/process/task_queues.js:97:5)
(node:11528) UnhandledPromiseRejectionWarning: Unhandled promise rejection. This error originated either by throwing inside of an async function without a catch block, or by rejecting a promise which was not handled with .catch(). To terminate the node process on unhandled promise rejection, use the CLI flag `--unhandled-rejections=strict` (see https://nodejs.org/api/cli.html#cli_unhandled_rejections_mode). (rejection id: 1)
(node:11528) [DEP0018] DeprecationWarning: Unhandled promise rejections are deprecated. In the future, promise rejections that are not handled will terminate the Node.js process with a non-zero exit code.
我该如何解决这个问题?我的直觉是等到页面加载完毕,然后单击并输入信息。
编辑1:
我删除了 waitForNavigation() ,这是我得到的例外
UnhandledPromiseRejectionWarning: Error: Protocol error (Network.getResponseBody): No resource with given identifier found
at E:\code\generic_scrapper\node_modules\puppeteer\lib\Connection.js:152:63
at new Promise (<anonymous>)
at CDPSession.send (E:\code\generic_scrapper\node_modules\puppeteer\lib\Connection.js:151:16)
at E:\code\generic_scrapper\node_modules\puppeteer\lib\HTTPResponse.js:58:53
at processTicksAndRejections (internal/process/task_queues.js:97:5)
at async HTTPResponse.text (E:\code\generic_scrapper\node_modules\puppeteer\lib\HTTPResponse.js:67:25)
at async E:\code\generic_scrapper\post.js:45:26
-- ASYNC --
at HTTPResponse.<anonymous> (E:\code\generic_scrapper\node_modules\puppeteer\lib\helper.js:94:19)
at E:\code\generic_scrapper\post.js:45:41
at processTicksAndRejections (internal/process/task_queues.js:97:5)
(node:15292) UnhandledPromiseRejectionWarning: Unhandled promise rejection. This error originated either by throwing inside of an async function without a catch block, or by rejecting a promise which was not handled with .catch(). To terminate the node process on unhandled promise rejection, use the CLI flag `--unhandled-rejections=strict` (see https://nodejs.org/api/cli.html#cli_unhandled_rejections_mode). (rejection id: 1)
(node:15292) [DEP0018] DeprecationWarning: Unhandled promise rejections are deprecated. In the future, promise rejections that are not handled will terminate the Node.js process with a non-zero exit code.
Edit2:无论有没有凭据,它都会登陆此页面:https ://www.ikinciyeni.com/giris但是,它应该登陆这样的页面:https ://www.ikinciyeni.com/konsinye-hesap-sonuc?tempId = ...
这背后的原因可能是什么?可能是因为它没有发送给它的凭据?
Edit3:这是来自网站(Chrome)的请求的样子
POST https://www.ikinciyeni.com/giris HTTP/1.1
Host: www.ikinciyeni.com
Connection: keep-alive
Content-Length: 296
Cache-Control: max-age=0
Upgrade-Insecure-Requests: 1
Origin: https://www.ikinciyeni.com
Content-Type: application/x-www-form-urlencoded
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9
Sec-Fetch-Site: same-origin
Sec-Fetch-Mode: navigate
Sec-Fetch-User: ?1
Sec-Fetch-Dest: document
Referer: https://www.ikinciyeni.com/giris?ReturnUrl=/konsinye-hesap-sonuc&tempId=2a26816b-62ed-4603-bfec-473342f58de7
Accept-Encoding: gzip, deflate, br
Accept-Language: en-US,en;q=0.9
Cookie: __gads=ID=b7b47e7d0a959312:T=1
__RequestVerificationToken=Amij.....
而这就是 Pupeteer 的情况
POST https://www.ikinciyeni.com/giris HTTP/1.1
Host: www.ikinciyeni.com
Connection: keep-alive
Content-Length: 595
Pragma: no-cache
Cache-Control: no-cache
Content-Type: application/x-www-form-urlencoded
Sec-Fetch-Site: same-origin
Sec-Fetch-Mode: navigate
Sec-Fetch-User: ?1
Sec-Fetch-Dest: document
Referer: https://www.ikinciyeni.com/giris?ReturnUrl=/konsinye-hesap-sonuc&tempId=a92c918b-4745-4c79-9cd4-cb31084468b3
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.0 Safari/537.36
Accept-Encoding: gzip, deflate, br
Accept-Language: en-US,en;q=0.9
Cookie: ASP.NET_SessionId=mfy3sm5da1b1xdiro52dtqqn; Cookie_DimensionId=477094; NSC_xxx.jljodjzfoj.dpn_iuuqt=ffffffffaf1fc83045525d5f4f58455e445a4a423660; __RequestVerificationToken=-5v7ZraCIzfPQ4ViK6x1oEBjXzzxn9v0HV_q8lkdwCGiOOT9ChF7-N1Ya2tn3D1rbvIRdw1mw_BCPe50aluylMqS5qo1
HasSell=true&ModelYears=2005&MaxModelYearHidden=2020&Brands=BMW&ModelNames=SERIES+3&TransmissionTypes=Otomatik&FuelTypes=Dizel&CarCases=Sedan&Versions=320D+AUTO&HorsePowers=150&MaxHorsePowerHidden=163&IsDontKnowHorsePowerHidden=0&Km=5.000&IsDontKnowKmHidden=0&OuterDemage-1=1-1&OuterDemage-2=2-1&OuterDemage-3=3-1&OuterDemage-4=4-1&OuterDemage-5=5-1&OuterDemage-6=6-1&OuterDemage
在后一种情况下,我需要附加我的电子邮件和密码。
编辑:浏览器本身生成的标题会被我在 POST 中提到的标题覆盖。有什么办法可以将这些添加到现有的标题中而不是覆盖它们?
解决方案
您需要首先拥有他们网站的有效凭据。然后你可以在你的脚本中设置一个条件:检查你是否在登录页面,如果是,然后像真实用户一样通过 puppeteer 输入凭据。通过单击来选择输入字段,然后使用 api 的keyboard.type
. (您也可以在输入字段之间导航keyboard.press()
。)
如果您这样做,请不要忘记将您的凭据存储在环境变量中!
[...]
await page.goto('https://www.ikinciyeni.com/fiyatlandirici')
if ((await page.$('#EmailRetail')) !== null) {
// select and type the email
await page.click('#EmailRetail')
await page.keyboard.type('example@example.com')
// select and type the password
await page.click('#Password')
await page.keyboard.type('password0123')
// submit the form
await page.click('#LoginSubmitBtn')
}
const responseBody = await page.content()
console.log(responseBody)
[...]
推荐阅读
- django - 未找到没有参数的“user_logout”反向
- django - Django methodSerializer 不返回绝对图像 url
- flutter - 如何在颤动中选择默认单选按钮
- azure - 缺少必填字段时,是否可以强制 Azure API 管理返回 400 而不是 404?
- python - 在 numpy 数组中包含不同形状的信息的最佳方法?
- ruby-on-rails - Rails 18n French:使用带重音的单词时出现特殊字符
- java - 如何迭代思想 JLabel 对象以将图标添加到特定对象
- exceed - 问:保存打开文本是否超出窗口大小和位置?
- abap - 具有不同域的字段的内部连接
- javascript - 同时部署 Node.js Restfull API 和 Vue.js 应用