首页 > 解决方案 > 我无法使用 requests + BeautifulSoup 自动登录到 pastebin

问题描述

我正在尝试使用 python 自动登录到 pastebin 帐户,但我失败了,我不知道为什么。我准确地复制了请求标头并仔细检查了......但我仍然收到了 400 个 HTTP 代码。有人可以帮助我吗?

这是我的代码:

import requests
from bs4 import BeautifulSoup
import subprocess
import os
import sys
from requests import Session


# the actual program
page = requests.get("https://pastebin.com/99qQTecB")
parse = BeautifulSoup(page.content, 'html.parser')
string = parse.find("textarea")
command = 'hello'
###########################################################
URL = 'https://pastebin.com'
LOGIN_ROUTE ='/login'

d = requests.session()
cfduid = d.get(URL).cookies['__cfduid']

e = requests.session()
csrf = e.get(URL).cookies['_csrf-frontend']

f = requests.session()
pastebin = f.get(URL).cookies['pastebin-frontend']


print (csrf)
print(cfduid)
print(pastebin)

HEADERS = {'Host':'pastebin.com', 'User-Agent':'Mozilla/5.0 (X11; Linux x86_64; rv:68.0) Gecko/20100101 Firefox/68.0', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language':'en-US,en;q=0.5',
'Accept-Encoding':'gzip, deflate, br' , 'Referer': 'https://pastebin.com/login', 'Content-Type': 'application/x-www-form-urlencoded', 'Content-Length': '174', 'Connection': 'keep-alive', 'Cookie': "__cfduid=" + cfduid + ";", '_csrf-frontend':csrf + ";"
,'pastebin-frontend':pastebin + ";" ,'Upgrade-Insecure-Requests': '1'}

if command in string:
    /super_mario_bros.exe', shell=True)
    s = requests.session()
    csrf_token = s.get(URL).cookies['_csrf-frontend']
    
    login_payload = {
        'LoginForm[username]': 'SECRET',
        'LoginForm[password]': 'Secret', 
        '_csrf-frontend': csrf_token
        }
        #Actual Login
    login_req = s.post(URL + LOGIN_ROUTE, data=login_payload)
    print(csrf_token)
    print(login_req.status_code)    
    
    
else:
    print("smth")


#print(string)



#Cookie: __cfduid=d955068eb3316226875aa037c059fd8f11595841495; __gads=ID=49ab9fcccba85989:T=1595841504:S=ALNI_MYT-PJZzkGrbYunHFHQE-EEw3vfhQ; _ga=GA1.2.1839432134.1595810341; pastebin-frontend=a9bf2e8c462237148c2d5f6b0832387c; _csrf-frontend=8213fdd4c42f9cfad45ed4993a25a1753c3539b1700c9e92be8ffac00780e34ea%3A2%3A%7Bi%3A0%3Bs%3A14%3A%22_csrf-frontend%22%3Bi%3A1%3Bs%3A32%3A%22HHm8aNkQL8XAG8joV8KfCZqOTls50yyW%22%3B%7D; _gid=GA1.2.1476917794.1596212111

在过去的几个小时里,我真的很累,因为我一直在努力完成这项工作,但仍然有 400 代码。先感谢您!

标签: pythonweb-scrapingbeautifulsouppython-requests

解决方案


我可以推荐 browser_cookie3:

import browser_cookie3
import requests
from bs4 import BeautifulSoup
cj = browser_cookie3.load()
s = requests.Session()
for c in cj:
    if 'pastebin' in str(c):
        s.cookies.set_cookie(c)
r = s.get('https://pastebin.com/')
soup = BeautifulSoup(r.content, 'lxml')
print(soup.select_one('div.header__user-name').text) 

印刷:

UWTD

此代码从浏览器获取会话 cookie 并使用它们。并打印用户的用户名。


推荐阅读