首页 > 解决方案 > 简单的 python 网页抓取与登录

问题描述

我似乎无法连接到网站来抓取它。我想知道我是否做错了什么?我在另一个网站上尝试了相同的代码并且它可以工作,但我没有收到任何身份验证错误......我将不胜感激任何帮助,谢谢!

import requests
from bs4 import BeautifulSoup

login_url = 'https://secure.selfwealth.com.au/Account/Login?ReturnUrl=%2F'
session = requests.Session()
page = session.get(login_url)

soup = BeautifulSoup(page.text, 'html.parser') 
token = soup.find(attrs={"name": '__aft'}).get('value')

payload = {
    "email": "bob@gmail.com", 
    "password": "mypwd",
    "__aft": token 
}


result = session.post(login_url, data=payload)
page = session.get(login_url)
#print(result.ok)

soup = BeautifulSoup(page.content, 'html.parser')
results = soup.find('div', class_='daily-perf')
results2 = soup.find("title")
#marketvalue = results.find('div', class_='price-change')
print(results)
print(results2)
print(payload)

标签: pythonweb-scrapingbeautifulsouppython-requests

解决方案


import requests
from bs4 import BeautifulSoup

data = {
    "Email": "your@email.com",
    "Password": "yrpass"
}


headers = {}


def main(url):
    with requests.Session() as req:
        r = req.get("https://secure.selfwealth.com.au/Account/Login")
        soup = BeautifulSoup(r.content, 'html.parser')
        headers["X-XSRF-TOKEN"] = soup.find("input",
                                            {"name": "__aft"}).get("value")
        r = req.post(url, json=data, headers=headers).json()
        print(r)


main("https://secure.selfwealth.com.au/api/login")

推荐阅读