首页 > 解决方案 > 从网站上的按钮下载 csv 文件的 Python 脚本

问题描述

我想通过单击以下网址中的“将表输出到 csv”按钮来下载 csv 文件:https ://tradingstockalerts.com/AlertHQ/SwingTradingSignals

我在以下位置看到了另一个与此非常相似的问题: Python script to download file from button on website

我试图复制代码并对其进行一些更改,但我真的不明白如何使用beautifulsoup 或通过html 解析。我也没有在 csv 文件的任何 ajax 请求中看到。

这是我目前拥有的:

import requests
from bs4 import BeautifulSoup
from tqdm import tqdm

def downloadExcel():
    headers = {
        'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=.08',
        'Origin': 'https://tradingstockalerts.com/',
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.17 (KHTML, like Gecko)  Chrome/24.0.1312.57 Safari/537.17',
        'Content-Type': 'application/x-www-form-urlencoded',
        'Referer': 'https://tradingstockalerts.com/AlertHQ/SwingTradingSignals',
        'Accept-Encoding': 'gzip,deflate,br',
        'Accept-Language': 'en-US,en;q=0.5',
    }

    r = requests.get("https://tradingstockalerts.com/AlertHQ/SwingTradingSignals", headers=headers)
    # parse and retrieve two vital form values
    if not r.status_code == 200:
        print('Error')
        return
    soup = BeautifulSoup(r.content, "html.parser")
    viewstate = soup.select("#__VIEWSTATE")[0]['value']
    eventvalidation = soup.select("#__EVENTVALIDATION")[0]['value']
    print ('__VIEWSTATE:', viewstate)
    print ('__EVENTVALIDATION:', eventvalidation)
    formData = {
        '__EVENTVALIDATION': eventvalidation,
        '__VIEWSTATE': viewstate,
        '__EVENTTARGET': '',
        '__EVENTARGUMENT': '',
        '__VIEWSTATEGENERATOR': '49DF2C80',
        'MainContent_RadScriptManager1_TSM':""";;System.Web.Extensions, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35:en-US:59e0a739-153b-40bd-883f-4e212fc43305:ea597d4b:b25378d2;Telerik.Web.UI, Version=2015.2.826.40, Culture=neutral, PublicKeyToken=121fae78165ba3d4:en-US:c2ba43dc-851e-4009-beab-3032480b6a4b:16e4e7cd:f7645509:24ee1bba:c128760b:874f8ea2:19620875:4877f69a:f46195d3:92fe8ea0:fa31b949:490a9d4e:bd8f85e4:58366029:ed16cbdc:2003d0b8:88144a7a:1e771326:aa288e2d:b092aa46:7c926187:8674cba1:ef347303:2e42e72a:b7778d6c:c08e9f8a:e330518b:c8618e41:e4f8f289:1a73651d:16d8629e:59462f1:a51ee93e""",
        'search_block_form':'',
        'ctl00$MainContent$btndata':'Output Table to CSV',
        'ctl00_MainContent_RadWindow1_C_RadGridVehicles_ClientState':'',
        'ctl00_MainContent_RadWindow1_ClientState':'',
        'ctl00_MainContent_RadWindowManager1_ClientState':'',
        'ctl00$MainContent$RadGrid1$ctl00$ctl02$ctl00$PageSizeComboBox':'20',
        'ctl00_MainContent_RadGrid1_ctl00_ctl02_ctl00_PageSizeComboBox_ClientState':'',
        'ctl00$MainContent$RadGrid1$ctl00$ctl02$ctl02$RDIPFdispatch_time':'',
        'ctl00$MainContent$RadGrid1$ctl00$ctl02$ctl02$RDIPFdispatch_time$dateInput':'',
        'ctl00_MainContent_RadGrid1_ctl00_ctl02_ctl02_RDIPFdispatch_time_dateInput_ClientState':'{"enabled":true,"emptyMessage":"","validationText":"","valueAsString":"","minDateStr":"1900-01-01-00-00-00","maxDateStr":"2099-12-31-00-00-00","lastSetTextBoxValue":""}',
        'ctl00_MainContent_RadGrid1_ctl00_ctl02_ctl02_RDIPFdispatch_time_ClientState':'{"minDateStr":"1900-01-01-00-00-00","maxDateStr":"2099-12-31-00-00-00"}',
        'ctl00$MainContent$RadGrid1$ctl00$ctl02$ctl02$RadComboBox1address':'',
        'ctl00_MainContent_RadGrid1_ctl00_ctl02_ctl02_RadComboBox1address_ClientState':'',
        'ctl00$MainContent$RadGrid1$ctl00$ctl02$ctl02$RadComboBox1case_description':'',
        'ctl00_MainContent_RadGrid1_ctl00_ctl02_ctl02_RadComboBox1case_description_ClientState':'',
        'ctl00$MainContent$RadGrid1$ctl00$ctl02$ctl02$FilterTextBox_grid':'',
        'ctl00$MainContent$RadGrid1$ctl00$ctl02$ctl02$RadComboBox1report_number':'',
        'ctl00_MainContent_RadGrid1_ctl00_ctl02_ctl02_RadComboBox1report_number_ClientState':'',
        'ctl00$MainContent$RadGrid1$ctl00$ctl02$ctl02$FilterTextBox_out_max_date':'',
        'ctl00$MainContent$RadGrid1$ctl00$ctl02$ctl02$FilterTextBox_out_rowcount':'',
        'ctl00$MainContent$RadGrid1$ctl00$ctl03$ctl01$PageSizeComboBox':'20',
        'ctl00_MainContent_RadGrid1_ctl00_ctl03_ctl01_PageSizeComboBox_ClientState':'',
        'ctl00_MainContent_RadGrid1_rfltMenu_ClientState':'',
        'ctl00_MainContent_RadGrid1_gdtcSharedTimeView_ClientState':'',
        'ctl00_MainContent_RadGrid1_gdtcSharedCalendar_SD':'[]',
        'ctl00_MainContent_RadGrid1_gdtcSharedCalendar_AD':'[[1900,1,1],[2099,12,31],[2018,3,29]]',
        'ctl00_MainContent_RadGrid1_ClientState':'',
        }

    # second HTTP request with form data
    r = requests.post("https://tradingstockalerts.com/AlertHQ/SwingTradingSignals", data=formData, headers=headers)
    print('received:', r.status_code, len(r.content))
    with open(r"C:\xx\swing_trades.csv", "wb") as handle:
        for data in tqdm(r.iter_content()):
            handle.write(data)

downloadExcel()

我确定我做错了什么,但我将非常感谢任何有关获得有效解决方案的帮助!

标签: pythonhtmlfilebeautifulsoupdownload

解决方案


您可以通过向 发送POST请求来获取 CSV 文件的数据https://tradingstockalerts.com/AlertHQ/swing_buy_signal

import csv
import requests
from bs4 import BeautifulSoup

URL = "https://tradingstockalerts.com/AlertHQ/swing_buy_signal"
soup = BeautifulSoup(requests.post(URL).content, "html.parser")

with open("output.csv", "w") as f:
    writer1 = csv.writer(f, delimiter="\t", quotechar='"', quoting=csv.QUOTE_MINIMAL)
    writer1.writerow(
        header.get_text(strip=True, separator=",") for header in soup.select("thead tr")
    )

    writer2 = csv.writer(f, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL)
    for tag in soup.select("tbody tr"):
        writer2.writerow(t.get_text(strip=True, separator=" ") for t in tag)

截图ouput.csv

截屏


推荐阅读