tkinter - 如何为我的网络抓取程序设置 tkinter GUI?
问题描述
我有一个功能:
def atime_scrape(asia_times):
# create dataframe
atime = pd.DataFrame(columns = column_names)
# pass url list to URL column
atime['URL'] = asia_times
# create dictionaries
atime_date = {}
atime_title = {}
atime_auth = {}
atime_type = {}
atime_corpus = {}
atime_summary = {}
atime_category = {}
# iterate through URL column to scrape data
for i in atime['URL']:
# general
req = Request(i, headers=headers) # make the request
page = urlopen(req).read() # get the response
soup = bs(page, 'html.parser') # parse the response into a bs object
# date
for x in soup.findAll('meta', {'property':'article:published_time'}):
atime_date[i] = x['content'].split('T',1)[0]
# title
for x in soup.findAll('meta', {'property':'og:title'}):
atime_title[i] = x['content']
# author
for x in soup.findAll('meta', {'name':'twitter:data1'}):
atime_auth[i] = x['content']
# type
for x in soup.findAll('meta', {'property':'og:type'}):
atime_type[i] = x['content']
# text
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36'
config = Config()
config.browser_user_agent = user_agent
page = Article(i, config=config)
page.download()
page.parse()
atime_corpus[i] = page.text.replace('\xa0',' ').replace('\n',' ')
# category
for k, v in url_category.items():
if str(i) == str(k):
atime_category[i] = v
# map data by URL to dataframe
atime['Article Date'] = atime['URL'].map(atime_date)
atime['Article Title'] = atime['URL'].map(atime_title)
atime['Author'] = atime['URL'].map(atime_auth)
atime['Source'] = 'Asia Times'
atime['Type'] = atime['URL'].map(atime_type)
atime['Text'] = atime['URL'].map(atime_corpus)
atime['Category'] = atime['URL'].map(atime_category)
return atime.to_csv(testCSV.csv)
我的程序目前获取新闻文章的 CSV 或 XLSX 文件,按来源对它们进行排序,将它们传递给各自的抓取功能,并输出包含作者、标题、日期、文本等的 CSV/XLSX 文件。
我想在 tkinter 中创建一个 GUI,将 URL 的 CSV 或 EXCEL 文件作为文本输入,然后有一个按钮,单击该按钮时执行此功能,以便输出是导出到我的机器的 CSV/XLSX 文件.
对此的任何想法/帮助将不胜感激。我也很高兴与任何人分享我的程序。它按原样工作得很好,但目前需要用户手动更改 URL 的输入文件
解决方案
你可以有这样的东西:
from tkinter import *
import threading
from tkinter import filedialog
gui = Tk()
gui.title("Asian News Scraper")
gui.geometry("600x300")
label = Label(gui, text="Select File: ")
label.grid(row=0, column=0)
def scraper(asian_time):
# Your Scraper Here
pass
def run_scrape():
file = filedialog.askopenfilename(initialdir="/", title="Choose A File", filetypes=(("CSV Files", "*.csv"),("Excel Files", "*.xlsx"))
theader = threading.Thread(target=scraper, args=(file))
theader.start()
button = Label(gui, text="Scrape / Format", command=run_scrape)
gui.mainloop()
希望回答了你的问题
菜鸟科学
推荐阅读
- redis - 什么是 Redis 时间序列模块限制
- vim - 从同一个字符(= 符号)开始的多行编辑[没有宏?]
- python - 打开目录中的所有文件以测试条件
- r - 如何使用基于成对欧几里得距离向量的核函数分配权重?
- java - 如何在 Java 中连接 1 个数组和字符串?
- java - 为什么我在此程序中收到 ArrayIndexOutOfBoundsException 错误?
- linux - Docker:从 Win10 挂载到运行 ubuntu 的容器的 dir 中不允许 git 操作
- javascript - 希望让我的机器人接收消息,并使用 Discord.js 转换字母
- python-3.x - selenium alert 使用 IE 驱动程序自动单击接受
- c - C语言程序以不同的颜色开始