python - 如何在多线程中使用 tqdm?
问题描述
我正在尝试使用 tqdm 报告从三个链接下载每个文件的进度,我想使用多线程从每个链接同时下载同时更新进度条。但是当我执行我的脚本时,有多行进度条似乎线程正在同时更新 tqdm 进度条。我在问我应该如何运行多线程来下载文件,同时保持每次下载的进度条而没有重复的条填满整个屏幕?这是我的代码。
import os
import sys
import requests
from pathlib import Path
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor as PE
def get_filename(url):
filename = os.path.basename(url)
fname, extension = os.path.splitext(filename)
if extension:
return filename
header = requests.head(url).headers
if "Location" in header:
return os.path.basename(header["Location"])
return fname
def get_file_size(url):
header = requests.head(url).headers
if "Content-Length" in header and header["Content-Length"] != 0:
return int(header["Content-Length"])
elif "Location" in header and "status" not in header:
redirect_link = header["Location"]
r = requests.head(redirect_link).headers
return int(r["Content-Length"])
def download_file(url, filename=None):
# Download to the Downloads folder in user's home folder.
download_dir = os.path.join(Path.home(), "Downloads")
if not os.path.exists(download_dir):
os.makedirs(download_dir, exist_ok=True)
if not filename:
filename = get_filename(url)
file_size = get_file_size(url)
abs_path = os.path.join(download_dir, filename)
chunk_size = 1024
with open(abs_path, "wb") as f, requests.get(url, stream=True) as r, tqdm(
unit="B",
unit_scale=True,
unit_divisor=chunk_size,
desc=filename,
total=file_size,
file=sys.stdout
) as progress:
for chunk in r.iter_content(chunk_size=chunk_size):
data = f.write(chunk)
progress.update(data)
if __name__ == "__main__":
urls = ["http://mirrors.evowise.com/linuxmint/stable/20/linuxmint-20-xfce-64bit.iso",
"https://www.vmware.com/go/getworkstation-win",
"https://download.geany.org/geany-1.36_setup.exe"]
with PE(max_workers=len(urls)) as ex:
ex.map(download_file, urls)
我修改了我的代码,取自Use tqdm with concurrent.futures? .
def download_file(url, filename=None):
# Download to the Downloads folder in user's home folder.
download_dir = os.path.join(Path.home(), "Downloads")
if not os.path.exists(download_dir):
os.makedirs(download_dir, exist_ok=True)
if not filename:
filename = get_filename(url)
# file_size = get_file_size(url)
abs_path = os.path.join(download_dir, filename)
chunk_size = 1024
with open(abs_path, "wb") as f, requests.get(url, stream=True) as r:
for chunk in r.iter_content(chunk_size=chunk_size):
f.write(chunk)
if __name__ == "__main__":
urls = ["http://mirrors.evowise.com/linuxmint/stable/20/linuxmint-20-xfce-64bit.iso",
"https://www.vmware.com/go/getworkstation-win",
"https://download.geany.org/geany-1.36_setup.exe"]
with PE() as ex:
for url in urls:
tqdm(ex.submit(download_file, url),
total=get_file_size(url),
unit="B",
unit_scale=True,
unit_divisor=1024,
desc=get_filename(url),
file=sys.stdout)
但是在我修改我的代码后,栏没有更新......
我的问题:
我对并发下载没有问题,但是在实现 tqdm 来更新每个链接的单独进度时遇到问题,下面是我想要实现的目标:
我使用了一种解决方案:
if __name__ == "__main__":
urls = ["http://mirrors.evowise.com/linuxmint/stable/20/linuxmint-20-xfce-64bit.iso",
"https://www.vmware.com/go/getworkstation-win",
"https://download.geany.org/geany-1.36_setup.exe"]
with tqdm(total=len(urls)) as pbar:
with ThreadPoolExecutor() as ex:
futures = [ex.submit(download_file, url) for url in urls]
for future in as_completed(futures):
result = future.result()
pbar.update(1)
但这是结果:
解决方案
这将是一般的想法(根据需要格式化):
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm
import requests
def download_file(url):
with requests.get(url, stream=True) as r:
for chunk in r.iter_content(chunk_size=50000):
pass
return url
if __name__ == "__main__":
urls = ["http://mirrors.evowise.com/linuxmint/stable/20/linuxmint-20-xfce-64bit.iso",
"https://www.vmware.com/go/getworkstation-win",
"https://download.geany.org/geany-1.36_setup.exe"]
with tqdm(total=len(urls)) as pbar:
with ThreadPoolExecutor(max_workers=len(urls)) as ex:
futures = [ex.submit(download_file, url) for url in urls]
for future in as_completed(futures):
result = future.result()
pbar.update(1)
模拟如果您知道每次下载的长度
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm
import requests
import time
import random
def download_file(url, pbar):
for _ in range(30):
time.sleep(.50 * random.random())
pbar.update(1)
return url
if __name__ == "__main__":
urls = ["http://mirrors.evowise.com/linuxmint/stable/20/linuxmint-20-xfce-64bit.iso",
"https://www.vmware.com/go/getworkstation-win",
"https://download.geany.org/geany-1.36_setup.exe"]
with tqdm(total=90) as pbar:
with ThreadPoolExecutor(max_workers=3) as ex:
futures = [ex.submit(download_file, url, pbar) for url in urls]
for future in as_completed(futures):
result = future.result()
推荐阅读
- c# - 如何在c#中合并具有不同列和行的两个csv文件
- javascript - Ajv 验证始终返回 true
- javascript - 混合 Angular 应用程序中的变化检测非常慢
- python - werkzeug.routing.BuildError with Flask -- 尝试构建一个非常简单的 webapp
- php - 如何在电子商务网站的数据库中创建产品规格表
- python - TypeError:'str'对象不能解释为整数python for循环
- ruby-on-rails - RSpec - 除非从撬点调用局部变量,否则测试失败
- c# - API 响应始终为“空”
- ios - SwiftUI - 根据条件添加导航栏按钮
- typescript - 使用“ts_library”中的非“.ts”/“.tsx”文件作为依赖项