首页 > 技术文章 > python多线程下载网页图片并保存至特定目录

noxy 2017-12-21 00:01 原文

#!python3
#multidownloadXkcd.py  - Download XKCD comics using multiple threads.

import requests
import bs4
import os
import threading

# os.mkdir('xkcd', exist_ok=True)     # store comics in ./xkcd
if os.path.exists('xkcd'):
    print("xkcd is existed!")
else:
    os.mkdir('xkcd')

def downloadXkcd(startComic, endComic):
    for urlNumber in range(startComic, endComic):
        #Download the page
        print("Downloading page http://xkcd.com/%s..." % urlNumber)
        res = requests.get('http://xkcd.com/%s' % urlNumber)
        res.raise_for_status()

        print(res.text)
        soup = bs4.BeautifulSoup(res.text)

        #Find the URL of the comic image.
        comicElem = soup.select('#comic img')
        if comicElem == []:
            print('Could not find comic images.')
        else:
            comicUrl = comicElem[0].get('src')
        #     #Download the image.
        #     print('Downloading image %s...' % (comicUrl))
        #     res = requests.get(comicUrl)
        #     res.raise_for_status()
        #
        #     # Save the image to ./xkcd
        #     imageFile = open(os.path.join('xkcd', os.path.basename(comicUrl)), 'wb')
        #     for chunk in res.iter_content(100000):
        #         imageFile.write(chunk)
        #     imageFile.close()

downloadThread = threading.Thread(target=downloadXkcd(555, 557))
downloadThread.start()

# # TODO: Create and start the thread objects
# downloadThreads = []        # a list of all the Thread objects
# for i in range(500, 600, 10):
#     downloadThread = threading.Thread(target=downloadXkcd, args=(i, i+9))
#     downloadThreads.append(downloadThread)
#     downloadThread.start()
#
# # TODO: Wait for all threads to end
# for downloadThread in downloadThreads:
#     downloadThread.join()
# print("Done.")

  

推荐阅读