首页 > 技术文章 > 【爬虫】必应图片按关键词进行图片下载

waterr 2021-02-27 17:44 原文

 

 1 """
 2 关键是找到正确的url,然后判断组成url的参数的规律,多观察几个url即可得出规律
 3 """
 4 
 5 import requests
 6 from fake_useragent import UserAgent
 7 import os
 8 from lxml import etree
 9 import json
10 import time
11 
12 headers = {'User-Agent': UserAgent().random}
13 
14 keyword = "周星驰"
15 url = 'https://cn.bing.com/images/async?q={}&first={}&count=35&relp={}&tsc=ImageHoverTitle&mmasync=1'
16 
17 if not os.path.exists(keyword):
18     os.mkdir(keyword)
19 os.chdir(keyword)
20 
21 first = 35
22 relp = 35
23 count = 1
24 
25 while True:
26     try:
27         html_str = requests.get(url.format(keyword, first, relp), headers=headers).content.decode()
28         html = etree.HTML(html_str)
29         pics_url = html.xpath("//a[@class='iusc']/@m")
30 
31         for pic_url in pics_url:
32             pic_url = json.loads(pic_url)['turl']
33             res = requests.get(pic_url, headers=headers).content
34 
35             with open('{}.jpg'.format(count), "wb") as f:
36                 f.write(res)
37 
38             print('第{}张已下载完成'.format(count))
39 
40             count += 1
41             time.sleep(0.5)
42 
43         first += 35
44         relp += 35
45 
46         if first > 35 * 5:
47             break
48 
49     except Exception as e:
50         print(e)

 

推荐阅读