首页 > 技术文章 > 素材库图片下载

duoba 2020-07-09 14:49 原文

import re
import requests
import random
import time
import os.path
from bs4 import BeautifulSoup

import pymysql

user_agent_list = [
    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1",
    "Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11",
    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6",
    "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6",
    "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1",
    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.9 Safari/536.5",
    "Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5",
    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
    "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
    "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
    "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
    "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
    "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3",
    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24",
    "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24"
]
UA = random.choice(user_agent_list)  ##从self.user_agent_list中随机取出一个字符串
headers = {'User-Agent': UA}  ##构造成一个完整的User-Agent (UA代表的是上面随机取出来的字符串哦)

#提取扩展名函数
def file_extension(path):
  return os.path.splitext(path)[1]

# 连接database
conn =pymysql.connect(user='root', password='1234' ,host='127.0.0.1',database='sucai')

#创建游标
cursor = conn.cursor()

#执行函数  返回受影响的函数
effect_rows = cursor.execute('select * from sucaix_copy2 where id>10851')
print("受影响的行数",effect_rows)

#提取所有结果
results = cursor.fetchall()
xx=1012642  #图片变化的起始数字
datalist=[]
for row in results:
    datalist.append([row[0],row[2]])

#----------------------------------------
for row in datalist:
    id = row[0]
    rowcode = row[1]

    soup = BeautifulSoup(rowcode, 'lxml')

    img_url=soup.find_all('img')

    for x in img_url:

        #下载图片
        url0=x.get('src')

        print(url0)
        response=requests.get(url0, headers=headers, timeout=5)
        img_name=str(xx) + file_extension(url0)   #方法1:提取图片名
        #img_name=url0.split('/')[-1]  #方法2:提取图片名
        with open('C:\\Users\\mydell\\Desktop\\sucai365\\' + img_name,'wb') as f:
            f.write(response.content)

        rowcode=rowcode.replace(url0,'/20200708/'+img_name)

        xx += 1

    # 执行函数  返回受影响的函数
    effect_rows = cursor.execute('update sucaix_copy2 set rowcode2=%s  where id=%s',[rowcode,id])

    conn.commit()
    print(id)

conn.close()

推荐阅读