首页 > 解决方案 > 无法加载图片,图片 url 自动更改 url

问题描述

我在学校有一个电子商务网站项目,我需要大量图像才能完成。所以我查阅了从 Youtube 下载图像的代码:John Watson Rooney 但我在下载图像的一半时遇到问题 Url 更改为 'data:image/gif;base64,R0lGODdhFQAXAPAAANba3wAAACwAAAAAFQAXAAACFISPqcvtD6OctNqLs968+w+GolUAADs=' 所以我无法继续下载.

import requests
from bs4 import BeautifulSoup
import os
import base64

def imagedown(url, folder):
    try:
        os.mkdir(os.path.join(os.getcwd(), folder))
    except:
        pass
    os.chdir(os.path.join(os.getcwd(), folder))
    r = requests.get(url)
    soup = BeautifulSoup(r.text, 'html.parser')
    images = soup.find_all('img', class_='styles__productImage--3ZNPD')
    for image in images:
        name = image['alt']
        link = image['src']
        with open(name.replace('/', '').replace('?', '').replace('=', '').replace('|', '') + '.jpg', 'wb') as f:
            im = requests.get(link)
            f.write(im.content)
            print('Writing: ', name)

imagedown('https://www.redbubble.com/shop/?gender=gender-men&iaCode=u-tees&page=2&query=dog&sortOrder=relevant&style=u-tee-regular-crew', 'Images')

在此处输入图像描述

不知道哪里出错了,求大神帮忙,谢谢

标签: pythonweb-crawler

解决方案


这些图像被编码为base64字符串,所以你不需要下载它们,你可以简单地保存它们,如下所示:

import requests
from bs4 import BeautifulSoup
from urllib.request import urlopen
import os
import re

def imagedown(url, folder):
    try:
        os.mkdir(os.path.join(os.getcwd(), folder))
    except:
        pass
    os.chdir(os.path.join(os.getcwd(), folder))
    r = requests.get(url)
    soup = BeautifulSoup(r.text, 'html.parser')
    images = soup.find_all('img', class_='styles__productImage--3ZNPD')
    for image in images:
        name = image['alt']
        link = image['src']
        ext = None
        data = None

        if link.startswith('data'):
            with urlopen(link) as response:
                if link.startswith('data:image/gif'):
                    ext = '.gif'
                data = response.read()
        else:
            ext = os.path.splitext(link)[1]
            data = requests.get(link).content
               
        with open(name.replace('/', '').replace('?', '').replace('=', '').replace('|', '') + ext, 'wb') as f:
            f.write(data)
            print('Writing: ', name)

imagedown('https://www.redbubble.com/shop/?gender=gender-men&iaCode=u-tees&page=2&query=dog&sortOrder=relevant&style=u-tee-regular-crew', 'Images')

推荐阅读