首页 > 解决方案 > encodUnicodeEncodeError: 'charmap' 同时抓取

问题描述

import requests
from bs4 import BeautifulSoup
import csv
import os

URL = 'https://www'
HEADERS = {
    'user-agent': 'Mozilla/5.0',
    'accept': '*/*'}
HOST = 'https://www.wildberries.ru'
FILE = 'bar.csv'


def get_html(url, params=None):
    r = requests.get(url, headers=HEADERS, params=params)
    return r


def get_pages_count(html):enter code here
    soup = BeautifulSoup(html, 'html.parser')
    pagination = soup.find_all('a', class_='pagination-item')
    if pagination:
        return int(pagination[-1].get_text())
    else:
        return 1


def get_content(html):
    soup = BeautifulSoup(html, 'html.parser')
    items = soup.find_all('div', class_='dtList i-dtList j-card-item')

    shoes = []
    for item in items:
        shoes.append({
            'title': item.find('div', class_='dtlist-inner-brand-name').get_text(strip=True),
            'link': HOST + item.find('a', class_="ref_goods_n_p j-open-full-product-card").get('href'),
            'price': item.find('ins', class_='lower-price'),
            'sale': item.find('span', class_='price-sale active').get_text(),
        })
    return shoes


def save_file(items, path):
    with open(path, 'w', newline='') as file:
        writer = csv.writer(file, delimiter=';')
        writer.writerow(['Название', 'Ссылка', 'Цена', 'Скидка', ])
        for item in items:
            writer.writerow([item['title'], item['link'], item['price'], item['sale']])


def parse():
    URL = input('Tipe URL: ')
    URL = URL.strip()
    html = get_html(URL)
    if html.status_code == 200:
        shoes = []
        pages_count = get_pages_count(html.text)
        for page in range(1, pages_count + 1):
            print(f'Wait {page} from {pages_count}... ')
            html = get_html(URL, params={'page': page})
            shoes.extend(get_content(html.text))
        save_file(shoes, FILE)
        print(f'We found {len(shoes)} shoes')
        os.startfile(FILE)
    else:
        print('Error')


parse()

第 19 行,在编码返回 codecs.charmap_encode(input,self.errors,encoding_table)[0] UnicodeEncodeError: 'charmap' codec can't encode character '\u20bd' in position 129: character maps to I don't know what to do :(我明天必须通过它。所以你能找到我的错误吗?如果我删除 save_file(shoes, FILE) 它可以工作,但方式错误。所以 heeelp

标签: pythoncsvparsingbeautifulsoup

解决方案


您正在导入但未知的符号是俄罗斯卢布符号的符号。您可以在这里找到答案 ,无论您需要 ascii 还是 utf,都应该可以。问题是,您使用的编码中不存在此符号。


推荐阅读