首页 > 解决方案 > 如何在 Windows 中修复此 UnicodeDecodeError?

问题描述

我正在运行 nuitka 并收到 UnicodeDecodeError。

python -m nuitka --standalone --plugin-enable=tk-inter MyProgram.py

UnicodeDecodeError:“utf-8”编解码器无法解码位置 74 的字节 0xc6:无效的继续字节 scons:[MyProgram.build\module.babel.core.obj] UnicodeDecodeError:“utf-8”编解码器无法解码字节 0xc6在第 79 位:无效的继续字节

我的程序.py

import datetime
import glob
import mysql.connector
import openpyxl
import os
import PyPDF4
import sys
import tkcalendar
import tkinter.filedialog, tkinter.messagebox, tkinter.ttk

version = 0.03

def verify():
    comp = datetime.datetime.strptime(base.get(), "%d/%m/%Y").strftime("%Y-%m-01")
    tkinter.messagebox.showinfo("Planilha Excel", "Selecione a planilha")
    xlsx = tkinter.filedialog.askopenfilename(initialdir = "M:\\", title = "Selecione a planilha", filetypes = [("Excel files", "*.xlsx")])
    pbar["value"] = 5
    tkinter.messagebox.showinfo("Pasta", "Selecione a pasta contendo os arquivos pdf")
    directory = tkinter.filedialog.askdirectory(initialdir = "M:\\", title = "Selecione a pasta contendo os arquivos pdf")
    pbar["value"] = 10
    wbterc = openpyxl.load_workbook(xlsx, read_only = True)
    sheeterc = wbterc.active
    pbar["value"] = 20
    listerc = []
    i = 3
    cell = sheeterc.cell(row = 1, column = 1)
    while cell.value != None:
        cell = sheeterc.cell(row = 1, column = 1)
        ctr = cell.value
        cell = sheeterc.cell(row = i, column = 5)
        dep = cell.value
        cell = sheeterc.cell(row = i, column = 6)
        sb = cell.value
        cell = sheeterc.cell(row = i, column = 2)
        cpf = str(cell.value).replace("-", "").replace(".", "")
        cell = sheeterc.cell(row = i, column = 1)
        nameterc = cell.value
        listerc.append((ctr, comp, dep, sb, cpf, nameterc))
        pbar["value"] = i + 20
        i = i + 1
    col1 = "CTR"
    col2 = "COMPETENCIA"
    col3 = "AG_LOTACAO"
    col4 = "SB_LOTACAO"
    col5 = "CPF_TERC"
    col6 = "NOME_TERC"
    workbook = openpyxl.Workbook()
    worksheet = workbook.active
    worksheet.title = "TERCEIRIZADOS"
    worksheet.cell(row = 1, column = 1).value = directory
    worksheet.cell(row = 2, column = 1).value = col1
    worksheet.cell(row = 2, column = 2).value = col2
    worksheet.cell(row = 2, column = 3).value = col3
    worksheet.cell(row = 2, column = 4).value = col4
    worksheet.cell(row = 2, column = 5).value = col5
    worksheet.cell(row = 2, column = 6).value = col6
    indexfile = 7
    indexname = 3
    os.chdir(directory)
    for file in glob.glob("*.pdf"):
        pdf = PyPDF4.PdfFileReader(directory + "/" + file)
        worksheet.cell(row = 2, column = indexfile).value = file
        numpages = pdf.getNumPages()
        data = ""
        i = 0
        while i < numpages:
            pdfReader = pdf.getPage(i).extractText()
            data = data + pdfReader
            i = i + 1
        for tupterc in listerc:
            nameterc = tupterc[5]
            if nameterc == None:
                break
            worksheet.cell(row = indexname, column = 1).value = tupterc[0]
            worksheet.cell(row = indexname, column = 2).value = tupterc[1]
            worksheet.cell(row = indexname, column = 3).value = tupterc[2]
            worksheet.cell(row = indexname, column = 4).value = tupterc[3]
            worksheet.cell(row = indexname, column = 5).value = int(tupterc[4])
            worksheet.cell(row = indexname, column = 6).value = tupterc[5]
            stmt = "replace into terceirizados.TERCEIRIZADOS (" + col1 + ", " + col2 + ", " + col3 + ", " + col4 + ", " + col5 + ", " + col6 + ") values (%s, %s, %s, %s, %s, %s)"
            cur.execute(stmt, tupterc)
            if nameterc in data:
                worksheet.cell(row = indexname, column = indexfile).value = "SIM"
            else:
                worksheet.cell(row = indexname, column = indexfile).value = "NÃO"
            pbar["value"] = indexname + 100
            indexname = indexname + 1
            dbconn.commit()
    dbconn.close()
    pbar["value"] = 100
    destfilename = directory + "/testeverificacaoterceirizados_" + datetime.datetime.now().strftime("%d-%m-%y") + ".xlsx"
    workbook.save(filename = destfilename)
    tkinter.messagebox.showinfo("Fim", "Relatório resultante salvo em " + directory)
    os.startfile(destfilename)
    pbar["value"] = 100

dbconn = mysql.connector.connect(
    host = "pxl0hosp0164.dispositivos.bb.com.br", user = "terceirizados", password = "7417pd2", port = 3306
)
cur = dbconn.cursor()
cur.execute("Select max(V.VERSAO) from terceirizados.VERSIONAMENTO V")
v = cur.fetchone()

if float(v[0]) == version:
    window = tkinter.Tk()
    window.title("MyProgram - v" + str(version))
    window.geometry("350x200")
    label = tkinter.Label(window, text = "Defina a competência: ")
    label.place(relx = 0.185, rely = 0.3, anchor = "center")
    comp = datetime.datetime.now()
    base = tkcalendar.DateEntry(window)
    base.place(relx = 0.5, rely = 0.3, anchor = "center")
    button = tkinter.Button(window, text = "VERIFICAR", command = verify)
    button.place(relx = 0.5, rely = 0.5, anchor = "center")
    pbar = tkinter.ttk.Progressbar(window, length = 100)
    pbar.place(relx = 0.5, rely = 0.7, anchor = "center")
    pbar["value"] = 0
    window.mainloop()
else:
    dbconn.close()
    tkinter.messagebox.showinfo("Atualização", "Programa desatualizado")

如何绕过它?

标签: python-3.xsconsnuitka

解决方案


您的源文件中有重音字符:

  • Relatório 结果齐射
  • 定义能力
  • 阿塔利萨桑

确保您的整个工具链 (nuitka/scons/...) 需要 UTF-8 格式的输入文件,并确保源文件MyProgram.py也以 UTF-8 编码存储。

后者似乎并非如此,因此解码会阻塞以某些本地 ASCII 编码(巴西葡萄牙语?代码页 860?)存储的字符。


推荐阅读