首页 > 解决方案 > UnicodeDecodeError:“utf-8”编解码器无法解码脚本中的字节错误

问题描述

import sys  
sys.path.append('E:\MLDS\resparser')
​
from resume_parser import ResumeParser
​
data = ResumeParser(filename).get_extracted_data()

这想到了以下输出,我无法理解是什么原因造成的,因为相同的脚本在具有相似版本安装的不同计算机上顺利运行。

---------------------------------------------------------------------------
UnicodeDecodeError                        Traceback (most recent call last)
<ipython-input-52-d48f50eae8f2> in <module>
      4 from resume_parser import ResumeParser
      5 
----> 6 data = ResumeParser(filename).get_extracted_data()

E:\MLDS\resparser\resume_parser.py in __init__(self, resume, skills_file, custom_regex)
     13     def __init__(self, resume, skills_file=None, custom_regex=None):
     14         nlp = spacy.load("en_core_web_sm")
---> 15         custom_nlp = spacy.load(os.path.dirname(os.path.abspath(__file__)))
     16         self.__skills_file = skills_file
     17         self.__custom_regex = custom_regex

C:\Miniconda-38\envs\env3.8\lib\site-packages\spacy\__init__.py in load(name, **overrides)
     28     if depr_path not in (True, False, None):
     29         warnings.warn(Warnings.W001.format(path=depr_path), DeprecationWarning)
---> 30     return util.load_model(name, **overrides)
     31 
     32 

C:\Miniconda-38\envs\env3.8\lib\site-packages\spacy\util.py in load_model(name, **overrides)
    170             return load_model_from_package(name, **overrides)
    171         if Path(name).exists():  # path to model data directory
--> 172             return load_model_from_path(Path(name), **overrides)
    173     elif hasattr(name, "exists"):  # Path or Path-like to model data
    174         return load_model_from_path(name, **overrides)

C:\Miniconda-38\envs\env3.8\lib\site-packages\spacy\util.py in load_model_from_path(model_path, meta, **overrides)
    220             component = nlp.create_pipe(factory, config=config)
    221             nlp.add_pipe(component, name=name)
--> 222     return nlp.from_disk(model_path, exclude=disable)
    223 
    224 

C:\Miniconda-38\envs\env3.8\lib\site-packages\spacy\language.py in from_disk(self, path, exclude, disable)
    972             # Convert to list here in case exclude is (default) tuple
    973             exclude = list(exclude) + ["vocab"]
--> 974         util.from_disk(path, deserializers, exclude)
    975         self._path = path
    976         return self

C:\Miniconda-38\envs\env3.8\lib\site-packages\spacy\util.py in from_disk(path, readers, exclude)
    688         # Split to support file names like meta.json
    689         if key.split(".")[0] not in exclude:
--> 690             reader(path / key)
    691     return path
    692 

C:\Miniconda-38\envs\env3.8\lib\site-packages\spacy\language.py in <lambda>(p)
    958         deserializers["meta.json"] = deserialize_meta
    959         deserializers["vocab"] = deserialize_vocab
--> 960         deserializers["tokenizer"] = lambda p: self.tokenizer.from_disk(
    961             p, exclude=["vocab"]
    962         )

tokenizer.pyx in spacy.tokenizer.Tokenizer.from_disk()

tokenizer.pyx in spacy.tokenizer.Tokenizer.from_bytes()

C:\Miniconda-38\envs\env3.8\lib\site-packages\spacy\util.py in from_bytes(bytes_data, setters, exclude)
    664 
    665 def from_bytes(bytes_data, setters, exclude):
--> 666     msg = srsly.msgpack_loads(bytes_data)
    667     for key, setter in setters.items():
    668         # Split to support file names like meta.json

C:\Miniconda-38\envs\env3.8\lib\site-packages\srsly\_msgpack_api.py in msgpack_loads(data, use_list)
     27     # msgpack-python docs suggest disabling gc before unpacking large messages
     28     gc.disable()
---> 29     msg = msgpack.loads(data, raw=False, use_list=use_list)
     30     gc.enable()
     31     return msg

C:\Miniconda-38\envs\env3.8\lib\site-packages\srsly\msgpack\__init__.py in unpackb(packed, **kwargs)
     58         object_hook = kwargs.get('object_hook')
     59         kwargs['object_hook'] = functools.partial(_decode_numpy, chain=object_hook)
---> 60     return _unpackb(packed, **kwargs)
     61 
     62 

C:\Miniconda-38\envs\env3.8\lib\site-packages\srsly\msgpack\_unpacker.pyx in srsly.msgpack._unpacker.unpackb()

UnicodeDecodeError: 'utf-8' codec can't decode byte 0xda in position 0: invalid continuation byte

我尝试将 spacy 模型更改为 _lg 但没有用,我确实尝试了“python -m spacy validate”来验证模型,输出如下:

类型 名称 型号 版本 package en-core-web-sm en_core_web_sm 2.3.1 ✔ package en-core-web-lg en_core_web_lg 2.3.1 ✔</p>

我将不胜感激任何可以为我指明正确方向的帮助。

标签: pythonpython-3.xspacy

解决方案


推荐阅读