python - UnicodeDecodeError:“utf-8”编解码器无法解码脚本中的字节错误
问题描述
import sys
sys.path.append('E:\MLDS\resparser')
from resume_parser import ResumeParser
data = ResumeParser(filename).get_extracted_data()
这想到了以下输出,我无法理解是什么原因造成的,因为相同的脚本在具有相似版本安装的不同计算机上顺利运行。
---------------------------------------------------------------------------
UnicodeDecodeError Traceback (most recent call last)
<ipython-input-52-d48f50eae8f2> in <module>
4 from resume_parser import ResumeParser
5
----> 6 data = ResumeParser(filename).get_extracted_data()
E:\MLDS\resparser\resume_parser.py in __init__(self, resume, skills_file, custom_regex)
13 def __init__(self, resume, skills_file=None, custom_regex=None):
14 nlp = spacy.load("en_core_web_sm")
---> 15 custom_nlp = spacy.load(os.path.dirname(os.path.abspath(__file__)))
16 self.__skills_file = skills_file
17 self.__custom_regex = custom_regex
C:\Miniconda-38\envs\env3.8\lib\site-packages\spacy\__init__.py in load(name, **overrides)
28 if depr_path not in (True, False, None):
29 warnings.warn(Warnings.W001.format(path=depr_path), DeprecationWarning)
---> 30 return util.load_model(name, **overrides)
31
32
C:\Miniconda-38\envs\env3.8\lib\site-packages\spacy\util.py in load_model(name, **overrides)
170 return load_model_from_package(name, **overrides)
171 if Path(name).exists(): # path to model data directory
--> 172 return load_model_from_path(Path(name), **overrides)
173 elif hasattr(name, "exists"): # Path or Path-like to model data
174 return load_model_from_path(name, **overrides)
C:\Miniconda-38\envs\env3.8\lib\site-packages\spacy\util.py in load_model_from_path(model_path, meta, **overrides)
220 component = nlp.create_pipe(factory, config=config)
221 nlp.add_pipe(component, name=name)
--> 222 return nlp.from_disk(model_path, exclude=disable)
223
224
C:\Miniconda-38\envs\env3.8\lib\site-packages\spacy\language.py in from_disk(self, path, exclude, disable)
972 # Convert to list here in case exclude is (default) tuple
973 exclude = list(exclude) + ["vocab"]
--> 974 util.from_disk(path, deserializers, exclude)
975 self._path = path
976 return self
C:\Miniconda-38\envs\env3.8\lib\site-packages\spacy\util.py in from_disk(path, readers, exclude)
688 # Split to support file names like meta.json
689 if key.split(".")[0] not in exclude:
--> 690 reader(path / key)
691 return path
692
C:\Miniconda-38\envs\env3.8\lib\site-packages\spacy\language.py in <lambda>(p)
958 deserializers["meta.json"] = deserialize_meta
959 deserializers["vocab"] = deserialize_vocab
--> 960 deserializers["tokenizer"] = lambda p: self.tokenizer.from_disk(
961 p, exclude=["vocab"]
962 )
tokenizer.pyx in spacy.tokenizer.Tokenizer.from_disk()
tokenizer.pyx in spacy.tokenizer.Tokenizer.from_bytes()
C:\Miniconda-38\envs\env3.8\lib\site-packages\spacy\util.py in from_bytes(bytes_data, setters, exclude)
664
665 def from_bytes(bytes_data, setters, exclude):
--> 666 msg = srsly.msgpack_loads(bytes_data)
667 for key, setter in setters.items():
668 # Split to support file names like meta.json
C:\Miniconda-38\envs\env3.8\lib\site-packages\srsly\_msgpack_api.py in msgpack_loads(data, use_list)
27 # msgpack-python docs suggest disabling gc before unpacking large messages
28 gc.disable()
---> 29 msg = msgpack.loads(data, raw=False, use_list=use_list)
30 gc.enable()
31 return msg
C:\Miniconda-38\envs\env3.8\lib\site-packages\srsly\msgpack\__init__.py in unpackb(packed, **kwargs)
58 object_hook = kwargs.get('object_hook')
59 kwargs['object_hook'] = functools.partial(_decode_numpy, chain=object_hook)
---> 60 return _unpackb(packed, **kwargs)
61
62
C:\Miniconda-38\envs\env3.8\lib\site-packages\srsly\msgpack\_unpacker.pyx in srsly.msgpack._unpacker.unpackb()
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xda in position 0: invalid continuation byte
我尝试将 spacy 模型更改为 _lg 但没有用,我确实尝试了“python -m spacy validate”来验证模型,输出如下:
类型 名称 型号 版本 package en-core-web-sm en_core_web_sm 2.3.1 ✔ package en-core-web-lg en_core_web_lg 2.3.1 ✔</p>
我将不胜感激任何可以为我指明正确方向的帮助。
解决方案
推荐阅读
- intellij-idea - 使用 CLion 将文件夹作为项目打开
- php - 更新和重新生成 wordpress 多站点的缩略图
- generics - Kotlin 反映/解析通用模板参数的原始类型
- c++ - 如何仅更改一台显示器的分辨率
- azure-data-factory - 从 Azure 数据工厂中的 api 读取时间戳数据的问题
- c# - 将子游戏对象添加到数组中?
- java - 我无法在我的应用程序上显示我下载的图像
- php - 有没有办法在 PHP 中分隔“ucwords()”,这样第一个字符不会自动大写?
- java - 有什么方法可以最小化 Selenium 中的 chrome 窗口?
- profiling - 测量或分析程序使用的 AVX2(和其他高级指令集)指令的使用情况