首页 > 解决方案 > 打印 Mbox 消息时出现 UnicodeEncodeError

问题描述

我正在尝试打印出存储在欺诈性电子邮件 Mbox 数据集中的每封电子邮件,如下所示;

for email in mailbox.mbox("fraudulent_emails.mbox"):
    print(email)

它打印了一对,但随后因以下错误而崩溃;

UnicodeEncodeError                        Traceback (most recent call last)
<ipython-input-68-af6159dbf7cf> in <module>
      5 
      6 for email in mailbox.mbox("fraudulent_emails.mbox"):
----> 7     print(email)
      8 

c:\users\fredd\appdata\local\programs\python\python38\lib\email\message.py in __str__(self)
    133         """Return the entire formatted message as a string.
    134         """
--> 135         return self.as_string()
    136 
    137     def as_string(self, unixfrom=False, maxheaderlen=0, policy=None):

c:\users\fredd\appdata\local\programs\python\python38\lib\email\message.py in as_string(self, unixfrom, maxheaderlen, policy)
    156                       maxheaderlen=maxheaderlen,
    157                       policy=policy)
--> 158         g.flatten(self, unixfrom=unixfrom)
    159         return fp.getvalue()
    160 

c:\users\fredd\appdata\local\programs\python\python38\lib\email\generator.py in flatten(self, msg, unixfrom, linesep)
    114                     ufrom = 'From nobody ' + time.ctime(time.time())
    115                 self.write(ufrom + self._NL)
--> 116             self._write(msg)
    117         finally:
    118             self.policy = old_gen_policy

c:\users\fredd\appdata\local\programs\python\python38\lib\email\generator.py in _write(self, msg)
    179             self._munge_cte = None
    180             self._fp = sfp = self._new_buffer()
--> 181             self._dispatch(msg)
    182         finally:
    183             self._fp = oldfp

c:\users\fredd\appdata\local\programs\python\python38\lib\email\generator.py in _dispatch(self, msg)
    212             if meth is None:
    213                 meth = self._writeBody
--> 214         meth(msg)
    215 
    216     #

c:\users\fredd\appdata\local\programs\python\python38\lib\email\generator.py in _handle_text(self, msg)
    241                 msg = deepcopy(msg)
    242                 del msg['content-transfer-encoding']
--> 243                 msg.set_payload(payload, charset)
    244                 payload = msg.get_payload()
    245                 self._munge_cte = (msg['content-transfer-encoding'],

c:\users\fredd\appdata\local\programs\python\python38\lib\email\message.py in set_payload(self, payload, charset)
    313             if not isinstance(charset, Charset):
    314                 charset = Charset(charset)
--> 315             payload = payload.encode(charset.output_charset)
    316         if hasattr(payload, 'decode'):
    317             self._payload = payload.decode('ascii', 'surrogateescape')

UnicodeEncodeError: 'ascii' codec can't encode character '\ufffd' in position 95: ordinal not in range(128)

我试过使用编码并将编解码器设置为 utf-8 但我仍然是同样的问题

标签: pythonpython-3.xemailcharacter-encoding

解决方案


推荐阅读