我认为最好的答案(在Python 3中)是使用errors =参数:
with open('evil_unicode.txt', 'r', errors='replace') as f:
lines = f.readlines()
>>> s = b'xe5abcnline2nline3'
>>> with open('evil_unicode.txt','wb') as f:
... f.write(s)
...
16
>>> with open('evil_unicode.txt', 'r') as f:
... lines = f.readlines()
...
Traceback (most recent call last):
File "", line 2, in
File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/codecs.py", line 319, in decode
(result, consumed) = self._buffer_decode(data, self.errors, final)
>>> with open('evil_unicode.txt', 'r', errors='replace') as f:
... lines = f.readlines()
...
>>> lines
['�abcn', 'line2n', 'line3']
>>>
>>> with open('evil_unicode.txt', 'r', errors='ignore') as f:
... lines = f.readlines()
...
>>> lines
['abcn', 'line2n', 'line3']