# HG changeset patch # User Gregory Szorc # Date 2019-11-05 04:46:19 # Node ID a8454e84673641e0132692df2d151a3804e8cc50 # Parent a0916e8819f6e8afc7e3ac966723811ae4057d44 import-checker: open all source files as utf-8 Before, we opened in text mode and used the default encoding to interpret the bytes within. This caused problems interpreting some byte sequences in some files. This commit changes things to always open files as UTF-8, which makes the error go away. test-check-module-imports.t now passes on Python 3.5 and 3.6 with this change. Differential Revision: https://phab.mercurial-scm.org/D7225 diff --git a/contrib/import-checker.py b/contrib/import-checker.py --- a/contrib/import-checker.py +++ b/contrib/import-checker.py @@ -4,6 +4,7 @@ from __future__ import absolute_import, import ast import collections +import io import os import sys @@ -754,7 +755,11 @@ def sources(f, modname): yield src.read(), modname, f, 0 py = True if py or f.endswith('.t'): - with open(f, 'r') as src: + # Strictly speaking we should sniff for the magic header that denotes + # Python source file encoding. But in reality we don't use anything + # other than ASCII (mainly) and UTF-8 (in a few exceptions), so + # simplicity is fine. + with io.open(f, 'r', encoding='utf-8') as src: for script, modname, t, line in embedded(f, modname, src): yield script, modname.encode('utf8'), t, line