Show More
@@ -98,6 +98,7 b' if ispy3:' | |||||
98 | import codecs |
|
98 | import codecs | |
99 | import functools |
|
99 | import functools | |
100 | import io |
|
100 | import io | |
|
101 | import locale | |||
101 | import struct |
|
102 | import struct | |
102 |
|
103 | |||
103 | if os.name == r'nt' and sys.version_info >= (3, 6): |
|
104 | if os.name == r'nt' and sys.version_info >= (3, 6): | |
@@ -148,15 +149,36 b' if ispy3:' | |||||
148 | stdout = sys.stdout.buffer |
|
149 | stdout = sys.stdout.buffer | |
149 | stderr = sys.stderr.buffer |
|
150 | stderr = sys.stderr.buffer | |
150 |
|
151 | |||
151 | # Since Python 3 converts argv to wchar_t type by Py_DecodeLocale() on Unix, |
|
|||
152 | # we can use os.fsencode() to get back bytes argv. |
|
|||
153 | # |
|
|||
154 | # https://hg.python.org/cpython/file/v3.5.1/Programs/python.c#l55 |
|
|||
155 | # |
|
|||
156 | # On Windows, the native argv is unicode and is converted to MBCS bytes |
|
|||
157 | # since we do enable the legacy filesystem encoding. |
|
|||
158 | if getattr(sys, 'argv', None) is not None: |
|
152 | if getattr(sys, 'argv', None) is not None: | |
159 | sysargv = list(map(os.fsencode, sys.argv)) |
|
153 | # On POSIX, the char** argv array is converted to Python str using | |
|
154 | # Py_DecodeLocale(). The inverse of this is Py_EncodeLocale(), which isn't | |||
|
155 | # directly callable from Python code. So, we need to emulate it. | |||
|
156 | # Py_DecodeLocale() calls mbstowcs() and falls back to mbrtowc() with | |||
|
157 | # surrogateescape error handling on failure. These functions take the | |||
|
158 | # current system locale into account. So, the inverse operation is to | |||
|
159 | # .encode() using the system locale's encoding and using the | |||
|
160 | # surrogateescape error handler. The only tricky part here is getting | |||
|
161 | # the system encoding correct, since `locale.getlocale()` can return | |||
|
162 | # None. We fall back to the filesystem encoding if lookups via `locale` | |||
|
163 | # fail, as this seems like a reasonable thing to do. | |||
|
164 | # | |||
|
165 | # On Windows, the wchar_t **argv is passed into the interpreter as-is. | |||
|
166 | # Like POSIX, we need to emulate what Py_EncodeLocale() would do. But | |||
|
167 | # there's an additional wrinkle. What we really want to access is the | |||
|
168 | # ANSI codepage representation of the arguments, as this is what | |||
|
169 | # `int main()` would receive if Python 3 didn't define `int wmain()` | |||
|
170 | # (this is how Python 2 worked). To get that, we encode with the mbcs | |||
|
171 | # encoding, which will pass CP_ACP to the underlying Windows API to | |||
|
172 | # produce bytes. | |||
|
173 | if os.name == r'nt': | |||
|
174 | sysargv = [a.encode("mbcs", "ignore") for a in sys.argv] | |||
|
175 | else: | |||
|
176 | encoding = ( | |||
|
177 | locale.getlocale()[1] | |||
|
178 | or locale.getdefaultlocale()[1] | |||
|
179 | or sys.getfilesystemencoding() | |||
|
180 | ) | |||
|
181 | sysargv = [a.encode(encoding, "surrogateescape") for a in sys.argv] | |||
160 |
|
182 | |||
161 | bytechr = struct.Struct('>B').pack |
|
183 | bytechr = struct.Struct('>B').pack | |
162 | byterepr = b'%r'.__mod__ |
|
184 | byterepr = b'%r'.__mod__ |
General Comments 0
You need to be logged in to leave comments.
Login now