##// END OF EJS Templates
compat: initialize LC_CTYPE locale on all Python versions and platforms...
Manuel Jacob -
r45923:a25343d1 default
parent child Browse files
Show More
@@ -1,531 +1,552 b''
1 # pycompat.py - portability shim for python 3
1 # pycompat.py - portability shim for python 3
2 #
2 #
3 # This software may be used and distributed according to the terms of the
3 # This software may be used and distributed according to the terms of the
4 # GNU General Public License version 2 or any later version.
4 # GNU General Public License version 2 or any later version.
5
5
6 """Mercurial portability shim for python 3.
6 """Mercurial portability shim for python 3.
7
7
8 This contains aliases to hide python version-specific details from the core.
8 This contains aliases to hide python version-specific details from the core.
9 """
9 """
10
10
11 from __future__ import absolute_import
11 from __future__ import absolute_import
12
12
13 import getopt
13 import getopt
14 import inspect
14 import inspect
15 import json
15 import json
16 import locale
16 import os
17 import os
17 import shlex
18 import shlex
18 import sys
19 import sys
19 import tempfile
20 import tempfile
20
21
21 ispy3 = sys.version_info[0] >= 3
22 ispy3 = sys.version_info[0] >= 3
22 ispypy = '__pypy__' in sys.builtin_module_names
23 ispypy = '__pypy__' in sys.builtin_module_names
23 TYPE_CHECKING = False
24 TYPE_CHECKING = False
24
25
25 if not globals(): # hide this from non-pytype users
26 if not globals(): # hide this from non-pytype users
26 import typing
27 import typing
27
28
28 TYPE_CHECKING = typing.TYPE_CHECKING
29 TYPE_CHECKING = typing.TYPE_CHECKING
29
30
30 if not ispy3:
31 if not ispy3:
31 import cookielib
32 import cookielib
32 import cPickle as pickle
33 import cPickle as pickle
33 import httplib
34 import httplib
34 import Queue as queue
35 import Queue as queue
35 import SocketServer as socketserver
36 import SocketServer as socketserver
36 import xmlrpclib
37 import xmlrpclib
37
38
38 from .thirdparty.concurrent import futures
39 from .thirdparty.concurrent import futures
39
40
40 def future_set_exception_info(f, exc_info):
41 def future_set_exception_info(f, exc_info):
41 f.set_exception_info(*exc_info)
42 f.set_exception_info(*exc_info)
42
43
43
44
44 else:
45 else:
45 import concurrent.futures as futures
46 import concurrent.futures as futures
46 import http.cookiejar as cookielib
47 import http.cookiejar as cookielib
47 import http.client as httplib
48 import http.client as httplib
48 import pickle
49 import pickle
49 import queue as queue
50 import queue as queue
50 import socketserver
51 import socketserver
51 import xmlrpc.client as xmlrpclib
52 import xmlrpc.client as xmlrpclib
52
53
53 def future_set_exception_info(f, exc_info):
54 def future_set_exception_info(f, exc_info):
54 f.set_exception(exc_info[0])
55 f.set_exception(exc_info[0])
55
56
56
57
57 def identity(a):
58 def identity(a):
58 return a
59 return a
59
60
60
61
61 def _rapply(f, xs):
62 def _rapply(f, xs):
62 if xs is None:
63 if xs is None:
63 # assume None means non-value of optional data
64 # assume None means non-value of optional data
64 return xs
65 return xs
65 if isinstance(xs, (list, set, tuple)):
66 if isinstance(xs, (list, set, tuple)):
66 return type(xs)(_rapply(f, x) for x in xs)
67 return type(xs)(_rapply(f, x) for x in xs)
67 if isinstance(xs, dict):
68 if isinstance(xs, dict):
68 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
69 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
69 return f(xs)
70 return f(xs)
70
71
71
72
72 def rapply(f, xs):
73 def rapply(f, xs):
73 """Apply function recursively to every item preserving the data structure
74 """Apply function recursively to every item preserving the data structure
74
75
75 >>> def f(x):
76 >>> def f(x):
76 ... return 'f(%s)' % x
77 ... return 'f(%s)' % x
77 >>> rapply(f, None) is None
78 >>> rapply(f, None) is None
78 True
79 True
79 >>> rapply(f, 'a')
80 >>> rapply(f, 'a')
80 'f(a)'
81 'f(a)'
81 >>> rapply(f, {'a'}) == {'f(a)'}
82 >>> rapply(f, {'a'}) == {'f(a)'}
82 True
83 True
83 >>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])
84 >>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])
84 ['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]
85 ['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]
85
86
86 >>> xs = [object()]
87 >>> xs = [object()]
87 >>> rapply(identity, xs) is xs
88 >>> rapply(identity, xs) is xs
88 True
89 True
89 """
90 """
90 if f is identity:
91 if f is identity:
91 # fast path mainly for py2
92 # fast path mainly for py2
92 return xs
93 return xs
93 return _rapply(f, xs)
94 return _rapply(f, xs)
94
95
95
96
97 # Passing the '' locale means that the locale should be set according to the
98 # user settings (environment variables).
99 # Python sometimes avoids setting the global locale settings. When interfacing
100 # with C code (e.g. the curses module or the Subversion bindings), the global
101 # locale settings must be initialized correctly. Python 2 does not initialize
102 # the global locale settings on interpreter startup. Python 3 sometimes
103 # initializes LC_CTYPE, but not consistently at least on Windows. Therefore we
104 # explicitly initialize it to get consistent behavior if it's not already
105 # initialized. Since CPython commit 177d921c8c03d30daa32994362023f777624b10d,
106 # LC_CTYPE is always initialized. If we require Python 3.8+, we should re-check
107 # if we can remove this code.
108 if locale.setlocale(locale.LC_CTYPE, None) == 'C':
109 try:
110 locale.setlocale(locale.LC_CTYPE, '')
111 except locale.Error:
112 # The likely case is that the locale from the environment variables is
113 # unknown.
114 pass
115
116
96 if ispy3:
117 if ispy3:
97 import builtins
118 import builtins
98 import codecs
119 import codecs
99 import functools
120 import functools
100 import io
121 import io
101 import struct
122 import struct
102
123
103 if os.name == r'nt' and sys.version_info >= (3, 6):
124 if os.name == r'nt' and sys.version_info >= (3, 6):
104 # MBCS (or ANSI) filesystem encoding must be used as before.
125 # MBCS (or ANSI) filesystem encoding must be used as before.
105 # Otherwise non-ASCII filenames in existing repositories would be
126 # Otherwise non-ASCII filenames in existing repositories would be
106 # corrupted.
127 # corrupted.
107 # This must be set once prior to any fsencode/fsdecode calls.
128 # This must be set once prior to any fsencode/fsdecode calls.
108 sys._enablelegacywindowsfsencoding() # pytype: disable=module-attr
129 sys._enablelegacywindowsfsencoding() # pytype: disable=module-attr
109
130
110 fsencode = os.fsencode
131 fsencode = os.fsencode
111 fsdecode = os.fsdecode
132 fsdecode = os.fsdecode
112 oscurdir = os.curdir.encode('ascii')
133 oscurdir = os.curdir.encode('ascii')
113 oslinesep = os.linesep.encode('ascii')
134 oslinesep = os.linesep.encode('ascii')
114 osname = os.name.encode('ascii')
135 osname = os.name.encode('ascii')
115 ospathsep = os.pathsep.encode('ascii')
136 ospathsep = os.pathsep.encode('ascii')
116 ospardir = os.pardir.encode('ascii')
137 ospardir = os.pardir.encode('ascii')
117 ossep = os.sep.encode('ascii')
138 ossep = os.sep.encode('ascii')
118 osaltsep = os.altsep
139 osaltsep = os.altsep
119 if osaltsep:
140 if osaltsep:
120 osaltsep = osaltsep.encode('ascii')
141 osaltsep = osaltsep.encode('ascii')
121 osdevnull = os.devnull.encode('ascii')
142 osdevnull = os.devnull.encode('ascii')
122
143
123 sysplatform = sys.platform.encode('ascii')
144 sysplatform = sys.platform.encode('ascii')
124 sysexecutable = sys.executable
145 sysexecutable = sys.executable
125 if sysexecutable:
146 if sysexecutable:
126 sysexecutable = os.fsencode(sysexecutable)
147 sysexecutable = os.fsencode(sysexecutable)
127 bytesio = io.BytesIO
148 bytesio = io.BytesIO
128 # TODO deprecate stringio name, as it is a lie on Python 3.
149 # TODO deprecate stringio name, as it is a lie on Python 3.
129 stringio = bytesio
150 stringio = bytesio
130
151
131 def maplist(*args):
152 def maplist(*args):
132 return list(map(*args))
153 return list(map(*args))
133
154
134 def rangelist(*args):
155 def rangelist(*args):
135 return list(range(*args))
156 return list(range(*args))
136
157
137 def ziplist(*args):
158 def ziplist(*args):
138 return list(zip(*args))
159 return list(zip(*args))
139
160
140 rawinput = input
161 rawinput = input
141 getargspec = inspect.getfullargspec
162 getargspec = inspect.getfullargspec
142
163
143 long = int
164 long = int
144
165
145 # Warning: sys.stdout.buffer and sys.stderr.buffer do not necessarily have
166 # Warning: sys.stdout.buffer and sys.stderr.buffer do not necessarily have
146 # the same buffering behavior as sys.stdout and sys.stderr. The interpreter
167 # the same buffering behavior as sys.stdout and sys.stderr. The interpreter
147 # initializes them with block-buffered streams or unbuffered streams (when
168 # initializes them with block-buffered streams or unbuffered streams (when
148 # the -u option or the PYTHONUNBUFFERED environment variable is set), never
169 # the -u option or the PYTHONUNBUFFERED environment variable is set), never
149 # with a line-buffered stream.
170 # with a line-buffered stream.
150 # TODO: .buffer might not exist if std streams were replaced; we'll need
171 # TODO: .buffer might not exist if std streams were replaced; we'll need
151 # a silly wrapper to make a bytes stream backed by a unicode one.
172 # a silly wrapper to make a bytes stream backed by a unicode one.
152 stdin = sys.stdin.buffer
173 stdin = sys.stdin.buffer
153 stdout = sys.stdout.buffer
174 stdout = sys.stdout.buffer
154 stderr = sys.stderr.buffer
175 stderr = sys.stderr.buffer
155
176
156 if getattr(sys, 'argv', None) is not None:
177 if getattr(sys, 'argv', None) is not None:
157 # On POSIX, the char** argv array is converted to Python str using
178 # On POSIX, the char** argv array is converted to Python str using
158 # Py_DecodeLocale(). The inverse of this is Py_EncodeLocale(), which
179 # Py_DecodeLocale(). The inverse of this is Py_EncodeLocale(), which
159 # isn't directly callable from Python code. In practice, os.fsencode()
180 # isn't directly callable from Python code. In practice, os.fsencode()
160 # can be used instead (this is recommended by Python's documentation
181 # can be used instead (this is recommended by Python's documentation
161 # for sys.argv).
182 # for sys.argv).
162 #
183 #
163 # On Windows, the wchar_t **argv is passed into the interpreter as-is.
184 # On Windows, the wchar_t **argv is passed into the interpreter as-is.
164 # Like POSIX, we need to emulate what Py_EncodeLocale() would do. But
185 # Like POSIX, we need to emulate what Py_EncodeLocale() would do. But
165 # there's an additional wrinkle. What we really want to access is the
186 # there's an additional wrinkle. What we really want to access is the
166 # ANSI codepage representation of the arguments, as this is what
187 # ANSI codepage representation of the arguments, as this is what
167 # `int main()` would receive if Python 3 didn't define `int wmain()`
188 # `int main()` would receive if Python 3 didn't define `int wmain()`
168 # (this is how Python 2 worked). To get that, we encode with the mbcs
189 # (this is how Python 2 worked). To get that, we encode with the mbcs
169 # encoding, which will pass CP_ACP to the underlying Windows API to
190 # encoding, which will pass CP_ACP to the underlying Windows API to
170 # produce bytes.
191 # produce bytes.
171 if os.name == r'nt':
192 if os.name == r'nt':
172 sysargv = [a.encode("mbcs", "ignore") for a in sys.argv]
193 sysargv = [a.encode("mbcs", "ignore") for a in sys.argv]
173 else:
194 else:
174 sysargv = [fsencode(a) for a in sys.argv]
195 sysargv = [fsencode(a) for a in sys.argv]
175
196
176 bytechr = struct.Struct('>B').pack
197 bytechr = struct.Struct('>B').pack
177 byterepr = b'%r'.__mod__
198 byterepr = b'%r'.__mod__
178
199
179 class bytestr(bytes):
200 class bytestr(bytes):
180 """A bytes which mostly acts as a Python 2 str
201 """A bytes which mostly acts as a Python 2 str
181
202
182 >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
203 >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
183 ('', 'foo', 'ascii', '1')
204 ('', 'foo', 'ascii', '1')
184 >>> s = bytestr(b'foo')
205 >>> s = bytestr(b'foo')
185 >>> assert s is bytestr(s)
206 >>> assert s is bytestr(s)
186
207
187 __bytes__() should be called if provided:
208 __bytes__() should be called if provided:
188
209
189 >>> class bytesable(object):
210 >>> class bytesable(object):
190 ... def __bytes__(self):
211 ... def __bytes__(self):
191 ... return b'bytes'
212 ... return b'bytes'
192 >>> bytestr(bytesable())
213 >>> bytestr(bytesable())
193 'bytes'
214 'bytes'
194
215
195 There's no implicit conversion from non-ascii str as its encoding is
216 There's no implicit conversion from non-ascii str as its encoding is
196 unknown:
217 unknown:
197
218
198 >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
219 >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
199 Traceback (most recent call last):
220 Traceback (most recent call last):
200 ...
221 ...
201 UnicodeEncodeError: ...
222 UnicodeEncodeError: ...
202
223
203 Comparison between bytestr and bytes should work:
224 Comparison between bytestr and bytes should work:
204
225
205 >>> assert bytestr(b'foo') == b'foo'
226 >>> assert bytestr(b'foo') == b'foo'
206 >>> assert b'foo' == bytestr(b'foo')
227 >>> assert b'foo' == bytestr(b'foo')
207 >>> assert b'f' in bytestr(b'foo')
228 >>> assert b'f' in bytestr(b'foo')
208 >>> assert bytestr(b'f') in b'foo'
229 >>> assert bytestr(b'f') in b'foo'
209
230
210 Sliced elements should be bytes, not integer:
231 Sliced elements should be bytes, not integer:
211
232
212 >>> s[1], s[:2]
233 >>> s[1], s[:2]
213 (b'o', b'fo')
234 (b'o', b'fo')
214 >>> list(s), list(reversed(s))
235 >>> list(s), list(reversed(s))
215 ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
236 ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
216
237
217 As bytestr type isn't propagated across operations, you need to cast
238 As bytestr type isn't propagated across operations, you need to cast
218 bytes to bytestr explicitly:
239 bytes to bytestr explicitly:
219
240
220 >>> s = bytestr(b'foo').upper()
241 >>> s = bytestr(b'foo').upper()
221 >>> t = bytestr(s)
242 >>> t = bytestr(s)
222 >>> s[0], t[0]
243 >>> s[0], t[0]
223 (70, b'F')
244 (70, b'F')
224
245
225 Be careful to not pass a bytestr object to a function which expects
246 Be careful to not pass a bytestr object to a function which expects
226 bytearray-like behavior.
247 bytearray-like behavior.
227
248
228 >>> t = bytes(t) # cast to bytes
249 >>> t = bytes(t) # cast to bytes
229 >>> assert type(t) is bytes
250 >>> assert type(t) is bytes
230 """
251 """
231
252
232 def __new__(cls, s=b''):
253 def __new__(cls, s=b''):
233 if isinstance(s, bytestr):
254 if isinstance(s, bytestr):
234 return s
255 return s
235 if not isinstance(
256 if not isinstance(
236 s, (bytes, bytearray)
257 s, (bytes, bytearray)
237 ) and not hasattr( # hasattr-py3-only
258 ) and not hasattr( # hasattr-py3-only
238 s, u'__bytes__'
259 s, u'__bytes__'
239 ):
260 ):
240 s = str(s).encode('ascii')
261 s = str(s).encode('ascii')
241 return bytes.__new__(cls, s)
262 return bytes.__new__(cls, s)
242
263
243 def __getitem__(self, key):
264 def __getitem__(self, key):
244 s = bytes.__getitem__(self, key)
265 s = bytes.__getitem__(self, key)
245 if not isinstance(s, bytes):
266 if not isinstance(s, bytes):
246 s = bytechr(s)
267 s = bytechr(s)
247 return s
268 return s
248
269
249 def __iter__(self):
270 def __iter__(self):
250 return iterbytestr(bytes.__iter__(self))
271 return iterbytestr(bytes.__iter__(self))
251
272
252 def __repr__(self):
273 def __repr__(self):
253 return bytes.__repr__(self)[1:] # drop b''
274 return bytes.__repr__(self)[1:] # drop b''
254
275
255 def iterbytestr(s):
276 def iterbytestr(s):
256 """Iterate bytes as if it were a str object of Python 2"""
277 """Iterate bytes as if it were a str object of Python 2"""
257 return map(bytechr, s)
278 return map(bytechr, s)
258
279
259 def maybebytestr(s):
280 def maybebytestr(s):
260 """Promote bytes to bytestr"""
281 """Promote bytes to bytestr"""
261 if isinstance(s, bytes):
282 if isinstance(s, bytes):
262 return bytestr(s)
283 return bytestr(s)
263 return s
284 return s
264
285
265 def sysbytes(s):
286 def sysbytes(s):
266 """Convert an internal str (e.g. keyword, __doc__) back to bytes
287 """Convert an internal str (e.g. keyword, __doc__) back to bytes
267
288
268 This never raises UnicodeEncodeError, but only ASCII characters
289 This never raises UnicodeEncodeError, but only ASCII characters
269 can be round-trip by sysstr(sysbytes(s)).
290 can be round-trip by sysstr(sysbytes(s)).
270 """
291 """
271 if isinstance(s, bytes):
292 if isinstance(s, bytes):
272 return s
293 return s
273 return s.encode('utf-8')
294 return s.encode('utf-8')
274
295
275 def sysstr(s):
296 def sysstr(s):
276 """Return a keyword str to be passed to Python functions such as
297 """Return a keyword str to be passed to Python functions such as
277 getattr() and str.encode()
298 getattr() and str.encode()
278
299
279 This never raises UnicodeDecodeError. Non-ascii characters are
300 This never raises UnicodeDecodeError. Non-ascii characters are
280 considered invalid and mapped to arbitrary but unique code points
301 considered invalid and mapped to arbitrary but unique code points
281 such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
302 such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
282 """
303 """
283 if isinstance(s, builtins.str):
304 if isinstance(s, builtins.str):
284 return s
305 return s
285 return s.decode('latin-1')
306 return s.decode('latin-1')
286
307
287 def strurl(url):
308 def strurl(url):
288 """Converts a bytes url back to str"""
309 """Converts a bytes url back to str"""
289 if isinstance(url, bytes):
310 if isinstance(url, bytes):
290 return url.decode('ascii')
311 return url.decode('ascii')
291 return url
312 return url
292
313
293 def bytesurl(url):
314 def bytesurl(url):
294 """Converts a str url to bytes by encoding in ascii"""
315 """Converts a str url to bytes by encoding in ascii"""
295 if isinstance(url, str):
316 if isinstance(url, str):
296 return url.encode('ascii')
317 return url.encode('ascii')
297 return url
318 return url
298
319
299 def raisewithtb(exc, tb):
320 def raisewithtb(exc, tb):
300 """Raise exception with the given traceback"""
321 """Raise exception with the given traceback"""
301 raise exc.with_traceback(tb)
322 raise exc.with_traceback(tb)
302
323
303 def getdoc(obj):
324 def getdoc(obj):
304 """Get docstring as bytes; may be None so gettext() won't confuse it
325 """Get docstring as bytes; may be None so gettext() won't confuse it
305 with _('')"""
326 with _('')"""
306 doc = getattr(obj, '__doc__', None)
327 doc = getattr(obj, '__doc__', None)
307 if doc is None:
328 if doc is None:
308 return doc
329 return doc
309 return sysbytes(doc)
330 return sysbytes(doc)
310
331
311 def _wrapattrfunc(f):
332 def _wrapattrfunc(f):
312 @functools.wraps(f)
333 @functools.wraps(f)
313 def w(object, name, *args):
334 def w(object, name, *args):
314 return f(object, sysstr(name), *args)
335 return f(object, sysstr(name), *args)
315
336
316 return w
337 return w
317
338
318 # these wrappers are automagically imported by hgloader
339 # these wrappers are automagically imported by hgloader
319 delattr = _wrapattrfunc(builtins.delattr)
340 delattr = _wrapattrfunc(builtins.delattr)
320 getattr = _wrapattrfunc(builtins.getattr)
341 getattr = _wrapattrfunc(builtins.getattr)
321 hasattr = _wrapattrfunc(builtins.hasattr)
342 hasattr = _wrapattrfunc(builtins.hasattr)
322 setattr = _wrapattrfunc(builtins.setattr)
343 setattr = _wrapattrfunc(builtins.setattr)
323 xrange = builtins.range
344 xrange = builtins.range
324 unicode = str
345 unicode = str
325
346
326 def open(name, mode=b'r', buffering=-1, encoding=None):
347 def open(name, mode=b'r', buffering=-1, encoding=None):
327 return builtins.open(name, sysstr(mode), buffering, encoding)
348 return builtins.open(name, sysstr(mode), buffering, encoding)
328
349
329 safehasattr = _wrapattrfunc(builtins.hasattr)
350 safehasattr = _wrapattrfunc(builtins.hasattr)
330
351
331 def _getoptbwrapper(orig, args, shortlist, namelist):
352 def _getoptbwrapper(orig, args, shortlist, namelist):
332 """
353 """
333 Takes bytes arguments, converts them to unicode, pass them to
354 Takes bytes arguments, converts them to unicode, pass them to
334 getopt.getopt(), convert the returned values back to bytes and then
355 getopt.getopt(), convert the returned values back to bytes and then
335 return them for Python 3 compatibility as getopt.getopt() don't accepts
356 return them for Python 3 compatibility as getopt.getopt() don't accepts
336 bytes on Python 3.
357 bytes on Python 3.
337 """
358 """
338 args = [a.decode('latin-1') for a in args]
359 args = [a.decode('latin-1') for a in args]
339 shortlist = shortlist.decode('latin-1')
360 shortlist = shortlist.decode('latin-1')
340 namelist = [a.decode('latin-1') for a in namelist]
361 namelist = [a.decode('latin-1') for a in namelist]
341 opts, args = orig(args, shortlist, namelist)
362 opts, args = orig(args, shortlist, namelist)
342 opts = [(a[0].encode('latin-1'), a[1].encode('latin-1')) for a in opts]
363 opts = [(a[0].encode('latin-1'), a[1].encode('latin-1')) for a in opts]
343 args = [a.encode('latin-1') for a in args]
364 args = [a.encode('latin-1') for a in args]
344 return opts, args
365 return opts, args
345
366
346 def strkwargs(dic):
367 def strkwargs(dic):
347 """
368 """
348 Converts the keys of a python dictonary to str i.e. unicodes so that
369 Converts the keys of a python dictonary to str i.e. unicodes so that
349 they can be passed as keyword arguments as dictonaries with bytes keys
370 they can be passed as keyword arguments as dictonaries with bytes keys
350 can't be passed as keyword arguments to functions on Python 3.
371 can't be passed as keyword arguments to functions on Python 3.
351 """
372 """
352 dic = {k.decode('latin-1'): v for k, v in dic.items()}
373 dic = {k.decode('latin-1'): v for k, v in dic.items()}
353 return dic
374 return dic
354
375
355 def byteskwargs(dic):
376 def byteskwargs(dic):
356 """
377 """
357 Converts keys of python dictonaries to bytes as they were converted to
378 Converts keys of python dictonaries to bytes as they were converted to
358 str to pass that dictonary as a keyword argument on Python 3.
379 str to pass that dictonary as a keyword argument on Python 3.
359 """
380 """
360 dic = {k.encode('latin-1'): v for k, v in dic.items()}
381 dic = {k.encode('latin-1'): v for k, v in dic.items()}
361 return dic
382 return dic
362
383
363 # TODO: handle shlex.shlex().
384 # TODO: handle shlex.shlex().
364 def shlexsplit(s, comments=False, posix=True):
385 def shlexsplit(s, comments=False, posix=True):
365 """
386 """
366 Takes bytes argument, convert it to str i.e. unicodes, pass that into
387 Takes bytes argument, convert it to str i.e. unicodes, pass that into
367 shlex.split(), convert the returned value to bytes and return that for
388 shlex.split(), convert the returned value to bytes and return that for
368 Python 3 compatibility as shelx.split() don't accept bytes on Python 3.
389 Python 3 compatibility as shelx.split() don't accept bytes on Python 3.
369 """
390 """
370 ret = shlex.split(s.decode('latin-1'), comments, posix)
391 ret = shlex.split(s.decode('latin-1'), comments, posix)
371 return [a.encode('latin-1') for a in ret]
392 return [a.encode('latin-1') for a in ret]
372
393
373 iteritems = lambda x: x.items()
394 iteritems = lambda x: x.items()
374 itervalues = lambda x: x.values()
395 itervalues = lambda x: x.values()
375
396
376 # Python 3.5's json.load and json.loads require str. We polyfill its
397 # Python 3.5's json.load and json.loads require str. We polyfill its
377 # code for detecting encoding from bytes.
398 # code for detecting encoding from bytes.
378 if sys.version_info[0:2] < (3, 6):
399 if sys.version_info[0:2] < (3, 6):
379
400
380 def _detect_encoding(b):
401 def _detect_encoding(b):
381 bstartswith = b.startswith
402 bstartswith = b.startswith
382 if bstartswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)):
403 if bstartswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)):
383 return 'utf-32'
404 return 'utf-32'
384 if bstartswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)):
405 if bstartswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)):
385 return 'utf-16'
406 return 'utf-16'
386 if bstartswith(codecs.BOM_UTF8):
407 if bstartswith(codecs.BOM_UTF8):
387 return 'utf-8-sig'
408 return 'utf-8-sig'
388
409
389 if len(b) >= 4:
410 if len(b) >= 4:
390 if not b[0]:
411 if not b[0]:
391 # 00 00 -- -- - utf-32-be
412 # 00 00 -- -- - utf-32-be
392 # 00 XX -- -- - utf-16-be
413 # 00 XX -- -- - utf-16-be
393 return 'utf-16-be' if b[1] else 'utf-32-be'
414 return 'utf-16-be' if b[1] else 'utf-32-be'
394 if not b[1]:
415 if not b[1]:
395 # XX 00 00 00 - utf-32-le
416 # XX 00 00 00 - utf-32-le
396 # XX 00 00 XX - utf-16-le
417 # XX 00 00 XX - utf-16-le
397 # XX 00 XX -- - utf-16-le
418 # XX 00 XX -- - utf-16-le
398 return 'utf-16-le' if b[2] or b[3] else 'utf-32-le'
419 return 'utf-16-le' if b[2] or b[3] else 'utf-32-le'
399 elif len(b) == 2:
420 elif len(b) == 2:
400 if not b[0]:
421 if not b[0]:
401 # 00 XX - utf-16-be
422 # 00 XX - utf-16-be
402 return 'utf-16-be'
423 return 'utf-16-be'
403 if not b[1]:
424 if not b[1]:
404 # XX 00 - utf-16-le
425 # XX 00 - utf-16-le
405 return 'utf-16-le'
426 return 'utf-16-le'
406 # default
427 # default
407 return 'utf-8'
428 return 'utf-8'
408
429
409 def json_loads(s, *args, **kwargs):
430 def json_loads(s, *args, **kwargs):
410 if isinstance(s, (bytes, bytearray)):
431 if isinstance(s, (bytes, bytearray)):
411 s = s.decode(_detect_encoding(s), 'surrogatepass')
432 s = s.decode(_detect_encoding(s), 'surrogatepass')
412
433
413 return json.loads(s, *args, **kwargs)
434 return json.loads(s, *args, **kwargs)
414
435
415 else:
436 else:
416 json_loads = json.loads
437 json_loads = json.loads
417
438
418 else:
439 else:
419 import cStringIO
440 import cStringIO
420
441
421 xrange = xrange
442 xrange = xrange
422 unicode = unicode
443 unicode = unicode
423 bytechr = chr
444 bytechr = chr
424 byterepr = repr
445 byterepr = repr
425 bytestr = str
446 bytestr = str
426 iterbytestr = iter
447 iterbytestr = iter
427 maybebytestr = identity
448 maybebytestr = identity
428 sysbytes = identity
449 sysbytes = identity
429 sysstr = identity
450 sysstr = identity
430 strurl = identity
451 strurl = identity
431 bytesurl = identity
452 bytesurl = identity
432 open = open
453 open = open
433 delattr = delattr
454 delattr = delattr
434 getattr = getattr
455 getattr = getattr
435 hasattr = hasattr
456 hasattr = hasattr
436 setattr = setattr
457 setattr = setattr
437
458
438 # this can't be parsed on Python 3
459 # this can't be parsed on Python 3
439 exec(b'def raisewithtb(exc, tb):\n raise exc, None, tb\n')
460 exec(b'def raisewithtb(exc, tb):\n raise exc, None, tb\n')
440
461
441 def fsencode(filename):
462 def fsencode(filename):
442 """
463 """
443 Partial backport from os.py in Python 3, which only accepts bytes.
464 Partial backport from os.py in Python 3, which only accepts bytes.
444 In Python 2, our paths should only ever be bytes, a unicode path
465 In Python 2, our paths should only ever be bytes, a unicode path
445 indicates a bug.
466 indicates a bug.
446 """
467 """
447 if isinstance(filename, str):
468 if isinstance(filename, str):
448 return filename
469 return filename
449 else:
470 else:
450 raise TypeError("expect str, not %s" % type(filename).__name__)
471 raise TypeError("expect str, not %s" % type(filename).__name__)
451
472
452 # In Python 2, fsdecode() has a very chance to receive bytes. So it's
473 # In Python 2, fsdecode() has a very chance to receive bytes. So it's
453 # better not to touch Python 2 part as it's already working fine.
474 # better not to touch Python 2 part as it's already working fine.
454 fsdecode = identity
475 fsdecode = identity
455
476
456 def getdoc(obj):
477 def getdoc(obj):
457 return getattr(obj, '__doc__', None)
478 return getattr(obj, '__doc__', None)
458
479
459 _notset = object()
480 _notset = object()
460
481
461 def safehasattr(thing, attr):
482 def safehasattr(thing, attr):
462 return getattr(thing, attr, _notset) is not _notset
483 return getattr(thing, attr, _notset) is not _notset
463
484
464 def _getoptbwrapper(orig, args, shortlist, namelist):
485 def _getoptbwrapper(orig, args, shortlist, namelist):
465 return orig(args, shortlist, namelist)
486 return orig(args, shortlist, namelist)
466
487
467 strkwargs = identity
488 strkwargs = identity
468 byteskwargs = identity
489 byteskwargs = identity
469
490
470 oscurdir = os.curdir
491 oscurdir = os.curdir
471 oslinesep = os.linesep
492 oslinesep = os.linesep
472 osname = os.name
493 osname = os.name
473 ospathsep = os.pathsep
494 ospathsep = os.pathsep
474 ospardir = os.pardir
495 ospardir = os.pardir
475 ossep = os.sep
496 ossep = os.sep
476 osaltsep = os.altsep
497 osaltsep = os.altsep
477 osdevnull = os.devnull
498 osdevnull = os.devnull
478 long = long
499 long = long
479 stdin = sys.stdin
500 stdin = sys.stdin
480 stdout = sys.stdout
501 stdout = sys.stdout
481 stderr = sys.stderr
502 stderr = sys.stderr
482 if getattr(sys, 'argv', None) is not None:
503 if getattr(sys, 'argv', None) is not None:
483 sysargv = sys.argv
504 sysargv = sys.argv
484 sysplatform = sys.platform
505 sysplatform = sys.platform
485 sysexecutable = sys.executable
506 sysexecutable = sys.executable
486 shlexsplit = shlex.split
507 shlexsplit = shlex.split
487 bytesio = cStringIO.StringIO
508 bytesio = cStringIO.StringIO
488 stringio = bytesio
509 stringio = bytesio
489 maplist = map
510 maplist = map
490 rangelist = range
511 rangelist = range
491 ziplist = zip
512 ziplist = zip
492 rawinput = raw_input
513 rawinput = raw_input
493 getargspec = inspect.getargspec
514 getargspec = inspect.getargspec
494 iteritems = lambda x: x.iteritems()
515 iteritems = lambda x: x.iteritems()
495 itervalues = lambda x: x.itervalues()
516 itervalues = lambda x: x.itervalues()
496 json_loads = json.loads
517 json_loads = json.loads
497
518
498 isjython = sysplatform.startswith(b'java')
519 isjython = sysplatform.startswith(b'java')
499
520
500 isdarwin = sysplatform.startswith(b'darwin')
521 isdarwin = sysplatform.startswith(b'darwin')
501 islinux = sysplatform.startswith(b'linux')
522 islinux = sysplatform.startswith(b'linux')
502 isposix = osname == b'posix'
523 isposix = osname == b'posix'
503 iswindows = osname == b'nt'
524 iswindows = osname == b'nt'
504
525
505
526
506 def getoptb(args, shortlist, namelist):
527 def getoptb(args, shortlist, namelist):
507 return _getoptbwrapper(getopt.getopt, args, shortlist, namelist)
528 return _getoptbwrapper(getopt.getopt, args, shortlist, namelist)
508
529
509
530
510 def gnugetoptb(args, shortlist, namelist):
531 def gnugetoptb(args, shortlist, namelist):
511 return _getoptbwrapper(getopt.gnu_getopt, args, shortlist, namelist)
532 return _getoptbwrapper(getopt.gnu_getopt, args, shortlist, namelist)
512
533
513
534
514 def mkdtemp(suffix=b'', prefix=b'tmp', dir=None):
535 def mkdtemp(suffix=b'', prefix=b'tmp', dir=None):
515 return tempfile.mkdtemp(suffix, prefix, dir)
536 return tempfile.mkdtemp(suffix, prefix, dir)
516
537
517
538
518 # text=True is not supported; use util.from/tonativeeol() instead
539 # text=True is not supported; use util.from/tonativeeol() instead
519 def mkstemp(suffix=b'', prefix=b'tmp', dir=None):
540 def mkstemp(suffix=b'', prefix=b'tmp', dir=None):
520 return tempfile.mkstemp(suffix, prefix, dir)
541 return tempfile.mkstemp(suffix, prefix, dir)
521
542
522
543
523 # mode must include 'b'ytes as encoding= is not supported
544 # mode must include 'b'ytes as encoding= is not supported
524 def namedtempfile(
545 def namedtempfile(
525 mode=b'w+b', bufsize=-1, suffix=b'', prefix=b'tmp', dir=None, delete=True
546 mode=b'w+b', bufsize=-1, suffix=b'', prefix=b'tmp', dir=None, delete=True
526 ):
547 ):
527 mode = sysstr(mode)
548 mode = sysstr(mode)
528 assert 'b' in mode
549 assert 'b' in mode
529 return tempfile.NamedTemporaryFile(
550 return tempfile.NamedTemporaryFile(
530 mode, bufsize, suffix=suffix, prefix=prefix, dir=dir, delete=delete
551 mode, bufsize, suffix=suffix, prefix=prefix, dir=dir, delete=delete
531 )
552 )
General Comments 0
You need to be logged in to leave comments. Login now