##// END OF EJS Templates
pycompat: remove large Python 2 block...
Gregory Szorc -
r49726:79009cca default
parent child Browse files
Show More
This diff has been collapsed as it changes many lines, (628 lines changed) Show them Hide them
@@ -1,527 +1,471 b''
1 # pycompat.py - portability shim for python 3
1 # pycompat.py - portability shim for python 3
2 #
2 #
3 # This software may be used and distributed according to the terms of the
3 # This software may be used and distributed according to the terms of the
4 # GNU General Public License version 2 or any later version.
4 # GNU General Public License version 2 or any later version.
5
5
6 """Mercurial portability shim for python 3.
6 """Mercurial portability shim for python 3.
7
7
8 This contains aliases to hide python version-specific details from the core.
8 This contains aliases to hide python version-specific details from the core.
9 """
9 """
10
10
11 from __future__ import absolute_import
11 from __future__ import absolute_import
12
12
13 import builtins
13 import builtins
14 import codecs
14 import concurrent.futures as futures
15 import concurrent.futures as futures
16 import functools
15 import getopt
17 import getopt
16 import http.client as httplib
18 import http.client as httplib
17 import http.cookiejar as cookielib
19 import http.cookiejar as cookielib
18 import inspect
20 import inspect
21 import io
19 import json
22 import json
20 import os
23 import os
21 import pickle
24 import pickle
22 import queue
25 import queue
23 import shlex
26 import shlex
24 import socketserver
27 import socketserver
28 import struct
25 import sys
29 import sys
26 import tempfile
30 import tempfile
27 import xmlrpc.client as xmlrpclib
31 import xmlrpc.client as xmlrpclib
28
32
33
29 ispy3 = sys.version_info[0] >= 3
34 ispy3 = sys.version_info[0] >= 3
30 ispypy = '__pypy__' in sys.builtin_module_names
35 ispypy = '__pypy__' in sys.builtin_module_names
31 TYPE_CHECKING = False
36 TYPE_CHECKING = False
32
37
33 if not globals(): # hide this from non-pytype users
38 if not globals(): # hide this from non-pytype users
34 import typing
39 import typing
35
40
36 TYPE_CHECKING = typing.TYPE_CHECKING
41 TYPE_CHECKING = typing.TYPE_CHECKING
37
42
38
43
39 def future_set_exception_info(f, exc_info):
44 def future_set_exception_info(f, exc_info):
40 f.set_exception(exc_info[0])
45 f.set_exception(exc_info[0])
41
46
42
47
43 FileNotFoundError = builtins.FileNotFoundError
48 FileNotFoundError = builtins.FileNotFoundError
44
49
45
50
46 def identity(a):
51 def identity(a):
47 return a
52 return a
48
53
49
54
50 def _rapply(f, xs):
55 def _rapply(f, xs):
51 if xs is None:
56 if xs is None:
52 # assume None means non-value of optional data
57 # assume None means non-value of optional data
53 return xs
58 return xs
54 if isinstance(xs, (list, set, tuple)):
59 if isinstance(xs, (list, set, tuple)):
55 return type(xs)(_rapply(f, x) for x in xs)
60 return type(xs)(_rapply(f, x) for x in xs)
56 if isinstance(xs, dict):
61 if isinstance(xs, dict):
57 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
62 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
58 return f(xs)
63 return f(xs)
59
64
60
65
61 def rapply(f, xs):
66 def rapply(f, xs):
62 """Apply function recursively to every item preserving the data structure
67 """Apply function recursively to every item preserving the data structure
63
68
64 >>> def f(x):
69 >>> def f(x):
65 ... return 'f(%s)' % x
70 ... return 'f(%s)' % x
66 >>> rapply(f, None) is None
71 >>> rapply(f, None) is None
67 True
72 True
68 >>> rapply(f, 'a')
73 >>> rapply(f, 'a')
69 'f(a)'
74 'f(a)'
70 >>> rapply(f, {'a'}) == {'f(a)'}
75 >>> rapply(f, {'a'}) == {'f(a)'}
71 True
76 True
72 >>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])
77 >>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])
73 ['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]
78 ['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]
74
79
75 >>> xs = [object()]
80 >>> xs = [object()]
76 >>> rapply(identity, xs) is xs
81 >>> rapply(identity, xs) is xs
77 True
82 True
78 """
83 """
79 if f is identity:
84 if f is identity:
80 # fast path mainly for py2
85 # fast path mainly for py2
81 return xs
86 return xs
82 return _rapply(f, xs)
87 return _rapply(f, xs)
83
88
84
89
85 if ispy3:
90 if os.name == r'nt' and sys.version_info >= (3, 6):
86 import builtins
91 # MBCS (or ANSI) filesystem encoding must be used as before.
87 import codecs
92 # Otherwise non-ASCII filenames in existing repositories would be
88 import functools
93 # corrupted.
89 import io
94 # This must be set once prior to any fsencode/fsdecode calls.
90 import struct
95 sys._enablelegacywindowsfsencoding() # pytype: disable=module-attr
91
92 if os.name == r'nt' and sys.version_info >= (3, 6):
93 # MBCS (or ANSI) filesystem encoding must be used as before.
94 # Otherwise non-ASCII filenames in existing repositories would be
95 # corrupted.
96 # This must be set once prior to any fsencode/fsdecode calls.
97 sys._enablelegacywindowsfsencoding() # pytype: disable=module-attr
98
96
99 fsencode = os.fsencode
97 fsencode = os.fsencode
100 fsdecode = os.fsdecode
98 fsdecode = os.fsdecode
101 oscurdir = os.curdir.encode('ascii')
99 oscurdir = os.curdir.encode('ascii')
102 oslinesep = os.linesep.encode('ascii')
100 oslinesep = os.linesep.encode('ascii')
103 osname = os.name.encode('ascii')
101 osname = os.name.encode('ascii')
104 ospathsep = os.pathsep.encode('ascii')
102 ospathsep = os.pathsep.encode('ascii')
105 ospardir = os.pardir.encode('ascii')
103 ospardir = os.pardir.encode('ascii')
106 ossep = os.sep.encode('ascii')
104 ossep = os.sep.encode('ascii')
107 osaltsep = os.altsep
105 osaltsep = os.altsep
108 if osaltsep:
106 if osaltsep:
109 osaltsep = osaltsep.encode('ascii')
107 osaltsep = osaltsep.encode('ascii')
110 osdevnull = os.devnull.encode('ascii')
108 osdevnull = os.devnull.encode('ascii')
111
109
112 sysplatform = sys.platform.encode('ascii')
110 sysplatform = sys.platform.encode('ascii')
113 sysexecutable = sys.executable
111 sysexecutable = sys.executable
114 if sysexecutable:
112 if sysexecutable:
115 sysexecutable = os.fsencode(sysexecutable)
113 sysexecutable = os.fsencode(sysexecutable)
116 bytesio = io.BytesIO
114 bytesio = io.BytesIO
117 # TODO deprecate stringio name, as it is a lie on Python 3.
115 # TODO deprecate stringio name, as it is a lie on Python 3.
118 stringio = bytesio
116 stringio = bytesio
119
117
120 def maplist(*args):
118
121 return list(map(*args))
119 def maplist(*args):
120 return list(map(*args))
122
121
123 def rangelist(*args):
122
124 return list(range(*args))
123 def rangelist(*args):
124 return list(range(*args))
125
125
126 def ziplist(*args):
126
127 return list(zip(*args))
127 def ziplist(*args):
128 return list(zip(*args))
129
128
130
129 rawinput = input
131 rawinput = input
130 getargspec = inspect.getfullargspec
132 getargspec = inspect.getfullargspec
131
133
132 long = int
134 long = int
133
135
134 if getattr(sys, 'argv', None) is not None:
136 if getattr(sys, 'argv', None) is not None:
135 # On POSIX, the char** argv array is converted to Python str using
137 # On POSIX, the char** argv array is converted to Python str using
136 # Py_DecodeLocale(). The inverse of this is Py_EncodeLocale(), which
138 # Py_DecodeLocale(). The inverse of this is Py_EncodeLocale(), which
137 # isn't directly callable from Python code. In practice, os.fsencode()
139 # isn't directly callable from Python code. In practice, os.fsencode()
138 # can be used instead (this is recommended by Python's documentation
140 # can be used instead (this is recommended by Python's documentation
139 # for sys.argv).
141 # for sys.argv).
140 #
142 #
141 # On Windows, the wchar_t **argv is passed into the interpreter as-is.
143 # On Windows, the wchar_t **argv is passed into the interpreter as-is.
142 # Like POSIX, we need to emulate what Py_EncodeLocale() would do. But
144 # Like POSIX, we need to emulate what Py_EncodeLocale() would do. But
143 # there's an additional wrinkle. What we really want to access is the
145 # there's an additional wrinkle. What we really want to access is the
144 # ANSI codepage representation of the arguments, as this is what
146 # ANSI codepage representation of the arguments, as this is what
145 # `int main()` would receive if Python 3 didn't define `int wmain()`
147 # `int main()` would receive if Python 3 didn't define `int wmain()`
146 # (this is how Python 2 worked). To get that, we encode with the mbcs
148 # (this is how Python 2 worked). To get that, we encode with the mbcs
147 # encoding, which will pass CP_ACP to the underlying Windows API to
149 # encoding, which will pass CP_ACP to the underlying Windows API to
148 # produce bytes.
150 # produce bytes.
149 if os.name == r'nt':
151 if os.name == r'nt':
150 sysargv = [a.encode("mbcs", "ignore") for a in sys.argv]
152 sysargv = [a.encode("mbcs", "ignore") for a in sys.argv]
151 else:
153 else:
152 sysargv = [fsencode(a) for a in sys.argv]
154 sysargv = [fsencode(a) for a in sys.argv]
153
155
154 bytechr = struct.Struct('>B').pack
156 bytechr = struct.Struct('>B').pack
155 byterepr = b'%r'.__mod__
157 byterepr = b'%r'.__mod__
156
157 class bytestr(bytes):
158 """A bytes which mostly acts as a Python 2 str
159
158
160 >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
161 ('', 'foo', 'ascii', '1')
162 >>> s = bytestr(b'foo')
163 >>> assert s is bytestr(s)
164
159
165 __bytes__() should be called if provided:
160 class bytestr(bytes):
161 """A bytes which mostly acts as a Python 2 str
166
162
167 >>> class bytesable(object):
163 >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
168 ... def __bytes__(self):
164 ('', 'foo', 'ascii', '1')
169 ... return b'bytes'
165 >>> s = bytestr(b'foo')
170 >>> bytestr(bytesable())
166 >>> assert s is bytestr(s)
171 'bytes'
167
168 __bytes__() should be called if provided:
172
169
173 There's no implicit conversion from non-ascii str as its encoding is
170 >>> class bytesable(object):
174 unknown:
171 ... def __bytes__(self):
172 ... return b'bytes'
173 >>> bytestr(bytesable())
174 'bytes'
175
175
176 >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
176 There's no implicit conversion from non-ascii str as its encoding is
177 Traceback (most recent call last):
177 unknown:
178 ...
179 UnicodeEncodeError: ...
180
181 Comparison between bytestr and bytes should work:
182
178
183 >>> assert bytestr(b'foo') == b'foo'
179 >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
184 >>> assert b'foo' == bytestr(b'foo')
180 Traceback (most recent call last):
185 >>> assert b'f' in bytestr(b'foo')
181 ...
186 >>> assert bytestr(b'f') in b'foo'
182 UnicodeEncodeError: ...
187
183
188 Sliced elements should be bytes, not integer:
184 Comparison between bytestr and bytes should work:
189
185
190 >>> s[1], s[:2]
186 >>> assert bytestr(b'foo') == b'foo'
191 (b'o', b'fo')
187 >>> assert b'foo' == bytestr(b'foo')
192 >>> list(s), list(reversed(s))
188 >>> assert b'f' in bytestr(b'foo')
193 ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
189 >>> assert bytestr(b'f') in b'foo'
194
195 As bytestr type isn't propagated across operations, you need to cast
196 bytes to bytestr explicitly:
197
190
198 >>> s = bytestr(b'foo').upper()
191 Sliced elements should be bytes, not integer:
199 >>> t = bytestr(s)
200 >>> s[0], t[0]
201 (70, b'F')
202
192
203 Be careful to not pass a bytestr object to a function which expects
193 >>> s[1], s[:2]
204 bytearray-like behavior.
194 (b'o', b'fo')
195 >>> list(s), list(reversed(s))
196 ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
205
197
206 >>> t = bytes(t) # cast to bytes
198 As bytestr type isn't propagated across operations, you need to cast
207 >>> assert type(t) is bytes
199 bytes to bytestr explicitly:
208 """
209
200
210 # Trick pytype into not demanding Iterable[int] be passed to __new__(),
201 >>> s = bytestr(b'foo').upper()
211 # since the appropriate bytes format is done internally.
202 >>> t = bytestr(s)
212 #
203 >>> s[0], t[0]
213 # https://github.com/google/pytype/issues/500
204 (70, b'F')
214 if TYPE_CHECKING:
215
205
216 def __init__(self, s=b''):
206 Be careful to not pass a bytestr object to a function which expects
217 pass
207 bytearray-like behavior.
208
209 >>> t = bytes(t) # cast to bytes
210 >>> assert type(t) is bytes
211 """
218
212
219 def __new__(cls, s=b''):
213 # Trick pytype into not demanding Iterable[int] be passed to __new__(),
220 if isinstance(s, bytestr):
214 # since the appropriate bytes format is done internally.
221 return s
215 #
222 if not isinstance(
216 # https://github.com/google/pytype/issues/500
223 s, (bytes, bytearray)
217 if TYPE_CHECKING:
224 ) and not hasattr( # hasattr-py3-only
225 s, u'__bytes__'
226 ):
227 s = str(s).encode('ascii')
228 return bytes.__new__(cls, s)
229
218
230 def __getitem__(self, key):
219 def __init__(self, s=b''):
231 s = bytes.__getitem__(self, key)
220 pass
232 if not isinstance(s, bytes):
221
233 s = bytechr(s)
222 def __new__(cls, s=b''):
223 if isinstance(s, bytestr):
234 return s
224 return s
235
225 if not isinstance(
236 def __iter__(self):
226 s, (bytes, bytearray)
237 return iterbytestr(bytes.__iter__(self))
227 ) and not hasattr( # hasattr-py3-only
238
228 s, u'__bytes__'
239 def __repr__(self):
229 ):
240 return bytes.__repr__(self)[1:] # drop b''
230 s = str(s).encode('ascii')
231 return bytes.__new__(cls, s)
241
232
242 def iterbytestr(s):
233 def __getitem__(self, key):
243 """Iterate bytes as if it were a str object of Python 2"""
234 s = bytes.__getitem__(self, key)
244 return map(bytechr, s)
235 if not isinstance(s, bytes):
245
236 s = bytechr(s)
246 def maybebytestr(s):
247 """Promote bytes to bytestr"""
248 if isinstance(s, bytes):
249 return bytestr(s)
250 return s
237 return s
251
238
252 def sysbytes(s):
239 def __iter__(self):
253 """Convert an internal str (e.g. keyword, __doc__) back to bytes
240 return iterbytestr(bytes.__iter__(self))
241
242 def __repr__(self):
243 return bytes.__repr__(self)[1:] # drop b''
244
254
245
255 This never raises UnicodeEncodeError, but only ASCII characters
246 def iterbytestr(s):
256 can be round-trip by sysstr(sysbytes(s)).
247 """Iterate bytes as if it were a str object of Python 2"""
257 """
248 return map(bytechr, s)
258 if isinstance(s, bytes):
249
259 return s
260 return s.encode('utf-8')
261
250
262 def sysstr(s):
251 def maybebytestr(s):
263 """Return a keyword str to be passed to Python functions such as
252 """Promote bytes to bytestr"""
264 getattr() and str.encode()
253 if isinstance(s, bytes):
254 return bytestr(s)
255 return s
256
265
257
266 This never raises UnicodeDecodeError. Non-ascii characters are
258 def sysbytes(s):
267 considered invalid and mapped to arbitrary but unique code points
259 """Convert an internal str (e.g. keyword, __doc__) back to bytes
268 such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
260
269 """
261 This never raises UnicodeEncodeError, but only ASCII characters
270 if isinstance(s, builtins.str):
262 can be round-trip by sysstr(sysbytes(s)).
271 return s
263 """
272 return s.decode('latin-1')
264 if isinstance(s, bytes):
265 return s
266 return s.encode('utf-8')
267
273
268
274 def strurl(url):
269 def sysstr(s):
275 """Converts a bytes url back to str"""
270 """Return a keyword str to be passed to Python functions such as
276 if isinstance(url, bytes):
271 getattr() and str.encode()
277 return url.decode('ascii')
272
278 return url
273 This never raises UnicodeDecodeError. Non-ascii characters are
274 considered invalid and mapped to arbitrary but unique code points
275 such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
276 """
277 if isinstance(s, builtins.str):
278 return s
279 return s.decode('latin-1')
280
279
281
280 def bytesurl(url):
282 def strurl(url):
281 """Converts a str url to bytes by encoding in ascii"""
283 """Converts a bytes url back to str"""
282 if isinstance(url, str):
284 if isinstance(url, bytes):
283 return url.encode('ascii')
285 return url.decode('ascii')
284 return url
286 return url
287
285
288
286 def raisewithtb(exc, tb):
289 def bytesurl(url):
287 """Raise exception with the given traceback"""
290 """Converts a str url to bytes by encoding in ascii"""
288 raise exc.with_traceback(tb)
291 if isinstance(url, str):
292 return url.encode('ascii')
293 return url
289
294
290 def getdoc(obj):
295
291 """Get docstring as bytes; may be None so gettext() won't confuse it
296 def raisewithtb(exc, tb):
292 with _('')"""
297 """Raise exception with the given traceback"""
293 doc = getattr(obj, '__doc__', None)
298 raise exc.with_traceback(tb)
294 if doc is None:
299
295 return doc
296 return sysbytes(doc)
297
300
298 def _wrapattrfunc(f):
301 def getdoc(obj):
299 @functools.wraps(f)
302 """Get docstring as bytes; may be None so gettext() won't confuse it
300 def w(object, name, *args):
303 with _('')"""
301 return f(object, sysstr(name), *args)
304 doc = getattr(obj, '__doc__', None)
305 if doc is None:
306 return doc
307 return sysbytes(doc)
302
308
303 return w
309
310 def _wrapattrfunc(f):
311 @functools.wraps(f)
312 def w(object, name, *args):
313 return f(object, sysstr(name), *args)
304
314
305 # these wrappers are automagically imported by hgloader
315 return w
306 delattr = _wrapattrfunc(builtins.delattr)
316
307 getattr = _wrapattrfunc(builtins.getattr)
308 hasattr = _wrapattrfunc(builtins.hasattr)
309 setattr = _wrapattrfunc(builtins.setattr)
310 xrange = builtins.range
311 unicode = str
312
317
313 def open(name, mode=b'r', buffering=-1, encoding=None):
318 # these wrappers are automagically imported by hgloader
314 return builtins.open(name, sysstr(mode), buffering, encoding)
319 delattr = _wrapattrfunc(builtins.delattr)
320 getattr = _wrapattrfunc(builtins.getattr)
321 hasattr = _wrapattrfunc(builtins.hasattr)
322 setattr = _wrapattrfunc(builtins.setattr)
323 xrange = builtins.range
324 unicode = str
315
325
316 safehasattr = _wrapattrfunc(builtins.hasattr)
326
327 def open(name, mode=b'r', buffering=-1, encoding=None):
328 return builtins.open(name, sysstr(mode), buffering, encoding)
329
317
330
318 def _getoptbwrapper(orig, args, shortlist, namelist):
331 safehasattr = _wrapattrfunc(builtins.hasattr)
319 """
332
320 Takes bytes arguments, converts them to unicode, pass them to
333
321 getopt.getopt(), convert the returned values back to bytes and then
334 def _getoptbwrapper(orig, args, shortlist, namelist):
322 return them for Python 3 compatibility as getopt.getopt() don't accepts
335 """
323 bytes on Python 3.
336 Takes bytes arguments, converts them to unicode, pass them to
324 """
337 getopt.getopt(), convert the returned values back to bytes and then
325 args = [a.decode('latin-1') for a in args]
338 return them for Python 3 compatibility as getopt.getopt() don't accepts
326 shortlist = shortlist.decode('latin-1')
339 bytes on Python 3.
327 namelist = [a.decode('latin-1') for a in namelist]
340 """
328 opts, args = orig(args, shortlist, namelist)
341 args = [a.decode('latin-1') for a in args]
329 opts = [(a[0].encode('latin-1'), a[1].encode('latin-1')) for a in opts]
342 shortlist = shortlist.decode('latin-1')
330 args = [a.encode('latin-1') for a in args]
343 namelist = [a.decode('latin-1') for a in namelist]
331 return opts, args
344 opts, args = orig(args, shortlist, namelist)
345 opts = [(a[0].encode('latin-1'), a[1].encode('latin-1')) for a in opts]
346 args = [a.encode('latin-1') for a in args]
347 return opts, args
348
332
349
333 def strkwargs(dic):
350 def strkwargs(dic):
334 """
351 """
335 Converts the keys of a python dictonary to str i.e. unicodes so that
352 Converts the keys of a python dictonary to str i.e. unicodes so that
336 they can be passed as keyword arguments as dictionaries with bytes keys
353 they can be passed as keyword arguments as dictionaries with bytes keys
337 can't be passed as keyword arguments to functions on Python 3.
354 can't be passed as keyword arguments to functions on Python 3.
338 """
355 """
339 dic = {k.decode('latin-1'): v for k, v in dic.items()}
356 dic = {k.decode('latin-1'): v for k, v in dic.items()}
340 return dic
357 return dic
341
358
342 def byteskwargs(dic):
343 """
344 Converts keys of python dictionaries to bytes as they were converted to
345 str to pass that dictonary as a keyword argument on Python 3.
346 """
347 dic = {k.encode('latin-1'): v for k, v in dic.items()}
348 return dic
349
359
350 # TODO: handle shlex.shlex().
360 def byteskwargs(dic):
351 def shlexsplit(s, comments=False, posix=True):
361 """
352 """
362 Converts keys of python dictionaries to bytes as they were converted to
353 Takes bytes argument, convert it to str i.e. unicodes, pass that into
363 str to pass that dictonary as a keyword argument on Python 3.
354 shlex.split(), convert the returned value to bytes and return that for
364 """
355 Python 3 compatibility as shelx.split() don't accept bytes on Python 3.
365 dic = {k.encode('latin-1'): v for k, v in dic.items()}
356 """
366 return dic
357 ret = shlex.split(s.decode('latin-1'), comments, posix)
367
358 return [a.encode('latin-1') for a in ret]
359
368
360 iteritems = lambda x: x.items()
369 # TODO: handle shlex.shlex().
361 itervalues = lambda x: x.values()
370 def shlexsplit(s, comments=False, posix=True):
371 """
372 Takes bytes argument, convert it to str i.e. unicodes, pass that into
373 shlex.split(), convert the returned value to bytes and return that for
374 Python 3 compatibility as shelx.split() don't accept bytes on Python 3.
375 """
376 ret = shlex.split(s.decode('latin-1'), comments, posix)
377 return [a.encode('latin-1') for a in ret]
362
378
363 # Python 3.5's json.load and json.loads require str. We polyfill its
364 # code for detecting encoding from bytes.
365 if sys.version_info[0:2] < (3, 6):
366
379
367 def _detect_encoding(b):
380 iteritems = lambda x: x.items()
368 bstartswith = b.startswith
381 itervalues = lambda x: x.values()
369 if bstartswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)):
382
370 return 'utf-32'
383 # Python 3.5's json.load and json.loads require str. We polyfill its
371 if bstartswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)):
384 # code for detecting encoding from bytes.
372 return 'utf-16'
385 if sys.version_info[0:2] < (3, 6):
373 if bstartswith(codecs.BOM_UTF8):
386
374 return 'utf-8-sig'
387 def _detect_encoding(b):
388 bstartswith = b.startswith
389 if bstartswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)):
390 return 'utf-32'
391 if bstartswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)):
392 return 'utf-16'
393 if bstartswith(codecs.BOM_UTF8):
394 return 'utf-8-sig'
375
395
376 if len(b) >= 4:
396 if len(b) >= 4:
377 if not b[0]:
397 if not b[0]:
378 # 00 00 -- -- - utf-32-be
398 # 00 00 -- -- - utf-32-be
379 # 00 XX -- -- - utf-16-be
399 # 00 XX -- -- - utf-16-be
380 return 'utf-16-be' if b[1] else 'utf-32-be'
400 return 'utf-16-be' if b[1] else 'utf-32-be'
381 if not b[1]:
401 if not b[1]:
382 # XX 00 00 00 - utf-32-le
402 # XX 00 00 00 - utf-32-le
383 # XX 00 00 XX - utf-16-le
403 # XX 00 00 XX - utf-16-le
384 # XX 00 XX -- - utf-16-le
404 # XX 00 XX -- - utf-16-le
385 return 'utf-16-le' if b[2] or b[3] else 'utf-32-le'
405 return 'utf-16-le' if b[2] or b[3] else 'utf-32-le'
386 elif len(b) == 2:
406 elif len(b) == 2:
387 if not b[0]:
407 if not b[0]:
388 # 00 XX - utf-16-be
408 # 00 XX - utf-16-be
389 return 'utf-16-be'
409 return 'utf-16-be'
390 if not b[1]:
410 if not b[1]:
391 # XX 00 - utf-16-le
411 # XX 00 - utf-16-le
392 return 'utf-16-le'
412 return 'utf-16-le'
393 # default
413 # default
394 return 'utf-8'
414 return 'utf-8'
395
415
396 def json_loads(s, *args, **kwargs):
416 def json_loads(s, *args, **kwargs):
397 if isinstance(s, (bytes, bytearray)):
417 if isinstance(s, (bytes, bytearray)):
398 s = s.decode(_detect_encoding(s), 'surrogatepass')
418 s = s.decode(_detect_encoding(s), 'surrogatepass')
399
419
400 return json.loads(s, *args, **kwargs)
420 return json.loads(s, *args, **kwargs)
401
421
402 else:
403 json_loads = json.loads
404
422
405 else:
423 else:
406 import cStringIO
407
408 xrange = xrange
409 unicode = unicode
410 bytechr = chr
411 byterepr = repr
412 bytestr = str
413 iterbytestr = iter
414 maybebytestr = identity
415 sysbytes = identity
416 sysstr = identity
417 strurl = identity
418 bytesurl = identity
419 open = open
420 delattr = delattr
421 getattr = getattr
422 hasattr = hasattr
423 setattr = setattr
424
425 # this can't be parsed on Python 3
426 exec(b'def raisewithtb(exc, tb):\n raise exc, None, tb\n')
427
428 def fsencode(filename):
429 """
430 Partial backport from os.py in Python 3, which only accepts bytes.
431 In Python 2, our paths should only ever be bytes, a unicode path
432 indicates a bug.
433 """
434 if isinstance(filename, str):
435 return filename
436 else:
437 raise TypeError("expect str, not %s" % type(filename).__name__)
438
439 # In Python 2, fsdecode() has a very chance to receive bytes. So it's
440 # better not to touch Python 2 part as it's already working fine.
441 fsdecode = identity
442
443 def getdoc(obj):
444 return getattr(obj, '__doc__', None)
445
446 _notset = object()
447
448 def safehasattr(thing, attr):
449 return getattr(thing, attr, _notset) is not _notset
450
451 def _getoptbwrapper(orig, args, shortlist, namelist):
452 return orig(args, shortlist, namelist)
453
454 strkwargs = identity
455 byteskwargs = identity
456
457 oscurdir = os.curdir
458 oslinesep = os.linesep
459 osname = os.name
460 ospathsep = os.pathsep
461 ospardir = os.pardir
462 ossep = os.sep
463 osaltsep = os.altsep
464 osdevnull = os.devnull
465 long = long
466 if getattr(sys, 'argv', None) is not None:
467 sysargv = sys.argv
468 sysplatform = sys.platform
469 sysexecutable = sys.executable
470 shlexsplit = shlex.split
471 bytesio = cStringIO.StringIO
472 stringio = bytesio
473 maplist = map
474 rangelist = range
475 ziplist = zip
476 rawinput = raw_input
477 getargspec = inspect.getargspec
478 iteritems = lambda x: x.iteritems()
479 itervalues = lambda x: x.itervalues()
480 json_loads = json.loads
424 json_loads = json.loads
481
425
482 isjython = sysplatform.startswith(b'java')
426 isjython = sysplatform.startswith(b'java')
483
427
484 isdarwin = sysplatform.startswith(b'darwin')
428 isdarwin = sysplatform.startswith(b'darwin')
485 islinux = sysplatform.startswith(b'linux')
429 islinux = sysplatform.startswith(b'linux')
486 isposix = osname == b'posix'
430 isposix = osname == b'posix'
487 iswindows = osname == b'nt'
431 iswindows = osname == b'nt'
488
432
489
433
490 def getoptb(args, shortlist, namelist):
434 def getoptb(args, shortlist, namelist):
491 return _getoptbwrapper(getopt.getopt, args, shortlist, namelist)
435 return _getoptbwrapper(getopt.getopt, args, shortlist, namelist)
492
436
493
437
494 def gnugetoptb(args, shortlist, namelist):
438 def gnugetoptb(args, shortlist, namelist):
495 return _getoptbwrapper(getopt.gnu_getopt, args, shortlist, namelist)
439 return _getoptbwrapper(getopt.gnu_getopt, args, shortlist, namelist)
496
440
497
441
498 def mkdtemp(suffix=b'', prefix=b'tmp', dir=None):
442 def mkdtemp(suffix=b'', prefix=b'tmp', dir=None):
499 return tempfile.mkdtemp(suffix, prefix, dir)
443 return tempfile.mkdtemp(suffix, prefix, dir)
500
444
501
445
502 # text=True is not supported; use util.from/tonativeeol() instead
446 # text=True is not supported; use util.from/tonativeeol() instead
503 def mkstemp(suffix=b'', prefix=b'tmp', dir=None):
447 def mkstemp(suffix=b'', prefix=b'tmp', dir=None):
504 return tempfile.mkstemp(suffix, prefix, dir)
448 return tempfile.mkstemp(suffix, prefix, dir)
505
449
506
450
507 # TemporaryFile does not support an "encoding=" argument on python2.
451 # TemporaryFile does not support an "encoding=" argument on python2.
508 # This wrapper file are always open in byte mode.
452 # This wrapper file are always open in byte mode.
509 def unnamedtempfile(mode=None, *args, **kwargs):
453 def unnamedtempfile(mode=None, *args, **kwargs):
510 if mode is None:
454 if mode is None:
511 mode = 'w+b'
455 mode = 'w+b'
512 else:
456 else:
513 mode = sysstr(mode)
457 mode = sysstr(mode)
514 assert 'b' in mode
458 assert 'b' in mode
515 return tempfile.TemporaryFile(mode, *args, **kwargs)
459 return tempfile.TemporaryFile(mode, *args, **kwargs)
516
460
517
461
518 # NamedTemporaryFile does not support an "encoding=" argument on python2.
462 # NamedTemporaryFile does not support an "encoding=" argument on python2.
519 # This wrapper file are always open in byte mode.
463 # This wrapper file are always open in byte mode.
520 def namedtempfile(
464 def namedtempfile(
521 mode=b'w+b', bufsize=-1, suffix=b'', prefix=b'tmp', dir=None, delete=True
465 mode=b'w+b', bufsize=-1, suffix=b'', prefix=b'tmp', dir=None, delete=True
522 ):
466 ):
523 mode = sysstr(mode)
467 mode = sysstr(mode)
524 assert 'b' in mode
468 assert 'b' in mode
525 return tempfile.NamedTemporaryFile(
469 return tempfile.NamedTemporaryFile(
526 mode, bufsize, suffix=suffix, prefix=prefix, dir=dir, delete=delete
470 mode, bufsize, suffix=suffix, prefix=prefix, dir=dir, delete=delete
527 )
471 )
General Comments 0
You need to be logged in to leave comments. Login now