##// END OF EJS Templates
pycompat: remove large Python 2 block...
Gregory Szorc -
r49726:79009cca default
parent child Browse files
Show More
@@ -1,527 +1,471 b''
1 # pycompat.py - portability shim for python 3
1 # pycompat.py - portability shim for python 3
2 #
2 #
3 # This software may be used and distributed according to the terms of the
3 # This software may be used and distributed according to the terms of the
4 # GNU General Public License version 2 or any later version.
4 # GNU General Public License version 2 or any later version.
5
5
6 """Mercurial portability shim for python 3.
6 """Mercurial portability shim for python 3.
7
7
8 This contains aliases to hide python version-specific details from the core.
8 This contains aliases to hide python version-specific details from the core.
9 """
9 """
10
10
11 from __future__ import absolute_import
11 from __future__ import absolute_import
12
12
13 import builtins
13 import builtins
14 import codecs
14 import concurrent.futures as futures
15 import concurrent.futures as futures
16 import functools
15 import getopt
17 import getopt
16 import http.client as httplib
18 import http.client as httplib
17 import http.cookiejar as cookielib
19 import http.cookiejar as cookielib
18 import inspect
20 import inspect
21 import io
19 import json
22 import json
20 import os
23 import os
21 import pickle
24 import pickle
22 import queue
25 import queue
23 import shlex
26 import shlex
24 import socketserver
27 import socketserver
28 import struct
25 import sys
29 import sys
26 import tempfile
30 import tempfile
27 import xmlrpc.client as xmlrpclib
31 import xmlrpc.client as xmlrpclib
28
32
33
29 ispy3 = sys.version_info[0] >= 3
34 ispy3 = sys.version_info[0] >= 3
30 ispypy = '__pypy__' in sys.builtin_module_names
35 ispypy = '__pypy__' in sys.builtin_module_names
31 TYPE_CHECKING = False
36 TYPE_CHECKING = False
32
37
33 if not globals(): # hide this from non-pytype users
38 if not globals(): # hide this from non-pytype users
34 import typing
39 import typing
35
40
36 TYPE_CHECKING = typing.TYPE_CHECKING
41 TYPE_CHECKING = typing.TYPE_CHECKING
37
42
38
43
39 def future_set_exception_info(f, exc_info):
44 def future_set_exception_info(f, exc_info):
40 f.set_exception(exc_info[0])
45 f.set_exception(exc_info[0])
41
46
42
47
43 FileNotFoundError = builtins.FileNotFoundError
48 FileNotFoundError = builtins.FileNotFoundError
44
49
45
50
46 def identity(a):
51 def identity(a):
47 return a
52 return a
48
53
49
54
50 def _rapply(f, xs):
55 def _rapply(f, xs):
51 if xs is None:
56 if xs is None:
52 # assume None means non-value of optional data
57 # assume None means non-value of optional data
53 return xs
58 return xs
54 if isinstance(xs, (list, set, tuple)):
59 if isinstance(xs, (list, set, tuple)):
55 return type(xs)(_rapply(f, x) for x in xs)
60 return type(xs)(_rapply(f, x) for x in xs)
56 if isinstance(xs, dict):
61 if isinstance(xs, dict):
57 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
62 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
58 return f(xs)
63 return f(xs)
59
64
60
65
61 def rapply(f, xs):
66 def rapply(f, xs):
62 """Apply function recursively to every item preserving the data structure
67 """Apply function recursively to every item preserving the data structure
63
68
64 >>> def f(x):
69 >>> def f(x):
65 ... return 'f(%s)' % x
70 ... return 'f(%s)' % x
66 >>> rapply(f, None) is None
71 >>> rapply(f, None) is None
67 True
72 True
68 >>> rapply(f, 'a')
73 >>> rapply(f, 'a')
69 'f(a)'
74 'f(a)'
70 >>> rapply(f, {'a'}) == {'f(a)'}
75 >>> rapply(f, {'a'}) == {'f(a)'}
71 True
76 True
72 >>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])
77 >>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])
73 ['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]
78 ['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]
74
79
75 >>> xs = [object()]
80 >>> xs = [object()]
76 >>> rapply(identity, xs) is xs
81 >>> rapply(identity, xs) is xs
77 True
82 True
78 """
83 """
79 if f is identity:
84 if f is identity:
80 # fast path mainly for py2
85 # fast path mainly for py2
81 return xs
86 return xs
82 return _rapply(f, xs)
87 return _rapply(f, xs)
83
88
84
89
85 if ispy3:
86 import builtins
87 import codecs
88 import functools
89 import io
90 import struct
91
92 if os.name == r'nt' and sys.version_info >= (3, 6):
90 if os.name == r'nt' and sys.version_info >= (3, 6):
93 # MBCS (or ANSI) filesystem encoding must be used as before.
91 # MBCS (or ANSI) filesystem encoding must be used as before.
94 # Otherwise non-ASCII filenames in existing repositories would be
92 # Otherwise non-ASCII filenames in existing repositories would be
95 # corrupted.
93 # corrupted.
96 # This must be set once prior to any fsencode/fsdecode calls.
94 # This must be set once prior to any fsencode/fsdecode calls.
97 sys._enablelegacywindowsfsencoding() # pytype: disable=module-attr
95 sys._enablelegacywindowsfsencoding() # pytype: disable=module-attr
98
96
99 fsencode = os.fsencode
97 fsencode = os.fsencode
100 fsdecode = os.fsdecode
98 fsdecode = os.fsdecode
101 oscurdir = os.curdir.encode('ascii')
99 oscurdir = os.curdir.encode('ascii')
102 oslinesep = os.linesep.encode('ascii')
100 oslinesep = os.linesep.encode('ascii')
103 osname = os.name.encode('ascii')
101 osname = os.name.encode('ascii')
104 ospathsep = os.pathsep.encode('ascii')
102 ospathsep = os.pathsep.encode('ascii')
105 ospardir = os.pardir.encode('ascii')
103 ospardir = os.pardir.encode('ascii')
106 ossep = os.sep.encode('ascii')
104 ossep = os.sep.encode('ascii')
107 osaltsep = os.altsep
105 osaltsep = os.altsep
108 if osaltsep:
106 if osaltsep:
109 osaltsep = osaltsep.encode('ascii')
107 osaltsep = osaltsep.encode('ascii')
110 osdevnull = os.devnull.encode('ascii')
108 osdevnull = os.devnull.encode('ascii')
111
109
112 sysplatform = sys.platform.encode('ascii')
110 sysplatform = sys.platform.encode('ascii')
113 sysexecutable = sys.executable
111 sysexecutable = sys.executable
114 if sysexecutable:
112 if sysexecutable:
115 sysexecutable = os.fsencode(sysexecutable)
113 sysexecutable = os.fsencode(sysexecutable)
116 bytesio = io.BytesIO
114 bytesio = io.BytesIO
117 # TODO deprecate stringio name, as it is a lie on Python 3.
115 # TODO deprecate stringio name, as it is a lie on Python 3.
118 stringio = bytesio
116 stringio = bytesio
119
117
118
120 def maplist(*args):
119 def maplist(*args):
121 return list(map(*args))
120 return list(map(*args))
122
121
122
123 def rangelist(*args):
123 def rangelist(*args):
124 return list(range(*args))
124 return list(range(*args))
125
125
126
126 def ziplist(*args):
127 def ziplist(*args):
127 return list(zip(*args))
128 return list(zip(*args))
128
129
130
129 rawinput = input
131 rawinput = input
130 getargspec = inspect.getfullargspec
132 getargspec = inspect.getfullargspec
131
133
132 long = int
134 long = int
133
135
134 if getattr(sys, 'argv', None) is not None:
136 if getattr(sys, 'argv', None) is not None:
135 # On POSIX, the char** argv array is converted to Python str using
137 # On POSIX, the char** argv array is converted to Python str using
136 # Py_DecodeLocale(). The inverse of this is Py_EncodeLocale(), which
138 # Py_DecodeLocale(). The inverse of this is Py_EncodeLocale(), which
137 # isn't directly callable from Python code. In practice, os.fsencode()
139 # isn't directly callable from Python code. In practice, os.fsencode()
138 # can be used instead (this is recommended by Python's documentation
140 # can be used instead (this is recommended by Python's documentation
139 # for sys.argv).
141 # for sys.argv).
140 #
142 #
141 # On Windows, the wchar_t **argv is passed into the interpreter as-is.
143 # On Windows, the wchar_t **argv is passed into the interpreter as-is.
142 # Like POSIX, we need to emulate what Py_EncodeLocale() would do. But
144 # Like POSIX, we need to emulate what Py_EncodeLocale() would do. But
143 # there's an additional wrinkle. What we really want to access is the
145 # there's an additional wrinkle. What we really want to access is the
144 # ANSI codepage representation of the arguments, as this is what
146 # ANSI codepage representation of the arguments, as this is what
145 # `int main()` would receive if Python 3 didn't define `int wmain()`
147 # `int main()` would receive if Python 3 didn't define `int wmain()`
146 # (this is how Python 2 worked). To get that, we encode with the mbcs
148 # (this is how Python 2 worked). To get that, we encode with the mbcs
147 # encoding, which will pass CP_ACP to the underlying Windows API to
149 # encoding, which will pass CP_ACP to the underlying Windows API to
148 # produce bytes.
150 # produce bytes.
149 if os.name == r'nt':
151 if os.name == r'nt':
150 sysargv = [a.encode("mbcs", "ignore") for a in sys.argv]
152 sysargv = [a.encode("mbcs", "ignore") for a in sys.argv]
151 else:
153 else:
152 sysargv = [fsencode(a) for a in sys.argv]
154 sysargv = [fsencode(a) for a in sys.argv]
153
155
154 bytechr = struct.Struct('>B').pack
156 bytechr = struct.Struct('>B').pack
155 byterepr = b'%r'.__mod__
157 byterepr = b'%r'.__mod__
156
158
159
157 class bytestr(bytes):
160 class bytestr(bytes):
158 """A bytes which mostly acts as a Python 2 str
161 """A bytes which mostly acts as a Python 2 str
159
162
160 >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
163 >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
161 ('', 'foo', 'ascii', '1')
164 ('', 'foo', 'ascii', '1')
162 >>> s = bytestr(b'foo')
165 >>> s = bytestr(b'foo')
163 >>> assert s is bytestr(s)
166 >>> assert s is bytestr(s)
164
167
165 __bytes__() should be called if provided:
168 __bytes__() should be called if provided:
166
169
167 >>> class bytesable(object):
170 >>> class bytesable(object):
168 ... def __bytes__(self):
171 ... def __bytes__(self):
169 ... return b'bytes'
172 ... return b'bytes'
170 >>> bytestr(bytesable())
173 >>> bytestr(bytesable())
171 'bytes'
174 'bytes'
172
175
173 There's no implicit conversion from non-ascii str as its encoding is
176 There's no implicit conversion from non-ascii str as its encoding is
174 unknown:
177 unknown:
175
178
176 >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
179 >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
177 Traceback (most recent call last):
180 Traceback (most recent call last):
178 ...
181 ...
179 UnicodeEncodeError: ...
182 UnicodeEncodeError: ...
180
183
181 Comparison between bytestr and bytes should work:
184 Comparison between bytestr and bytes should work:
182
185
183 >>> assert bytestr(b'foo') == b'foo'
186 >>> assert bytestr(b'foo') == b'foo'
184 >>> assert b'foo' == bytestr(b'foo')
187 >>> assert b'foo' == bytestr(b'foo')
185 >>> assert b'f' in bytestr(b'foo')
188 >>> assert b'f' in bytestr(b'foo')
186 >>> assert bytestr(b'f') in b'foo'
189 >>> assert bytestr(b'f') in b'foo'
187
190
188 Sliced elements should be bytes, not integer:
191 Sliced elements should be bytes, not integer:
189
192
190 >>> s[1], s[:2]
193 >>> s[1], s[:2]
191 (b'o', b'fo')
194 (b'o', b'fo')
192 >>> list(s), list(reversed(s))
195 >>> list(s), list(reversed(s))
193 ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
196 ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
194
197
195 As bytestr type isn't propagated across operations, you need to cast
198 As bytestr type isn't propagated across operations, you need to cast
196 bytes to bytestr explicitly:
199 bytes to bytestr explicitly:
197
200
198 >>> s = bytestr(b'foo').upper()
201 >>> s = bytestr(b'foo').upper()
199 >>> t = bytestr(s)
202 >>> t = bytestr(s)
200 >>> s[0], t[0]
203 >>> s[0], t[0]
201 (70, b'F')
204 (70, b'F')
202
205
203 Be careful to not pass a bytestr object to a function which expects
206 Be careful to not pass a bytestr object to a function which expects
204 bytearray-like behavior.
207 bytearray-like behavior.
205
208
206 >>> t = bytes(t) # cast to bytes
209 >>> t = bytes(t) # cast to bytes
207 >>> assert type(t) is bytes
210 >>> assert type(t) is bytes
208 """
211 """
209
212
210 # Trick pytype into not demanding Iterable[int] be passed to __new__(),
213 # Trick pytype into not demanding Iterable[int] be passed to __new__(),
211 # since the appropriate bytes format is done internally.
214 # since the appropriate bytes format is done internally.
212 #
215 #
213 # https://github.com/google/pytype/issues/500
216 # https://github.com/google/pytype/issues/500
214 if TYPE_CHECKING:
217 if TYPE_CHECKING:
215
218
216 def __init__(self, s=b''):
219 def __init__(self, s=b''):
217 pass
220 pass
218
221
219 def __new__(cls, s=b''):
222 def __new__(cls, s=b''):
220 if isinstance(s, bytestr):
223 if isinstance(s, bytestr):
221 return s
224 return s
222 if not isinstance(
225 if not isinstance(
223 s, (bytes, bytearray)
226 s, (bytes, bytearray)
224 ) and not hasattr( # hasattr-py3-only
227 ) and not hasattr( # hasattr-py3-only
225 s, u'__bytes__'
228 s, u'__bytes__'
226 ):
229 ):
227 s = str(s).encode('ascii')
230 s = str(s).encode('ascii')
228 return bytes.__new__(cls, s)
231 return bytes.__new__(cls, s)
229
232
230 def __getitem__(self, key):
233 def __getitem__(self, key):
231 s = bytes.__getitem__(self, key)
234 s = bytes.__getitem__(self, key)
232 if not isinstance(s, bytes):
235 if not isinstance(s, bytes):
233 s = bytechr(s)
236 s = bytechr(s)
234 return s
237 return s
235
238
236 def __iter__(self):
239 def __iter__(self):
237 return iterbytestr(bytes.__iter__(self))
240 return iterbytestr(bytes.__iter__(self))
238
241
239 def __repr__(self):
242 def __repr__(self):
240 return bytes.__repr__(self)[1:] # drop b''
243 return bytes.__repr__(self)[1:] # drop b''
241
244
245
242 def iterbytestr(s):
246 def iterbytestr(s):
243 """Iterate bytes as if it were a str object of Python 2"""
247 """Iterate bytes as if it were a str object of Python 2"""
244 return map(bytechr, s)
248 return map(bytechr, s)
245
249
250
246 def maybebytestr(s):
251 def maybebytestr(s):
247 """Promote bytes to bytestr"""
252 """Promote bytes to bytestr"""
248 if isinstance(s, bytes):
253 if isinstance(s, bytes):
249 return bytestr(s)
254 return bytestr(s)
250 return s
255 return s
251
256
257
252 def sysbytes(s):
258 def sysbytes(s):
253 """Convert an internal str (e.g. keyword, __doc__) back to bytes
259 """Convert an internal str (e.g. keyword, __doc__) back to bytes
254
260
255 This never raises UnicodeEncodeError, but only ASCII characters
261 This never raises UnicodeEncodeError, but only ASCII characters
256 can be round-trip by sysstr(sysbytes(s)).
262 can be round-trip by sysstr(sysbytes(s)).
257 """
263 """
258 if isinstance(s, bytes):
264 if isinstance(s, bytes):
259 return s
265 return s
260 return s.encode('utf-8')
266 return s.encode('utf-8')
261
267
268
262 def sysstr(s):
269 def sysstr(s):
263 """Return a keyword str to be passed to Python functions such as
270 """Return a keyword str to be passed to Python functions such as
264 getattr() and str.encode()
271 getattr() and str.encode()
265
272
266 This never raises UnicodeDecodeError. Non-ascii characters are
273 This never raises UnicodeDecodeError. Non-ascii characters are
267 considered invalid and mapped to arbitrary but unique code points
274 considered invalid and mapped to arbitrary but unique code points
268 such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
275 such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
269 """
276 """
270 if isinstance(s, builtins.str):
277 if isinstance(s, builtins.str):
271 return s
278 return s
272 return s.decode('latin-1')
279 return s.decode('latin-1')
273
280
281
274 def strurl(url):
282 def strurl(url):
275 """Converts a bytes url back to str"""
283 """Converts a bytes url back to str"""
276 if isinstance(url, bytes):
284 if isinstance(url, bytes):
277 return url.decode('ascii')
285 return url.decode('ascii')
278 return url
286 return url
279
287
288
280 def bytesurl(url):
289 def bytesurl(url):
281 """Converts a str url to bytes by encoding in ascii"""
290 """Converts a str url to bytes by encoding in ascii"""
282 if isinstance(url, str):
291 if isinstance(url, str):
283 return url.encode('ascii')
292 return url.encode('ascii')
284 return url
293 return url
285
294
295
286 def raisewithtb(exc, tb):
296 def raisewithtb(exc, tb):
287 """Raise exception with the given traceback"""
297 """Raise exception with the given traceback"""
288 raise exc.with_traceback(tb)
298 raise exc.with_traceback(tb)
289
299
300
290 def getdoc(obj):
301 def getdoc(obj):
291 """Get docstring as bytes; may be None so gettext() won't confuse it
302 """Get docstring as bytes; may be None so gettext() won't confuse it
292 with _('')"""
303 with _('')"""
293 doc = getattr(obj, '__doc__', None)
304 doc = getattr(obj, '__doc__', None)
294 if doc is None:
305 if doc is None:
295 return doc
306 return doc
296 return sysbytes(doc)
307 return sysbytes(doc)
297
308
309
298 def _wrapattrfunc(f):
310 def _wrapattrfunc(f):
299 @functools.wraps(f)
311 @functools.wraps(f)
300 def w(object, name, *args):
312 def w(object, name, *args):
301 return f(object, sysstr(name), *args)
313 return f(object, sysstr(name), *args)
302
314
303 return w
315 return w
304
316
317
305 # these wrappers are automagically imported by hgloader
318 # these wrappers are automagically imported by hgloader
306 delattr = _wrapattrfunc(builtins.delattr)
319 delattr = _wrapattrfunc(builtins.delattr)
307 getattr = _wrapattrfunc(builtins.getattr)
320 getattr = _wrapattrfunc(builtins.getattr)
308 hasattr = _wrapattrfunc(builtins.hasattr)
321 hasattr = _wrapattrfunc(builtins.hasattr)
309 setattr = _wrapattrfunc(builtins.setattr)
322 setattr = _wrapattrfunc(builtins.setattr)
310 xrange = builtins.range
323 xrange = builtins.range
311 unicode = str
324 unicode = str
312
325
326
313 def open(name, mode=b'r', buffering=-1, encoding=None):
327 def open(name, mode=b'r', buffering=-1, encoding=None):
314 return builtins.open(name, sysstr(mode), buffering, encoding)
328 return builtins.open(name, sysstr(mode), buffering, encoding)
315
329
330
316 safehasattr = _wrapattrfunc(builtins.hasattr)
331 safehasattr = _wrapattrfunc(builtins.hasattr)
317
332
333
318 def _getoptbwrapper(orig, args, shortlist, namelist):
334 def _getoptbwrapper(orig, args, shortlist, namelist):
319 """
335 """
320 Takes bytes arguments, converts them to unicode, pass them to
336 Takes bytes arguments, converts them to unicode, pass them to
321 getopt.getopt(), convert the returned values back to bytes and then
337 getopt.getopt(), convert the returned values back to bytes and then
322 return them for Python 3 compatibility as getopt.getopt() don't accepts
338 return them for Python 3 compatibility as getopt.getopt() don't accepts
323 bytes on Python 3.
339 bytes on Python 3.
324 """
340 """
325 args = [a.decode('latin-1') for a in args]
341 args = [a.decode('latin-1') for a in args]
326 shortlist = shortlist.decode('latin-1')
342 shortlist = shortlist.decode('latin-1')
327 namelist = [a.decode('latin-1') for a in namelist]
343 namelist = [a.decode('latin-1') for a in namelist]
328 opts, args = orig(args, shortlist, namelist)
344 opts, args = orig(args, shortlist, namelist)
329 opts = [(a[0].encode('latin-1'), a[1].encode('latin-1')) for a in opts]
345 opts = [(a[0].encode('latin-1'), a[1].encode('latin-1')) for a in opts]
330 args = [a.encode('latin-1') for a in args]
346 args = [a.encode('latin-1') for a in args]
331 return opts, args
347 return opts, args
332
348
349
333 def strkwargs(dic):
350 def strkwargs(dic):
334 """
351 """
335 Converts the keys of a python dictonary to str i.e. unicodes so that
352 Converts the keys of a python dictonary to str i.e. unicodes so that
336 they can be passed as keyword arguments as dictionaries with bytes keys
353 they can be passed as keyword arguments as dictionaries with bytes keys
337 can't be passed as keyword arguments to functions on Python 3.
354 can't be passed as keyword arguments to functions on Python 3.
338 """
355 """
339 dic = {k.decode('latin-1'): v for k, v in dic.items()}
356 dic = {k.decode('latin-1'): v for k, v in dic.items()}
340 return dic
357 return dic
341
358
359
342 def byteskwargs(dic):
360 def byteskwargs(dic):
343 """
361 """
344 Converts keys of python dictionaries to bytes as they were converted to
362 Converts keys of python dictionaries to bytes as they were converted to
345 str to pass that dictonary as a keyword argument on Python 3.
363 str to pass that dictonary as a keyword argument on Python 3.
346 """
364 """
347 dic = {k.encode('latin-1'): v for k, v in dic.items()}
365 dic = {k.encode('latin-1'): v for k, v in dic.items()}
348 return dic
366 return dic
349
367
368
350 # TODO: handle shlex.shlex().
369 # TODO: handle shlex.shlex().
351 def shlexsplit(s, comments=False, posix=True):
370 def shlexsplit(s, comments=False, posix=True):
352 """
371 """
353 Takes bytes argument, convert it to str i.e. unicodes, pass that into
372 Takes bytes argument, convert it to str i.e. unicodes, pass that into
354 shlex.split(), convert the returned value to bytes and return that for
373 shlex.split(), convert the returned value to bytes and return that for
355 Python 3 compatibility as shelx.split() don't accept bytes on Python 3.
374 Python 3 compatibility as shelx.split() don't accept bytes on Python 3.
356 """
375 """
357 ret = shlex.split(s.decode('latin-1'), comments, posix)
376 ret = shlex.split(s.decode('latin-1'), comments, posix)
358 return [a.encode('latin-1') for a in ret]
377 return [a.encode('latin-1') for a in ret]
359
378
379
360 iteritems = lambda x: x.items()
380 iteritems = lambda x: x.items()
361 itervalues = lambda x: x.values()
381 itervalues = lambda x: x.values()
362
382
363 # Python 3.5's json.load and json.loads require str. We polyfill its
383 # Python 3.5's json.load and json.loads require str. We polyfill its
364 # code for detecting encoding from bytes.
384 # code for detecting encoding from bytes.
365 if sys.version_info[0:2] < (3, 6):
385 if sys.version_info[0:2] < (3, 6):
366
386
367 def _detect_encoding(b):
387 def _detect_encoding(b):
368 bstartswith = b.startswith
388 bstartswith = b.startswith
369 if bstartswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)):
389 if bstartswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)):
370 return 'utf-32'
390 return 'utf-32'
371 if bstartswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)):
391 if bstartswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)):
372 return 'utf-16'
392 return 'utf-16'
373 if bstartswith(codecs.BOM_UTF8):
393 if bstartswith(codecs.BOM_UTF8):
374 return 'utf-8-sig'
394 return 'utf-8-sig'
375
395
376 if len(b) >= 4:
396 if len(b) >= 4:
377 if not b[0]:
397 if not b[0]:
378 # 00 00 -- -- - utf-32-be
398 # 00 00 -- -- - utf-32-be
379 # 00 XX -- -- - utf-16-be
399 # 00 XX -- -- - utf-16-be
380 return 'utf-16-be' if b[1] else 'utf-32-be'
400 return 'utf-16-be' if b[1] else 'utf-32-be'
381 if not b[1]:
401 if not b[1]:
382 # XX 00 00 00 - utf-32-le
402 # XX 00 00 00 - utf-32-le
383 # XX 00 00 XX - utf-16-le
403 # XX 00 00 XX - utf-16-le
384 # XX 00 XX -- - utf-16-le
404 # XX 00 XX -- - utf-16-le
385 return 'utf-16-le' if b[2] or b[3] else 'utf-32-le'
405 return 'utf-16-le' if b[2] or b[3] else 'utf-32-le'
386 elif len(b) == 2:
406 elif len(b) == 2:
387 if not b[0]:
407 if not b[0]:
388 # 00 XX - utf-16-be
408 # 00 XX - utf-16-be
389 return 'utf-16-be'
409 return 'utf-16-be'
390 if not b[1]:
410 if not b[1]:
391 # XX 00 - utf-16-le
411 # XX 00 - utf-16-le
392 return 'utf-16-le'
412 return 'utf-16-le'
393 # default
413 # default
394 return 'utf-8'
414 return 'utf-8'
395
415
396 def json_loads(s, *args, **kwargs):
416 def json_loads(s, *args, **kwargs):
397 if isinstance(s, (bytes, bytearray)):
417 if isinstance(s, (bytes, bytearray)):
398 s = s.decode(_detect_encoding(s), 'surrogatepass')
418 s = s.decode(_detect_encoding(s), 'surrogatepass')
399
419
400 return json.loads(s, *args, **kwargs)
420 return json.loads(s, *args, **kwargs)
401
421
402 else:
403 json_loads = json.loads
404
422
405 else:
423 else:
406 import cStringIO
407
408 xrange = xrange
409 unicode = unicode
410 bytechr = chr
411 byterepr = repr
412 bytestr = str
413 iterbytestr = iter
414 maybebytestr = identity
415 sysbytes = identity
416 sysstr = identity
417 strurl = identity
418 bytesurl = identity
419 open = open
420 delattr = delattr
421 getattr = getattr
422 hasattr = hasattr
423 setattr = setattr
424
425 # this can't be parsed on Python 3
426 exec(b'def raisewithtb(exc, tb):\n raise exc, None, tb\n')
427
428 def fsencode(filename):
429 """
430 Partial backport from os.py in Python 3, which only accepts bytes.
431 In Python 2, our paths should only ever be bytes, a unicode path
432 indicates a bug.
433 """
434 if isinstance(filename, str):
435 return filename
436 else:
437 raise TypeError("expect str, not %s" % type(filename).__name__)
438
439 # In Python 2, fsdecode() has a very chance to receive bytes. So it's
440 # better not to touch Python 2 part as it's already working fine.
441 fsdecode = identity
442
443 def getdoc(obj):
444 return getattr(obj, '__doc__', None)
445
446 _notset = object()
447
448 def safehasattr(thing, attr):
449 return getattr(thing, attr, _notset) is not _notset
450
451 def _getoptbwrapper(orig, args, shortlist, namelist):
452 return orig(args, shortlist, namelist)
453
454 strkwargs = identity
455 byteskwargs = identity
456
457 oscurdir = os.curdir
458 oslinesep = os.linesep
459 osname = os.name
460 ospathsep = os.pathsep
461 ospardir = os.pardir
462 ossep = os.sep
463 osaltsep = os.altsep
464 osdevnull = os.devnull
465 long = long
466 if getattr(sys, 'argv', None) is not None:
467 sysargv = sys.argv
468 sysplatform = sys.platform
469 sysexecutable = sys.executable
470 shlexsplit = shlex.split
471 bytesio = cStringIO.StringIO
472 stringio = bytesio
473 maplist = map
474 rangelist = range
475 ziplist = zip
476 rawinput = raw_input
477 getargspec = inspect.getargspec
478 iteritems = lambda x: x.iteritems()
479 itervalues = lambda x: x.itervalues()
480 json_loads = json.loads
424 json_loads = json.loads
481
425
482 isjython = sysplatform.startswith(b'java')
426 isjython = sysplatform.startswith(b'java')
483
427
484 isdarwin = sysplatform.startswith(b'darwin')
428 isdarwin = sysplatform.startswith(b'darwin')
485 islinux = sysplatform.startswith(b'linux')
429 islinux = sysplatform.startswith(b'linux')
486 isposix = osname == b'posix'
430 isposix = osname == b'posix'
487 iswindows = osname == b'nt'
431 iswindows = osname == b'nt'
488
432
489
433
490 def getoptb(args, shortlist, namelist):
434 def getoptb(args, shortlist, namelist):
491 return _getoptbwrapper(getopt.getopt, args, shortlist, namelist)
435 return _getoptbwrapper(getopt.getopt, args, shortlist, namelist)
492
436
493
437
494 def gnugetoptb(args, shortlist, namelist):
438 def gnugetoptb(args, shortlist, namelist):
495 return _getoptbwrapper(getopt.gnu_getopt, args, shortlist, namelist)
439 return _getoptbwrapper(getopt.gnu_getopt, args, shortlist, namelist)
496
440
497
441
498 def mkdtemp(suffix=b'', prefix=b'tmp', dir=None):
442 def mkdtemp(suffix=b'', prefix=b'tmp', dir=None):
499 return tempfile.mkdtemp(suffix, prefix, dir)
443 return tempfile.mkdtemp(suffix, prefix, dir)
500
444
501
445
502 # text=True is not supported; use util.from/tonativeeol() instead
446 # text=True is not supported; use util.from/tonativeeol() instead
503 def mkstemp(suffix=b'', prefix=b'tmp', dir=None):
447 def mkstemp(suffix=b'', prefix=b'tmp', dir=None):
504 return tempfile.mkstemp(suffix, prefix, dir)
448 return tempfile.mkstemp(suffix, prefix, dir)
505
449
506
450
507 # TemporaryFile does not support an "encoding=" argument on python2.
451 # TemporaryFile does not support an "encoding=" argument on python2.
508 # This wrapper file are always open in byte mode.
452 # This wrapper file are always open in byte mode.
509 def unnamedtempfile(mode=None, *args, **kwargs):
453 def unnamedtempfile(mode=None, *args, **kwargs):
510 if mode is None:
454 if mode is None:
511 mode = 'w+b'
455 mode = 'w+b'
512 else:
456 else:
513 mode = sysstr(mode)
457 mode = sysstr(mode)
514 assert 'b' in mode
458 assert 'b' in mode
515 return tempfile.TemporaryFile(mode, *args, **kwargs)
459 return tempfile.TemporaryFile(mode, *args, **kwargs)
516
460
517
461
518 # NamedTemporaryFile does not support an "encoding=" argument on python2.
462 # NamedTemporaryFile does not support an "encoding=" argument on python2.
519 # This wrapper file are always open in byte mode.
463 # This wrapper file are always open in byte mode.
520 def namedtempfile(
464 def namedtempfile(
521 mode=b'w+b', bufsize=-1, suffix=b'', prefix=b'tmp', dir=None, delete=True
465 mode=b'w+b', bufsize=-1, suffix=b'', prefix=b'tmp', dir=None, delete=True
522 ):
466 ):
523 mode = sysstr(mode)
467 mode = sysstr(mode)
524 assert 'b' in mode
468 assert 'b' in mode
525 return tempfile.NamedTemporaryFile(
469 return tempfile.NamedTemporaryFile(
526 mode, bufsize, suffix=suffix, prefix=prefix, dir=dir, delete=delete
470 mode, bufsize, suffix=suffix, prefix=prefix, dir=dir, delete=delete
527 )
471 )
General Comments 0
You need to be logged in to leave comments. Login now