##// END OF EJS Templates
pycompat: allow pycompat.sysbytes() even if input already is bytes...
Martin von Zweigbergk -
r44322:66af68d4 default
parent child Browse files
Show More
@@ -1,514 +1,516
1 # pycompat.py - portability shim for python 3
1 # pycompat.py - portability shim for python 3
2 #
2 #
3 # This software may be used and distributed according to the terms of the
3 # This software may be used and distributed according to the terms of the
4 # GNU General Public License version 2 or any later version.
4 # GNU General Public License version 2 or any later version.
5
5
6 """Mercurial portability shim for python 3.
6 """Mercurial portability shim for python 3.
7
7
8 This contains aliases to hide python version-specific details from the core.
8 This contains aliases to hide python version-specific details from the core.
9 """
9 """
10
10
11 from __future__ import absolute_import
11 from __future__ import absolute_import
12
12
13 import getopt
13 import getopt
14 import inspect
14 import inspect
15 import json
15 import json
16 import os
16 import os
17 import shlex
17 import shlex
18 import sys
18 import sys
19 import tempfile
19 import tempfile
20
20
21 ispy3 = sys.version_info[0] >= 3
21 ispy3 = sys.version_info[0] >= 3
22 ispypy = '__pypy__' in sys.builtin_module_names
22 ispypy = '__pypy__' in sys.builtin_module_names
23 TYPE_CHECKING = False
23 TYPE_CHECKING = False
24
24
25 if not globals(): # hide this from non-pytype users
25 if not globals(): # hide this from non-pytype users
26 import typing
26 import typing
27
27
28 TYPE_CHECKING = typing.TYPE_CHECKING
28 TYPE_CHECKING = typing.TYPE_CHECKING
29
29
30 if not ispy3:
30 if not ispy3:
31 import cookielib
31 import cookielib
32 import cPickle as pickle
32 import cPickle as pickle
33 import httplib
33 import httplib
34 import Queue as queue
34 import Queue as queue
35 import SocketServer as socketserver
35 import SocketServer as socketserver
36 import xmlrpclib
36 import xmlrpclib
37
37
38 from .thirdparty.concurrent import futures
38 from .thirdparty.concurrent import futures
39
39
40 def future_set_exception_info(f, exc_info):
40 def future_set_exception_info(f, exc_info):
41 f.set_exception_info(*exc_info)
41 f.set_exception_info(*exc_info)
42
42
43
43
44 else:
44 else:
45 import concurrent.futures as futures
45 import concurrent.futures as futures
46 import http.cookiejar as cookielib
46 import http.cookiejar as cookielib
47 import http.client as httplib
47 import http.client as httplib
48 import pickle
48 import pickle
49 import queue as queue
49 import queue as queue
50 import socketserver
50 import socketserver
51 import xmlrpc.client as xmlrpclib
51 import xmlrpc.client as xmlrpclib
52
52
53 def future_set_exception_info(f, exc_info):
53 def future_set_exception_info(f, exc_info):
54 f.set_exception(exc_info[0])
54 f.set_exception(exc_info[0])
55
55
56
56
57 def identity(a):
57 def identity(a):
58 return a
58 return a
59
59
60
60
61 def _rapply(f, xs):
61 def _rapply(f, xs):
62 if xs is None:
62 if xs is None:
63 # assume None means non-value of optional data
63 # assume None means non-value of optional data
64 return xs
64 return xs
65 if isinstance(xs, (list, set, tuple)):
65 if isinstance(xs, (list, set, tuple)):
66 return type(xs)(_rapply(f, x) for x in xs)
66 return type(xs)(_rapply(f, x) for x in xs)
67 if isinstance(xs, dict):
67 if isinstance(xs, dict):
68 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
68 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
69 return f(xs)
69 return f(xs)
70
70
71
71
72 def rapply(f, xs):
72 def rapply(f, xs):
73 """Apply function recursively to every item preserving the data structure
73 """Apply function recursively to every item preserving the data structure
74
74
75 >>> def f(x):
75 >>> def f(x):
76 ... return 'f(%s)' % x
76 ... return 'f(%s)' % x
77 >>> rapply(f, None) is None
77 >>> rapply(f, None) is None
78 True
78 True
79 >>> rapply(f, 'a')
79 >>> rapply(f, 'a')
80 'f(a)'
80 'f(a)'
81 >>> rapply(f, {'a'}) == {'f(a)'}
81 >>> rapply(f, {'a'}) == {'f(a)'}
82 True
82 True
83 >>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])
83 >>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])
84 ['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]
84 ['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]
85
85
86 >>> xs = [object()]
86 >>> xs = [object()]
87 >>> rapply(identity, xs) is xs
87 >>> rapply(identity, xs) is xs
88 True
88 True
89 """
89 """
90 if f is identity:
90 if f is identity:
91 # fast path mainly for py2
91 # fast path mainly for py2
92 return xs
92 return xs
93 return _rapply(f, xs)
93 return _rapply(f, xs)
94
94
95
95
96 if ispy3:
96 if ispy3:
97 import builtins
97 import builtins
98 import codecs
98 import codecs
99 import functools
99 import functools
100 import io
100 import io
101 import struct
101 import struct
102
102
103 if os.name == r'nt' and sys.version_info >= (3, 6):
103 if os.name == r'nt' and sys.version_info >= (3, 6):
104 # MBCS (or ANSI) filesystem encoding must be used as before.
104 # MBCS (or ANSI) filesystem encoding must be used as before.
105 # Otherwise non-ASCII filenames in existing repositories would be
105 # Otherwise non-ASCII filenames in existing repositories would be
106 # corrupted.
106 # corrupted.
107 # This must be set once prior to any fsencode/fsdecode calls.
107 # This must be set once prior to any fsencode/fsdecode calls.
108 sys._enablelegacywindowsfsencoding() # pytype: disable=module-attr
108 sys._enablelegacywindowsfsencoding() # pytype: disable=module-attr
109
109
110 fsencode = os.fsencode
110 fsencode = os.fsencode
111 fsdecode = os.fsdecode
111 fsdecode = os.fsdecode
112 oscurdir = os.curdir.encode('ascii')
112 oscurdir = os.curdir.encode('ascii')
113 oslinesep = os.linesep.encode('ascii')
113 oslinesep = os.linesep.encode('ascii')
114 osname = os.name.encode('ascii')
114 osname = os.name.encode('ascii')
115 ospathsep = os.pathsep.encode('ascii')
115 ospathsep = os.pathsep.encode('ascii')
116 ospardir = os.pardir.encode('ascii')
116 ospardir = os.pardir.encode('ascii')
117 ossep = os.sep.encode('ascii')
117 ossep = os.sep.encode('ascii')
118 osaltsep = os.altsep
118 osaltsep = os.altsep
119 if osaltsep:
119 if osaltsep:
120 osaltsep = osaltsep.encode('ascii')
120 osaltsep = osaltsep.encode('ascii')
121 osdevnull = os.devnull.encode('ascii')
121 osdevnull = os.devnull.encode('ascii')
122
122
123 sysplatform = sys.platform.encode('ascii')
123 sysplatform = sys.platform.encode('ascii')
124 sysexecutable = sys.executable
124 sysexecutable = sys.executable
125 if sysexecutable:
125 if sysexecutable:
126 sysexecutable = os.fsencode(sysexecutable)
126 sysexecutable = os.fsencode(sysexecutable)
127 bytesio = io.BytesIO
127 bytesio = io.BytesIO
128 # TODO deprecate stringio name, as it is a lie on Python 3.
128 # TODO deprecate stringio name, as it is a lie on Python 3.
129 stringio = bytesio
129 stringio = bytesio
130
130
131 def maplist(*args):
131 def maplist(*args):
132 return list(map(*args))
132 return list(map(*args))
133
133
134 def rangelist(*args):
134 def rangelist(*args):
135 return list(range(*args))
135 return list(range(*args))
136
136
137 def ziplist(*args):
137 def ziplist(*args):
138 return list(zip(*args))
138 return list(zip(*args))
139
139
140 rawinput = input
140 rawinput = input
141 getargspec = inspect.getfullargspec
141 getargspec = inspect.getfullargspec
142
142
143 long = int
143 long = int
144
144
145 # TODO: .buffer might not exist if std streams were replaced; we'll need
145 # TODO: .buffer might not exist if std streams were replaced; we'll need
146 # a silly wrapper to make a bytes stream backed by a unicode one.
146 # a silly wrapper to make a bytes stream backed by a unicode one.
147 stdin = sys.stdin.buffer
147 stdin = sys.stdin.buffer
148 stdout = sys.stdout.buffer
148 stdout = sys.stdout.buffer
149 stderr = sys.stderr.buffer
149 stderr = sys.stderr.buffer
150
150
151 # Since Python 3 converts argv to wchar_t type by Py_DecodeLocale() on Unix,
151 # Since Python 3 converts argv to wchar_t type by Py_DecodeLocale() on Unix,
152 # we can use os.fsencode() to get back bytes argv.
152 # we can use os.fsencode() to get back bytes argv.
153 #
153 #
154 # https://hg.python.org/cpython/file/v3.5.1/Programs/python.c#l55
154 # https://hg.python.org/cpython/file/v3.5.1/Programs/python.c#l55
155 #
155 #
156 # On Windows, the native argv is unicode and is converted to MBCS bytes
156 # On Windows, the native argv is unicode and is converted to MBCS bytes
157 # since we do enable the legacy filesystem encoding.
157 # since we do enable the legacy filesystem encoding.
158 if getattr(sys, 'argv', None) is not None:
158 if getattr(sys, 'argv', None) is not None:
159 sysargv = list(map(os.fsencode, sys.argv))
159 sysargv = list(map(os.fsencode, sys.argv))
160
160
161 bytechr = struct.Struct('>B').pack
161 bytechr = struct.Struct('>B').pack
162 byterepr = b'%r'.__mod__
162 byterepr = b'%r'.__mod__
163
163
164 class bytestr(bytes):
164 class bytestr(bytes):
165 """A bytes which mostly acts as a Python 2 str
165 """A bytes which mostly acts as a Python 2 str
166
166
167 >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
167 >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
168 ('', 'foo', 'ascii', '1')
168 ('', 'foo', 'ascii', '1')
169 >>> s = bytestr(b'foo')
169 >>> s = bytestr(b'foo')
170 >>> assert s is bytestr(s)
170 >>> assert s is bytestr(s)
171
171
172 __bytes__() should be called if provided:
172 __bytes__() should be called if provided:
173
173
174 >>> class bytesable(object):
174 >>> class bytesable(object):
175 ... def __bytes__(self):
175 ... def __bytes__(self):
176 ... return b'bytes'
176 ... return b'bytes'
177 >>> bytestr(bytesable())
177 >>> bytestr(bytesable())
178 'bytes'
178 'bytes'
179
179
180 There's no implicit conversion from non-ascii str as its encoding is
180 There's no implicit conversion from non-ascii str as its encoding is
181 unknown:
181 unknown:
182
182
183 >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
183 >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
184 Traceback (most recent call last):
184 Traceback (most recent call last):
185 ...
185 ...
186 UnicodeEncodeError: ...
186 UnicodeEncodeError: ...
187
187
188 Comparison between bytestr and bytes should work:
188 Comparison between bytestr and bytes should work:
189
189
190 >>> assert bytestr(b'foo') == b'foo'
190 >>> assert bytestr(b'foo') == b'foo'
191 >>> assert b'foo' == bytestr(b'foo')
191 >>> assert b'foo' == bytestr(b'foo')
192 >>> assert b'f' in bytestr(b'foo')
192 >>> assert b'f' in bytestr(b'foo')
193 >>> assert bytestr(b'f') in b'foo'
193 >>> assert bytestr(b'f') in b'foo'
194
194
195 Sliced elements should be bytes, not integer:
195 Sliced elements should be bytes, not integer:
196
196
197 >>> s[1], s[:2]
197 >>> s[1], s[:2]
198 (b'o', b'fo')
198 (b'o', b'fo')
199 >>> list(s), list(reversed(s))
199 >>> list(s), list(reversed(s))
200 ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
200 ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
201
201
202 As bytestr type isn't propagated across operations, you need to cast
202 As bytestr type isn't propagated across operations, you need to cast
203 bytes to bytestr explicitly:
203 bytes to bytestr explicitly:
204
204
205 >>> s = bytestr(b'foo').upper()
205 >>> s = bytestr(b'foo').upper()
206 >>> t = bytestr(s)
206 >>> t = bytestr(s)
207 >>> s[0], t[0]
207 >>> s[0], t[0]
208 (70, b'F')
208 (70, b'F')
209
209
210 Be careful to not pass a bytestr object to a function which expects
210 Be careful to not pass a bytestr object to a function which expects
211 bytearray-like behavior.
211 bytearray-like behavior.
212
212
213 >>> t = bytes(t) # cast to bytes
213 >>> t = bytes(t) # cast to bytes
214 >>> assert type(t) is bytes
214 >>> assert type(t) is bytes
215 """
215 """
216
216
217 def __new__(cls, s=b''):
217 def __new__(cls, s=b''):
218 if isinstance(s, bytestr):
218 if isinstance(s, bytestr):
219 return s
219 return s
220 if not isinstance(
220 if not isinstance(
221 s, (bytes, bytearray)
221 s, (bytes, bytearray)
222 ) and not hasattr( # hasattr-py3-only
222 ) and not hasattr( # hasattr-py3-only
223 s, u'__bytes__'
223 s, u'__bytes__'
224 ):
224 ):
225 s = str(s).encode('ascii')
225 s = str(s).encode('ascii')
226 return bytes.__new__(cls, s)
226 return bytes.__new__(cls, s)
227
227
228 def __getitem__(self, key):
228 def __getitem__(self, key):
229 s = bytes.__getitem__(self, key)
229 s = bytes.__getitem__(self, key)
230 if not isinstance(s, bytes):
230 if not isinstance(s, bytes):
231 s = bytechr(s)
231 s = bytechr(s)
232 return s
232 return s
233
233
234 def __iter__(self):
234 def __iter__(self):
235 return iterbytestr(bytes.__iter__(self))
235 return iterbytestr(bytes.__iter__(self))
236
236
237 def __repr__(self):
237 def __repr__(self):
238 return bytes.__repr__(self)[1:] # drop b''
238 return bytes.__repr__(self)[1:] # drop b''
239
239
240 def iterbytestr(s):
240 def iterbytestr(s):
241 """Iterate bytes as if it were a str object of Python 2"""
241 """Iterate bytes as if it were a str object of Python 2"""
242 return map(bytechr, s)
242 return map(bytechr, s)
243
243
244 def maybebytestr(s):
244 def maybebytestr(s):
245 """Promote bytes to bytestr"""
245 """Promote bytes to bytestr"""
246 if isinstance(s, bytes):
246 if isinstance(s, bytes):
247 return bytestr(s)
247 return bytestr(s)
248 return s
248 return s
249
249
250 def sysbytes(s):
250 def sysbytes(s):
251 """Convert an internal str (e.g. keyword, __doc__) back to bytes
251 """Convert an internal str (e.g. keyword, __doc__) back to bytes
252
252
253 This never raises UnicodeEncodeError, but only ASCII characters
253 This never raises UnicodeEncodeError, but only ASCII characters
254 can be round-trip by sysstr(sysbytes(s)).
254 can be round-trip by sysstr(sysbytes(s)).
255 """
255 """
256 if isinstance(s, bytes):
257 return s
256 return s.encode('utf-8')
258 return s.encode('utf-8')
257
259
258 def sysstr(s):
260 def sysstr(s):
259 """Return a keyword str to be passed to Python functions such as
261 """Return a keyword str to be passed to Python functions such as
260 getattr() and str.encode()
262 getattr() and str.encode()
261
263
262 This never raises UnicodeDecodeError. Non-ascii characters are
264 This never raises UnicodeDecodeError. Non-ascii characters are
263 considered invalid and mapped to arbitrary but unique code points
265 considered invalid and mapped to arbitrary but unique code points
264 such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
266 such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
265 """
267 """
266 if isinstance(s, builtins.str):
268 if isinstance(s, builtins.str):
267 return s
269 return s
268 return s.decode('latin-1')
270 return s.decode('latin-1')
269
271
270 def strurl(url):
272 def strurl(url):
271 """Converts a bytes url back to str"""
273 """Converts a bytes url back to str"""
272 if isinstance(url, bytes):
274 if isinstance(url, bytes):
273 return url.decode('ascii')
275 return url.decode('ascii')
274 return url
276 return url
275
277
276 def bytesurl(url):
278 def bytesurl(url):
277 """Converts a str url to bytes by encoding in ascii"""
279 """Converts a str url to bytes by encoding in ascii"""
278 if isinstance(url, str):
280 if isinstance(url, str):
279 return url.encode('ascii')
281 return url.encode('ascii')
280 return url
282 return url
281
283
282 def raisewithtb(exc, tb):
284 def raisewithtb(exc, tb):
283 """Raise exception with the given traceback"""
285 """Raise exception with the given traceback"""
284 raise exc.with_traceback(tb)
286 raise exc.with_traceback(tb)
285
287
286 def getdoc(obj):
288 def getdoc(obj):
287 """Get docstring as bytes; may be None so gettext() won't confuse it
289 """Get docstring as bytes; may be None so gettext() won't confuse it
288 with _('')"""
290 with _('')"""
289 doc = getattr(obj, '__doc__', None)
291 doc = getattr(obj, '__doc__', None)
290 if doc is None:
292 if doc is None:
291 return doc
293 return doc
292 return sysbytes(doc)
294 return sysbytes(doc)
293
295
294 def _wrapattrfunc(f):
296 def _wrapattrfunc(f):
295 @functools.wraps(f)
297 @functools.wraps(f)
296 def w(object, name, *args):
298 def w(object, name, *args):
297 return f(object, sysstr(name), *args)
299 return f(object, sysstr(name), *args)
298
300
299 return w
301 return w
300
302
301 # these wrappers are automagically imported by hgloader
303 # these wrappers are automagically imported by hgloader
302 delattr = _wrapattrfunc(builtins.delattr)
304 delattr = _wrapattrfunc(builtins.delattr)
303 getattr = _wrapattrfunc(builtins.getattr)
305 getattr = _wrapattrfunc(builtins.getattr)
304 hasattr = _wrapattrfunc(builtins.hasattr)
306 hasattr = _wrapattrfunc(builtins.hasattr)
305 setattr = _wrapattrfunc(builtins.setattr)
307 setattr = _wrapattrfunc(builtins.setattr)
306 xrange = builtins.range
308 xrange = builtins.range
307 unicode = str
309 unicode = str
308
310
309 def open(name, mode=b'r', buffering=-1, encoding=None):
311 def open(name, mode=b'r', buffering=-1, encoding=None):
310 return builtins.open(name, sysstr(mode), buffering, encoding)
312 return builtins.open(name, sysstr(mode), buffering, encoding)
311
313
312 safehasattr = _wrapattrfunc(builtins.hasattr)
314 safehasattr = _wrapattrfunc(builtins.hasattr)
313
315
314 def _getoptbwrapper(orig, args, shortlist, namelist):
316 def _getoptbwrapper(orig, args, shortlist, namelist):
315 """
317 """
316 Takes bytes arguments, converts them to unicode, pass them to
318 Takes bytes arguments, converts them to unicode, pass them to
317 getopt.getopt(), convert the returned values back to bytes and then
319 getopt.getopt(), convert the returned values back to bytes and then
318 return them for Python 3 compatibility as getopt.getopt() don't accepts
320 return them for Python 3 compatibility as getopt.getopt() don't accepts
319 bytes on Python 3.
321 bytes on Python 3.
320 """
322 """
321 args = [a.decode('latin-1') for a in args]
323 args = [a.decode('latin-1') for a in args]
322 shortlist = shortlist.decode('latin-1')
324 shortlist = shortlist.decode('latin-1')
323 namelist = [a.decode('latin-1') for a in namelist]
325 namelist = [a.decode('latin-1') for a in namelist]
324 opts, args = orig(args, shortlist, namelist)
326 opts, args = orig(args, shortlist, namelist)
325 opts = [(a[0].encode('latin-1'), a[1].encode('latin-1')) for a in opts]
327 opts = [(a[0].encode('latin-1'), a[1].encode('latin-1')) for a in opts]
326 args = [a.encode('latin-1') for a in args]
328 args = [a.encode('latin-1') for a in args]
327 return opts, args
329 return opts, args
328
330
329 def strkwargs(dic):
331 def strkwargs(dic):
330 """
332 """
331 Converts the keys of a python dictonary to str i.e. unicodes so that
333 Converts the keys of a python dictonary to str i.e. unicodes so that
332 they can be passed as keyword arguments as dictonaries with bytes keys
334 they can be passed as keyword arguments as dictonaries with bytes keys
333 can't be passed as keyword arguments to functions on Python 3.
335 can't be passed as keyword arguments to functions on Python 3.
334 """
336 """
335 dic = dict((k.decode('latin-1'), v) for k, v in dic.items())
337 dic = dict((k.decode('latin-1'), v) for k, v in dic.items())
336 return dic
338 return dic
337
339
338 def byteskwargs(dic):
340 def byteskwargs(dic):
339 """
341 """
340 Converts keys of python dictonaries to bytes as they were converted to
342 Converts keys of python dictonaries to bytes as they were converted to
341 str to pass that dictonary as a keyword argument on Python 3.
343 str to pass that dictonary as a keyword argument on Python 3.
342 """
344 """
343 dic = dict((k.encode('latin-1'), v) for k, v in dic.items())
345 dic = dict((k.encode('latin-1'), v) for k, v in dic.items())
344 return dic
346 return dic
345
347
346 # TODO: handle shlex.shlex().
348 # TODO: handle shlex.shlex().
347 def shlexsplit(s, comments=False, posix=True):
349 def shlexsplit(s, comments=False, posix=True):
348 """
350 """
349 Takes bytes argument, convert it to str i.e. unicodes, pass that into
351 Takes bytes argument, convert it to str i.e. unicodes, pass that into
350 shlex.split(), convert the returned value to bytes and return that for
352 shlex.split(), convert the returned value to bytes and return that for
351 Python 3 compatibility as shelx.split() don't accept bytes on Python 3.
353 Python 3 compatibility as shelx.split() don't accept bytes on Python 3.
352 """
354 """
353 ret = shlex.split(s.decode('latin-1'), comments, posix)
355 ret = shlex.split(s.decode('latin-1'), comments, posix)
354 return [a.encode('latin-1') for a in ret]
356 return [a.encode('latin-1') for a in ret]
355
357
356 iteritems = lambda x: x.items()
358 iteritems = lambda x: x.items()
357 itervalues = lambda x: x.values()
359 itervalues = lambda x: x.values()
358
360
359 # Python 3.5's json.load and json.loads require str. We polyfill its
361 # Python 3.5's json.load and json.loads require str. We polyfill its
360 # code for detecting encoding from bytes.
362 # code for detecting encoding from bytes.
361 if sys.version_info[0:2] < (3, 6):
363 if sys.version_info[0:2] < (3, 6):
362
364
363 def _detect_encoding(b):
365 def _detect_encoding(b):
364 bstartswith = b.startswith
366 bstartswith = b.startswith
365 if bstartswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)):
367 if bstartswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)):
366 return 'utf-32'
368 return 'utf-32'
367 if bstartswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)):
369 if bstartswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)):
368 return 'utf-16'
370 return 'utf-16'
369 if bstartswith(codecs.BOM_UTF8):
371 if bstartswith(codecs.BOM_UTF8):
370 return 'utf-8-sig'
372 return 'utf-8-sig'
371
373
372 if len(b) >= 4:
374 if len(b) >= 4:
373 if not b[0]:
375 if not b[0]:
374 # 00 00 -- -- - utf-32-be
376 # 00 00 -- -- - utf-32-be
375 # 00 XX -- -- - utf-16-be
377 # 00 XX -- -- - utf-16-be
376 return 'utf-16-be' if b[1] else 'utf-32-be'
378 return 'utf-16-be' if b[1] else 'utf-32-be'
377 if not b[1]:
379 if not b[1]:
378 # XX 00 00 00 - utf-32-le
380 # XX 00 00 00 - utf-32-le
379 # XX 00 00 XX - utf-16-le
381 # XX 00 00 XX - utf-16-le
380 # XX 00 XX -- - utf-16-le
382 # XX 00 XX -- - utf-16-le
381 return 'utf-16-le' if b[2] or b[3] else 'utf-32-le'
383 return 'utf-16-le' if b[2] or b[3] else 'utf-32-le'
382 elif len(b) == 2:
384 elif len(b) == 2:
383 if not b[0]:
385 if not b[0]:
384 # 00 XX - utf-16-be
386 # 00 XX - utf-16-be
385 return 'utf-16-be'
387 return 'utf-16-be'
386 if not b[1]:
388 if not b[1]:
387 # XX 00 - utf-16-le
389 # XX 00 - utf-16-le
388 return 'utf-16-le'
390 return 'utf-16-le'
389 # default
391 # default
390 return 'utf-8'
392 return 'utf-8'
391
393
392 def json_loads(s, *args, **kwargs):
394 def json_loads(s, *args, **kwargs):
393 if isinstance(s, (bytes, bytearray)):
395 if isinstance(s, (bytes, bytearray)):
394 s = s.decode(_detect_encoding(s), 'surrogatepass')
396 s = s.decode(_detect_encoding(s), 'surrogatepass')
395
397
396 return json.loads(s, *args, **kwargs)
398 return json.loads(s, *args, **kwargs)
397
399
398 else:
400 else:
399 json_loads = json.loads
401 json_loads = json.loads
400
402
401 else:
403 else:
402 import cStringIO
404 import cStringIO
403
405
404 xrange = xrange
406 xrange = xrange
405 unicode = unicode
407 unicode = unicode
406 bytechr = chr
408 bytechr = chr
407 byterepr = repr
409 byterepr = repr
408 bytestr = str
410 bytestr = str
409 iterbytestr = iter
411 iterbytestr = iter
410 maybebytestr = identity
412 maybebytestr = identity
411 sysbytes = identity
413 sysbytes = identity
412 sysstr = identity
414 sysstr = identity
413 strurl = identity
415 strurl = identity
414 bytesurl = identity
416 bytesurl = identity
415 open = open
417 open = open
416 delattr = delattr
418 delattr = delattr
417 getattr = getattr
419 getattr = getattr
418 hasattr = hasattr
420 hasattr = hasattr
419 setattr = setattr
421 setattr = setattr
420
422
421 # this can't be parsed on Python 3
423 # this can't be parsed on Python 3
422 exec(b'def raisewithtb(exc, tb):\n raise exc, None, tb\n')
424 exec(b'def raisewithtb(exc, tb):\n raise exc, None, tb\n')
423
425
424 def fsencode(filename):
426 def fsencode(filename):
425 """
427 """
426 Partial backport from os.py in Python 3, which only accepts bytes.
428 Partial backport from os.py in Python 3, which only accepts bytes.
427 In Python 2, our paths should only ever be bytes, a unicode path
429 In Python 2, our paths should only ever be bytes, a unicode path
428 indicates a bug.
430 indicates a bug.
429 """
431 """
430 if isinstance(filename, str):
432 if isinstance(filename, str):
431 return filename
433 return filename
432 else:
434 else:
433 raise TypeError("expect str, not %s" % type(filename).__name__)
435 raise TypeError("expect str, not %s" % type(filename).__name__)
434
436
435 # In Python 2, fsdecode() has a very chance to receive bytes. So it's
437 # In Python 2, fsdecode() has a very chance to receive bytes. So it's
436 # better not to touch Python 2 part as it's already working fine.
438 # better not to touch Python 2 part as it's already working fine.
437 fsdecode = identity
439 fsdecode = identity
438
440
439 def getdoc(obj):
441 def getdoc(obj):
440 return getattr(obj, '__doc__', None)
442 return getattr(obj, '__doc__', None)
441
443
442 _notset = object()
444 _notset = object()
443
445
444 def safehasattr(thing, attr):
446 def safehasattr(thing, attr):
445 return getattr(thing, attr, _notset) is not _notset
447 return getattr(thing, attr, _notset) is not _notset
446
448
447 def _getoptbwrapper(orig, args, shortlist, namelist):
449 def _getoptbwrapper(orig, args, shortlist, namelist):
448 return orig(args, shortlist, namelist)
450 return orig(args, shortlist, namelist)
449
451
450 strkwargs = identity
452 strkwargs = identity
451 byteskwargs = identity
453 byteskwargs = identity
452
454
453 oscurdir = os.curdir
455 oscurdir = os.curdir
454 oslinesep = os.linesep
456 oslinesep = os.linesep
455 osname = os.name
457 osname = os.name
456 ospathsep = os.pathsep
458 ospathsep = os.pathsep
457 ospardir = os.pardir
459 ospardir = os.pardir
458 ossep = os.sep
460 ossep = os.sep
459 osaltsep = os.altsep
461 osaltsep = os.altsep
460 osdevnull = os.devnull
462 osdevnull = os.devnull
461 long = long
463 long = long
462 stdin = sys.stdin
464 stdin = sys.stdin
463 stdout = sys.stdout
465 stdout = sys.stdout
464 stderr = sys.stderr
466 stderr = sys.stderr
465 if getattr(sys, 'argv', None) is not None:
467 if getattr(sys, 'argv', None) is not None:
466 sysargv = sys.argv
468 sysargv = sys.argv
467 sysplatform = sys.platform
469 sysplatform = sys.platform
468 sysexecutable = sys.executable
470 sysexecutable = sys.executable
469 shlexsplit = shlex.split
471 shlexsplit = shlex.split
470 bytesio = cStringIO.StringIO
472 bytesio = cStringIO.StringIO
471 stringio = bytesio
473 stringio = bytesio
472 maplist = map
474 maplist = map
473 rangelist = range
475 rangelist = range
474 ziplist = zip
476 ziplist = zip
475 rawinput = raw_input
477 rawinput = raw_input
476 getargspec = inspect.getargspec
478 getargspec = inspect.getargspec
477 iteritems = lambda x: x.iteritems()
479 iteritems = lambda x: x.iteritems()
478 itervalues = lambda x: x.itervalues()
480 itervalues = lambda x: x.itervalues()
479 json_loads = json.loads
481 json_loads = json.loads
480
482
481 isjython = sysplatform.startswith(b'java')
483 isjython = sysplatform.startswith(b'java')
482
484
483 isdarwin = sysplatform.startswith(b'darwin')
485 isdarwin = sysplatform.startswith(b'darwin')
484 islinux = sysplatform.startswith(b'linux')
486 islinux = sysplatform.startswith(b'linux')
485 isposix = osname == b'posix'
487 isposix = osname == b'posix'
486 iswindows = osname == b'nt'
488 iswindows = osname == b'nt'
487
489
488
490
489 def getoptb(args, shortlist, namelist):
491 def getoptb(args, shortlist, namelist):
490 return _getoptbwrapper(getopt.getopt, args, shortlist, namelist)
492 return _getoptbwrapper(getopt.getopt, args, shortlist, namelist)
491
493
492
494
493 def gnugetoptb(args, shortlist, namelist):
495 def gnugetoptb(args, shortlist, namelist):
494 return _getoptbwrapper(getopt.gnu_getopt, args, shortlist, namelist)
496 return _getoptbwrapper(getopt.gnu_getopt, args, shortlist, namelist)
495
497
496
498
497 def mkdtemp(suffix=b'', prefix=b'tmp', dir=None):
499 def mkdtemp(suffix=b'', prefix=b'tmp', dir=None):
498 return tempfile.mkdtemp(suffix, prefix, dir)
500 return tempfile.mkdtemp(suffix, prefix, dir)
499
501
500
502
501 # text=True is not supported; use util.from/tonativeeol() instead
503 # text=True is not supported; use util.from/tonativeeol() instead
502 def mkstemp(suffix=b'', prefix=b'tmp', dir=None):
504 def mkstemp(suffix=b'', prefix=b'tmp', dir=None):
503 return tempfile.mkstemp(suffix, prefix, dir)
505 return tempfile.mkstemp(suffix, prefix, dir)
504
506
505
507
506 # mode must include 'b'ytes as encoding= is not supported
508 # mode must include 'b'ytes as encoding= is not supported
507 def namedtempfile(
509 def namedtempfile(
508 mode=b'w+b', bufsize=-1, suffix=b'', prefix=b'tmp', dir=None, delete=True
510 mode=b'w+b', bufsize=-1, suffix=b'', prefix=b'tmp', dir=None, delete=True
509 ):
511 ):
510 mode = sysstr(mode)
512 mode = sysstr(mode)
511 assert 'b' in mode
513 assert 'b' in mode
512 return tempfile.NamedTemporaryFile(
514 return tempfile.NamedTemporaryFile(
513 mode, bufsize, suffix=suffix, prefix=prefix, dir=dir, delete=delete
515 mode, bufsize, suffix=suffix, prefix=prefix, dir=dir, delete=delete
514 )
516 )
General Comments 0
You need to be logged in to leave comments. Login now