##// END OF EJS Templates
pycompat: fix a bytes vs str issue in `unnamedtempfile()`...
Matt Harbison -
r47385:7a29d900 stable
parent child Browse files
Show More
@@ -1,529 +1,529
1 # pycompat.py - portability shim for python 3
1 # pycompat.py - portability shim for python 3
2 #
2 #
3 # This software may be used and distributed according to the terms of the
3 # This software may be used and distributed according to the terms of the
4 # GNU General Public License version 2 or any later version.
4 # GNU General Public License version 2 or any later version.
5
5
6 """Mercurial portability shim for python 3.
6 """Mercurial portability shim for python 3.
7
7
8 This contains aliases to hide python version-specific details from the core.
8 This contains aliases to hide python version-specific details from the core.
9 """
9 """
10
10
11 from __future__ import absolute_import
11 from __future__ import absolute_import
12
12
13 import getopt
13 import getopt
14 import inspect
14 import inspect
15 import json
15 import json
16 import os
16 import os
17 import shlex
17 import shlex
18 import sys
18 import sys
19 import tempfile
19 import tempfile
20
20
21 ispy3 = sys.version_info[0] >= 3
21 ispy3 = sys.version_info[0] >= 3
22 ispypy = '__pypy__' in sys.builtin_module_names
22 ispypy = '__pypy__' in sys.builtin_module_names
23 TYPE_CHECKING = False
23 TYPE_CHECKING = False
24
24
25 if not globals(): # hide this from non-pytype users
25 if not globals(): # hide this from non-pytype users
26 import typing
26 import typing
27
27
28 TYPE_CHECKING = typing.TYPE_CHECKING
28 TYPE_CHECKING = typing.TYPE_CHECKING
29
29
30 if not ispy3:
30 if not ispy3:
31 import cookielib
31 import cookielib
32 import cPickle as pickle
32 import cPickle as pickle
33 import httplib
33 import httplib
34 import Queue as queue
34 import Queue as queue
35 import SocketServer as socketserver
35 import SocketServer as socketserver
36 import xmlrpclib
36 import xmlrpclib
37
37
38 from .thirdparty.concurrent import futures
38 from .thirdparty.concurrent import futures
39
39
40 def future_set_exception_info(f, exc_info):
40 def future_set_exception_info(f, exc_info):
41 f.set_exception_info(*exc_info)
41 f.set_exception_info(*exc_info)
42
42
43
43
44 else:
44 else:
45 import concurrent.futures as futures
45 import concurrent.futures as futures
46 import http.cookiejar as cookielib
46 import http.cookiejar as cookielib
47 import http.client as httplib
47 import http.client as httplib
48 import pickle
48 import pickle
49 import queue as queue
49 import queue as queue
50 import socketserver
50 import socketserver
51 import xmlrpc.client as xmlrpclib
51 import xmlrpc.client as xmlrpclib
52
52
53 def future_set_exception_info(f, exc_info):
53 def future_set_exception_info(f, exc_info):
54 f.set_exception(exc_info[0])
54 f.set_exception(exc_info[0])
55
55
56
56
57 def identity(a):
57 def identity(a):
58 return a
58 return a
59
59
60
60
61 def _rapply(f, xs):
61 def _rapply(f, xs):
62 if xs is None:
62 if xs is None:
63 # assume None means non-value of optional data
63 # assume None means non-value of optional data
64 return xs
64 return xs
65 if isinstance(xs, (list, set, tuple)):
65 if isinstance(xs, (list, set, tuple)):
66 return type(xs)(_rapply(f, x) for x in xs)
66 return type(xs)(_rapply(f, x) for x in xs)
67 if isinstance(xs, dict):
67 if isinstance(xs, dict):
68 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
68 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
69 return f(xs)
69 return f(xs)
70
70
71
71
72 def rapply(f, xs):
72 def rapply(f, xs):
73 """Apply function recursively to every item preserving the data structure
73 """Apply function recursively to every item preserving the data structure
74
74
75 >>> def f(x):
75 >>> def f(x):
76 ... return 'f(%s)' % x
76 ... return 'f(%s)' % x
77 >>> rapply(f, None) is None
77 >>> rapply(f, None) is None
78 True
78 True
79 >>> rapply(f, 'a')
79 >>> rapply(f, 'a')
80 'f(a)'
80 'f(a)'
81 >>> rapply(f, {'a'}) == {'f(a)'}
81 >>> rapply(f, {'a'}) == {'f(a)'}
82 True
82 True
83 >>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])
83 >>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])
84 ['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]
84 ['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]
85
85
86 >>> xs = [object()]
86 >>> xs = [object()]
87 >>> rapply(identity, xs) is xs
87 >>> rapply(identity, xs) is xs
88 True
88 True
89 """
89 """
90 if f is identity:
90 if f is identity:
91 # fast path mainly for py2
91 # fast path mainly for py2
92 return xs
92 return xs
93 return _rapply(f, xs)
93 return _rapply(f, xs)
94
94
95
95
96 if ispy3:
96 if ispy3:
97 import builtins
97 import builtins
98 import codecs
98 import codecs
99 import functools
99 import functools
100 import io
100 import io
101 import struct
101 import struct
102
102
103 if os.name == r'nt' and sys.version_info >= (3, 6):
103 if os.name == r'nt' and sys.version_info >= (3, 6):
104 # MBCS (or ANSI) filesystem encoding must be used as before.
104 # MBCS (or ANSI) filesystem encoding must be used as before.
105 # Otherwise non-ASCII filenames in existing repositories would be
105 # Otherwise non-ASCII filenames in existing repositories would be
106 # corrupted.
106 # corrupted.
107 # This must be set once prior to any fsencode/fsdecode calls.
107 # This must be set once prior to any fsencode/fsdecode calls.
108 sys._enablelegacywindowsfsencoding() # pytype: disable=module-attr
108 sys._enablelegacywindowsfsencoding() # pytype: disable=module-attr
109
109
110 fsencode = os.fsencode
110 fsencode = os.fsencode
111 fsdecode = os.fsdecode
111 fsdecode = os.fsdecode
112 oscurdir = os.curdir.encode('ascii')
112 oscurdir = os.curdir.encode('ascii')
113 oslinesep = os.linesep.encode('ascii')
113 oslinesep = os.linesep.encode('ascii')
114 osname = os.name.encode('ascii')
114 osname = os.name.encode('ascii')
115 ospathsep = os.pathsep.encode('ascii')
115 ospathsep = os.pathsep.encode('ascii')
116 ospardir = os.pardir.encode('ascii')
116 ospardir = os.pardir.encode('ascii')
117 ossep = os.sep.encode('ascii')
117 ossep = os.sep.encode('ascii')
118 osaltsep = os.altsep
118 osaltsep = os.altsep
119 if osaltsep:
119 if osaltsep:
120 osaltsep = osaltsep.encode('ascii')
120 osaltsep = osaltsep.encode('ascii')
121 osdevnull = os.devnull.encode('ascii')
121 osdevnull = os.devnull.encode('ascii')
122
122
123 sysplatform = sys.platform.encode('ascii')
123 sysplatform = sys.platform.encode('ascii')
124 sysexecutable = sys.executable
124 sysexecutable = sys.executable
125 if sysexecutable:
125 if sysexecutable:
126 sysexecutable = os.fsencode(sysexecutable)
126 sysexecutable = os.fsencode(sysexecutable)
127 bytesio = io.BytesIO
127 bytesio = io.BytesIO
128 # TODO deprecate stringio name, as it is a lie on Python 3.
128 # TODO deprecate stringio name, as it is a lie on Python 3.
129 stringio = bytesio
129 stringio = bytesio
130
130
131 def maplist(*args):
131 def maplist(*args):
132 return list(map(*args))
132 return list(map(*args))
133
133
134 def rangelist(*args):
134 def rangelist(*args):
135 return list(range(*args))
135 return list(range(*args))
136
136
137 def ziplist(*args):
137 def ziplist(*args):
138 return list(zip(*args))
138 return list(zip(*args))
139
139
140 rawinput = input
140 rawinput = input
141 getargspec = inspect.getfullargspec
141 getargspec = inspect.getfullargspec
142
142
143 long = int
143 long = int
144
144
145 if getattr(sys, 'argv', None) is not None:
145 if getattr(sys, 'argv', None) is not None:
146 # On POSIX, the char** argv array is converted to Python str using
146 # On POSIX, the char** argv array is converted to Python str using
147 # Py_DecodeLocale(). The inverse of this is Py_EncodeLocale(), which
147 # Py_DecodeLocale(). The inverse of this is Py_EncodeLocale(), which
148 # isn't directly callable from Python code. In practice, os.fsencode()
148 # isn't directly callable from Python code. In practice, os.fsencode()
149 # can be used instead (this is recommended by Python's documentation
149 # can be used instead (this is recommended by Python's documentation
150 # for sys.argv).
150 # for sys.argv).
151 #
151 #
152 # On Windows, the wchar_t **argv is passed into the interpreter as-is.
152 # On Windows, the wchar_t **argv is passed into the interpreter as-is.
153 # Like POSIX, we need to emulate what Py_EncodeLocale() would do. But
153 # Like POSIX, we need to emulate what Py_EncodeLocale() would do. But
154 # there's an additional wrinkle. What we really want to access is the
154 # there's an additional wrinkle. What we really want to access is the
155 # ANSI codepage representation of the arguments, as this is what
155 # ANSI codepage representation of the arguments, as this is what
156 # `int main()` would receive if Python 3 didn't define `int wmain()`
156 # `int main()` would receive if Python 3 didn't define `int wmain()`
157 # (this is how Python 2 worked). To get that, we encode with the mbcs
157 # (this is how Python 2 worked). To get that, we encode with the mbcs
158 # encoding, which will pass CP_ACP to the underlying Windows API to
158 # encoding, which will pass CP_ACP to the underlying Windows API to
159 # produce bytes.
159 # produce bytes.
160 if os.name == r'nt':
160 if os.name == r'nt':
161 sysargv = [a.encode("mbcs", "ignore") for a in sys.argv]
161 sysargv = [a.encode("mbcs", "ignore") for a in sys.argv]
162 else:
162 else:
163 sysargv = [fsencode(a) for a in sys.argv]
163 sysargv = [fsencode(a) for a in sys.argv]
164
164
165 bytechr = struct.Struct('>B').pack
165 bytechr = struct.Struct('>B').pack
166 byterepr = b'%r'.__mod__
166 byterepr = b'%r'.__mod__
167
167
168 class bytestr(bytes):
168 class bytestr(bytes):
169 """A bytes which mostly acts as a Python 2 str
169 """A bytes which mostly acts as a Python 2 str
170
170
171 >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
171 >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
172 ('', 'foo', 'ascii', '1')
172 ('', 'foo', 'ascii', '1')
173 >>> s = bytestr(b'foo')
173 >>> s = bytestr(b'foo')
174 >>> assert s is bytestr(s)
174 >>> assert s is bytestr(s)
175
175
176 __bytes__() should be called if provided:
176 __bytes__() should be called if provided:
177
177
178 >>> class bytesable(object):
178 >>> class bytesable(object):
179 ... def __bytes__(self):
179 ... def __bytes__(self):
180 ... return b'bytes'
180 ... return b'bytes'
181 >>> bytestr(bytesable())
181 >>> bytestr(bytesable())
182 'bytes'
182 'bytes'
183
183
184 There's no implicit conversion from non-ascii str as its encoding is
184 There's no implicit conversion from non-ascii str as its encoding is
185 unknown:
185 unknown:
186
186
187 >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
187 >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
188 Traceback (most recent call last):
188 Traceback (most recent call last):
189 ...
189 ...
190 UnicodeEncodeError: ...
190 UnicodeEncodeError: ...
191
191
192 Comparison between bytestr and bytes should work:
192 Comparison between bytestr and bytes should work:
193
193
194 >>> assert bytestr(b'foo') == b'foo'
194 >>> assert bytestr(b'foo') == b'foo'
195 >>> assert b'foo' == bytestr(b'foo')
195 >>> assert b'foo' == bytestr(b'foo')
196 >>> assert b'f' in bytestr(b'foo')
196 >>> assert b'f' in bytestr(b'foo')
197 >>> assert bytestr(b'f') in b'foo'
197 >>> assert bytestr(b'f') in b'foo'
198
198
199 Sliced elements should be bytes, not integer:
199 Sliced elements should be bytes, not integer:
200
200
201 >>> s[1], s[:2]
201 >>> s[1], s[:2]
202 (b'o', b'fo')
202 (b'o', b'fo')
203 >>> list(s), list(reversed(s))
203 >>> list(s), list(reversed(s))
204 ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
204 ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
205
205
206 As bytestr type isn't propagated across operations, you need to cast
206 As bytestr type isn't propagated across operations, you need to cast
207 bytes to bytestr explicitly:
207 bytes to bytestr explicitly:
208
208
209 >>> s = bytestr(b'foo').upper()
209 >>> s = bytestr(b'foo').upper()
210 >>> t = bytestr(s)
210 >>> t = bytestr(s)
211 >>> s[0], t[0]
211 >>> s[0], t[0]
212 (70, b'F')
212 (70, b'F')
213
213
214 Be careful to not pass a bytestr object to a function which expects
214 Be careful to not pass a bytestr object to a function which expects
215 bytearray-like behavior.
215 bytearray-like behavior.
216
216
217 >>> t = bytes(t) # cast to bytes
217 >>> t = bytes(t) # cast to bytes
218 >>> assert type(t) is bytes
218 >>> assert type(t) is bytes
219 """
219 """
220
220
221 def __new__(cls, s=b''):
221 def __new__(cls, s=b''):
222 if isinstance(s, bytestr):
222 if isinstance(s, bytestr):
223 return s
223 return s
224 if not isinstance(
224 if not isinstance(
225 s, (bytes, bytearray)
225 s, (bytes, bytearray)
226 ) and not hasattr( # hasattr-py3-only
226 ) and not hasattr( # hasattr-py3-only
227 s, u'__bytes__'
227 s, u'__bytes__'
228 ):
228 ):
229 s = str(s).encode('ascii')
229 s = str(s).encode('ascii')
230 return bytes.__new__(cls, s)
230 return bytes.__new__(cls, s)
231
231
232 def __getitem__(self, key):
232 def __getitem__(self, key):
233 s = bytes.__getitem__(self, key)
233 s = bytes.__getitem__(self, key)
234 if not isinstance(s, bytes):
234 if not isinstance(s, bytes):
235 s = bytechr(s)
235 s = bytechr(s)
236 return s
236 return s
237
237
238 def __iter__(self):
238 def __iter__(self):
239 return iterbytestr(bytes.__iter__(self))
239 return iterbytestr(bytes.__iter__(self))
240
240
241 def __repr__(self):
241 def __repr__(self):
242 return bytes.__repr__(self)[1:] # drop b''
242 return bytes.__repr__(self)[1:] # drop b''
243
243
244 def iterbytestr(s):
244 def iterbytestr(s):
245 """Iterate bytes as if it were a str object of Python 2"""
245 """Iterate bytes as if it were a str object of Python 2"""
246 return map(bytechr, s)
246 return map(bytechr, s)
247
247
248 def maybebytestr(s):
248 def maybebytestr(s):
249 """Promote bytes to bytestr"""
249 """Promote bytes to bytestr"""
250 if isinstance(s, bytes):
250 if isinstance(s, bytes):
251 return bytestr(s)
251 return bytestr(s)
252 return s
252 return s
253
253
254 def sysbytes(s):
254 def sysbytes(s):
255 """Convert an internal str (e.g. keyword, __doc__) back to bytes
255 """Convert an internal str (e.g. keyword, __doc__) back to bytes
256
256
257 This never raises UnicodeEncodeError, but only ASCII characters
257 This never raises UnicodeEncodeError, but only ASCII characters
258 can be round-trip by sysstr(sysbytes(s)).
258 can be round-trip by sysstr(sysbytes(s)).
259 """
259 """
260 if isinstance(s, bytes):
260 if isinstance(s, bytes):
261 return s
261 return s
262 return s.encode('utf-8')
262 return s.encode('utf-8')
263
263
264 def sysstr(s):
264 def sysstr(s):
265 """Return a keyword str to be passed to Python functions such as
265 """Return a keyword str to be passed to Python functions such as
266 getattr() and str.encode()
266 getattr() and str.encode()
267
267
268 This never raises UnicodeDecodeError. Non-ascii characters are
268 This never raises UnicodeDecodeError. Non-ascii characters are
269 considered invalid and mapped to arbitrary but unique code points
269 considered invalid and mapped to arbitrary but unique code points
270 such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
270 such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
271 """
271 """
272 if isinstance(s, builtins.str):
272 if isinstance(s, builtins.str):
273 return s
273 return s
274 return s.decode('latin-1')
274 return s.decode('latin-1')
275
275
276 def strurl(url):
276 def strurl(url):
277 """Converts a bytes url back to str"""
277 """Converts a bytes url back to str"""
278 if isinstance(url, bytes):
278 if isinstance(url, bytes):
279 return url.decode('ascii')
279 return url.decode('ascii')
280 return url
280 return url
281
281
282 def bytesurl(url):
282 def bytesurl(url):
283 """Converts a str url to bytes by encoding in ascii"""
283 """Converts a str url to bytes by encoding in ascii"""
284 if isinstance(url, str):
284 if isinstance(url, str):
285 return url.encode('ascii')
285 return url.encode('ascii')
286 return url
286 return url
287
287
288 def raisewithtb(exc, tb):
288 def raisewithtb(exc, tb):
289 """Raise exception with the given traceback"""
289 """Raise exception with the given traceback"""
290 raise exc.with_traceback(tb)
290 raise exc.with_traceback(tb)
291
291
292 def getdoc(obj):
292 def getdoc(obj):
293 """Get docstring as bytes; may be None so gettext() won't confuse it
293 """Get docstring as bytes; may be None so gettext() won't confuse it
294 with _('')"""
294 with _('')"""
295 doc = getattr(obj, '__doc__', None)
295 doc = getattr(obj, '__doc__', None)
296 if doc is None:
296 if doc is None:
297 return doc
297 return doc
298 return sysbytes(doc)
298 return sysbytes(doc)
299
299
300 def _wrapattrfunc(f):
300 def _wrapattrfunc(f):
301 @functools.wraps(f)
301 @functools.wraps(f)
302 def w(object, name, *args):
302 def w(object, name, *args):
303 return f(object, sysstr(name), *args)
303 return f(object, sysstr(name), *args)
304
304
305 return w
305 return w
306
306
307 # these wrappers are automagically imported by hgloader
307 # these wrappers are automagically imported by hgloader
308 delattr = _wrapattrfunc(builtins.delattr)
308 delattr = _wrapattrfunc(builtins.delattr)
309 getattr = _wrapattrfunc(builtins.getattr)
309 getattr = _wrapattrfunc(builtins.getattr)
310 hasattr = _wrapattrfunc(builtins.hasattr)
310 hasattr = _wrapattrfunc(builtins.hasattr)
311 setattr = _wrapattrfunc(builtins.setattr)
311 setattr = _wrapattrfunc(builtins.setattr)
312 xrange = builtins.range
312 xrange = builtins.range
313 unicode = str
313 unicode = str
314
314
315 def open(name, mode=b'r', buffering=-1, encoding=None):
315 def open(name, mode=b'r', buffering=-1, encoding=None):
316 return builtins.open(name, sysstr(mode), buffering, encoding)
316 return builtins.open(name, sysstr(mode), buffering, encoding)
317
317
318 safehasattr = _wrapattrfunc(builtins.hasattr)
318 safehasattr = _wrapattrfunc(builtins.hasattr)
319
319
320 def _getoptbwrapper(orig, args, shortlist, namelist):
320 def _getoptbwrapper(orig, args, shortlist, namelist):
321 """
321 """
322 Takes bytes arguments, converts them to unicode, pass them to
322 Takes bytes arguments, converts them to unicode, pass them to
323 getopt.getopt(), convert the returned values back to bytes and then
323 getopt.getopt(), convert the returned values back to bytes and then
324 return them for Python 3 compatibility as getopt.getopt() don't accepts
324 return them for Python 3 compatibility as getopt.getopt() don't accepts
325 bytes on Python 3.
325 bytes on Python 3.
326 """
326 """
327 args = [a.decode('latin-1') for a in args]
327 args = [a.decode('latin-1') for a in args]
328 shortlist = shortlist.decode('latin-1')
328 shortlist = shortlist.decode('latin-1')
329 namelist = [a.decode('latin-1') for a in namelist]
329 namelist = [a.decode('latin-1') for a in namelist]
330 opts, args = orig(args, shortlist, namelist)
330 opts, args = orig(args, shortlist, namelist)
331 opts = [(a[0].encode('latin-1'), a[1].encode('latin-1')) for a in opts]
331 opts = [(a[0].encode('latin-1'), a[1].encode('latin-1')) for a in opts]
332 args = [a.encode('latin-1') for a in args]
332 args = [a.encode('latin-1') for a in args]
333 return opts, args
333 return opts, args
334
334
335 def strkwargs(dic):
335 def strkwargs(dic):
336 """
336 """
337 Converts the keys of a python dictonary to str i.e. unicodes so that
337 Converts the keys of a python dictonary to str i.e. unicodes so that
338 they can be passed as keyword arguments as dictionaries with bytes keys
338 they can be passed as keyword arguments as dictionaries with bytes keys
339 can't be passed as keyword arguments to functions on Python 3.
339 can't be passed as keyword arguments to functions on Python 3.
340 """
340 """
341 dic = {k.decode('latin-1'): v for k, v in dic.items()}
341 dic = {k.decode('latin-1'): v for k, v in dic.items()}
342 return dic
342 return dic
343
343
344 def byteskwargs(dic):
344 def byteskwargs(dic):
345 """
345 """
346 Converts keys of python dictionaries to bytes as they were converted to
346 Converts keys of python dictionaries to bytes as they were converted to
347 str to pass that dictonary as a keyword argument on Python 3.
347 str to pass that dictonary as a keyword argument on Python 3.
348 """
348 """
349 dic = {k.encode('latin-1'): v for k, v in dic.items()}
349 dic = {k.encode('latin-1'): v for k, v in dic.items()}
350 return dic
350 return dic
351
351
352 # TODO: handle shlex.shlex().
352 # TODO: handle shlex.shlex().
353 def shlexsplit(s, comments=False, posix=True):
353 def shlexsplit(s, comments=False, posix=True):
354 """
354 """
355 Takes bytes argument, convert it to str i.e. unicodes, pass that into
355 Takes bytes argument, convert it to str i.e. unicodes, pass that into
356 shlex.split(), convert the returned value to bytes and return that for
356 shlex.split(), convert the returned value to bytes and return that for
357 Python 3 compatibility as shelx.split() don't accept bytes on Python 3.
357 Python 3 compatibility as shelx.split() don't accept bytes on Python 3.
358 """
358 """
359 ret = shlex.split(s.decode('latin-1'), comments, posix)
359 ret = shlex.split(s.decode('latin-1'), comments, posix)
360 return [a.encode('latin-1') for a in ret]
360 return [a.encode('latin-1') for a in ret]
361
361
362 iteritems = lambda x: x.items()
362 iteritems = lambda x: x.items()
363 itervalues = lambda x: x.values()
363 itervalues = lambda x: x.values()
364
364
365 # Python 3.5's json.load and json.loads require str. We polyfill its
365 # Python 3.5's json.load and json.loads require str. We polyfill its
366 # code for detecting encoding from bytes.
366 # code for detecting encoding from bytes.
367 if sys.version_info[0:2] < (3, 6):
367 if sys.version_info[0:2] < (3, 6):
368
368
369 def _detect_encoding(b):
369 def _detect_encoding(b):
370 bstartswith = b.startswith
370 bstartswith = b.startswith
371 if bstartswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)):
371 if bstartswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)):
372 return 'utf-32'
372 return 'utf-32'
373 if bstartswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)):
373 if bstartswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)):
374 return 'utf-16'
374 return 'utf-16'
375 if bstartswith(codecs.BOM_UTF8):
375 if bstartswith(codecs.BOM_UTF8):
376 return 'utf-8-sig'
376 return 'utf-8-sig'
377
377
378 if len(b) >= 4:
378 if len(b) >= 4:
379 if not b[0]:
379 if not b[0]:
380 # 00 00 -- -- - utf-32-be
380 # 00 00 -- -- - utf-32-be
381 # 00 XX -- -- - utf-16-be
381 # 00 XX -- -- - utf-16-be
382 return 'utf-16-be' if b[1] else 'utf-32-be'
382 return 'utf-16-be' if b[1] else 'utf-32-be'
383 if not b[1]:
383 if not b[1]:
384 # XX 00 00 00 - utf-32-le
384 # XX 00 00 00 - utf-32-le
385 # XX 00 00 XX - utf-16-le
385 # XX 00 00 XX - utf-16-le
386 # XX 00 XX -- - utf-16-le
386 # XX 00 XX -- - utf-16-le
387 return 'utf-16-le' if b[2] or b[3] else 'utf-32-le'
387 return 'utf-16-le' if b[2] or b[3] else 'utf-32-le'
388 elif len(b) == 2:
388 elif len(b) == 2:
389 if not b[0]:
389 if not b[0]:
390 # 00 XX - utf-16-be
390 # 00 XX - utf-16-be
391 return 'utf-16-be'
391 return 'utf-16-be'
392 if not b[1]:
392 if not b[1]:
393 # XX 00 - utf-16-le
393 # XX 00 - utf-16-le
394 return 'utf-16-le'
394 return 'utf-16-le'
395 # default
395 # default
396 return 'utf-8'
396 return 'utf-8'
397
397
398 def json_loads(s, *args, **kwargs):
398 def json_loads(s, *args, **kwargs):
399 if isinstance(s, (bytes, bytearray)):
399 if isinstance(s, (bytes, bytearray)):
400 s = s.decode(_detect_encoding(s), 'surrogatepass')
400 s = s.decode(_detect_encoding(s), 'surrogatepass')
401
401
402 return json.loads(s, *args, **kwargs)
402 return json.loads(s, *args, **kwargs)
403
403
404 else:
404 else:
405 json_loads = json.loads
405 json_loads = json.loads
406
406
407 else:
407 else:
408 import cStringIO
408 import cStringIO
409
409
410 xrange = xrange
410 xrange = xrange
411 unicode = unicode
411 unicode = unicode
412 bytechr = chr
412 bytechr = chr
413 byterepr = repr
413 byterepr = repr
414 bytestr = str
414 bytestr = str
415 iterbytestr = iter
415 iterbytestr = iter
416 maybebytestr = identity
416 maybebytestr = identity
417 sysbytes = identity
417 sysbytes = identity
418 sysstr = identity
418 sysstr = identity
419 strurl = identity
419 strurl = identity
420 bytesurl = identity
420 bytesurl = identity
421 open = open
421 open = open
422 delattr = delattr
422 delattr = delattr
423 getattr = getattr
423 getattr = getattr
424 hasattr = hasattr
424 hasattr = hasattr
425 setattr = setattr
425 setattr = setattr
426
426
427 # this can't be parsed on Python 3
427 # this can't be parsed on Python 3
428 exec(b'def raisewithtb(exc, tb):\n raise exc, None, tb\n')
428 exec(b'def raisewithtb(exc, tb):\n raise exc, None, tb\n')
429
429
430 def fsencode(filename):
430 def fsencode(filename):
431 """
431 """
432 Partial backport from os.py in Python 3, which only accepts bytes.
432 Partial backport from os.py in Python 3, which only accepts bytes.
433 In Python 2, our paths should only ever be bytes, a unicode path
433 In Python 2, our paths should only ever be bytes, a unicode path
434 indicates a bug.
434 indicates a bug.
435 """
435 """
436 if isinstance(filename, str):
436 if isinstance(filename, str):
437 return filename
437 return filename
438 else:
438 else:
439 raise TypeError("expect str, not %s" % type(filename).__name__)
439 raise TypeError("expect str, not %s" % type(filename).__name__)
440
440
441 # In Python 2, fsdecode() has a very chance to receive bytes. So it's
441 # In Python 2, fsdecode() has a very chance to receive bytes. So it's
442 # better not to touch Python 2 part as it's already working fine.
442 # better not to touch Python 2 part as it's already working fine.
443 fsdecode = identity
443 fsdecode = identity
444
444
445 def getdoc(obj):
445 def getdoc(obj):
446 return getattr(obj, '__doc__', None)
446 return getattr(obj, '__doc__', None)
447
447
448 _notset = object()
448 _notset = object()
449
449
450 def safehasattr(thing, attr):
450 def safehasattr(thing, attr):
451 return getattr(thing, attr, _notset) is not _notset
451 return getattr(thing, attr, _notset) is not _notset
452
452
453 def _getoptbwrapper(orig, args, shortlist, namelist):
453 def _getoptbwrapper(orig, args, shortlist, namelist):
454 return orig(args, shortlist, namelist)
454 return orig(args, shortlist, namelist)
455
455
456 strkwargs = identity
456 strkwargs = identity
457 byteskwargs = identity
457 byteskwargs = identity
458
458
459 oscurdir = os.curdir
459 oscurdir = os.curdir
460 oslinesep = os.linesep
460 oslinesep = os.linesep
461 osname = os.name
461 osname = os.name
462 ospathsep = os.pathsep
462 ospathsep = os.pathsep
463 ospardir = os.pardir
463 ospardir = os.pardir
464 ossep = os.sep
464 ossep = os.sep
465 osaltsep = os.altsep
465 osaltsep = os.altsep
466 osdevnull = os.devnull
466 osdevnull = os.devnull
467 long = long
467 long = long
468 if getattr(sys, 'argv', None) is not None:
468 if getattr(sys, 'argv', None) is not None:
469 sysargv = sys.argv
469 sysargv = sys.argv
470 sysplatform = sys.platform
470 sysplatform = sys.platform
471 sysexecutable = sys.executable
471 sysexecutable = sys.executable
472 shlexsplit = shlex.split
472 shlexsplit = shlex.split
473 bytesio = cStringIO.StringIO
473 bytesio = cStringIO.StringIO
474 stringio = bytesio
474 stringio = bytesio
475 maplist = map
475 maplist = map
476 rangelist = range
476 rangelist = range
477 ziplist = zip
477 ziplist = zip
478 rawinput = raw_input
478 rawinput = raw_input
479 getargspec = inspect.getargspec
479 getargspec = inspect.getargspec
480 iteritems = lambda x: x.iteritems()
480 iteritems = lambda x: x.iteritems()
481 itervalues = lambda x: x.itervalues()
481 itervalues = lambda x: x.itervalues()
482 json_loads = json.loads
482 json_loads = json.loads
483
483
484 isjython = sysplatform.startswith(b'java')
484 isjython = sysplatform.startswith(b'java')
485
485
486 isdarwin = sysplatform.startswith(b'darwin')
486 isdarwin = sysplatform.startswith(b'darwin')
487 islinux = sysplatform.startswith(b'linux')
487 islinux = sysplatform.startswith(b'linux')
488 isposix = osname == b'posix'
488 isposix = osname == b'posix'
489 iswindows = osname == b'nt'
489 iswindows = osname == b'nt'
490
490
491
491
492 def getoptb(args, shortlist, namelist):
492 def getoptb(args, shortlist, namelist):
493 return _getoptbwrapper(getopt.getopt, args, shortlist, namelist)
493 return _getoptbwrapper(getopt.getopt, args, shortlist, namelist)
494
494
495
495
496 def gnugetoptb(args, shortlist, namelist):
496 def gnugetoptb(args, shortlist, namelist):
497 return _getoptbwrapper(getopt.gnu_getopt, args, shortlist, namelist)
497 return _getoptbwrapper(getopt.gnu_getopt, args, shortlist, namelist)
498
498
499
499
500 def mkdtemp(suffix=b'', prefix=b'tmp', dir=None):
500 def mkdtemp(suffix=b'', prefix=b'tmp', dir=None):
501 return tempfile.mkdtemp(suffix, prefix, dir)
501 return tempfile.mkdtemp(suffix, prefix, dir)
502
502
503
503
504 # text=True is not supported; use util.from/tonativeeol() instead
504 # text=True is not supported; use util.from/tonativeeol() instead
505 def mkstemp(suffix=b'', prefix=b'tmp', dir=None):
505 def mkstemp(suffix=b'', prefix=b'tmp', dir=None):
506 return tempfile.mkstemp(suffix, prefix, dir)
506 return tempfile.mkstemp(suffix, prefix, dir)
507
507
508
508
509 # TemporaryFile does not support an "encoding=" argument on python2.
509 # TemporaryFile does not support an "encoding=" argument on python2.
510 # This wrapper file are always open in byte mode.
510 # This wrapper file are always open in byte mode.
511 def unnamedtempfile(mode=None, *args, **kwargs):
511 def unnamedtempfile(mode=None, *args, **kwargs):
512 if mode is None:
512 if mode is None:
513 mode = b'w+b'
513 mode = 'w+b'
514 else:
514 else:
515 mode = sysstr(mode)
515 mode = sysstr(mode)
516 assert 'b' in mode
516 assert 'b' in mode
517 return tempfile.TemporaryFile(mode, *args, **kwargs)
517 return tempfile.TemporaryFile(mode, *args, **kwargs)
518
518
519
519
520 # NamedTemporaryFile does not support an "encoding=" argument on python2.
520 # NamedTemporaryFile does not support an "encoding=" argument on python2.
521 # This wrapper file are always open in byte mode.
521 # This wrapper file are always open in byte mode.
522 def namedtempfile(
522 def namedtempfile(
523 mode=b'w+b', bufsize=-1, suffix=b'', prefix=b'tmp', dir=None, delete=True
523 mode=b'w+b', bufsize=-1, suffix=b'', prefix=b'tmp', dir=None, delete=True
524 ):
524 ):
525 mode = sysstr(mode)
525 mode = sysstr(mode)
526 assert 'b' in mode
526 assert 'b' in mode
527 return tempfile.NamedTemporaryFile(
527 return tempfile.NamedTemporaryFile(
528 mode, bufsize, suffix=suffix, prefix=prefix, dir=dir, delete=delete
528 mode, bufsize, suffix=suffix, prefix=prefix, dir=dir, delete=delete
529 )
529 )
General Comments 0
You need to be logged in to leave comments. Login now