##// END OF EJS Templates
pycompat: prevent encoding or decoding values if not required...
Pulkit Goyal -
r36662:e2b87e19 default
parent child Browse files
Show More
@@ -1,353 +1,357 b''
1 # pycompat.py - portability shim for python 3
1 # pycompat.py - portability shim for python 3
2 #
2 #
3 # This software may be used and distributed according to the terms of the
3 # This software may be used and distributed according to the terms of the
4 # GNU General Public License version 2 or any later version.
4 # GNU General Public License version 2 or any later version.
5
5
6 """Mercurial portability shim for python 3.
6 """Mercurial portability shim for python 3.
7
7
8 This contains aliases to hide python version-specific details from the core.
8 This contains aliases to hide python version-specific details from the core.
9 """
9 """
10
10
11 from __future__ import absolute_import
11 from __future__ import absolute_import
12
12
13 import getopt
13 import getopt
14 import inspect
14 import inspect
15 import os
15 import os
16 import shlex
16 import shlex
17 import sys
17 import sys
18
18
19 ispy3 = (sys.version_info[0] >= 3)
19 ispy3 = (sys.version_info[0] >= 3)
20 ispypy = (r'__pypy__' in sys.builtin_module_names)
20 ispypy = (r'__pypy__' in sys.builtin_module_names)
21
21
22 if not ispy3:
22 if not ispy3:
23 import cookielib
23 import cookielib
24 import cPickle as pickle
24 import cPickle as pickle
25 import httplib
25 import httplib
26 import Queue as _queue
26 import Queue as _queue
27 import SocketServer as socketserver
27 import SocketServer as socketserver
28 import xmlrpclib
28 import xmlrpclib
29 else:
29 else:
30 import http.cookiejar as cookielib
30 import http.cookiejar as cookielib
31 import http.client as httplib
31 import http.client as httplib
32 import pickle
32 import pickle
33 import queue as _queue
33 import queue as _queue
34 import socketserver
34 import socketserver
35 import xmlrpc.client as xmlrpclib
35 import xmlrpc.client as xmlrpclib
36
36
37 empty = _queue.Empty
37 empty = _queue.Empty
38 queue = _queue.Queue
38 queue = _queue.Queue
39
39
40 def identity(a):
40 def identity(a):
41 return a
41 return a
42
42
43 if ispy3:
43 if ispy3:
44 import builtins
44 import builtins
45 import functools
45 import functools
46 import io
46 import io
47 import struct
47 import struct
48
48
49 fsencode = os.fsencode
49 fsencode = os.fsencode
50 fsdecode = os.fsdecode
50 fsdecode = os.fsdecode
51 oslinesep = os.linesep.encode('ascii')
51 oslinesep = os.linesep.encode('ascii')
52 osname = os.name.encode('ascii')
52 osname = os.name.encode('ascii')
53 ospathsep = os.pathsep.encode('ascii')
53 ospathsep = os.pathsep.encode('ascii')
54 ossep = os.sep.encode('ascii')
54 ossep = os.sep.encode('ascii')
55 osaltsep = os.altsep
55 osaltsep = os.altsep
56 if osaltsep:
56 if osaltsep:
57 osaltsep = osaltsep.encode('ascii')
57 osaltsep = osaltsep.encode('ascii')
58 # os.getcwd() on Python 3 returns string, but it has os.getcwdb() which
58 # os.getcwd() on Python 3 returns string, but it has os.getcwdb() which
59 # returns bytes.
59 # returns bytes.
60 getcwd = os.getcwdb
60 getcwd = os.getcwdb
61 sysplatform = sys.platform.encode('ascii')
61 sysplatform = sys.platform.encode('ascii')
62 sysexecutable = sys.executable
62 sysexecutable = sys.executable
63 if sysexecutable:
63 if sysexecutable:
64 sysexecutable = os.fsencode(sysexecutable)
64 sysexecutable = os.fsencode(sysexecutable)
65 stringio = io.BytesIO
65 stringio = io.BytesIO
66 maplist = lambda *args: list(map(*args))
66 maplist = lambda *args: list(map(*args))
67 ziplist = lambda *args: list(zip(*args))
67 ziplist = lambda *args: list(zip(*args))
68 rawinput = input
68 rawinput = input
69 getargspec = inspect.getfullargspec
69 getargspec = inspect.getfullargspec
70
70
71 # TODO: .buffer might not exist if std streams were replaced; we'll need
71 # TODO: .buffer might not exist if std streams were replaced; we'll need
72 # a silly wrapper to make a bytes stream backed by a unicode one.
72 # a silly wrapper to make a bytes stream backed by a unicode one.
73 stdin = sys.stdin.buffer
73 stdin = sys.stdin.buffer
74 stdout = sys.stdout.buffer
74 stdout = sys.stdout.buffer
75 stderr = sys.stderr.buffer
75 stderr = sys.stderr.buffer
76
76
77 # Since Python 3 converts argv to wchar_t type by Py_DecodeLocale() on Unix,
77 # Since Python 3 converts argv to wchar_t type by Py_DecodeLocale() on Unix,
78 # we can use os.fsencode() to get back bytes argv.
78 # we can use os.fsencode() to get back bytes argv.
79 #
79 #
80 # https://hg.python.org/cpython/file/v3.5.1/Programs/python.c#l55
80 # https://hg.python.org/cpython/file/v3.5.1/Programs/python.c#l55
81 #
81 #
82 # TODO: On Windows, the native argv is wchar_t, so we'll need a different
82 # TODO: On Windows, the native argv is wchar_t, so we'll need a different
83 # workaround to simulate the Python 2 (i.e. ANSI Win32 API) behavior.
83 # workaround to simulate the Python 2 (i.e. ANSI Win32 API) behavior.
84 if getattr(sys, 'argv', None) is not None:
84 if getattr(sys, 'argv', None) is not None:
85 sysargv = list(map(os.fsencode, sys.argv))
85 sysargv = list(map(os.fsencode, sys.argv))
86
86
87 bytechr = struct.Struct('>B').pack
87 bytechr = struct.Struct('>B').pack
88 byterepr = b'%r'.__mod__
88 byterepr = b'%r'.__mod__
89
89
90 class bytestr(bytes):
90 class bytestr(bytes):
91 """A bytes which mostly acts as a Python 2 str
91 """A bytes which mostly acts as a Python 2 str
92
92
93 >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
93 >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
94 ('', 'foo', 'ascii', '1')
94 ('', 'foo', 'ascii', '1')
95 >>> s = bytestr(b'foo')
95 >>> s = bytestr(b'foo')
96 >>> assert s is bytestr(s)
96 >>> assert s is bytestr(s)
97
97
98 __bytes__() should be called if provided:
98 __bytes__() should be called if provided:
99
99
100 >>> class bytesable(object):
100 >>> class bytesable(object):
101 ... def __bytes__(self):
101 ... def __bytes__(self):
102 ... return b'bytes'
102 ... return b'bytes'
103 >>> bytestr(bytesable())
103 >>> bytestr(bytesable())
104 'bytes'
104 'bytes'
105
105
106 There's no implicit conversion from non-ascii str as its encoding is
106 There's no implicit conversion from non-ascii str as its encoding is
107 unknown:
107 unknown:
108
108
109 >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
109 >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
110 Traceback (most recent call last):
110 Traceback (most recent call last):
111 ...
111 ...
112 UnicodeEncodeError: ...
112 UnicodeEncodeError: ...
113
113
114 Comparison between bytestr and bytes should work:
114 Comparison between bytestr and bytes should work:
115
115
116 >>> assert bytestr(b'foo') == b'foo'
116 >>> assert bytestr(b'foo') == b'foo'
117 >>> assert b'foo' == bytestr(b'foo')
117 >>> assert b'foo' == bytestr(b'foo')
118 >>> assert b'f' in bytestr(b'foo')
118 >>> assert b'f' in bytestr(b'foo')
119 >>> assert bytestr(b'f') in b'foo'
119 >>> assert bytestr(b'f') in b'foo'
120
120
121 Sliced elements should be bytes, not integer:
121 Sliced elements should be bytes, not integer:
122
122
123 >>> s[1], s[:2]
123 >>> s[1], s[:2]
124 (b'o', b'fo')
124 (b'o', b'fo')
125 >>> list(s), list(reversed(s))
125 >>> list(s), list(reversed(s))
126 ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
126 ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
127
127
128 As bytestr type isn't propagated across operations, you need to cast
128 As bytestr type isn't propagated across operations, you need to cast
129 bytes to bytestr explicitly:
129 bytes to bytestr explicitly:
130
130
131 >>> s = bytestr(b'foo').upper()
131 >>> s = bytestr(b'foo').upper()
132 >>> t = bytestr(s)
132 >>> t = bytestr(s)
133 >>> s[0], t[0]
133 >>> s[0], t[0]
134 (70, b'F')
134 (70, b'F')
135
135
136 Be careful to not pass a bytestr object to a function which expects
136 Be careful to not pass a bytestr object to a function which expects
137 bytearray-like behavior.
137 bytearray-like behavior.
138
138
139 >>> t = bytes(t) # cast to bytes
139 >>> t = bytes(t) # cast to bytes
140 >>> assert type(t) is bytes
140 >>> assert type(t) is bytes
141 """
141 """
142
142
143 def __new__(cls, s=b''):
143 def __new__(cls, s=b''):
144 if isinstance(s, bytestr):
144 if isinstance(s, bytestr):
145 return s
145 return s
146 if (not isinstance(s, (bytes, bytearray))
146 if (not isinstance(s, (bytes, bytearray))
147 and not hasattr(s, u'__bytes__')): # hasattr-py3-only
147 and not hasattr(s, u'__bytes__')): # hasattr-py3-only
148 s = str(s).encode(u'ascii')
148 s = str(s).encode(u'ascii')
149 return bytes.__new__(cls, s)
149 return bytes.__new__(cls, s)
150
150
151 def __getitem__(self, key):
151 def __getitem__(self, key):
152 s = bytes.__getitem__(self, key)
152 s = bytes.__getitem__(self, key)
153 if not isinstance(s, bytes):
153 if not isinstance(s, bytes):
154 s = bytechr(s)
154 s = bytechr(s)
155 return s
155 return s
156
156
157 def __iter__(self):
157 def __iter__(self):
158 return iterbytestr(bytes.__iter__(self))
158 return iterbytestr(bytes.__iter__(self))
159
159
160 def __repr__(self):
160 def __repr__(self):
161 return bytes.__repr__(self)[1:] # drop b''
161 return bytes.__repr__(self)[1:] # drop b''
162
162
163 def iterbytestr(s):
163 def iterbytestr(s):
164 """Iterate bytes as if it were a str object of Python 2"""
164 """Iterate bytes as if it were a str object of Python 2"""
165 return map(bytechr, s)
165 return map(bytechr, s)
166
166
167 def maybebytestr(s):
167 def maybebytestr(s):
168 """Promote bytes to bytestr"""
168 """Promote bytes to bytestr"""
169 if isinstance(s, bytes):
169 if isinstance(s, bytes):
170 return bytestr(s)
170 return bytestr(s)
171 return s
171 return s
172
172
173 def sysbytes(s):
173 def sysbytes(s):
174 """Convert an internal str (e.g. keyword, __doc__) back to bytes
174 """Convert an internal str (e.g. keyword, __doc__) back to bytes
175
175
176 This never raises UnicodeEncodeError, but only ASCII characters
176 This never raises UnicodeEncodeError, but only ASCII characters
177 can be round-trip by sysstr(sysbytes(s)).
177 can be round-trip by sysstr(sysbytes(s)).
178 """
178 """
179 return s.encode(u'utf-8')
179 return s.encode(u'utf-8')
180
180
181 def sysstr(s):
181 def sysstr(s):
182 """Return a keyword str to be passed to Python functions such as
182 """Return a keyword str to be passed to Python functions such as
183 getattr() and str.encode()
183 getattr() and str.encode()
184
184
185 This never raises UnicodeDecodeError. Non-ascii characters are
185 This never raises UnicodeDecodeError. Non-ascii characters are
186 considered invalid and mapped to arbitrary but unique code points
186 considered invalid and mapped to arbitrary but unique code points
187 such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
187 such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
188 """
188 """
189 if isinstance(s, builtins.str):
189 if isinstance(s, builtins.str):
190 return s
190 return s
191 return s.decode(u'latin-1')
191 return s.decode(u'latin-1')
192
192
193 def strurl(url):
193 def strurl(url):
194 """Converts a bytes url back to str"""
194 """Converts a bytes url back to str"""
195 return url.decode(u'ascii')
195 if isinstance(url, bytes):
196 return url.decode(u'ascii')
197 return url
196
198
197 def bytesurl(url):
199 def bytesurl(url):
198 """Converts a str url to bytes by encoding in ascii"""
200 """Converts a str url to bytes by encoding in ascii"""
199 return url.encode(u'ascii')
201 if isinstance(url, str):
202 return url.encode(u'ascii')
203 return url
200
204
201 def raisewithtb(exc, tb):
205 def raisewithtb(exc, tb):
202 """Raise exception with the given traceback"""
206 """Raise exception with the given traceback"""
203 raise exc.with_traceback(tb)
207 raise exc.with_traceback(tb)
204
208
205 def getdoc(obj):
209 def getdoc(obj):
206 """Get docstring as bytes; may be None so gettext() won't confuse it
210 """Get docstring as bytes; may be None so gettext() won't confuse it
207 with _('')"""
211 with _('')"""
208 doc = getattr(obj, u'__doc__', None)
212 doc = getattr(obj, u'__doc__', None)
209 if doc is None:
213 if doc is None:
210 return doc
214 return doc
211 return sysbytes(doc)
215 return sysbytes(doc)
212
216
213 def _wrapattrfunc(f):
217 def _wrapattrfunc(f):
214 @functools.wraps(f)
218 @functools.wraps(f)
215 def w(object, name, *args):
219 def w(object, name, *args):
216 return f(object, sysstr(name), *args)
220 return f(object, sysstr(name), *args)
217 return w
221 return w
218
222
219 # these wrappers are automagically imported by hgloader
223 # these wrappers are automagically imported by hgloader
220 delattr = _wrapattrfunc(builtins.delattr)
224 delattr = _wrapattrfunc(builtins.delattr)
221 getattr = _wrapattrfunc(builtins.getattr)
225 getattr = _wrapattrfunc(builtins.getattr)
222 hasattr = _wrapattrfunc(builtins.hasattr)
226 hasattr = _wrapattrfunc(builtins.hasattr)
223 setattr = _wrapattrfunc(builtins.setattr)
227 setattr = _wrapattrfunc(builtins.setattr)
224 xrange = builtins.range
228 xrange = builtins.range
225 unicode = str
229 unicode = str
226
230
227 def open(name, mode='r', buffering=-1, encoding=None):
231 def open(name, mode='r', buffering=-1, encoding=None):
228 return builtins.open(name, sysstr(mode), buffering, encoding)
232 return builtins.open(name, sysstr(mode), buffering, encoding)
229
233
230 def _getoptbwrapper(orig, args, shortlist, namelist):
234 def _getoptbwrapper(orig, args, shortlist, namelist):
231 """
235 """
232 Takes bytes arguments, converts them to unicode, pass them to
236 Takes bytes arguments, converts them to unicode, pass them to
233 getopt.getopt(), convert the returned values back to bytes and then
237 getopt.getopt(), convert the returned values back to bytes and then
234 return them for Python 3 compatibility as getopt.getopt() don't accepts
238 return them for Python 3 compatibility as getopt.getopt() don't accepts
235 bytes on Python 3.
239 bytes on Python 3.
236 """
240 """
237 args = [a.decode('latin-1') for a in args]
241 args = [a.decode('latin-1') for a in args]
238 shortlist = shortlist.decode('latin-1')
242 shortlist = shortlist.decode('latin-1')
239 namelist = [a.decode('latin-1') for a in namelist]
243 namelist = [a.decode('latin-1') for a in namelist]
240 opts, args = orig(args, shortlist, namelist)
244 opts, args = orig(args, shortlist, namelist)
241 opts = [(a[0].encode('latin-1'), a[1].encode('latin-1'))
245 opts = [(a[0].encode('latin-1'), a[1].encode('latin-1'))
242 for a in opts]
246 for a in opts]
243 args = [a.encode('latin-1') for a in args]
247 args = [a.encode('latin-1') for a in args]
244 return opts, args
248 return opts, args
245
249
246 def strkwargs(dic):
250 def strkwargs(dic):
247 """
251 """
248 Converts the keys of a python dictonary to str i.e. unicodes so that
252 Converts the keys of a python dictonary to str i.e. unicodes so that
249 they can be passed as keyword arguments as dictonaries with bytes keys
253 they can be passed as keyword arguments as dictonaries with bytes keys
250 can't be passed as keyword arguments to functions on Python 3.
254 can't be passed as keyword arguments to functions on Python 3.
251 """
255 """
252 dic = dict((k.decode('latin-1'), v) for k, v in dic.iteritems())
256 dic = dict((k.decode('latin-1'), v) for k, v in dic.iteritems())
253 return dic
257 return dic
254
258
255 def byteskwargs(dic):
259 def byteskwargs(dic):
256 """
260 """
257 Converts keys of python dictonaries to bytes as they were converted to
261 Converts keys of python dictonaries to bytes as they were converted to
258 str to pass that dictonary as a keyword argument on Python 3.
262 str to pass that dictonary as a keyword argument on Python 3.
259 """
263 """
260 dic = dict((k.encode('latin-1'), v) for k, v in dic.iteritems())
264 dic = dict((k.encode('latin-1'), v) for k, v in dic.iteritems())
261 return dic
265 return dic
262
266
263 # TODO: handle shlex.shlex().
267 # TODO: handle shlex.shlex().
264 def shlexsplit(s, comments=False, posix=True):
268 def shlexsplit(s, comments=False, posix=True):
265 """
269 """
266 Takes bytes argument, convert it to str i.e. unicodes, pass that into
270 Takes bytes argument, convert it to str i.e. unicodes, pass that into
267 shlex.split(), convert the returned value to bytes and return that for
271 shlex.split(), convert the returned value to bytes and return that for
268 Python 3 compatibility as shelx.split() don't accept bytes on Python 3.
272 Python 3 compatibility as shelx.split() don't accept bytes on Python 3.
269 """
273 """
270 ret = shlex.split(s.decode('latin-1'), comments, posix)
274 ret = shlex.split(s.decode('latin-1'), comments, posix)
271 return [a.encode('latin-1') for a in ret]
275 return [a.encode('latin-1') for a in ret]
272
276
273 def emailparser(*args, **kwargs):
277 def emailparser(*args, **kwargs):
274 import email.parser
278 import email.parser
275 return email.parser.BytesParser(*args, **kwargs)
279 return email.parser.BytesParser(*args, **kwargs)
276
280
277 else:
281 else:
278 import cStringIO
282 import cStringIO
279
283
280 bytechr = chr
284 bytechr = chr
281 byterepr = repr
285 byterepr = repr
282 bytestr = str
286 bytestr = str
283 iterbytestr = iter
287 iterbytestr = iter
284 maybebytestr = identity
288 maybebytestr = identity
285 sysbytes = identity
289 sysbytes = identity
286 sysstr = identity
290 sysstr = identity
287 strurl = identity
291 strurl = identity
288 bytesurl = identity
292 bytesurl = identity
289
293
290 # this can't be parsed on Python 3
294 # this can't be parsed on Python 3
291 exec('def raisewithtb(exc, tb):\n'
295 exec('def raisewithtb(exc, tb):\n'
292 ' raise exc, None, tb\n')
296 ' raise exc, None, tb\n')
293
297
294 def fsencode(filename):
298 def fsencode(filename):
295 """
299 """
296 Partial backport from os.py in Python 3, which only accepts bytes.
300 Partial backport from os.py in Python 3, which only accepts bytes.
297 In Python 2, our paths should only ever be bytes, a unicode path
301 In Python 2, our paths should only ever be bytes, a unicode path
298 indicates a bug.
302 indicates a bug.
299 """
303 """
300 if isinstance(filename, str):
304 if isinstance(filename, str):
301 return filename
305 return filename
302 else:
306 else:
303 raise TypeError(
307 raise TypeError(
304 "expect str, not %s" % type(filename).__name__)
308 "expect str, not %s" % type(filename).__name__)
305
309
306 # In Python 2, fsdecode() has a very chance to receive bytes. So it's
310 # In Python 2, fsdecode() has a very chance to receive bytes. So it's
307 # better not to touch Python 2 part as it's already working fine.
311 # better not to touch Python 2 part as it's already working fine.
308 fsdecode = identity
312 fsdecode = identity
309
313
310 def getdoc(obj):
314 def getdoc(obj):
311 return getattr(obj, '__doc__', None)
315 return getattr(obj, '__doc__', None)
312
316
313 def _getoptbwrapper(orig, args, shortlist, namelist):
317 def _getoptbwrapper(orig, args, shortlist, namelist):
314 return orig(args, shortlist, namelist)
318 return orig(args, shortlist, namelist)
315
319
316 strkwargs = identity
320 strkwargs = identity
317 byteskwargs = identity
321 byteskwargs = identity
318
322
319 oslinesep = os.linesep
323 oslinesep = os.linesep
320 osname = os.name
324 osname = os.name
321 ospathsep = os.pathsep
325 ospathsep = os.pathsep
322 ossep = os.sep
326 ossep = os.sep
323 osaltsep = os.altsep
327 osaltsep = os.altsep
324 stdin = sys.stdin
328 stdin = sys.stdin
325 stdout = sys.stdout
329 stdout = sys.stdout
326 stderr = sys.stderr
330 stderr = sys.stderr
327 if getattr(sys, 'argv', None) is not None:
331 if getattr(sys, 'argv', None) is not None:
328 sysargv = sys.argv
332 sysargv = sys.argv
329 sysplatform = sys.platform
333 sysplatform = sys.platform
330 getcwd = os.getcwd
334 getcwd = os.getcwd
331 sysexecutable = sys.executable
335 sysexecutable = sys.executable
332 shlexsplit = shlex.split
336 shlexsplit = shlex.split
333 stringio = cStringIO.StringIO
337 stringio = cStringIO.StringIO
334 maplist = map
338 maplist = map
335 ziplist = zip
339 ziplist = zip
336 rawinput = raw_input
340 rawinput = raw_input
337 getargspec = inspect.getargspec
341 getargspec = inspect.getargspec
338
342
339 def emailparser(*args, **kwargs):
343 def emailparser(*args, **kwargs):
340 import email.parser
344 import email.parser
341 return email.parser.Parser(*args, **kwargs)
345 return email.parser.Parser(*args, **kwargs)
342
346
343 isjython = sysplatform.startswith('java')
347 isjython = sysplatform.startswith('java')
344
348
345 isdarwin = sysplatform == 'darwin'
349 isdarwin = sysplatform == 'darwin'
346 isposix = osname == 'posix'
350 isposix = osname == 'posix'
347 iswindows = osname == 'nt'
351 iswindows = osname == 'nt'
348
352
349 def getoptb(args, shortlist, namelist):
353 def getoptb(args, shortlist, namelist):
350 return _getoptbwrapper(getopt.getopt, args, shortlist, namelist)
354 return _getoptbwrapper(getopt.getopt, args, shortlist, namelist)
351
355
352 def gnugetoptb(args, shortlist, namelist):
356 def gnugetoptb(args, shortlist, namelist):
353 return _getoptbwrapper(getopt.gnu_getopt, args, shortlist, namelist)
357 return _getoptbwrapper(getopt.gnu_getopt, args, shortlist, namelist)
General Comments 0
You need to be logged in to leave comments. Login now