##// END OF EJS Templates
pycompat: add support for encoding argument to our wrapper...
Augie Fackler -
r36574:63fe5ca9 default
parent child Browse files
Show More
@@ -1,353 +1,353 b''
1 # pycompat.py - portability shim for python 3
1 # pycompat.py - portability shim for python 3
2 #
2 #
3 # This software may be used and distributed according to the terms of the
3 # This software may be used and distributed according to the terms of the
4 # GNU General Public License version 2 or any later version.
4 # GNU General Public License version 2 or any later version.
5
5
6 """Mercurial portability shim for python 3.
6 """Mercurial portability shim for python 3.
7
7
8 This contains aliases to hide python version-specific details from the core.
8 This contains aliases to hide python version-specific details from the core.
9 """
9 """
10
10
11 from __future__ import absolute_import
11 from __future__ import absolute_import
12
12
13 import getopt
13 import getopt
14 import inspect
14 import inspect
15 import os
15 import os
16 import shlex
16 import shlex
17 import sys
17 import sys
18
18
19 ispy3 = (sys.version_info[0] >= 3)
19 ispy3 = (sys.version_info[0] >= 3)
20 ispypy = (r'__pypy__' in sys.builtin_module_names)
20 ispypy = (r'__pypy__' in sys.builtin_module_names)
21
21
22 if not ispy3:
22 if not ispy3:
23 import cookielib
23 import cookielib
24 import cPickle as pickle
24 import cPickle as pickle
25 import httplib
25 import httplib
26 import Queue as _queue
26 import Queue as _queue
27 import SocketServer as socketserver
27 import SocketServer as socketserver
28 import xmlrpclib
28 import xmlrpclib
29 else:
29 else:
30 import http.cookiejar as cookielib
30 import http.cookiejar as cookielib
31 import http.client as httplib
31 import http.client as httplib
32 import pickle
32 import pickle
33 import queue as _queue
33 import queue as _queue
34 import socketserver
34 import socketserver
35 import xmlrpc.client as xmlrpclib
35 import xmlrpc.client as xmlrpclib
36
36
37 empty = _queue.Empty
37 empty = _queue.Empty
38 queue = _queue.Queue
38 queue = _queue.Queue
39
39
40 def identity(a):
40 def identity(a):
41 return a
41 return a
42
42
43 if ispy3:
43 if ispy3:
44 import builtins
44 import builtins
45 import functools
45 import functools
46 import io
46 import io
47 import struct
47 import struct
48
48
49 fsencode = os.fsencode
49 fsencode = os.fsencode
50 fsdecode = os.fsdecode
50 fsdecode = os.fsdecode
51 oslinesep = os.linesep.encode('ascii')
51 oslinesep = os.linesep.encode('ascii')
52 osname = os.name.encode('ascii')
52 osname = os.name.encode('ascii')
53 ospathsep = os.pathsep.encode('ascii')
53 ospathsep = os.pathsep.encode('ascii')
54 ossep = os.sep.encode('ascii')
54 ossep = os.sep.encode('ascii')
55 osaltsep = os.altsep
55 osaltsep = os.altsep
56 if osaltsep:
56 if osaltsep:
57 osaltsep = osaltsep.encode('ascii')
57 osaltsep = osaltsep.encode('ascii')
58 # os.getcwd() on Python 3 returns string, but it has os.getcwdb() which
58 # os.getcwd() on Python 3 returns string, but it has os.getcwdb() which
59 # returns bytes.
59 # returns bytes.
60 getcwd = os.getcwdb
60 getcwd = os.getcwdb
61 sysplatform = sys.platform.encode('ascii')
61 sysplatform = sys.platform.encode('ascii')
62 sysexecutable = sys.executable
62 sysexecutable = sys.executable
63 if sysexecutable:
63 if sysexecutable:
64 sysexecutable = os.fsencode(sysexecutable)
64 sysexecutable = os.fsencode(sysexecutable)
65 stringio = io.BytesIO
65 stringio = io.BytesIO
66 maplist = lambda *args: list(map(*args))
66 maplist = lambda *args: list(map(*args))
67 ziplist = lambda *args: list(zip(*args))
67 ziplist = lambda *args: list(zip(*args))
68 rawinput = input
68 rawinput = input
69 getargspec = inspect.getfullargspec
69 getargspec = inspect.getfullargspec
70
70
71 # TODO: .buffer might not exist if std streams were replaced; we'll need
71 # TODO: .buffer might not exist if std streams were replaced; we'll need
72 # a silly wrapper to make a bytes stream backed by a unicode one.
72 # a silly wrapper to make a bytes stream backed by a unicode one.
73 stdin = sys.stdin.buffer
73 stdin = sys.stdin.buffer
74 stdout = sys.stdout.buffer
74 stdout = sys.stdout.buffer
75 stderr = sys.stderr.buffer
75 stderr = sys.stderr.buffer
76
76
77 # Since Python 3 converts argv to wchar_t type by Py_DecodeLocale() on Unix,
77 # Since Python 3 converts argv to wchar_t type by Py_DecodeLocale() on Unix,
78 # we can use os.fsencode() to get back bytes argv.
78 # we can use os.fsencode() to get back bytes argv.
79 #
79 #
80 # https://hg.python.org/cpython/file/v3.5.1/Programs/python.c#l55
80 # https://hg.python.org/cpython/file/v3.5.1/Programs/python.c#l55
81 #
81 #
82 # TODO: On Windows, the native argv is wchar_t, so we'll need a different
82 # TODO: On Windows, the native argv is wchar_t, so we'll need a different
83 # workaround to simulate the Python 2 (i.e. ANSI Win32 API) behavior.
83 # workaround to simulate the Python 2 (i.e. ANSI Win32 API) behavior.
84 if getattr(sys, 'argv', None) is not None:
84 if getattr(sys, 'argv', None) is not None:
85 sysargv = list(map(os.fsencode, sys.argv))
85 sysargv = list(map(os.fsencode, sys.argv))
86
86
87 bytechr = struct.Struct('>B').pack
87 bytechr = struct.Struct('>B').pack
88 byterepr = b'%r'.__mod__
88 byterepr = b'%r'.__mod__
89
89
90 class bytestr(bytes):
90 class bytestr(bytes):
91 """A bytes which mostly acts as a Python 2 str
91 """A bytes which mostly acts as a Python 2 str
92
92
93 >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
93 >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
94 ('', 'foo', 'ascii', '1')
94 ('', 'foo', 'ascii', '1')
95 >>> s = bytestr(b'foo')
95 >>> s = bytestr(b'foo')
96 >>> assert s is bytestr(s)
96 >>> assert s is bytestr(s)
97
97
98 __bytes__() should be called if provided:
98 __bytes__() should be called if provided:
99
99
100 >>> class bytesable(object):
100 >>> class bytesable(object):
101 ... def __bytes__(self):
101 ... def __bytes__(self):
102 ... return b'bytes'
102 ... return b'bytes'
103 >>> bytestr(bytesable())
103 >>> bytestr(bytesable())
104 'bytes'
104 'bytes'
105
105
106 There's no implicit conversion from non-ascii str as its encoding is
106 There's no implicit conversion from non-ascii str as its encoding is
107 unknown:
107 unknown:
108
108
109 >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
109 >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
110 Traceback (most recent call last):
110 Traceback (most recent call last):
111 ...
111 ...
112 UnicodeEncodeError: ...
112 UnicodeEncodeError: ...
113
113
114 Comparison between bytestr and bytes should work:
114 Comparison between bytestr and bytes should work:
115
115
116 >>> assert bytestr(b'foo') == b'foo'
116 >>> assert bytestr(b'foo') == b'foo'
117 >>> assert b'foo' == bytestr(b'foo')
117 >>> assert b'foo' == bytestr(b'foo')
118 >>> assert b'f' in bytestr(b'foo')
118 >>> assert b'f' in bytestr(b'foo')
119 >>> assert bytestr(b'f') in b'foo'
119 >>> assert bytestr(b'f') in b'foo'
120
120
121 Sliced elements should be bytes, not integer:
121 Sliced elements should be bytes, not integer:
122
122
123 >>> s[1], s[:2]
123 >>> s[1], s[:2]
124 (b'o', b'fo')
124 (b'o', b'fo')
125 >>> list(s), list(reversed(s))
125 >>> list(s), list(reversed(s))
126 ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
126 ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
127
127
128 As bytestr type isn't propagated across operations, you need to cast
128 As bytestr type isn't propagated across operations, you need to cast
129 bytes to bytestr explicitly:
129 bytes to bytestr explicitly:
130
130
131 >>> s = bytestr(b'foo').upper()
131 >>> s = bytestr(b'foo').upper()
132 >>> t = bytestr(s)
132 >>> t = bytestr(s)
133 >>> s[0], t[0]
133 >>> s[0], t[0]
134 (70, b'F')
134 (70, b'F')
135
135
136 Be careful to not pass a bytestr object to a function which expects
136 Be careful to not pass a bytestr object to a function which expects
137 bytearray-like behavior.
137 bytearray-like behavior.
138
138
139 >>> t = bytes(t) # cast to bytes
139 >>> t = bytes(t) # cast to bytes
140 >>> assert type(t) is bytes
140 >>> assert type(t) is bytes
141 """
141 """
142
142
143 def __new__(cls, s=b''):
143 def __new__(cls, s=b''):
144 if isinstance(s, bytestr):
144 if isinstance(s, bytestr):
145 return s
145 return s
146 if (not isinstance(s, (bytes, bytearray))
146 if (not isinstance(s, (bytes, bytearray))
147 and not hasattr(s, u'__bytes__')): # hasattr-py3-only
147 and not hasattr(s, u'__bytes__')): # hasattr-py3-only
148 s = str(s).encode(u'ascii')
148 s = str(s).encode(u'ascii')
149 return bytes.__new__(cls, s)
149 return bytes.__new__(cls, s)
150
150
151 def __getitem__(self, key):
151 def __getitem__(self, key):
152 s = bytes.__getitem__(self, key)
152 s = bytes.__getitem__(self, key)
153 if not isinstance(s, bytes):
153 if not isinstance(s, bytes):
154 s = bytechr(s)
154 s = bytechr(s)
155 return s
155 return s
156
156
157 def __iter__(self):
157 def __iter__(self):
158 return iterbytestr(bytes.__iter__(self))
158 return iterbytestr(bytes.__iter__(self))
159
159
160 def __repr__(self):
160 def __repr__(self):
161 return bytes.__repr__(self)[1:] # drop b''
161 return bytes.__repr__(self)[1:] # drop b''
162
162
163 def iterbytestr(s):
163 def iterbytestr(s):
164 """Iterate bytes as if it were a str object of Python 2"""
164 """Iterate bytes as if it were a str object of Python 2"""
165 return map(bytechr, s)
165 return map(bytechr, s)
166
166
167 def maybebytestr(s):
167 def maybebytestr(s):
168 """Promote bytes to bytestr"""
168 """Promote bytes to bytestr"""
169 if isinstance(s, bytes):
169 if isinstance(s, bytes):
170 return bytestr(s)
170 return bytestr(s)
171 return s
171 return s
172
172
173 def sysbytes(s):
173 def sysbytes(s):
174 """Convert an internal str (e.g. keyword, __doc__) back to bytes
174 """Convert an internal str (e.g. keyword, __doc__) back to bytes
175
175
176 This never raises UnicodeEncodeError, but only ASCII characters
176 This never raises UnicodeEncodeError, but only ASCII characters
177 can be round-trip by sysstr(sysbytes(s)).
177 can be round-trip by sysstr(sysbytes(s)).
178 """
178 """
179 return s.encode(u'utf-8')
179 return s.encode(u'utf-8')
180
180
181 def sysstr(s):
181 def sysstr(s):
182 """Return a keyword str to be passed to Python functions such as
182 """Return a keyword str to be passed to Python functions such as
183 getattr() and str.encode()
183 getattr() and str.encode()
184
184
185 This never raises UnicodeDecodeError. Non-ascii characters are
185 This never raises UnicodeDecodeError. Non-ascii characters are
186 considered invalid and mapped to arbitrary but unique code points
186 considered invalid and mapped to arbitrary but unique code points
187 such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
187 such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
188 """
188 """
189 if isinstance(s, builtins.str):
189 if isinstance(s, builtins.str):
190 return s
190 return s
191 return s.decode(u'latin-1')
191 return s.decode(u'latin-1')
192
192
193 def strurl(url):
193 def strurl(url):
194 """Converts a bytes url back to str"""
194 """Converts a bytes url back to str"""
195 return url.decode(u'ascii')
195 return url.decode(u'ascii')
196
196
197 def bytesurl(url):
197 def bytesurl(url):
198 """Converts a str url to bytes by encoding in ascii"""
198 """Converts a str url to bytes by encoding in ascii"""
199 return url.encode(u'ascii')
199 return url.encode(u'ascii')
200
200
201 def raisewithtb(exc, tb):
201 def raisewithtb(exc, tb):
202 """Raise exception with the given traceback"""
202 """Raise exception with the given traceback"""
203 raise exc.with_traceback(tb)
203 raise exc.with_traceback(tb)
204
204
205 def getdoc(obj):
205 def getdoc(obj):
206 """Get docstring as bytes; may be None so gettext() won't confuse it
206 """Get docstring as bytes; may be None so gettext() won't confuse it
207 with _('')"""
207 with _('')"""
208 doc = getattr(obj, u'__doc__', None)
208 doc = getattr(obj, u'__doc__', None)
209 if doc is None:
209 if doc is None:
210 return doc
210 return doc
211 return sysbytes(doc)
211 return sysbytes(doc)
212
212
213 def _wrapattrfunc(f):
213 def _wrapattrfunc(f):
214 @functools.wraps(f)
214 @functools.wraps(f)
215 def w(object, name, *args):
215 def w(object, name, *args):
216 return f(object, sysstr(name), *args)
216 return f(object, sysstr(name), *args)
217 return w
217 return w
218
218
219 # these wrappers are automagically imported by hgloader
219 # these wrappers are automagically imported by hgloader
220 delattr = _wrapattrfunc(builtins.delattr)
220 delattr = _wrapattrfunc(builtins.delattr)
221 getattr = _wrapattrfunc(builtins.getattr)
221 getattr = _wrapattrfunc(builtins.getattr)
222 hasattr = _wrapattrfunc(builtins.hasattr)
222 hasattr = _wrapattrfunc(builtins.hasattr)
223 setattr = _wrapattrfunc(builtins.setattr)
223 setattr = _wrapattrfunc(builtins.setattr)
224 xrange = builtins.range
224 xrange = builtins.range
225 unicode = str
225 unicode = str
226
226
227 def open(name, mode='r', buffering=-1):
227 def open(name, mode='r', buffering=-1, encoding=None):
228 return builtins.open(name, sysstr(mode), buffering)
228 return builtins.open(name, sysstr(mode), buffering, encoding)
229
229
230 def _getoptbwrapper(orig, args, shortlist, namelist):
230 def _getoptbwrapper(orig, args, shortlist, namelist):
231 """
231 """
232 Takes bytes arguments, converts them to unicode, pass them to
232 Takes bytes arguments, converts them to unicode, pass them to
233 getopt.getopt(), convert the returned values back to bytes and then
233 getopt.getopt(), convert the returned values back to bytes and then
234 return them for Python 3 compatibility as getopt.getopt() don't accepts
234 return them for Python 3 compatibility as getopt.getopt() don't accepts
235 bytes on Python 3.
235 bytes on Python 3.
236 """
236 """
237 args = [a.decode('latin-1') for a in args]
237 args = [a.decode('latin-1') for a in args]
238 shortlist = shortlist.decode('latin-1')
238 shortlist = shortlist.decode('latin-1')
239 namelist = [a.decode('latin-1') for a in namelist]
239 namelist = [a.decode('latin-1') for a in namelist]
240 opts, args = orig(args, shortlist, namelist)
240 opts, args = orig(args, shortlist, namelist)
241 opts = [(a[0].encode('latin-1'), a[1].encode('latin-1'))
241 opts = [(a[0].encode('latin-1'), a[1].encode('latin-1'))
242 for a in opts]
242 for a in opts]
243 args = [a.encode('latin-1') for a in args]
243 args = [a.encode('latin-1') for a in args]
244 return opts, args
244 return opts, args
245
245
246 def strkwargs(dic):
246 def strkwargs(dic):
247 """
247 """
248 Converts the keys of a python dictonary to str i.e. unicodes so that
248 Converts the keys of a python dictonary to str i.e. unicodes so that
249 they can be passed as keyword arguments as dictonaries with bytes keys
249 they can be passed as keyword arguments as dictonaries with bytes keys
250 can't be passed as keyword arguments to functions on Python 3.
250 can't be passed as keyword arguments to functions on Python 3.
251 """
251 """
252 dic = dict((k.decode('latin-1'), v) for k, v in dic.iteritems())
252 dic = dict((k.decode('latin-1'), v) for k, v in dic.iteritems())
253 return dic
253 return dic
254
254
255 def byteskwargs(dic):
255 def byteskwargs(dic):
256 """
256 """
257 Converts keys of python dictonaries to bytes as they were converted to
257 Converts keys of python dictonaries to bytes as they were converted to
258 str to pass that dictonary as a keyword argument on Python 3.
258 str to pass that dictonary as a keyword argument on Python 3.
259 """
259 """
260 dic = dict((k.encode('latin-1'), v) for k, v in dic.iteritems())
260 dic = dict((k.encode('latin-1'), v) for k, v in dic.iteritems())
261 return dic
261 return dic
262
262
263 # TODO: handle shlex.shlex().
263 # TODO: handle shlex.shlex().
264 def shlexsplit(s, comments=False, posix=True):
264 def shlexsplit(s, comments=False, posix=True):
265 """
265 """
266 Takes bytes argument, convert it to str i.e. unicodes, pass that into
266 Takes bytes argument, convert it to str i.e. unicodes, pass that into
267 shlex.split(), convert the returned value to bytes and return that for
267 shlex.split(), convert the returned value to bytes and return that for
268 Python 3 compatibility as shelx.split() don't accept bytes on Python 3.
268 Python 3 compatibility as shelx.split() don't accept bytes on Python 3.
269 """
269 """
270 ret = shlex.split(s.decode('latin-1'), comments, posix)
270 ret = shlex.split(s.decode('latin-1'), comments, posix)
271 return [a.encode('latin-1') for a in ret]
271 return [a.encode('latin-1') for a in ret]
272
272
273 def emailparser(*args, **kwargs):
273 def emailparser(*args, **kwargs):
274 import email.parser
274 import email.parser
275 return email.parser.BytesParser(*args, **kwargs)
275 return email.parser.BytesParser(*args, **kwargs)
276
276
277 else:
277 else:
278 import cStringIO
278 import cStringIO
279
279
280 bytechr = chr
280 bytechr = chr
281 byterepr = repr
281 byterepr = repr
282 bytestr = str
282 bytestr = str
283 iterbytestr = iter
283 iterbytestr = iter
284 maybebytestr = identity
284 maybebytestr = identity
285 sysbytes = identity
285 sysbytes = identity
286 sysstr = identity
286 sysstr = identity
287 strurl = identity
287 strurl = identity
288 bytesurl = identity
288 bytesurl = identity
289
289
290 # this can't be parsed on Python 3
290 # this can't be parsed on Python 3
291 exec('def raisewithtb(exc, tb):\n'
291 exec('def raisewithtb(exc, tb):\n'
292 ' raise exc, None, tb\n')
292 ' raise exc, None, tb\n')
293
293
294 def fsencode(filename):
294 def fsencode(filename):
295 """
295 """
296 Partial backport from os.py in Python 3, which only accepts bytes.
296 Partial backport from os.py in Python 3, which only accepts bytes.
297 In Python 2, our paths should only ever be bytes, a unicode path
297 In Python 2, our paths should only ever be bytes, a unicode path
298 indicates a bug.
298 indicates a bug.
299 """
299 """
300 if isinstance(filename, str):
300 if isinstance(filename, str):
301 return filename
301 return filename
302 else:
302 else:
303 raise TypeError(
303 raise TypeError(
304 "expect str, not %s" % type(filename).__name__)
304 "expect str, not %s" % type(filename).__name__)
305
305
306 # In Python 2, fsdecode() has a very chance to receive bytes. So it's
306 # In Python 2, fsdecode() has a very chance to receive bytes. So it's
307 # better not to touch Python 2 part as it's already working fine.
307 # better not to touch Python 2 part as it's already working fine.
308 fsdecode = identity
308 fsdecode = identity
309
309
310 def getdoc(obj):
310 def getdoc(obj):
311 return getattr(obj, '__doc__', None)
311 return getattr(obj, '__doc__', None)
312
312
313 def _getoptbwrapper(orig, args, shortlist, namelist):
313 def _getoptbwrapper(orig, args, shortlist, namelist):
314 return orig(args, shortlist, namelist)
314 return orig(args, shortlist, namelist)
315
315
316 strkwargs = identity
316 strkwargs = identity
317 byteskwargs = identity
317 byteskwargs = identity
318
318
319 oslinesep = os.linesep
319 oslinesep = os.linesep
320 osname = os.name
320 osname = os.name
321 ospathsep = os.pathsep
321 ospathsep = os.pathsep
322 ossep = os.sep
322 ossep = os.sep
323 osaltsep = os.altsep
323 osaltsep = os.altsep
324 stdin = sys.stdin
324 stdin = sys.stdin
325 stdout = sys.stdout
325 stdout = sys.stdout
326 stderr = sys.stderr
326 stderr = sys.stderr
327 if getattr(sys, 'argv', None) is not None:
327 if getattr(sys, 'argv', None) is not None:
328 sysargv = sys.argv
328 sysargv = sys.argv
329 sysplatform = sys.platform
329 sysplatform = sys.platform
330 getcwd = os.getcwd
330 getcwd = os.getcwd
331 sysexecutable = sys.executable
331 sysexecutable = sys.executable
332 shlexsplit = shlex.split
332 shlexsplit = shlex.split
333 stringio = cStringIO.StringIO
333 stringio = cStringIO.StringIO
334 maplist = map
334 maplist = map
335 ziplist = zip
335 ziplist = zip
336 rawinput = raw_input
336 rawinput = raw_input
337 getargspec = inspect.getargspec
337 getargspec = inspect.getargspec
338
338
339 def emailparser(*args, **kwargs):
339 def emailparser(*args, **kwargs):
340 import email.parser
340 import email.parser
341 return email.parser.Parser(*args, **kwargs)
341 return email.parser.Parser(*args, **kwargs)
342
342
343 isjython = sysplatform.startswith('java')
343 isjython = sysplatform.startswith('java')
344
344
345 isdarwin = sysplatform == 'darwin'
345 isdarwin = sysplatform == 'darwin'
346 isposix = osname == 'posix'
346 isposix = osname == 'posix'
347 iswindows = osname == 'nt'
347 iswindows = osname == 'nt'
348
348
349 def getoptb(args, shortlist, namelist):
349 def getoptb(args, shortlist, namelist):
350 return _getoptbwrapper(getopt.getopt, args, shortlist, namelist)
350 return _getoptbwrapper(getopt.getopt, args, shortlist, namelist)
351
351
352 def gnugetoptb(args, shortlist, namelist):
352 def gnugetoptb(args, shortlist, namelist):
353 return _getoptbwrapper(getopt.gnu_getopt, args, shortlist, namelist)
353 return _getoptbwrapper(getopt.gnu_getopt, args, shortlist, namelist)
General Comments 0
You need to be logged in to leave comments. Login now