##// END OF EJS Templates
py3: introduce pycompat.ziplist as zip is a generator on Python 3...
Pulkit Goyal -
r35406:e66d6e93 default
parent child Browse files
Show More
@@ -1,328 +1,330 b''
1 # pycompat.py - portability shim for python 3
1 # pycompat.py - portability shim for python 3
2 #
2 #
3 # This software may be used and distributed according to the terms of the
3 # This software may be used and distributed according to the terms of the
4 # GNU General Public License version 2 or any later version.
4 # GNU General Public License version 2 or any later version.
5
5
6 """Mercurial portability shim for python 3.
6 """Mercurial portability shim for python 3.
7
7
8 This contains aliases to hide python version-specific details from the core.
8 This contains aliases to hide python version-specific details from the core.
9 """
9 """
10
10
11 from __future__ import absolute_import
11 from __future__ import absolute_import
12
12
13 import getopt
13 import getopt
14 import os
14 import os
15 import shlex
15 import shlex
16 import sys
16 import sys
17
17
18 ispy3 = (sys.version_info[0] >= 3)
18 ispy3 = (sys.version_info[0] >= 3)
19 ispypy = (r'__pypy__' in sys.builtin_module_names)
19 ispypy = (r'__pypy__' in sys.builtin_module_names)
20
20
21 if not ispy3:
21 if not ispy3:
22 import cookielib
22 import cookielib
23 import cPickle as pickle
23 import cPickle as pickle
24 import httplib
24 import httplib
25 import Queue as _queue
25 import Queue as _queue
26 import SocketServer as socketserver
26 import SocketServer as socketserver
27 import xmlrpclib
27 import xmlrpclib
28 else:
28 else:
29 import http.cookiejar as cookielib
29 import http.cookiejar as cookielib
30 import http.client as httplib
30 import http.client as httplib
31 import pickle
31 import pickle
32 import queue as _queue
32 import queue as _queue
33 import socketserver
33 import socketserver
34 import xmlrpc.client as xmlrpclib
34 import xmlrpc.client as xmlrpclib
35
35
36 empty = _queue.Empty
36 empty = _queue.Empty
37 queue = _queue.Queue
37 queue = _queue.Queue
38
38
39 def identity(a):
39 def identity(a):
40 return a
40 return a
41
41
42 if ispy3:
42 if ispy3:
43 import builtins
43 import builtins
44 import functools
44 import functools
45 import io
45 import io
46 import struct
46 import struct
47
47
48 fsencode = os.fsencode
48 fsencode = os.fsencode
49 fsdecode = os.fsdecode
49 fsdecode = os.fsdecode
50 oslinesep = os.linesep.encode('ascii')
50 oslinesep = os.linesep.encode('ascii')
51 osname = os.name.encode('ascii')
51 osname = os.name.encode('ascii')
52 ospathsep = os.pathsep.encode('ascii')
52 ospathsep = os.pathsep.encode('ascii')
53 ossep = os.sep.encode('ascii')
53 ossep = os.sep.encode('ascii')
54 osaltsep = os.altsep
54 osaltsep = os.altsep
55 if osaltsep:
55 if osaltsep:
56 osaltsep = osaltsep.encode('ascii')
56 osaltsep = osaltsep.encode('ascii')
57 # os.getcwd() on Python 3 returns string, but it has os.getcwdb() which
57 # os.getcwd() on Python 3 returns string, but it has os.getcwdb() which
58 # returns bytes.
58 # returns bytes.
59 getcwd = os.getcwdb
59 getcwd = os.getcwdb
60 sysplatform = sys.platform.encode('ascii')
60 sysplatform = sys.platform.encode('ascii')
61 sysexecutable = sys.executable
61 sysexecutable = sys.executable
62 if sysexecutable:
62 if sysexecutable:
63 sysexecutable = os.fsencode(sysexecutable)
63 sysexecutable = os.fsencode(sysexecutable)
64 stringio = io.BytesIO
64 stringio = io.BytesIO
65 maplist = lambda *args: list(map(*args))
65 maplist = lambda *args: list(map(*args))
66 ziplist = lambda *args: list(zip(*args))
66 rawinput = input
67 rawinput = input
67
68
68 # TODO: .buffer might not exist if std streams were replaced; we'll need
69 # TODO: .buffer might not exist if std streams were replaced; we'll need
69 # a silly wrapper to make a bytes stream backed by a unicode one.
70 # a silly wrapper to make a bytes stream backed by a unicode one.
70 stdin = sys.stdin.buffer
71 stdin = sys.stdin.buffer
71 stdout = sys.stdout.buffer
72 stdout = sys.stdout.buffer
72 stderr = sys.stderr.buffer
73 stderr = sys.stderr.buffer
73
74
74 # Since Python 3 converts argv to wchar_t type by Py_DecodeLocale() on Unix,
75 # Since Python 3 converts argv to wchar_t type by Py_DecodeLocale() on Unix,
75 # we can use os.fsencode() to get back bytes argv.
76 # we can use os.fsencode() to get back bytes argv.
76 #
77 #
77 # https://hg.python.org/cpython/file/v3.5.1/Programs/python.c#l55
78 # https://hg.python.org/cpython/file/v3.5.1/Programs/python.c#l55
78 #
79 #
79 # TODO: On Windows, the native argv is wchar_t, so we'll need a different
80 # TODO: On Windows, the native argv is wchar_t, so we'll need a different
80 # workaround to simulate the Python 2 (i.e. ANSI Win32 API) behavior.
81 # workaround to simulate the Python 2 (i.e. ANSI Win32 API) behavior.
81 if getattr(sys, 'argv', None) is not None:
82 if getattr(sys, 'argv', None) is not None:
82 sysargv = list(map(os.fsencode, sys.argv))
83 sysargv = list(map(os.fsencode, sys.argv))
83
84
84 bytechr = struct.Struct('>B').pack
85 bytechr = struct.Struct('>B').pack
85
86
86 class bytestr(bytes):
87 class bytestr(bytes):
87 """A bytes which mostly acts as a Python 2 str
88 """A bytes which mostly acts as a Python 2 str
88
89
89 >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
90 >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
90 (b'', b'foo', b'ascii', b'1')
91 (b'', b'foo', b'ascii', b'1')
91 >>> s = bytestr(b'foo')
92 >>> s = bytestr(b'foo')
92 >>> assert s is bytestr(s)
93 >>> assert s is bytestr(s)
93
94
94 __bytes__() should be called if provided:
95 __bytes__() should be called if provided:
95
96
96 >>> class bytesable(object):
97 >>> class bytesable(object):
97 ... def __bytes__(self):
98 ... def __bytes__(self):
98 ... return b'bytes'
99 ... return b'bytes'
99 >>> bytestr(bytesable())
100 >>> bytestr(bytesable())
100 b'bytes'
101 b'bytes'
101
102
102 There's no implicit conversion from non-ascii str as its encoding is
103 There's no implicit conversion from non-ascii str as its encoding is
103 unknown:
104 unknown:
104
105
105 >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
106 >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
106 Traceback (most recent call last):
107 Traceback (most recent call last):
107 ...
108 ...
108 UnicodeEncodeError: ...
109 UnicodeEncodeError: ...
109
110
110 Comparison between bytestr and bytes should work:
111 Comparison between bytestr and bytes should work:
111
112
112 >>> assert bytestr(b'foo') == b'foo'
113 >>> assert bytestr(b'foo') == b'foo'
113 >>> assert b'foo' == bytestr(b'foo')
114 >>> assert b'foo' == bytestr(b'foo')
114 >>> assert b'f' in bytestr(b'foo')
115 >>> assert b'f' in bytestr(b'foo')
115 >>> assert bytestr(b'f') in b'foo'
116 >>> assert bytestr(b'f') in b'foo'
116
117
117 Sliced elements should be bytes, not integer:
118 Sliced elements should be bytes, not integer:
118
119
119 >>> s[1], s[:2]
120 >>> s[1], s[:2]
120 (b'o', b'fo')
121 (b'o', b'fo')
121 >>> list(s), list(reversed(s))
122 >>> list(s), list(reversed(s))
122 ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
123 ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
123
124
124 As bytestr type isn't propagated across operations, you need to cast
125 As bytestr type isn't propagated across operations, you need to cast
125 bytes to bytestr explicitly:
126 bytes to bytestr explicitly:
126
127
127 >>> s = bytestr(b'foo').upper()
128 >>> s = bytestr(b'foo').upper()
128 >>> t = bytestr(s)
129 >>> t = bytestr(s)
129 >>> s[0], t[0]
130 >>> s[0], t[0]
130 (70, b'F')
131 (70, b'F')
131
132
132 Be careful to not pass a bytestr object to a function which expects
133 Be careful to not pass a bytestr object to a function which expects
133 bytearray-like behavior.
134 bytearray-like behavior.
134
135
135 >>> t = bytes(t) # cast to bytes
136 >>> t = bytes(t) # cast to bytes
136 >>> assert type(t) is bytes
137 >>> assert type(t) is bytes
137 """
138 """
138
139
139 def __new__(cls, s=b''):
140 def __new__(cls, s=b''):
140 if isinstance(s, bytestr):
141 if isinstance(s, bytestr):
141 return s
142 return s
142 if (not isinstance(s, (bytes, bytearray))
143 if (not isinstance(s, (bytes, bytearray))
143 and not hasattr(s, u'__bytes__')): # hasattr-py3-only
144 and not hasattr(s, u'__bytes__')): # hasattr-py3-only
144 s = str(s).encode(u'ascii')
145 s = str(s).encode(u'ascii')
145 return bytes.__new__(cls, s)
146 return bytes.__new__(cls, s)
146
147
147 def __getitem__(self, key):
148 def __getitem__(self, key):
148 s = bytes.__getitem__(self, key)
149 s = bytes.__getitem__(self, key)
149 if not isinstance(s, bytes):
150 if not isinstance(s, bytes):
150 s = bytechr(s)
151 s = bytechr(s)
151 return s
152 return s
152
153
153 def __iter__(self):
154 def __iter__(self):
154 return iterbytestr(bytes.__iter__(self))
155 return iterbytestr(bytes.__iter__(self))
155
156
156 def iterbytestr(s):
157 def iterbytestr(s):
157 """Iterate bytes as if it were a str object of Python 2"""
158 """Iterate bytes as if it were a str object of Python 2"""
158 return map(bytechr, s)
159 return map(bytechr, s)
159
160
160 def sysbytes(s):
161 def sysbytes(s):
161 """Convert an internal str (e.g. keyword, __doc__) back to bytes
162 """Convert an internal str (e.g. keyword, __doc__) back to bytes
162
163
163 This never raises UnicodeEncodeError, but only ASCII characters
164 This never raises UnicodeEncodeError, but only ASCII characters
164 can be round-trip by sysstr(sysbytes(s)).
165 can be round-trip by sysstr(sysbytes(s)).
165 """
166 """
166 return s.encode(u'utf-8')
167 return s.encode(u'utf-8')
167
168
168 def sysstr(s):
169 def sysstr(s):
169 """Return a keyword str to be passed to Python functions such as
170 """Return a keyword str to be passed to Python functions such as
170 getattr() and str.encode()
171 getattr() and str.encode()
171
172
172 This never raises UnicodeDecodeError. Non-ascii characters are
173 This never raises UnicodeDecodeError. Non-ascii characters are
173 considered invalid and mapped to arbitrary but unique code points
174 considered invalid and mapped to arbitrary but unique code points
174 such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
175 such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
175 """
176 """
176 if isinstance(s, builtins.str):
177 if isinstance(s, builtins.str):
177 return s
178 return s
178 return s.decode(u'latin-1')
179 return s.decode(u'latin-1')
179
180
180 def strurl(url):
181 def strurl(url):
181 """Converts a bytes url back to str"""
182 """Converts a bytes url back to str"""
182 return url.decode(u'ascii')
183 return url.decode(u'ascii')
183
184
184 def bytesurl(url):
185 def bytesurl(url):
185 """Converts a str url to bytes by encoding in ascii"""
186 """Converts a str url to bytes by encoding in ascii"""
186 return url.encode(u'ascii')
187 return url.encode(u'ascii')
187
188
188 def raisewithtb(exc, tb):
189 def raisewithtb(exc, tb):
189 """Raise exception with the given traceback"""
190 """Raise exception with the given traceback"""
190 raise exc.with_traceback(tb)
191 raise exc.with_traceback(tb)
191
192
192 def getdoc(obj):
193 def getdoc(obj):
193 """Get docstring as bytes; may be None so gettext() won't confuse it
194 """Get docstring as bytes; may be None so gettext() won't confuse it
194 with _('')"""
195 with _('')"""
195 doc = getattr(obj, u'__doc__', None)
196 doc = getattr(obj, u'__doc__', None)
196 if doc is None:
197 if doc is None:
197 return doc
198 return doc
198 return sysbytes(doc)
199 return sysbytes(doc)
199
200
200 def _wrapattrfunc(f):
201 def _wrapattrfunc(f):
201 @functools.wraps(f)
202 @functools.wraps(f)
202 def w(object, name, *args):
203 def w(object, name, *args):
203 return f(object, sysstr(name), *args)
204 return f(object, sysstr(name), *args)
204 return w
205 return w
205
206
206 # these wrappers are automagically imported by hgloader
207 # these wrappers are automagically imported by hgloader
207 delattr = _wrapattrfunc(builtins.delattr)
208 delattr = _wrapattrfunc(builtins.delattr)
208 getattr = _wrapattrfunc(builtins.getattr)
209 getattr = _wrapattrfunc(builtins.getattr)
209 hasattr = _wrapattrfunc(builtins.hasattr)
210 hasattr = _wrapattrfunc(builtins.hasattr)
210 setattr = _wrapattrfunc(builtins.setattr)
211 setattr = _wrapattrfunc(builtins.setattr)
211 xrange = builtins.range
212 xrange = builtins.range
212 unicode = str
213 unicode = str
213
214
214 def open(name, mode='r', buffering=-1):
215 def open(name, mode='r', buffering=-1):
215 return builtins.open(name, sysstr(mode), buffering)
216 return builtins.open(name, sysstr(mode), buffering)
216
217
217 def _getoptbwrapper(orig, args, shortlist, namelist):
218 def _getoptbwrapper(orig, args, shortlist, namelist):
218 """
219 """
219 Takes bytes arguments, converts them to unicode, pass them to
220 Takes bytes arguments, converts them to unicode, pass them to
220 getopt.getopt(), convert the returned values back to bytes and then
221 getopt.getopt(), convert the returned values back to bytes and then
221 return them for Python 3 compatibility as getopt.getopt() don't accepts
222 return them for Python 3 compatibility as getopt.getopt() don't accepts
222 bytes on Python 3.
223 bytes on Python 3.
223 """
224 """
224 args = [a.decode('latin-1') for a in args]
225 args = [a.decode('latin-1') for a in args]
225 shortlist = shortlist.decode('latin-1')
226 shortlist = shortlist.decode('latin-1')
226 namelist = [a.decode('latin-1') for a in namelist]
227 namelist = [a.decode('latin-1') for a in namelist]
227 opts, args = orig(args, shortlist, namelist)
228 opts, args = orig(args, shortlist, namelist)
228 opts = [(a[0].encode('latin-1'), a[1].encode('latin-1'))
229 opts = [(a[0].encode('latin-1'), a[1].encode('latin-1'))
229 for a in opts]
230 for a in opts]
230 args = [a.encode('latin-1') for a in args]
231 args = [a.encode('latin-1') for a in args]
231 return opts, args
232 return opts, args
232
233
233 def strkwargs(dic):
234 def strkwargs(dic):
234 """
235 """
235 Converts the keys of a python dictonary to str i.e. unicodes so that
236 Converts the keys of a python dictonary to str i.e. unicodes so that
236 they can be passed as keyword arguments as dictonaries with bytes keys
237 they can be passed as keyword arguments as dictonaries with bytes keys
237 can't be passed as keyword arguments to functions on Python 3.
238 can't be passed as keyword arguments to functions on Python 3.
238 """
239 """
239 dic = dict((k.decode('latin-1'), v) for k, v in dic.iteritems())
240 dic = dict((k.decode('latin-1'), v) for k, v in dic.iteritems())
240 return dic
241 return dic
241
242
242 def byteskwargs(dic):
243 def byteskwargs(dic):
243 """
244 """
244 Converts keys of python dictonaries to bytes as they were converted to
245 Converts keys of python dictonaries to bytes as they were converted to
245 str to pass that dictonary as a keyword argument on Python 3.
246 str to pass that dictonary as a keyword argument on Python 3.
246 """
247 """
247 dic = dict((k.encode('latin-1'), v) for k, v in dic.iteritems())
248 dic = dict((k.encode('latin-1'), v) for k, v in dic.iteritems())
248 return dic
249 return dic
249
250
250 # TODO: handle shlex.shlex().
251 # TODO: handle shlex.shlex().
251 def shlexsplit(s):
252 def shlexsplit(s):
252 """
253 """
253 Takes bytes argument, convert it to str i.e. unicodes, pass that into
254 Takes bytes argument, convert it to str i.e. unicodes, pass that into
254 shlex.split(), convert the returned value to bytes and return that for
255 shlex.split(), convert the returned value to bytes and return that for
255 Python 3 compatibility as shelx.split() don't accept bytes on Python 3.
256 Python 3 compatibility as shelx.split() don't accept bytes on Python 3.
256 """
257 """
257 ret = shlex.split(s.decode('latin-1'))
258 ret = shlex.split(s.decode('latin-1'))
258 return [a.encode('latin-1') for a in ret]
259 return [a.encode('latin-1') for a in ret]
259
260
260 else:
261 else:
261 import cStringIO
262 import cStringIO
262
263
263 bytechr = chr
264 bytechr = chr
264 bytestr = str
265 bytestr = str
265 iterbytestr = iter
266 iterbytestr = iter
266 sysbytes = identity
267 sysbytes = identity
267 sysstr = identity
268 sysstr = identity
268 strurl = identity
269 strurl = identity
269 bytesurl = identity
270 bytesurl = identity
270
271
271 # this can't be parsed on Python 3
272 # this can't be parsed on Python 3
272 exec('def raisewithtb(exc, tb):\n'
273 exec('def raisewithtb(exc, tb):\n'
273 ' raise exc, None, tb\n')
274 ' raise exc, None, tb\n')
274
275
275 def fsencode(filename):
276 def fsencode(filename):
276 """
277 """
277 Partial backport from os.py in Python 3, which only accepts bytes.
278 Partial backport from os.py in Python 3, which only accepts bytes.
278 In Python 2, our paths should only ever be bytes, a unicode path
279 In Python 2, our paths should only ever be bytes, a unicode path
279 indicates a bug.
280 indicates a bug.
280 """
281 """
281 if isinstance(filename, str):
282 if isinstance(filename, str):
282 return filename
283 return filename
283 else:
284 else:
284 raise TypeError(
285 raise TypeError(
285 "expect str, not %s" % type(filename).__name__)
286 "expect str, not %s" % type(filename).__name__)
286
287
287 # In Python 2, fsdecode() has a very chance to receive bytes. So it's
288 # In Python 2, fsdecode() has a very chance to receive bytes. So it's
288 # better not to touch Python 2 part as it's already working fine.
289 # better not to touch Python 2 part as it's already working fine.
289 fsdecode = identity
290 fsdecode = identity
290
291
291 def getdoc(obj):
292 def getdoc(obj):
292 return getattr(obj, '__doc__', None)
293 return getattr(obj, '__doc__', None)
293
294
294 def _getoptbwrapper(orig, args, shortlist, namelist):
295 def _getoptbwrapper(orig, args, shortlist, namelist):
295 return orig(args, shortlist, namelist)
296 return orig(args, shortlist, namelist)
296
297
297 strkwargs = identity
298 strkwargs = identity
298 byteskwargs = identity
299 byteskwargs = identity
299
300
300 oslinesep = os.linesep
301 oslinesep = os.linesep
301 osname = os.name
302 osname = os.name
302 ospathsep = os.pathsep
303 ospathsep = os.pathsep
303 ossep = os.sep
304 ossep = os.sep
304 osaltsep = os.altsep
305 osaltsep = os.altsep
305 stdin = sys.stdin
306 stdin = sys.stdin
306 stdout = sys.stdout
307 stdout = sys.stdout
307 stderr = sys.stderr
308 stderr = sys.stderr
308 if getattr(sys, 'argv', None) is not None:
309 if getattr(sys, 'argv', None) is not None:
309 sysargv = sys.argv
310 sysargv = sys.argv
310 sysplatform = sys.platform
311 sysplatform = sys.platform
311 getcwd = os.getcwd
312 getcwd = os.getcwd
312 sysexecutable = sys.executable
313 sysexecutable = sys.executable
313 shlexsplit = shlex.split
314 shlexsplit = shlex.split
314 stringio = cStringIO.StringIO
315 stringio = cStringIO.StringIO
315 maplist = map
316 maplist = map
317 ziplist = zip
316 rawinput = raw_input
318 rawinput = raw_input
317
319
318 isjython = sysplatform.startswith('java')
320 isjython = sysplatform.startswith('java')
319
321
320 isdarwin = sysplatform == 'darwin'
322 isdarwin = sysplatform == 'darwin'
321 isposix = osname == 'posix'
323 isposix = osname == 'posix'
322 iswindows = osname == 'nt'
324 iswindows = osname == 'nt'
323
325
324 def getoptb(args, shortlist, namelist):
326 def getoptb(args, shortlist, namelist):
325 return _getoptbwrapper(getopt.getopt, args, shortlist, namelist)
327 return _getoptbwrapper(getopt.getopt, args, shortlist, namelist)
326
328
327 def gnugetoptb(args, shortlist, namelist):
329 def gnugetoptb(args, shortlist, namelist):
328 return _getoptbwrapper(getopt.gnu_getopt, args, shortlist, namelist)
330 return _getoptbwrapper(getopt.gnu_getopt, args, shortlist, namelist)
General Comments 0
You need to be logged in to leave comments. Login now