##// END OF EJS Templates
py3: introduce pycompat.ziplist as zip is a generator on Python 3...
Pulkit Goyal -
r35406:e66d6e93 default
parent child Browse files
Show More
@@ -1,328 +1,330 b''
1 1 # pycompat.py - portability shim for python 3
2 2 #
3 3 # This software may be used and distributed according to the terms of the
4 4 # GNU General Public License version 2 or any later version.
5 5
6 6 """Mercurial portability shim for python 3.
7 7
8 8 This contains aliases to hide python version-specific details from the core.
9 9 """
10 10
11 11 from __future__ import absolute_import
12 12
13 13 import getopt
14 14 import os
15 15 import shlex
16 16 import sys
17 17
18 18 ispy3 = (sys.version_info[0] >= 3)
19 19 ispypy = (r'__pypy__' in sys.builtin_module_names)
20 20
21 21 if not ispy3:
22 22 import cookielib
23 23 import cPickle as pickle
24 24 import httplib
25 25 import Queue as _queue
26 26 import SocketServer as socketserver
27 27 import xmlrpclib
28 28 else:
29 29 import http.cookiejar as cookielib
30 30 import http.client as httplib
31 31 import pickle
32 32 import queue as _queue
33 33 import socketserver
34 34 import xmlrpc.client as xmlrpclib
35 35
36 36 empty = _queue.Empty
37 37 queue = _queue.Queue
38 38
39 39 def identity(a):
40 40 return a
41 41
42 42 if ispy3:
43 43 import builtins
44 44 import functools
45 45 import io
46 46 import struct
47 47
48 48 fsencode = os.fsencode
49 49 fsdecode = os.fsdecode
50 50 oslinesep = os.linesep.encode('ascii')
51 51 osname = os.name.encode('ascii')
52 52 ospathsep = os.pathsep.encode('ascii')
53 53 ossep = os.sep.encode('ascii')
54 54 osaltsep = os.altsep
55 55 if osaltsep:
56 56 osaltsep = osaltsep.encode('ascii')
57 57 # os.getcwd() on Python 3 returns string, but it has os.getcwdb() which
58 58 # returns bytes.
59 59 getcwd = os.getcwdb
60 60 sysplatform = sys.platform.encode('ascii')
61 61 sysexecutable = sys.executable
62 62 if sysexecutable:
63 63 sysexecutable = os.fsencode(sysexecutable)
64 64 stringio = io.BytesIO
65 65 maplist = lambda *args: list(map(*args))
66 ziplist = lambda *args: list(zip(*args))
66 67 rawinput = input
67 68
68 69 # TODO: .buffer might not exist if std streams were replaced; we'll need
69 70 # a silly wrapper to make a bytes stream backed by a unicode one.
70 71 stdin = sys.stdin.buffer
71 72 stdout = sys.stdout.buffer
72 73 stderr = sys.stderr.buffer
73 74
74 75 # Since Python 3 converts argv to wchar_t type by Py_DecodeLocale() on Unix,
75 76 # we can use os.fsencode() to get back bytes argv.
76 77 #
77 78 # https://hg.python.org/cpython/file/v3.5.1/Programs/python.c#l55
78 79 #
79 80 # TODO: On Windows, the native argv is wchar_t, so we'll need a different
80 81 # workaround to simulate the Python 2 (i.e. ANSI Win32 API) behavior.
81 82 if getattr(sys, 'argv', None) is not None:
82 83 sysargv = list(map(os.fsencode, sys.argv))
83 84
84 85 bytechr = struct.Struct('>B').pack
85 86
86 87 class bytestr(bytes):
87 88 """A bytes which mostly acts as a Python 2 str
88 89
89 90 >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
90 91 (b'', b'foo', b'ascii', b'1')
91 92 >>> s = bytestr(b'foo')
92 93 >>> assert s is bytestr(s)
93 94
94 95 __bytes__() should be called if provided:
95 96
96 97 >>> class bytesable(object):
97 98 ... def __bytes__(self):
98 99 ... return b'bytes'
99 100 >>> bytestr(bytesable())
100 101 b'bytes'
101 102
102 103 There's no implicit conversion from non-ascii str as its encoding is
103 104 unknown:
104 105
105 106 >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
106 107 Traceback (most recent call last):
107 108 ...
108 109 UnicodeEncodeError: ...
109 110
110 111 Comparison between bytestr and bytes should work:
111 112
112 113 >>> assert bytestr(b'foo') == b'foo'
113 114 >>> assert b'foo' == bytestr(b'foo')
114 115 >>> assert b'f' in bytestr(b'foo')
115 116 >>> assert bytestr(b'f') in b'foo'
116 117
117 118 Sliced elements should be bytes, not integer:
118 119
119 120 >>> s[1], s[:2]
120 121 (b'o', b'fo')
121 122 >>> list(s), list(reversed(s))
122 123 ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
123 124
124 125 As bytestr type isn't propagated across operations, you need to cast
125 126 bytes to bytestr explicitly:
126 127
127 128 >>> s = bytestr(b'foo').upper()
128 129 >>> t = bytestr(s)
129 130 >>> s[0], t[0]
130 131 (70, b'F')
131 132
132 133 Be careful to not pass a bytestr object to a function which expects
133 134 bytearray-like behavior.
134 135
135 136 >>> t = bytes(t) # cast to bytes
136 137 >>> assert type(t) is bytes
137 138 """
138 139
139 140 def __new__(cls, s=b''):
140 141 if isinstance(s, bytestr):
141 142 return s
142 143 if (not isinstance(s, (bytes, bytearray))
143 144 and not hasattr(s, u'__bytes__')): # hasattr-py3-only
144 145 s = str(s).encode(u'ascii')
145 146 return bytes.__new__(cls, s)
146 147
147 148 def __getitem__(self, key):
148 149 s = bytes.__getitem__(self, key)
149 150 if not isinstance(s, bytes):
150 151 s = bytechr(s)
151 152 return s
152 153
153 154 def __iter__(self):
154 155 return iterbytestr(bytes.__iter__(self))
155 156
156 157 def iterbytestr(s):
157 158 """Iterate bytes as if it were a str object of Python 2"""
158 159 return map(bytechr, s)
159 160
160 161 def sysbytes(s):
161 162 """Convert an internal str (e.g. keyword, __doc__) back to bytes
162 163
163 164 This never raises UnicodeEncodeError, but only ASCII characters
164 165 can be round-trip by sysstr(sysbytes(s)).
165 166 """
166 167 return s.encode(u'utf-8')
167 168
168 169 def sysstr(s):
169 170 """Return a keyword str to be passed to Python functions such as
170 171 getattr() and str.encode()
171 172
172 173 This never raises UnicodeDecodeError. Non-ascii characters are
173 174 considered invalid and mapped to arbitrary but unique code points
174 175 such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
175 176 """
176 177 if isinstance(s, builtins.str):
177 178 return s
178 179 return s.decode(u'latin-1')
179 180
180 181 def strurl(url):
181 182 """Converts a bytes url back to str"""
182 183 return url.decode(u'ascii')
183 184
184 185 def bytesurl(url):
185 186 """Converts a str url to bytes by encoding in ascii"""
186 187 return url.encode(u'ascii')
187 188
188 189 def raisewithtb(exc, tb):
189 190 """Raise exception with the given traceback"""
190 191 raise exc.with_traceback(tb)
191 192
192 193 def getdoc(obj):
193 194 """Get docstring as bytes; may be None so gettext() won't confuse it
194 195 with _('')"""
195 196 doc = getattr(obj, u'__doc__', None)
196 197 if doc is None:
197 198 return doc
198 199 return sysbytes(doc)
199 200
200 201 def _wrapattrfunc(f):
201 202 @functools.wraps(f)
202 203 def w(object, name, *args):
203 204 return f(object, sysstr(name), *args)
204 205 return w
205 206
206 207 # these wrappers are automagically imported by hgloader
207 208 delattr = _wrapattrfunc(builtins.delattr)
208 209 getattr = _wrapattrfunc(builtins.getattr)
209 210 hasattr = _wrapattrfunc(builtins.hasattr)
210 211 setattr = _wrapattrfunc(builtins.setattr)
211 212 xrange = builtins.range
212 213 unicode = str
213 214
214 215 def open(name, mode='r', buffering=-1):
215 216 return builtins.open(name, sysstr(mode), buffering)
216 217
217 218 def _getoptbwrapper(orig, args, shortlist, namelist):
218 219 """
219 220 Takes bytes arguments, converts them to unicode, pass them to
220 221 getopt.getopt(), convert the returned values back to bytes and then
221 222 return them for Python 3 compatibility as getopt.getopt() don't accepts
222 223 bytes on Python 3.
223 224 """
224 225 args = [a.decode('latin-1') for a in args]
225 226 shortlist = shortlist.decode('latin-1')
226 227 namelist = [a.decode('latin-1') for a in namelist]
227 228 opts, args = orig(args, shortlist, namelist)
228 229 opts = [(a[0].encode('latin-1'), a[1].encode('latin-1'))
229 230 for a in opts]
230 231 args = [a.encode('latin-1') for a in args]
231 232 return opts, args
232 233
233 234 def strkwargs(dic):
234 235 """
235 236 Converts the keys of a python dictonary to str i.e. unicodes so that
236 237 they can be passed as keyword arguments as dictonaries with bytes keys
237 238 can't be passed as keyword arguments to functions on Python 3.
238 239 """
239 240 dic = dict((k.decode('latin-1'), v) for k, v in dic.iteritems())
240 241 return dic
241 242
242 243 def byteskwargs(dic):
243 244 """
244 245 Converts keys of python dictonaries to bytes as they were converted to
245 246 str to pass that dictonary as a keyword argument on Python 3.
246 247 """
247 248 dic = dict((k.encode('latin-1'), v) for k, v in dic.iteritems())
248 249 return dic
249 250
250 251 # TODO: handle shlex.shlex().
251 252 def shlexsplit(s):
252 253 """
253 254 Takes bytes argument, convert it to str i.e. unicodes, pass that into
254 255 shlex.split(), convert the returned value to bytes and return that for
255 256 Python 3 compatibility as shelx.split() don't accept bytes on Python 3.
256 257 """
257 258 ret = shlex.split(s.decode('latin-1'))
258 259 return [a.encode('latin-1') for a in ret]
259 260
260 261 else:
261 262 import cStringIO
262 263
263 264 bytechr = chr
264 265 bytestr = str
265 266 iterbytestr = iter
266 267 sysbytes = identity
267 268 sysstr = identity
268 269 strurl = identity
269 270 bytesurl = identity
270 271
271 272 # this can't be parsed on Python 3
272 273 exec('def raisewithtb(exc, tb):\n'
273 274 ' raise exc, None, tb\n')
274 275
275 276 def fsencode(filename):
276 277 """
277 278 Partial backport from os.py in Python 3, which only accepts bytes.
278 279 In Python 2, our paths should only ever be bytes, a unicode path
279 280 indicates a bug.
280 281 """
281 282 if isinstance(filename, str):
282 283 return filename
283 284 else:
284 285 raise TypeError(
285 286 "expect str, not %s" % type(filename).__name__)
286 287
287 288 # In Python 2, fsdecode() has a very chance to receive bytes. So it's
288 289 # better not to touch Python 2 part as it's already working fine.
289 290 fsdecode = identity
290 291
291 292 def getdoc(obj):
292 293 return getattr(obj, '__doc__', None)
293 294
294 295 def _getoptbwrapper(orig, args, shortlist, namelist):
295 296 return orig(args, shortlist, namelist)
296 297
297 298 strkwargs = identity
298 299 byteskwargs = identity
299 300
300 301 oslinesep = os.linesep
301 302 osname = os.name
302 303 ospathsep = os.pathsep
303 304 ossep = os.sep
304 305 osaltsep = os.altsep
305 306 stdin = sys.stdin
306 307 stdout = sys.stdout
307 308 stderr = sys.stderr
308 309 if getattr(sys, 'argv', None) is not None:
309 310 sysargv = sys.argv
310 311 sysplatform = sys.platform
311 312 getcwd = os.getcwd
312 313 sysexecutable = sys.executable
313 314 shlexsplit = shlex.split
314 315 stringio = cStringIO.StringIO
315 316 maplist = map
317 ziplist = zip
316 318 rawinput = raw_input
317 319
318 320 isjython = sysplatform.startswith('java')
319 321
320 322 isdarwin = sysplatform == 'darwin'
321 323 isposix = osname == 'posix'
322 324 iswindows = osname == 'nt'
323 325
324 326 def getoptb(args, shortlist, namelist):
325 327 return _getoptbwrapper(getopt.getopt, args, shortlist, namelist)
326 328
327 329 def gnugetoptb(args, shortlist, namelist):
328 330 return _getoptbwrapper(getopt.gnu_getopt, args, shortlist, namelist)
General Comments 0
You need to be logged in to leave comments. Login now