##// END OF EJS Templates
pycompat: remove large Python 2 block...
Gregory Szorc -
r49726:79009cca default
parent child Browse files
Show More
@@ -1,527 +1,471 b''
1 1 # pycompat.py - portability shim for python 3
2 2 #
3 3 # This software may be used and distributed according to the terms of the
4 4 # GNU General Public License version 2 or any later version.
5 5
6 6 """Mercurial portability shim for python 3.
7 7
8 8 This contains aliases to hide python version-specific details from the core.
9 9 """
10 10
11 11 from __future__ import absolute_import
12 12
13 13 import builtins
14 import codecs
14 15 import concurrent.futures as futures
16 import functools
15 17 import getopt
16 18 import http.client as httplib
17 19 import http.cookiejar as cookielib
18 20 import inspect
21 import io
19 22 import json
20 23 import os
21 24 import pickle
22 25 import queue
23 26 import shlex
24 27 import socketserver
28 import struct
25 29 import sys
26 30 import tempfile
27 31 import xmlrpc.client as xmlrpclib
28 32
33
29 34 ispy3 = sys.version_info[0] >= 3
30 35 ispypy = '__pypy__' in sys.builtin_module_names
31 36 TYPE_CHECKING = False
32 37
33 38 if not globals(): # hide this from non-pytype users
34 39 import typing
35 40
36 41 TYPE_CHECKING = typing.TYPE_CHECKING
37 42
38 43
39 44 def future_set_exception_info(f, exc_info):
40 45 f.set_exception(exc_info[0])
41 46
42 47
43 48 FileNotFoundError = builtins.FileNotFoundError
44 49
45 50
46 51 def identity(a):
47 52 return a
48 53
49 54
50 55 def _rapply(f, xs):
51 56 if xs is None:
52 57 # assume None means non-value of optional data
53 58 return xs
54 59 if isinstance(xs, (list, set, tuple)):
55 60 return type(xs)(_rapply(f, x) for x in xs)
56 61 if isinstance(xs, dict):
57 62 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
58 63 return f(xs)
59 64
60 65
61 66 def rapply(f, xs):
62 67 """Apply function recursively to every item preserving the data structure
63 68
64 69 >>> def f(x):
65 70 ... return 'f(%s)' % x
66 71 >>> rapply(f, None) is None
67 72 True
68 73 >>> rapply(f, 'a')
69 74 'f(a)'
70 75 >>> rapply(f, {'a'}) == {'f(a)'}
71 76 True
72 77 >>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])
73 78 ['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]
74 79
75 80 >>> xs = [object()]
76 81 >>> rapply(identity, xs) is xs
77 82 True
78 83 """
79 84 if f is identity:
80 85 # fast path mainly for py2
81 86 return xs
82 87 return _rapply(f, xs)
83 88
84 89
85 if ispy3:
86 import builtins
87 import codecs
88 import functools
89 import io
90 import struct
91
92 90 if os.name == r'nt' and sys.version_info >= (3, 6):
93 91 # MBCS (or ANSI) filesystem encoding must be used as before.
94 92 # Otherwise non-ASCII filenames in existing repositories would be
95 93 # corrupted.
96 94 # This must be set once prior to any fsencode/fsdecode calls.
97 95 sys._enablelegacywindowsfsencoding() # pytype: disable=module-attr
98 96
99 97 fsencode = os.fsencode
100 98 fsdecode = os.fsdecode
101 99 oscurdir = os.curdir.encode('ascii')
102 100 oslinesep = os.linesep.encode('ascii')
103 101 osname = os.name.encode('ascii')
104 102 ospathsep = os.pathsep.encode('ascii')
105 103 ospardir = os.pardir.encode('ascii')
106 104 ossep = os.sep.encode('ascii')
107 105 osaltsep = os.altsep
108 106 if osaltsep:
109 107 osaltsep = osaltsep.encode('ascii')
110 108 osdevnull = os.devnull.encode('ascii')
111 109
112 110 sysplatform = sys.platform.encode('ascii')
113 111 sysexecutable = sys.executable
114 112 if sysexecutable:
115 113 sysexecutable = os.fsencode(sysexecutable)
116 114 bytesio = io.BytesIO
117 115 # TODO deprecate stringio name, as it is a lie on Python 3.
118 116 stringio = bytesio
119 117
118
120 119 def maplist(*args):
121 120 return list(map(*args))
122 121
122
123 123 def rangelist(*args):
124 124 return list(range(*args))
125 125
126
126 127 def ziplist(*args):
127 128 return list(zip(*args))
128 129
130
129 131 rawinput = input
130 132 getargspec = inspect.getfullargspec
131 133
132 134 long = int
133 135
134 136 if getattr(sys, 'argv', None) is not None:
135 137 # On POSIX, the char** argv array is converted to Python str using
136 138 # Py_DecodeLocale(). The inverse of this is Py_EncodeLocale(), which
137 139 # isn't directly callable from Python code. In practice, os.fsencode()
138 140 # can be used instead (this is recommended by Python's documentation
139 141 # for sys.argv).
140 142 #
141 143 # On Windows, the wchar_t **argv is passed into the interpreter as-is.
142 144 # Like POSIX, we need to emulate what Py_EncodeLocale() would do. But
143 145 # there's an additional wrinkle. What we really want to access is the
144 146 # ANSI codepage representation of the arguments, as this is what
145 147 # `int main()` would receive if Python 3 didn't define `int wmain()`
146 148 # (this is how Python 2 worked). To get that, we encode with the mbcs
147 149 # encoding, which will pass CP_ACP to the underlying Windows API to
148 150 # produce bytes.
149 151 if os.name == r'nt':
150 152 sysargv = [a.encode("mbcs", "ignore") for a in sys.argv]
151 153 else:
152 154 sysargv = [fsencode(a) for a in sys.argv]
153 155
154 156 bytechr = struct.Struct('>B').pack
155 157 byterepr = b'%r'.__mod__
156 158
159
157 160 class bytestr(bytes):
158 161 """A bytes which mostly acts as a Python 2 str
159 162
160 163 >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
161 164 ('', 'foo', 'ascii', '1')
162 165 >>> s = bytestr(b'foo')
163 166 >>> assert s is bytestr(s)
164 167
165 168 __bytes__() should be called if provided:
166 169
167 170 >>> class bytesable(object):
168 171 ... def __bytes__(self):
169 172 ... return b'bytes'
170 173 >>> bytestr(bytesable())
171 174 'bytes'
172 175
173 176 There's no implicit conversion from non-ascii str as its encoding is
174 177 unknown:
175 178
176 179 >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
177 180 Traceback (most recent call last):
178 181 ...
179 182 UnicodeEncodeError: ...
180 183
181 184 Comparison between bytestr and bytes should work:
182 185
183 186 >>> assert bytestr(b'foo') == b'foo'
184 187 >>> assert b'foo' == bytestr(b'foo')
185 188 >>> assert b'f' in bytestr(b'foo')
186 189 >>> assert bytestr(b'f') in b'foo'
187 190
188 191 Sliced elements should be bytes, not integer:
189 192
190 193 >>> s[1], s[:2]
191 194 (b'o', b'fo')
192 195 >>> list(s), list(reversed(s))
193 196 ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
194 197
195 198 As bytestr type isn't propagated across operations, you need to cast
196 199 bytes to bytestr explicitly:
197 200
198 201 >>> s = bytestr(b'foo').upper()
199 202 >>> t = bytestr(s)
200 203 >>> s[0], t[0]
201 204 (70, b'F')
202 205
203 206 Be careful to not pass a bytestr object to a function which expects
204 207 bytearray-like behavior.
205 208
206 209 >>> t = bytes(t) # cast to bytes
207 210 >>> assert type(t) is bytes
208 211 """
209 212
210 213 # Trick pytype into not demanding Iterable[int] be passed to __new__(),
211 214 # since the appropriate bytes format is done internally.
212 215 #
213 216 # https://github.com/google/pytype/issues/500
214 217 if TYPE_CHECKING:
215 218
216 219 def __init__(self, s=b''):
217 220 pass
218 221
219 222 def __new__(cls, s=b''):
220 223 if isinstance(s, bytestr):
221 224 return s
222 225 if not isinstance(
223 226 s, (bytes, bytearray)
224 227 ) and not hasattr( # hasattr-py3-only
225 228 s, u'__bytes__'
226 229 ):
227 230 s = str(s).encode('ascii')
228 231 return bytes.__new__(cls, s)
229 232
230 233 def __getitem__(self, key):
231 234 s = bytes.__getitem__(self, key)
232 235 if not isinstance(s, bytes):
233 236 s = bytechr(s)
234 237 return s
235 238
236 239 def __iter__(self):
237 240 return iterbytestr(bytes.__iter__(self))
238 241
239 242 def __repr__(self):
240 243 return bytes.__repr__(self)[1:] # drop b''
241 244
245
242 246 def iterbytestr(s):
243 247 """Iterate bytes as if it were a str object of Python 2"""
244 248 return map(bytechr, s)
245 249
250
246 251 def maybebytestr(s):
247 252 """Promote bytes to bytestr"""
248 253 if isinstance(s, bytes):
249 254 return bytestr(s)
250 255 return s
251 256
257
252 258 def sysbytes(s):
253 259 """Convert an internal str (e.g. keyword, __doc__) back to bytes
254 260
255 261 This never raises UnicodeEncodeError, but only ASCII characters
256 262 can be round-trip by sysstr(sysbytes(s)).
257 263 """
258 264 if isinstance(s, bytes):
259 265 return s
260 266 return s.encode('utf-8')
261 267
268
262 269 def sysstr(s):
263 270 """Return a keyword str to be passed to Python functions such as
264 271 getattr() and str.encode()
265 272
266 273 This never raises UnicodeDecodeError. Non-ascii characters are
267 274 considered invalid and mapped to arbitrary but unique code points
268 275 such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
269 276 """
270 277 if isinstance(s, builtins.str):
271 278 return s
272 279 return s.decode('latin-1')
273 280
281
274 282 def strurl(url):
275 283 """Converts a bytes url back to str"""
276 284 if isinstance(url, bytes):
277 285 return url.decode('ascii')
278 286 return url
279 287
288
280 289 def bytesurl(url):
281 290 """Converts a str url to bytes by encoding in ascii"""
282 291 if isinstance(url, str):
283 292 return url.encode('ascii')
284 293 return url
285 294
295
286 296 def raisewithtb(exc, tb):
287 297 """Raise exception with the given traceback"""
288 298 raise exc.with_traceback(tb)
289 299
300
290 301 def getdoc(obj):
291 302 """Get docstring as bytes; may be None so gettext() won't confuse it
292 303 with _('')"""
293 304 doc = getattr(obj, '__doc__', None)
294 305 if doc is None:
295 306 return doc
296 307 return sysbytes(doc)
297 308
309
298 310 def _wrapattrfunc(f):
299 311 @functools.wraps(f)
300 312 def w(object, name, *args):
301 313 return f(object, sysstr(name), *args)
302 314
303 315 return w
304 316
317
305 318 # these wrappers are automagically imported by hgloader
306 319 delattr = _wrapattrfunc(builtins.delattr)
307 320 getattr = _wrapattrfunc(builtins.getattr)
308 321 hasattr = _wrapattrfunc(builtins.hasattr)
309 322 setattr = _wrapattrfunc(builtins.setattr)
310 323 xrange = builtins.range
311 324 unicode = str
312 325
326
313 327 def open(name, mode=b'r', buffering=-1, encoding=None):
314 328 return builtins.open(name, sysstr(mode), buffering, encoding)
315 329
330
316 331 safehasattr = _wrapattrfunc(builtins.hasattr)
317 332
333
318 334 def _getoptbwrapper(orig, args, shortlist, namelist):
319 335 """
320 336 Takes bytes arguments, converts them to unicode, pass them to
321 337 getopt.getopt(), convert the returned values back to bytes and then
322 338 return them for Python 3 compatibility as getopt.getopt() don't accepts
323 339 bytes on Python 3.
324 340 """
325 341 args = [a.decode('latin-1') for a in args]
326 342 shortlist = shortlist.decode('latin-1')
327 343 namelist = [a.decode('latin-1') for a in namelist]
328 344 opts, args = orig(args, shortlist, namelist)
329 345 opts = [(a[0].encode('latin-1'), a[1].encode('latin-1')) for a in opts]
330 346 args = [a.encode('latin-1') for a in args]
331 347 return opts, args
332 348
349
333 350 def strkwargs(dic):
334 351 """
335 352 Converts the keys of a python dictonary to str i.e. unicodes so that
336 353 they can be passed as keyword arguments as dictionaries with bytes keys
337 354 can't be passed as keyword arguments to functions on Python 3.
338 355 """
339 356 dic = {k.decode('latin-1'): v for k, v in dic.items()}
340 357 return dic
341 358
359
342 360 def byteskwargs(dic):
343 361 """
344 362 Converts keys of python dictionaries to bytes as they were converted to
345 363 str to pass that dictonary as a keyword argument on Python 3.
346 364 """
347 365 dic = {k.encode('latin-1'): v for k, v in dic.items()}
348 366 return dic
349 367
368
350 369 # TODO: handle shlex.shlex().
351 370 def shlexsplit(s, comments=False, posix=True):
352 371 """
353 372 Takes bytes argument, convert it to str i.e. unicodes, pass that into
354 373 shlex.split(), convert the returned value to bytes and return that for
355 374 Python 3 compatibility as shelx.split() don't accept bytes on Python 3.
356 375 """
357 376 ret = shlex.split(s.decode('latin-1'), comments, posix)
358 377 return [a.encode('latin-1') for a in ret]
359 378
379
360 380 iteritems = lambda x: x.items()
361 381 itervalues = lambda x: x.values()
362 382
363 383 # Python 3.5's json.load and json.loads require str. We polyfill its
364 384 # code for detecting encoding from bytes.
365 385 if sys.version_info[0:2] < (3, 6):
366 386
367 387 def _detect_encoding(b):
368 388 bstartswith = b.startswith
369 389 if bstartswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)):
370 390 return 'utf-32'
371 391 if bstartswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)):
372 392 return 'utf-16'
373 393 if bstartswith(codecs.BOM_UTF8):
374 394 return 'utf-8-sig'
375 395
376 396 if len(b) >= 4:
377 397 if not b[0]:
378 398 # 00 00 -- -- - utf-32-be
379 399 # 00 XX -- -- - utf-16-be
380 400 return 'utf-16-be' if b[1] else 'utf-32-be'
381 401 if not b[1]:
382 402 # XX 00 00 00 - utf-32-le
383 403 # XX 00 00 XX - utf-16-le
384 404 # XX 00 XX -- - utf-16-le
385 405 return 'utf-16-le' if b[2] or b[3] else 'utf-32-le'
386 406 elif len(b) == 2:
387 407 if not b[0]:
388 408 # 00 XX - utf-16-be
389 409 return 'utf-16-be'
390 410 if not b[1]:
391 411 # XX 00 - utf-16-le
392 412 return 'utf-16-le'
393 413 # default
394 414 return 'utf-8'
395 415
396 416 def json_loads(s, *args, **kwargs):
397 417 if isinstance(s, (bytes, bytearray)):
398 418 s = s.decode(_detect_encoding(s), 'surrogatepass')
399 419
400 420 return json.loads(s, *args, **kwargs)
401 421
402 else:
403 json_loads = json.loads
404 422
405 423 else:
406 import cStringIO
407
408 xrange = xrange
409 unicode = unicode
410 bytechr = chr
411 byterepr = repr
412 bytestr = str
413 iterbytestr = iter
414 maybebytestr = identity
415 sysbytes = identity
416 sysstr = identity
417 strurl = identity
418 bytesurl = identity
419 open = open
420 delattr = delattr
421 getattr = getattr
422 hasattr = hasattr
423 setattr = setattr
424
425 # this can't be parsed on Python 3
426 exec(b'def raisewithtb(exc, tb):\n raise exc, None, tb\n')
427
428 def fsencode(filename):
429 """
430 Partial backport from os.py in Python 3, which only accepts bytes.
431 In Python 2, our paths should only ever be bytes, a unicode path
432 indicates a bug.
433 """
434 if isinstance(filename, str):
435 return filename
436 else:
437 raise TypeError("expect str, not %s" % type(filename).__name__)
438
439 # In Python 2, fsdecode() has a very chance to receive bytes. So it's
440 # better not to touch Python 2 part as it's already working fine.
441 fsdecode = identity
442
443 def getdoc(obj):
444 return getattr(obj, '__doc__', None)
445
446 _notset = object()
447
448 def safehasattr(thing, attr):
449 return getattr(thing, attr, _notset) is not _notset
450
451 def _getoptbwrapper(orig, args, shortlist, namelist):
452 return orig(args, shortlist, namelist)
453
454 strkwargs = identity
455 byteskwargs = identity
456
457 oscurdir = os.curdir
458 oslinesep = os.linesep
459 osname = os.name
460 ospathsep = os.pathsep
461 ospardir = os.pardir
462 ossep = os.sep
463 osaltsep = os.altsep
464 osdevnull = os.devnull
465 long = long
466 if getattr(sys, 'argv', None) is not None:
467 sysargv = sys.argv
468 sysplatform = sys.platform
469 sysexecutable = sys.executable
470 shlexsplit = shlex.split
471 bytesio = cStringIO.StringIO
472 stringio = bytesio
473 maplist = map
474 rangelist = range
475 ziplist = zip
476 rawinput = raw_input
477 getargspec = inspect.getargspec
478 iteritems = lambda x: x.iteritems()
479 itervalues = lambda x: x.itervalues()
480 424 json_loads = json.loads
481 425
482 426 isjython = sysplatform.startswith(b'java')
483 427
484 428 isdarwin = sysplatform.startswith(b'darwin')
485 429 islinux = sysplatform.startswith(b'linux')
486 430 isposix = osname == b'posix'
487 431 iswindows = osname == b'nt'
488 432
489 433
490 434 def getoptb(args, shortlist, namelist):
491 435 return _getoptbwrapper(getopt.getopt, args, shortlist, namelist)
492 436
493 437
494 438 def gnugetoptb(args, shortlist, namelist):
495 439 return _getoptbwrapper(getopt.gnu_getopt, args, shortlist, namelist)
496 440
497 441
498 442 def mkdtemp(suffix=b'', prefix=b'tmp', dir=None):
499 443 return tempfile.mkdtemp(suffix, prefix, dir)
500 444
501 445
502 446 # text=True is not supported; use util.from/tonativeeol() instead
503 447 def mkstemp(suffix=b'', prefix=b'tmp', dir=None):
504 448 return tempfile.mkstemp(suffix, prefix, dir)
505 449
506 450
507 451 # TemporaryFile does not support an "encoding=" argument on python2.
508 452 # This wrapper file are always open in byte mode.
509 453 def unnamedtempfile(mode=None, *args, **kwargs):
510 454 if mode is None:
511 455 mode = 'w+b'
512 456 else:
513 457 mode = sysstr(mode)
514 458 assert 'b' in mode
515 459 return tempfile.TemporaryFile(mode, *args, **kwargs)
516 460
517 461
518 462 # NamedTemporaryFile does not support an "encoding=" argument on python2.
519 463 # This wrapper file are always open in byte mode.
520 464 def namedtempfile(
521 465 mode=b'w+b', bufsize=-1, suffix=b'', prefix=b'tmp', dir=None, delete=True
522 466 ):
523 467 mode = sysstr(mode)
524 468 assert 'b' in mode
525 469 return tempfile.NamedTemporaryFile(
526 470 mode, bufsize, suffix=suffix, prefix=prefix, dir=dir, delete=delete
527 471 )
General Comments 0
You need to be logged in to leave comments. Login now