##// END OF EJS Templates
pycompat: remove first not ispy3 block...
Gregory Szorc -
r49724:57b58413 default
parent child Browse files
Show More
@@ -1,540 +1,527 b''
1 1 # pycompat.py - portability shim for python 3
2 2 #
3 3 # This software may be used and distributed according to the terms of the
4 4 # GNU General Public License version 2 or any later version.
5 5
6 6 """Mercurial portability shim for python 3.
7 7
8 8 This contains aliases to hide python version-specific details from the core.
9 9 """
10 10
11 11 from __future__ import absolute_import
12 12
13 import builtins
14 import concurrent.futures as futures
13 15 import getopt
16 import http.client as httplib
17 import http.cookiejar as cookielib
14 18 import inspect
15 19 import json
16 20 import os
21 import pickle
22 import queue
17 23 import shlex
24 import socketserver
18 25 import sys
19 26 import tempfile
27 import xmlrpc.client as xmlrpclib
20 28
21 29 ispy3 = sys.version_info[0] >= 3
22 30 ispypy = '__pypy__' in sys.builtin_module_names
23 31 TYPE_CHECKING = False
24 32
25 33 if not globals(): # hide this from non-pytype users
26 34 import typing
27 35
28 36 TYPE_CHECKING = typing.TYPE_CHECKING
29 37
30 if not ispy3:
31 import cookielib
32 import cPickle as pickle
33 import httplib
34 import Queue as queue
35 import SocketServer as socketserver
36 import xmlrpclib
37
38 def future_set_exception_info(f, exc_info):
39 f.set_exception_info(*exc_info)
40
41 # this is close enough for our usage
42 FileNotFoundError = OSError
43 38
44 else:
45 import builtins
46 import http.cookiejar as cookielib
47 import http.client as httplib
48 import pickle
49 import queue as queue
50 import socketserver
51 import xmlrpc.client as xmlrpclib
39 def future_set_exception_info(f, exc_info):
40 f.set_exception(exc_info[0])
52 41
53 def future_set_exception_info(f, exc_info):
54 f.set_exception(exc_info[0])
55 42
56 FileNotFoundError = builtins.FileNotFoundError
43 FileNotFoundError = builtins.FileNotFoundError
57 44
58 45
59 46 def identity(a):
60 47 return a
61 48
62 49
63 50 def _rapply(f, xs):
64 51 if xs is None:
65 52 # assume None means non-value of optional data
66 53 return xs
67 54 if isinstance(xs, (list, set, tuple)):
68 55 return type(xs)(_rapply(f, x) for x in xs)
69 56 if isinstance(xs, dict):
70 57 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
71 58 return f(xs)
72 59
73 60
74 61 def rapply(f, xs):
75 62 """Apply function recursively to every item preserving the data structure
76 63
77 64 >>> def f(x):
78 65 ... return 'f(%s)' % x
79 66 >>> rapply(f, None) is None
80 67 True
81 68 >>> rapply(f, 'a')
82 69 'f(a)'
83 70 >>> rapply(f, {'a'}) == {'f(a)'}
84 71 True
85 72 >>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])
86 73 ['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]
87 74
88 75 >>> xs = [object()]
89 76 >>> rapply(identity, xs) is xs
90 77 True
91 78 """
92 79 if f is identity:
93 80 # fast path mainly for py2
94 81 return xs
95 82 return _rapply(f, xs)
96 83
97 84
98 85 if ispy3:
99 86 import builtins
100 87 import codecs
101 88 import functools
102 89 import io
103 90 import struct
104 91
105 92 if os.name == r'nt' and sys.version_info >= (3, 6):
106 93 # MBCS (or ANSI) filesystem encoding must be used as before.
107 94 # Otherwise non-ASCII filenames in existing repositories would be
108 95 # corrupted.
109 96 # This must be set once prior to any fsencode/fsdecode calls.
110 97 sys._enablelegacywindowsfsencoding() # pytype: disable=module-attr
111 98
112 99 fsencode = os.fsencode
113 100 fsdecode = os.fsdecode
114 101 oscurdir = os.curdir.encode('ascii')
115 102 oslinesep = os.linesep.encode('ascii')
116 103 osname = os.name.encode('ascii')
117 104 ospathsep = os.pathsep.encode('ascii')
118 105 ospardir = os.pardir.encode('ascii')
119 106 ossep = os.sep.encode('ascii')
120 107 osaltsep = os.altsep
121 108 if osaltsep:
122 109 osaltsep = osaltsep.encode('ascii')
123 110 osdevnull = os.devnull.encode('ascii')
124 111
125 112 sysplatform = sys.platform.encode('ascii')
126 113 sysexecutable = sys.executable
127 114 if sysexecutable:
128 115 sysexecutable = os.fsencode(sysexecutable)
129 116 bytesio = io.BytesIO
130 117 # TODO deprecate stringio name, as it is a lie on Python 3.
131 118 stringio = bytesio
132 119
133 120 def maplist(*args):
134 121 return list(map(*args))
135 122
136 123 def rangelist(*args):
137 124 return list(range(*args))
138 125
139 126 def ziplist(*args):
140 127 return list(zip(*args))
141 128
142 129 rawinput = input
143 130 getargspec = inspect.getfullargspec
144 131
145 132 long = int
146 133
147 134 if getattr(sys, 'argv', None) is not None:
148 135 # On POSIX, the char** argv array is converted to Python str using
149 136 # Py_DecodeLocale(). The inverse of this is Py_EncodeLocale(), which
150 137 # isn't directly callable from Python code. In practice, os.fsencode()
151 138 # can be used instead (this is recommended by Python's documentation
152 139 # for sys.argv).
153 140 #
154 141 # On Windows, the wchar_t **argv is passed into the interpreter as-is.
155 142 # Like POSIX, we need to emulate what Py_EncodeLocale() would do. But
156 143 # there's an additional wrinkle. What we really want to access is the
157 144 # ANSI codepage representation of the arguments, as this is what
158 145 # `int main()` would receive if Python 3 didn't define `int wmain()`
159 146 # (this is how Python 2 worked). To get that, we encode with the mbcs
160 147 # encoding, which will pass CP_ACP to the underlying Windows API to
161 148 # produce bytes.
162 149 if os.name == r'nt':
163 150 sysargv = [a.encode("mbcs", "ignore") for a in sys.argv]
164 151 else:
165 152 sysargv = [fsencode(a) for a in sys.argv]
166 153
167 154 bytechr = struct.Struct('>B').pack
168 155 byterepr = b'%r'.__mod__
169 156
170 157 class bytestr(bytes):
171 158 """A bytes which mostly acts as a Python 2 str
172 159
173 160 >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
174 161 ('', 'foo', 'ascii', '1')
175 162 >>> s = bytestr(b'foo')
176 163 >>> assert s is bytestr(s)
177 164
178 165 __bytes__() should be called if provided:
179 166
180 167 >>> class bytesable(object):
181 168 ... def __bytes__(self):
182 169 ... return b'bytes'
183 170 >>> bytestr(bytesable())
184 171 'bytes'
185 172
186 173 There's no implicit conversion from non-ascii str as its encoding is
187 174 unknown:
188 175
189 176 >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
190 177 Traceback (most recent call last):
191 178 ...
192 179 UnicodeEncodeError: ...
193 180
194 181 Comparison between bytestr and bytes should work:
195 182
196 183 >>> assert bytestr(b'foo') == b'foo'
197 184 >>> assert b'foo' == bytestr(b'foo')
198 185 >>> assert b'f' in bytestr(b'foo')
199 186 >>> assert bytestr(b'f') in b'foo'
200 187
201 188 Sliced elements should be bytes, not integer:
202 189
203 190 >>> s[1], s[:2]
204 191 (b'o', b'fo')
205 192 >>> list(s), list(reversed(s))
206 193 ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
207 194
208 195 As bytestr type isn't propagated across operations, you need to cast
209 196 bytes to bytestr explicitly:
210 197
211 198 >>> s = bytestr(b'foo').upper()
212 199 >>> t = bytestr(s)
213 200 >>> s[0], t[0]
214 201 (70, b'F')
215 202
216 203 Be careful to not pass a bytestr object to a function which expects
217 204 bytearray-like behavior.
218 205
219 206 >>> t = bytes(t) # cast to bytes
220 207 >>> assert type(t) is bytes
221 208 """
222 209
223 210 # Trick pytype into not demanding Iterable[int] be passed to __new__(),
224 211 # since the appropriate bytes format is done internally.
225 212 #
226 213 # https://github.com/google/pytype/issues/500
227 214 if TYPE_CHECKING:
228 215
229 216 def __init__(self, s=b''):
230 217 pass
231 218
232 219 def __new__(cls, s=b''):
233 220 if isinstance(s, bytestr):
234 221 return s
235 222 if not isinstance(
236 223 s, (bytes, bytearray)
237 224 ) and not hasattr( # hasattr-py3-only
238 225 s, u'__bytes__'
239 226 ):
240 227 s = str(s).encode('ascii')
241 228 return bytes.__new__(cls, s)
242 229
243 230 def __getitem__(self, key):
244 231 s = bytes.__getitem__(self, key)
245 232 if not isinstance(s, bytes):
246 233 s = bytechr(s)
247 234 return s
248 235
249 236 def __iter__(self):
250 237 return iterbytestr(bytes.__iter__(self))
251 238
252 239 def __repr__(self):
253 240 return bytes.__repr__(self)[1:] # drop b''
254 241
255 242 def iterbytestr(s):
256 243 """Iterate bytes as if it were a str object of Python 2"""
257 244 return map(bytechr, s)
258 245
259 246 def maybebytestr(s):
260 247 """Promote bytes to bytestr"""
261 248 if isinstance(s, bytes):
262 249 return bytestr(s)
263 250 return s
264 251
265 252 def sysbytes(s):
266 253 """Convert an internal str (e.g. keyword, __doc__) back to bytes
267 254
268 255 This never raises UnicodeEncodeError, but only ASCII characters
269 256 can be round-trip by sysstr(sysbytes(s)).
270 257 """
271 258 if isinstance(s, bytes):
272 259 return s
273 260 return s.encode('utf-8')
274 261
275 262 def sysstr(s):
276 263 """Return a keyword str to be passed to Python functions such as
277 264 getattr() and str.encode()
278 265
279 266 This never raises UnicodeDecodeError. Non-ascii characters are
280 267 considered invalid and mapped to arbitrary but unique code points
281 268 such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
282 269 """
283 270 if isinstance(s, builtins.str):
284 271 return s
285 272 return s.decode('latin-1')
286 273
287 274 def strurl(url):
288 275 """Converts a bytes url back to str"""
289 276 if isinstance(url, bytes):
290 277 return url.decode('ascii')
291 278 return url
292 279
293 280 def bytesurl(url):
294 281 """Converts a str url to bytes by encoding in ascii"""
295 282 if isinstance(url, str):
296 283 return url.encode('ascii')
297 284 return url
298 285
299 286 def raisewithtb(exc, tb):
300 287 """Raise exception with the given traceback"""
301 288 raise exc.with_traceback(tb)
302 289
303 290 def getdoc(obj):
304 291 """Get docstring as bytes; may be None so gettext() won't confuse it
305 292 with _('')"""
306 293 doc = getattr(obj, '__doc__', None)
307 294 if doc is None:
308 295 return doc
309 296 return sysbytes(doc)
310 297
311 298 def _wrapattrfunc(f):
312 299 @functools.wraps(f)
313 300 def w(object, name, *args):
314 301 return f(object, sysstr(name), *args)
315 302
316 303 return w
317 304
318 305 # these wrappers are automagically imported by hgloader
319 306 delattr = _wrapattrfunc(builtins.delattr)
320 307 getattr = _wrapattrfunc(builtins.getattr)
321 308 hasattr = _wrapattrfunc(builtins.hasattr)
322 309 setattr = _wrapattrfunc(builtins.setattr)
323 310 xrange = builtins.range
324 311 unicode = str
325 312
326 313 def open(name, mode=b'r', buffering=-1, encoding=None):
327 314 return builtins.open(name, sysstr(mode), buffering, encoding)
328 315
329 316 safehasattr = _wrapattrfunc(builtins.hasattr)
330 317
331 318 def _getoptbwrapper(orig, args, shortlist, namelist):
332 319 """
333 320 Takes bytes arguments, converts them to unicode, pass them to
334 321 getopt.getopt(), convert the returned values back to bytes and then
335 322 return them for Python 3 compatibility as getopt.getopt() don't accepts
336 323 bytes on Python 3.
337 324 """
338 325 args = [a.decode('latin-1') for a in args]
339 326 shortlist = shortlist.decode('latin-1')
340 327 namelist = [a.decode('latin-1') for a in namelist]
341 328 opts, args = orig(args, shortlist, namelist)
342 329 opts = [(a[0].encode('latin-1'), a[1].encode('latin-1')) for a in opts]
343 330 args = [a.encode('latin-1') for a in args]
344 331 return opts, args
345 332
346 333 def strkwargs(dic):
347 334 """
348 335 Converts the keys of a python dictonary to str i.e. unicodes so that
349 336 they can be passed as keyword arguments as dictionaries with bytes keys
350 337 can't be passed as keyword arguments to functions on Python 3.
351 338 """
352 339 dic = {k.decode('latin-1'): v for k, v in dic.items()}
353 340 return dic
354 341
355 342 def byteskwargs(dic):
356 343 """
357 344 Converts keys of python dictionaries to bytes as they were converted to
358 345 str to pass that dictonary as a keyword argument on Python 3.
359 346 """
360 347 dic = {k.encode('latin-1'): v for k, v in dic.items()}
361 348 return dic
362 349
363 350 # TODO: handle shlex.shlex().
364 351 def shlexsplit(s, comments=False, posix=True):
365 352 """
366 353 Takes bytes argument, convert it to str i.e. unicodes, pass that into
367 354 shlex.split(), convert the returned value to bytes and return that for
368 355 Python 3 compatibility as shelx.split() don't accept bytes on Python 3.
369 356 """
370 357 ret = shlex.split(s.decode('latin-1'), comments, posix)
371 358 return [a.encode('latin-1') for a in ret]
372 359
373 360 iteritems = lambda x: x.items()
374 361 itervalues = lambda x: x.values()
375 362
376 363 # Python 3.5's json.load and json.loads require str. We polyfill its
377 364 # code for detecting encoding from bytes.
378 365 if sys.version_info[0:2] < (3, 6):
379 366
380 367 def _detect_encoding(b):
381 368 bstartswith = b.startswith
382 369 if bstartswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)):
383 370 return 'utf-32'
384 371 if bstartswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)):
385 372 return 'utf-16'
386 373 if bstartswith(codecs.BOM_UTF8):
387 374 return 'utf-8-sig'
388 375
389 376 if len(b) >= 4:
390 377 if not b[0]:
391 378 # 00 00 -- -- - utf-32-be
392 379 # 00 XX -- -- - utf-16-be
393 380 return 'utf-16-be' if b[1] else 'utf-32-be'
394 381 if not b[1]:
395 382 # XX 00 00 00 - utf-32-le
396 383 # XX 00 00 XX - utf-16-le
397 384 # XX 00 XX -- - utf-16-le
398 385 return 'utf-16-le' if b[2] or b[3] else 'utf-32-le'
399 386 elif len(b) == 2:
400 387 if not b[0]:
401 388 # 00 XX - utf-16-be
402 389 return 'utf-16-be'
403 390 if not b[1]:
404 391 # XX 00 - utf-16-le
405 392 return 'utf-16-le'
406 393 # default
407 394 return 'utf-8'
408 395
409 396 def json_loads(s, *args, **kwargs):
410 397 if isinstance(s, (bytes, bytearray)):
411 398 s = s.decode(_detect_encoding(s), 'surrogatepass')
412 399
413 400 return json.loads(s, *args, **kwargs)
414 401
415 402 else:
416 403 json_loads = json.loads
417 404
418 405 else:
419 406 import cStringIO
420 407
421 408 xrange = xrange
422 409 unicode = unicode
423 410 bytechr = chr
424 411 byterepr = repr
425 412 bytestr = str
426 413 iterbytestr = iter
427 414 maybebytestr = identity
428 415 sysbytes = identity
429 416 sysstr = identity
430 417 strurl = identity
431 418 bytesurl = identity
432 419 open = open
433 420 delattr = delattr
434 421 getattr = getattr
435 422 hasattr = hasattr
436 423 setattr = setattr
437 424
438 425 # this can't be parsed on Python 3
439 426 exec(b'def raisewithtb(exc, tb):\n raise exc, None, tb\n')
440 427
441 428 def fsencode(filename):
442 429 """
443 430 Partial backport from os.py in Python 3, which only accepts bytes.
444 431 In Python 2, our paths should only ever be bytes, a unicode path
445 432 indicates a bug.
446 433 """
447 434 if isinstance(filename, str):
448 435 return filename
449 436 else:
450 437 raise TypeError("expect str, not %s" % type(filename).__name__)
451 438
452 439 # In Python 2, fsdecode() has a very chance to receive bytes. So it's
453 440 # better not to touch Python 2 part as it's already working fine.
454 441 fsdecode = identity
455 442
456 443 def getdoc(obj):
457 444 return getattr(obj, '__doc__', None)
458 445
459 446 _notset = object()
460 447
461 448 def safehasattr(thing, attr):
462 449 return getattr(thing, attr, _notset) is not _notset
463 450
464 451 def _getoptbwrapper(orig, args, shortlist, namelist):
465 452 return orig(args, shortlist, namelist)
466 453
467 454 strkwargs = identity
468 455 byteskwargs = identity
469 456
470 457 oscurdir = os.curdir
471 458 oslinesep = os.linesep
472 459 osname = os.name
473 460 ospathsep = os.pathsep
474 461 ospardir = os.pardir
475 462 ossep = os.sep
476 463 osaltsep = os.altsep
477 464 osdevnull = os.devnull
478 465 long = long
479 466 if getattr(sys, 'argv', None) is not None:
480 467 sysargv = sys.argv
481 468 sysplatform = sys.platform
482 469 sysexecutable = sys.executable
483 470 shlexsplit = shlex.split
484 471 bytesio = cStringIO.StringIO
485 472 stringio = bytesio
486 473 maplist = map
487 474 rangelist = range
488 475 ziplist = zip
489 476 rawinput = raw_input
490 477 getargspec = inspect.getargspec
491 478 iteritems = lambda x: x.iteritems()
492 479 itervalues = lambda x: x.itervalues()
493 480 json_loads = json.loads
494 481
495 482 isjython = sysplatform.startswith(b'java')
496 483
497 484 isdarwin = sysplatform.startswith(b'darwin')
498 485 islinux = sysplatform.startswith(b'linux')
499 486 isposix = osname == b'posix'
500 487 iswindows = osname == b'nt'
501 488
502 489
503 490 def getoptb(args, shortlist, namelist):
504 491 return _getoptbwrapper(getopt.getopt, args, shortlist, namelist)
505 492
506 493
507 494 def gnugetoptb(args, shortlist, namelist):
508 495 return _getoptbwrapper(getopt.gnu_getopt, args, shortlist, namelist)
509 496
510 497
511 498 def mkdtemp(suffix=b'', prefix=b'tmp', dir=None):
512 499 return tempfile.mkdtemp(suffix, prefix, dir)
513 500
514 501
515 502 # text=True is not supported; use util.from/tonativeeol() instead
516 503 def mkstemp(suffix=b'', prefix=b'tmp', dir=None):
517 504 return tempfile.mkstemp(suffix, prefix, dir)
518 505
519 506
520 507 # TemporaryFile does not support an "encoding=" argument on python2.
521 508 # This wrapper file are always open in byte mode.
522 509 def unnamedtempfile(mode=None, *args, **kwargs):
523 510 if mode is None:
524 511 mode = 'w+b'
525 512 else:
526 513 mode = sysstr(mode)
527 514 assert 'b' in mode
528 515 return tempfile.TemporaryFile(mode, *args, **kwargs)
529 516
530 517
531 518 # NamedTemporaryFile does not support an "encoding=" argument on python2.
532 519 # This wrapper file are always open in byte mode.
533 520 def namedtempfile(
534 521 mode=b'w+b', bufsize=-1, suffix=b'', prefix=b'tmp', dir=None, delete=True
535 522 ):
536 523 mode = sysstr(mode)
537 524 assert 'b' in mode
538 525 return tempfile.NamedTemporaryFile(
539 526 mode, bufsize, suffix=suffix, prefix=prefix, dir=dir, delete=delete
540 527 )
General Comments 0
You need to be logged in to leave comments. Login now