##// END OF EJS Templates
pycompat: remove check for Python >= 3.6...
Gregory Szorc -
r49814:9ac1a450 default
parent child Browse files
Show More
@@ -1,467 +1,466 b''
1 1 # pycompat.py - portability shim for python 3
2 2 #
3 3 # This software may be used and distributed according to the terms of the
4 4 # GNU General Public License version 2 or any later version.
5 5
6 6 """Mercurial portability shim for python 3.
7 7
8 8 This contains aliases to hide python version-specific details from the core.
9 9 """
10 10
11 11
12 12 import builtins
13 13 import codecs
14 14 import concurrent.futures as futures
15 15 import functools
16 16 import getopt
17 17 import http.client as httplib
18 18 import http.cookiejar as cookielib
19 19 import inspect
20 20 import io
21 21 import json
22 22 import os
23 23 import pickle
24 24 import queue
25 25 import shlex
26 26 import socketserver
27 27 import struct
28 28 import sys
29 29 import tempfile
30 30 import xmlrpc.client as xmlrpclib
31 31
32 32
33 33 ispy3 = sys.version_info[0] >= 3
34 34 ispypy = '__pypy__' in sys.builtin_module_names
35 35 TYPE_CHECKING = False
36 36
37 37 if not globals(): # hide this from non-pytype users
38 38 import typing
39 39
40 40 TYPE_CHECKING = typing.TYPE_CHECKING
41 41
42 42
43 43 def future_set_exception_info(f, exc_info):
44 44 f.set_exception(exc_info[0])
45 45
46 46
47 47 FileNotFoundError = builtins.FileNotFoundError
48 48
49 49
50 50 def identity(a):
51 51 return a
52 52
53 53
54 54 def _rapply(f, xs):
55 55 if xs is None:
56 56 # assume None means non-value of optional data
57 57 return xs
58 58 if isinstance(xs, (list, set, tuple)):
59 59 return type(xs)(_rapply(f, x) for x in xs)
60 60 if isinstance(xs, dict):
61 61 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
62 62 return f(xs)
63 63
64 64
65 65 def rapply(f, xs):
66 66 """Apply function recursively to every item preserving the data structure
67 67
68 68 >>> def f(x):
69 69 ... return 'f(%s)' % x
70 70 >>> rapply(f, None) is None
71 71 True
72 72 >>> rapply(f, 'a')
73 73 'f(a)'
74 74 >>> rapply(f, {'a'}) == {'f(a)'}
75 75 True
76 76 >>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])
77 77 ['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]
78 78
79 79 >>> xs = [object()]
80 80 >>> rapply(identity, xs) is xs
81 81 True
82 82 """
83 83 if f is identity:
84 84 # fast path mainly for py2
85 85 return xs
86 86 return _rapply(f, xs)
87 87
88 88
89 if os.name == r'nt' and sys.version_info >= (3, 6):
89 if os.name == r'nt':
90 90 # MBCS (or ANSI) filesystem encoding must be used as before.
91 91 # Otherwise non-ASCII filenames in existing repositories would be
92 92 # corrupted.
93 93 # This must be set once prior to any fsencode/fsdecode calls.
94 94 sys._enablelegacywindowsfsencoding() # pytype: disable=module-attr
95 95
96 96 fsencode = os.fsencode
97 97 fsdecode = os.fsdecode
98 98 oscurdir = os.curdir.encode('ascii')
99 99 oslinesep = os.linesep.encode('ascii')
100 100 osname = os.name.encode('ascii')
101 101 ospathsep = os.pathsep.encode('ascii')
102 102 ospardir = os.pardir.encode('ascii')
103 103 ossep = os.sep.encode('ascii')
104 104 osaltsep = os.altsep
105 105 if osaltsep:
106 106 osaltsep = osaltsep.encode('ascii')
107 107 osdevnull = os.devnull.encode('ascii')
108 108
109 109 sysplatform = sys.platform.encode('ascii')
110 110 sysexecutable = sys.executable
111 111 if sysexecutable:
112 112 sysexecutable = os.fsencode(sysexecutable)
113 113
114 114
115 115 def maplist(*args):
116 116 return list(map(*args))
117 117
118 118
119 119 def rangelist(*args):
120 120 return list(range(*args))
121 121
122 122
123 123 def ziplist(*args):
124 124 return list(zip(*args))
125 125
126 126
127 127 rawinput = input
128 128 getargspec = inspect.getfullargspec
129 129
130 130 long = int
131 131
132 132 if getattr(sys, 'argv', None) is not None:
133 133 # On POSIX, the char** argv array is converted to Python str using
134 134 # Py_DecodeLocale(). The inverse of this is Py_EncodeLocale(), which
135 135 # isn't directly callable from Python code. In practice, os.fsencode()
136 136 # can be used instead (this is recommended by Python's documentation
137 137 # for sys.argv).
138 138 #
139 139 # On Windows, the wchar_t **argv is passed into the interpreter as-is.
140 140 # Like POSIX, we need to emulate what Py_EncodeLocale() would do. But
141 141 # there's an additional wrinkle. What we really want to access is the
142 142 # ANSI codepage representation of the arguments, as this is what
143 143 # `int main()` would receive if Python 3 didn't define `int wmain()`
144 144 # (this is how Python 2 worked). To get that, we encode with the mbcs
145 145 # encoding, which will pass CP_ACP to the underlying Windows API to
146 146 # produce bytes.
147 147 if os.name == r'nt':
148 148 sysargv = [a.encode("mbcs", "ignore") for a in sys.argv]
149 149 else:
150 150 sysargv = [fsencode(a) for a in sys.argv]
151 151
152 152 bytechr = struct.Struct('>B').pack
153 153 byterepr = b'%r'.__mod__
154 154
155 155
156 156 class bytestr(bytes):
157 157 """A bytes which mostly acts as a Python 2 str
158 158
159 159 >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
160 160 ('', 'foo', 'ascii', '1')
161 161 >>> s = bytestr(b'foo')
162 162 >>> assert s is bytestr(s)
163 163
164 164 __bytes__() should be called if provided:
165 165
166 166 >>> class bytesable:
167 167 ... def __bytes__(self):
168 168 ... return b'bytes'
169 169 >>> bytestr(bytesable())
170 170 'bytes'
171 171
172 172 There's no implicit conversion from non-ascii str as its encoding is
173 173 unknown:
174 174
175 175 >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
176 176 Traceback (most recent call last):
177 177 ...
178 178 UnicodeEncodeError: ...
179 179
180 180 Comparison between bytestr and bytes should work:
181 181
182 182 >>> assert bytestr(b'foo') == b'foo'
183 183 >>> assert b'foo' == bytestr(b'foo')
184 184 >>> assert b'f' in bytestr(b'foo')
185 185 >>> assert bytestr(b'f') in b'foo'
186 186
187 187 Sliced elements should be bytes, not integer:
188 188
189 189 >>> s[1], s[:2]
190 190 (b'o', b'fo')
191 191 >>> list(s), list(reversed(s))
192 192 ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
193 193
194 194 As bytestr type isn't propagated across operations, you need to cast
195 195 bytes to bytestr explicitly:
196 196
197 197 >>> s = bytestr(b'foo').upper()
198 198 >>> t = bytestr(s)
199 199 >>> s[0], t[0]
200 200 (70, b'F')
201 201
202 202 Be careful to not pass a bytestr object to a function which expects
203 203 bytearray-like behavior.
204 204
205 205 >>> t = bytes(t) # cast to bytes
206 206 >>> assert type(t) is bytes
207 207 """
208 208
209 209 # Trick pytype into not demanding Iterable[int] be passed to __new__(),
210 210 # since the appropriate bytes format is done internally.
211 211 #
212 212 # https://github.com/google/pytype/issues/500
213 213 if TYPE_CHECKING:
214 214
215 215 def __init__(self, s=b''):
216 216 pass
217 217
218 218 def __new__(cls, s=b''):
219 219 if isinstance(s, bytestr):
220 220 return s
221 221 if not isinstance(
222 222 s, (bytes, bytearray)
223 223 ) and not hasattr( # hasattr-py3-only
224 224 s, u'__bytes__'
225 225 ):
226 226 s = str(s).encode('ascii')
227 227 return bytes.__new__(cls, s)
228 228
229 229 def __getitem__(self, key):
230 230 s = bytes.__getitem__(self, key)
231 231 if not isinstance(s, bytes):
232 232 s = bytechr(s)
233 233 return s
234 234
235 235 def __iter__(self):
236 236 return iterbytestr(bytes.__iter__(self))
237 237
238 238 def __repr__(self):
239 239 return bytes.__repr__(self)[1:] # drop b''
240 240
241 241
242 242 def iterbytestr(s):
243 243 """Iterate bytes as if it were a str object of Python 2"""
244 244 return map(bytechr, s)
245 245
246 246
247 247 def maybebytestr(s):
248 248 """Promote bytes to bytestr"""
249 249 if isinstance(s, bytes):
250 250 return bytestr(s)
251 251 return s
252 252
253 253
254 254 def sysbytes(s):
255 255 """Convert an internal str (e.g. keyword, __doc__) back to bytes
256 256
257 257 This never raises UnicodeEncodeError, but only ASCII characters
258 258 can be round-trip by sysstr(sysbytes(s)).
259 259 """
260 260 if isinstance(s, bytes):
261 261 return s
262 262 return s.encode('utf-8')
263 263
264 264
265 265 def sysstr(s):
266 266 """Return a keyword str to be passed to Python functions such as
267 267 getattr() and str.encode()
268 268
269 269 This never raises UnicodeDecodeError. Non-ascii characters are
270 270 considered invalid and mapped to arbitrary but unique code points
271 271 such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
272 272 """
273 273 if isinstance(s, builtins.str):
274 274 return s
275 275 return s.decode('latin-1')
276 276
277 277
278 278 def strurl(url):
279 279 """Converts a bytes url back to str"""
280 280 if isinstance(url, bytes):
281 281 return url.decode('ascii')
282 282 return url
283 283
284 284
285 285 def bytesurl(url):
286 286 """Converts a str url to bytes by encoding in ascii"""
287 287 if isinstance(url, str):
288 288 return url.encode('ascii')
289 289 return url
290 290
291 291
292 292 def raisewithtb(exc, tb):
293 293 """Raise exception with the given traceback"""
294 294 raise exc.with_traceback(tb)
295 295
296 296
297 297 def getdoc(obj):
298 298 """Get docstring as bytes; may be None so gettext() won't confuse it
299 299 with _('')"""
300 300 doc = getattr(obj, '__doc__', None)
301 301 if doc is None:
302 302 return doc
303 303 return sysbytes(doc)
304 304
305 305
306 306 def _wrapattrfunc(f):
307 307 @functools.wraps(f)
308 308 def w(object, name, *args):
309 309 return f(object, sysstr(name), *args)
310 310
311 311 return w
312 312
313 313
314 314 # these wrappers are automagically imported by hgloader
315 315 delattr = _wrapattrfunc(builtins.delattr)
316 316 getattr = _wrapattrfunc(builtins.getattr)
317 317 hasattr = _wrapattrfunc(builtins.hasattr)
318 318 setattr = _wrapattrfunc(builtins.setattr)
319 319 xrange = builtins.range
320 320 unicode = str
321 321
322 322
323 323 def open(name, mode=b'r', buffering=-1, encoding=None):
324 324 return builtins.open(name, sysstr(mode), buffering, encoding)
325 325
326 326
327 327 safehasattr = _wrapattrfunc(builtins.hasattr)
328 328
329 329
330 330 def _getoptbwrapper(orig, args, shortlist, namelist):
331 331 """
332 332 Takes bytes arguments, converts them to unicode, pass them to
333 333 getopt.getopt(), convert the returned values back to bytes and then
334 334 return them for Python 3 compatibility as getopt.getopt() don't accepts
335 335 bytes on Python 3.
336 336 """
337 337 args = [a.decode('latin-1') for a in args]
338 338 shortlist = shortlist.decode('latin-1')
339 339 namelist = [a.decode('latin-1') for a in namelist]
340 340 opts, args = orig(args, shortlist, namelist)
341 341 opts = [(a[0].encode('latin-1'), a[1].encode('latin-1')) for a in opts]
342 342 args = [a.encode('latin-1') for a in args]
343 343 return opts, args
344 344
345 345
346 346 def strkwargs(dic):
347 347 """
348 348 Converts the keys of a python dictonary to str i.e. unicodes so that
349 349 they can be passed as keyword arguments as dictionaries with bytes keys
350 350 can't be passed as keyword arguments to functions on Python 3.
351 351 """
352 352 dic = {k.decode('latin-1'): v for k, v in dic.items()}
353 353 return dic
354 354
355 355
356 356 def byteskwargs(dic):
357 357 """
358 358 Converts keys of python dictionaries to bytes as they were converted to
359 359 str to pass that dictonary as a keyword argument on Python 3.
360 360 """
361 361 dic = {k.encode('latin-1'): v for k, v in dic.items()}
362 362 return dic
363 363
364 364
365 365 # TODO: handle shlex.shlex().
366 366 def shlexsplit(s, comments=False, posix=True):
367 367 """
368 368 Takes bytes argument, convert it to str i.e. unicodes, pass that into
369 369 shlex.split(), convert the returned value to bytes and return that for
370 370 Python 3 compatibility as shelx.split() don't accept bytes on Python 3.
371 371 """
372 372 ret = shlex.split(s.decode('latin-1'), comments, posix)
373 373 return [a.encode('latin-1') for a in ret]
374 374
375 375
376 376 iteritems = lambda x: x.items()
377 377 itervalues = lambda x: x.values()
378 378
379 379 # Python 3.5's json.load and json.loads require str. We polyfill its
380 380 # code for detecting encoding from bytes.
381 381 if sys.version_info[0:2] < (3, 6):
382 382
383 383 def _detect_encoding(b):
384 384 bstartswith = b.startswith
385 385 if bstartswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)):
386 386 return 'utf-32'
387 387 if bstartswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)):
388 388 return 'utf-16'
389 389 if bstartswith(codecs.BOM_UTF8):
390 390 return 'utf-8-sig'
391 391
392 392 if len(b) >= 4:
393 393 if not b[0]:
394 394 # 00 00 -- -- - utf-32-be
395 395 # 00 XX -- -- - utf-16-be
396 396 return 'utf-16-be' if b[1] else 'utf-32-be'
397 397 if not b[1]:
398 398 # XX 00 00 00 - utf-32-le
399 399 # XX 00 00 XX - utf-16-le
400 400 # XX 00 XX -- - utf-16-le
401 401 return 'utf-16-le' if b[2] or b[3] else 'utf-32-le'
402 402 elif len(b) == 2:
403 403 if not b[0]:
404 404 # 00 XX - utf-16-be
405 405 return 'utf-16-be'
406 406 if not b[1]:
407 407 # XX 00 - utf-16-le
408 408 return 'utf-16-le'
409 409 # default
410 410 return 'utf-8'
411 411
412 412 def json_loads(s, *args, **kwargs):
413 413 if isinstance(s, (bytes, bytearray)):
414 414 s = s.decode(_detect_encoding(s), 'surrogatepass')
415 415
416 416 return json.loads(s, *args, **kwargs)
417 417
418
419 418 else:
420 419 json_loads = json.loads
421 420
422 421 isjython = sysplatform.startswith(b'java')
423 422
424 423 isdarwin = sysplatform.startswith(b'darwin')
425 424 islinux = sysplatform.startswith(b'linux')
426 425 isposix = osname == b'posix'
427 426 iswindows = osname == b'nt'
428 427
429 428
430 429 def getoptb(args, shortlist, namelist):
431 430 return _getoptbwrapper(getopt.getopt, args, shortlist, namelist)
432 431
433 432
434 433 def gnugetoptb(args, shortlist, namelist):
435 434 return _getoptbwrapper(getopt.gnu_getopt, args, shortlist, namelist)
436 435
437 436
438 437 def mkdtemp(suffix=b'', prefix=b'tmp', dir=None):
439 438 return tempfile.mkdtemp(suffix, prefix, dir)
440 439
441 440
442 441 # text=True is not supported; use util.from/tonativeeol() instead
443 442 def mkstemp(suffix=b'', prefix=b'tmp', dir=None):
444 443 return tempfile.mkstemp(suffix, prefix, dir)
445 444
446 445
447 446 # TemporaryFile does not support an "encoding=" argument on python2.
448 447 # This wrapper file are always open in byte mode.
449 448 def unnamedtempfile(mode=None, *args, **kwargs):
450 449 if mode is None:
451 450 mode = 'w+b'
452 451 else:
453 452 mode = sysstr(mode)
454 453 assert 'b' in mode
455 454 return tempfile.TemporaryFile(mode, *args, **kwargs)
456 455
457 456
458 457 # NamedTemporaryFile does not support an "encoding=" argument on python2.
459 458 # This wrapper file are always open in byte mode.
460 459 def namedtempfile(
461 460 mode=b'w+b', bufsize=-1, suffix=b'', prefix=b'tmp', dir=None, delete=True
462 461 ):
463 462 mode = sysstr(mode)
464 463 assert 'b' in mode
465 464 return tempfile.NamedTemporaryFile(
466 465 mode, bufsize, suffix=suffix, prefix=prefix, dir=dir, delete=delete
467 466 )
General Comments 0
You need to be logged in to leave comments. Login now