##// END OF EJS Templates
pycompat: deprecate using bytes...
marmoute -
r51817:9bffc6c4 default
parent child Browse files
Show More
@@ -1,498 +1,505 b''
1 1 # pycompat.py - portability shim for python 3
2 2 #
3 3 # This software may be used and distributed according to the terms of the
4 4 # GNU General Public License version 2 or any later version.
5 5
6 6 """Mercurial portability shim for python 3.
7 7
8 8 This contains aliases to hide python version-specific details from the core.
9 9 """
10 10
11 11
12 12 import builtins
13 13 import codecs
14 14 import concurrent.futures as futures
15 15 import functools
16 16 import getopt
17 17 import http.client as httplib
18 18 import http.cookiejar as cookielib
19 19 import inspect
20 20 import io
21 21 import json
22 22 import os
23 23 import queue
24 24 import shlex
25 25 import socketserver
26 26 import struct
27 27 import sys
28 28 import tempfile
29 29 import xmlrpc.client as xmlrpclib
30 30
31 31 from typing import (
32 32 Any,
33 33 AnyStr,
34 34 BinaryIO,
35 35 Callable,
36 36 Dict,
37 37 Iterable,
38 38 Iterator,
39 39 List,
40 40 Mapping,
41 41 NoReturn,
42 42 Optional,
43 43 Sequence,
44 44 Tuple,
45 45 Type,
46 46 TypeVar,
47 47 cast,
48 48 overload,
49 49 )
50 50
51 51 ispy3 = sys.version_info[0] >= 3
52 52 ispypy = '__pypy__' in sys.builtin_module_names
53 53 TYPE_CHECKING = False
54 54
55 55 if not globals(): # hide this from non-pytype users
56 56 import typing
57 57
58 58 TYPE_CHECKING = typing.TYPE_CHECKING
59 59
60 60 _GetOptResult = Tuple[List[Tuple[bytes, bytes]], List[bytes]]
61 61 _T0 = TypeVar('_T0')
62 62 _T1 = TypeVar('_T1')
63 63 _S = TypeVar('_S')
64 64 _Tbytestr = TypeVar('_Tbytestr', bound='bytestr')
65 65
66 66
67 67 def future_set_exception_info(f, exc_info):
68 68 f.set_exception(exc_info[0])
69 69
70 70
71 71 FileNotFoundError = builtins.FileNotFoundError
72 72
73 73
74 74 def identity(a: _T0) -> _T0:
75 75 return a
76 76
77 77
78 78 def _rapply(f, xs):
79 79 if xs is None:
80 80 # assume None means non-value of optional data
81 81 return xs
82 82 if isinstance(xs, (list, set, tuple)):
83 83 return type(xs)(_rapply(f, x) for x in xs)
84 84 if isinstance(xs, dict):
85 85 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
86 86 return f(xs)
87 87
88 88
89 89 def rapply(f, xs):
90 90 """Apply function recursively to every item preserving the data structure
91 91
92 92 >>> def f(x):
93 93 ... return 'f(%s)' % x
94 94 >>> rapply(f, None) is None
95 95 True
96 96 >>> rapply(f, 'a')
97 97 'f(a)'
98 98 >>> rapply(f, {'a'}) == {'f(a)'}
99 99 True
100 100 >>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])
101 101 ['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]
102 102
103 103 >>> xs = [object()]
104 104 >>> rapply(identity, xs) is xs
105 105 True
106 106 """
107 107 if f is identity:
108 108 # fast path mainly for py2
109 109 return xs
110 110 return _rapply(f, xs)
111 111
112 112
113 113 if os.name == r'nt':
114 114 # MBCS (or ANSI) filesystem encoding must be used as before.
115 115 # Otherwise non-ASCII filenames in existing repositories would be
116 116 # corrupted.
117 117 # This must be set once prior to any fsencode/fsdecode calls.
118 118 sys._enablelegacywindowsfsencoding() # pytype: disable=module-attr
119 119
120 120 fsencode = os.fsencode
121 121 fsdecode = os.fsdecode
122 122 oscurdir: bytes = os.curdir.encode('ascii')
123 123 oslinesep: bytes = os.linesep.encode('ascii')
124 124 osname: bytes = os.name.encode('ascii')
125 125 ospathsep: bytes = os.pathsep.encode('ascii')
126 126 ospardir: bytes = os.pardir.encode('ascii')
127 127 ossep: bytes = os.sep.encode('ascii')
128 128 osaltsep: Optional[bytes] = os.altsep.encode('ascii') if os.altsep else None
129 129 osdevnull: bytes = os.devnull.encode('ascii')
130 130
131 131 sysplatform: bytes = sys.platform.encode('ascii')
132 132 sysexecutable: bytes = os.fsencode(sys.executable) if sys.executable else b''
133 133
134 134
135 135 if TYPE_CHECKING:
136 136
137 137 @overload
138 138 def maplist(f: Callable[[_T0], _S], arg: Iterable[_T0]) -> List[_S]:
139 139 ...
140 140
141 141 @overload
142 142 def maplist(
143 143 f: Callable[[_T0, _T1], _S], arg1: Iterable[_T0], arg2: Iterable[_T1]
144 144 ) -> List[_S]:
145 145 ...
146 146
147 147
148 148 def maplist(f, *args):
149 149 return list(map(f, *args))
150 150
151 151
152 152 def rangelist(*args) -> List[int]:
153 153 return list(range(*args))
154 154
155 155
156 156 def ziplist(*args):
157 157 return list(zip(*args))
158 158
159 159
160 160 rawinput = input
161 161 getargspec = inspect.getfullargspec
162 162
163 163 long = int
164 164
165 165 if builtins.getattr(sys, 'argv', None) is not None:
166 166 # On POSIX, the char** argv array is converted to Python str using
167 167 # Py_DecodeLocale(). The inverse of this is Py_EncodeLocale(), which
168 168 # isn't directly callable from Python code. In practice, os.fsencode()
169 169 # can be used instead (this is recommended by Python's documentation
170 170 # for sys.argv).
171 171 #
172 172 # On Windows, the wchar_t **argv is passed into the interpreter as-is.
173 173 # Like POSIX, we need to emulate what Py_EncodeLocale() would do. But
174 174 # there's an additional wrinkle. What we really want to access is the
175 175 # ANSI codepage representation of the arguments, as this is what
176 176 # `int main()` would receive if Python 3 didn't define `int wmain()`
177 177 # (this is how Python 2 worked). To get that, we encode with the mbcs
178 178 # encoding, which will pass CP_ACP to the underlying Windows API to
179 179 # produce bytes.
180 180 sysargv: List[bytes] = []
181 181 if os.name == r'nt':
182 182 sysargv = [a.encode("mbcs", "ignore") for a in sys.argv]
183 183 else:
184 184 sysargv = [fsencode(a) for a in sys.argv]
185 185
186 186 bytechr = struct.Struct('>B').pack
187 187 byterepr = b'%r'.__mod__
188 188
189 189
190 190 class bytestr(bytes):
191 191 """A bytes which mostly acts as a Python 2 str
192 192
193 193 >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
194 194 ('', 'foo', 'ascii', '1')
195 195 >>> s = bytestr(b'foo')
196 196 >>> assert s is bytestr(s)
197 197
198 198 __bytes__() should be called if provided:
199 199
200 200 >>> class bytesable:
201 201 ... def __bytes__(self):
202 202 ... return b'bytes'
203 203 >>> bytestr(bytesable())
204 204 'bytes'
205 205
206 206 There's no implicit conversion from non-ascii str as its encoding is
207 207 unknown:
208 208
209 209 >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
210 210 Traceback (most recent call last):
211 211 ...
212 212 UnicodeEncodeError: ...
213 213
214 214 Comparison between bytestr and bytes should work:
215 215
216 216 >>> assert bytestr(b'foo') == b'foo'
217 217 >>> assert b'foo' == bytestr(b'foo')
218 218 >>> assert b'f' in bytestr(b'foo')
219 219 >>> assert bytestr(b'f') in b'foo'
220 220
221 221 Sliced elements should be bytes, not integer:
222 222
223 223 >>> s[1], s[:2]
224 224 (b'o', b'fo')
225 225 >>> list(s), list(reversed(s))
226 226 ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
227 227
228 228 As bytestr type isn't propagated across operations, you need to cast
229 229 bytes to bytestr explicitly:
230 230
231 231 >>> s = bytestr(b'foo').upper()
232 232 >>> t = bytestr(s)
233 233 >>> s[0], t[0]
234 234 (70, b'F')
235 235
236 236 Be careful to not pass a bytestr object to a function which expects
237 237 bytearray-like behavior.
238 238
239 239 >>> t = bytes(t) # cast to bytes
240 240 >>> assert type(t) is bytes
241 241 """
242 242
243 243 # Trick pytype into not demanding Iterable[int] be passed to __new__(),
244 244 # since the appropriate bytes format is done internally.
245 245 #
246 246 # https://github.com/google/pytype/issues/500
247 247 if TYPE_CHECKING:
248 248
249 249 def __init__(self, s: object = b'') -> None:
250 250 pass
251 251
252 252 def __new__(cls: Type[_Tbytestr], s: object = b'') -> _Tbytestr:
253 253 if isinstance(s, bytestr):
254 254 return s
255 255 if not isinstance(
256 256 s, (bytes, bytearray)
257 257 ) and not builtins.hasattr( # hasattr-py3-only
258 258 s, u'__bytes__'
259 259 ):
260 260 s = str(s).encode('ascii')
261 261 return bytes.__new__(cls, s)
262 262
263 263 # The base class uses `int` return in py3, but the point of this class is to
264 264 # behave like py2.
265 265 def __getitem__(self, key) -> bytes: # pytype: disable=signature-mismatch
266 266 s = bytes.__getitem__(self, key)
267 267 if not isinstance(s, bytes):
268 268 s = bytechr(s)
269 269 return s
270 270
271 271 # The base class expects `Iterator[int]` return in py3, but the point of
272 272 # this class is to behave like py2.
273 273 def __iter__(self) -> Iterator[bytes]: # pytype: disable=signature-mismatch
274 274 return iterbytestr(bytes.__iter__(self))
275 275
276 276 def __repr__(self) -> str:
277 277 return bytes.__repr__(self)[1:] # drop b''
278 278
279 279
280 280 def iterbytestr(s: Iterable[int]) -> Iterator[bytes]:
281 281 """Iterate bytes as if it were a str object of Python 2"""
282 282 return map(bytechr, s)
283 283
284 284
285 285 if TYPE_CHECKING:
286 286
287 287 @overload
288 288 def maybebytestr(s: bytes) -> bytestr:
289 289 ...
290 290
291 291 @overload
292 292 def maybebytestr(s: _T0) -> _T0:
293 293 ...
294 294
295 295
296 296 def maybebytestr(s):
297 297 """Promote bytes to bytestr"""
298 298 if isinstance(s, bytes):
299 299 return bytestr(s)
300 300 return s
301 301
302 302
303 303 def sysbytes(s: AnyStr) -> bytes:
304 304 """Convert an internal str (e.g. keyword, __doc__) back to bytes
305 305
306 306 This never raises UnicodeEncodeError, but only ASCII characters
307 307 can be round-trip by sysstr(sysbytes(s)).
308 308 """
309 309 if isinstance(s, bytes):
310 310 return s
311 311 return s.encode('utf-8')
312 312
313 313
314 314 def sysstr(s: AnyStr) -> str:
315 315 """Return a keyword str to be passed to Python functions such as
316 316 getattr() and str.encode()
317 317
318 318 This never raises UnicodeDecodeError. Non-ascii characters are
319 319 considered invalid and mapped to arbitrary but unique code points
320 320 such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
321 321 """
322 322 if isinstance(s, builtins.str):
323 323 return s
324 324 return s.decode('latin-1')
325 325
326 326
327 327 def strurl(url: AnyStr) -> str:
328 328 """Converts a bytes url back to str"""
329 329 if isinstance(url, bytes):
330 330 return url.decode('ascii')
331 331 return url
332 332
333 333
334 334 def bytesurl(url: AnyStr) -> bytes:
335 335 """Converts a str url to bytes by encoding in ascii"""
336 336 if isinstance(url, str):
337 337 return url.encode('ascii')
338 338 return url
339 339
340 340
341 341 def raisewithtb(exc: BaseException, tb) -> NoReturn:
342 342 """Raise exception with the given traceback"""
343 343 raise exc.with_traceback(tb)
344 344
345 345
346 346 def getdoc(obj: object) -> Optional[bytes]:
347 347 """Get docstring as bytes; may be None so gettext() won't confuse it
348 348 with _('')"""
349 349 doc = builtins.getattr(obj, '__doc__', None)
350 350 if doc is None:
351 351 return doc
352 352 return sysbytes(doc)
353 353
354 354
355 355 def _wrapattrfunc(f):
356 356 @functools.wraps(f)
357 357 def w(object, name, *args):
358 if isinstance(name, bytes):
359 from . import util
360
361 msg = b'function "%s" take `str` as argument, not `bytes`'
362 fname = f.__name__.encode('ascii')
363 msg %= fname
364 util.nouideprecwarn(msg, b"6.6", stacklevel=2)
358 365 return f(object, sysstr(name), *args)
359 366
360 367 return w
361 368
362 369
363 370 # these wrappers are automagically imported by hgloader
364 371 delattr = _wrapattrfunc(builtins.delattr)
365 372 getattr = _wrapattrfunc(builtins.getattr)
366 373 hasattr = _wrapattrfunc(builtins.hasattr)
367 374 setattr = _wrapattrfunc(builtins.setattr)
368 375 xrange = builtins.range
369 376 unicode = str
370 377
371 378
372 379 def open(
373 380 name,
374 381 mode: AnyStr = b'r',
375 382 buffering: int = -1,
376 383 encoding: Optional[str] = None,
377 384 ) -> Any:
378 385 # TODO: assert binary mode, and cast result to BinaryIO?
379 386 return builtins.open(name, sysstr(mode), buffering, encoding)
380 387
381 388
382 389 safehasattr = _wrapattrfunc(builtins.hasattr)
383 390
384 391
385 392 def _getoptbwrapper(
386 393 orig, args: Sequence[bytes], shortlist: bytes, namelist: Sequence[bytes]
387 394 ) -> _GetOptResult:
388 395 """
389 396 Takes bytes arguments, converts them to unicode, pass them to
390 397 getopt.getopt(), convert the returned values back to bytes and then
391 398 return them for Python 3 compatibility as getopt.getopt() don't accepts
392 399 bytes on Python 3.
393 400 """
394 401 args = [a.decode('latin-1') for a in args]
395 402 shortlist = shortlist.decode('latin-1')
396 403 namelist = [a.decode('latin-1') for a in namelist]
397 404 opts, args = orig(args, shortlist, namelist)
398 405 opts = [(a[0].encode('latin-1'), a[1].encode('latin-1')) for a in opts]
399 406 args = [a.encode('latin-1') for a in args]
400 407 return opts, args
401 408
402 409
403 410 def strkwargs(dic: Mapping[bytes, _T0]) -> Dict[str, _T0]:
404 411 """
405 412 Converts the keys of a python dictonary to str i.e. unicodes so that
406 413 they can be passed as keyword arguments as dictionaries with bytes keys
407 414 can't be passed as keyword arguments to functions on Python 3.
408 415 """
409 416 dic = {k.decode('latin-1'): v for k, v in dic.items()}
410 417 return dic
411 418
412 419
413 420 def byteskwargs(dic: Mapping[str, _T0]) -> Dict[bytes, _T0]:
414 421 """
415 422 Converts keys of python dictionaries to bytes as they were converted to
416 423 str to pass that dictonary as a keyword argument on Python 3.
417 424 """
418 425 dic = {k.encode('latin-1'): v for k, v in dic.items()}
419 426 return dic
420 427
421 428
422 429 # TODO: handle shlex.shlex().
423 430 def shlexsplit(
424 431 s: bytes, comments: bool = False, posix: bool = True
425 432 ) -> List[bytes]:
426 433 """
427 434 Takes bytes argument, convert it to str i.e. unicodes, pass that into
428 435 shlex.split(), convert the returned value to bytes and return that for
429 436 Python 3 compatibility as shelx.split() don't accept bytes on Python 3.
430 437 """
431 438 ret = shlex.split(s.decode('latin-1'), comments, posix)
432 439 return [a.encode('latin-1') for a in ret]
433 440
434 441
435 442 iteritems = lambda x: x.items()
436 443 itervalues = lambda x: x.values()
437 444
438 445 json_loads = json.loads
439 446
440 447 isjython: bool = sysplatform.startswith(b'java')
441 448
442 449 isdarwin: bool = sysplatform.startswith(b'darwin')
443 450 islinux: bool = sysplatform.startswith(b'linux')
444 451 isposix: bool = osname == b'posix'
445 452 iswindows: bool = osname == b'nt'
446 453
447 454
448 455 def getoptb(
449 456 args: Sequence[bytes], shortlist: bytes, namelist: Sequence[bytes]
450 457 ) -> _GetOptResult:
451 458 return _getoptbwrapper(getopt.getopt, args, shortlist, namelist)
452 459
453 460
454 461 def gnugetoptb(
455 462 args: Sequence[bytes], shortlist: bytes, namelist: Sequence[bytes]
456 463 ) -> _GetOptResult:
457 464 return _getoptbwrapper(getopt.gnu_getopt, args, shortlist, namelist)
458 465
459 466
460 467 def mkdtemp(
461 468 suffix: bytes = b'', prefix: bytes = b'tmp', dir: Optional[bytes] = None
462 469 ) -> bytes:
463 470 return tempfile.mkdtemp(suffix, prefix, dir)
464 471
465 472
466 473 # text=True is not supported; use util.from/tonativeeol() instead
467 474 def mkstemp(
468 475 suffix: bytes = b'', prefix: bytes = b'tmp', dir: Optional[bytes] = None
469 476 ) -> Tuple[int, bytes]:
470 477 return tempfile.mkstemp(suffix, prefix, dir)
471 478
472 479
473 480 # TemporaryFile does not support an "encoding=" argument on python2.
474 481 # This wrapper file are always open in byte mode.
475 482 def unnamedtempfile(mode: Optional[bytes] = None, *args, **kwargs) -> BinaryIO:
476 483 if mode is None:
477 484 mode = 'w+b'
478 485 else:
479 486 mode = sysstr(mode)
480 487 assert 'b' in mode
481 488 return cast(BinaryIO, tempfile.TemporaryFile(mode, *args, **kwargs))
482 489
483 490
484 491 # NamedTemporaryFile does not support an "encoding=" argument on python2.
485 492 # This wrapper file are always open in byte mode.
486 493 def namedtempfile(
487 494 mode: bytes = b'w+b',
488 495 bufsize: int = -1,
489 496 suffix: bytes = b'',
490 497 prefix: bytes = b'tmp',
491 498 dir: Optional[bytes] = None,
492 499 delete: bool = True,
493 500 ):
494 501 mode = sysstr(mode)
495 502 assert 'b' in mode
496 503 return tempfile.NamedTemporaryFile(
497 504 mode, bufsize, suffix=suffix, prefix=prefix, dir=dir, delete=delete
498 505 )
General Comments 0
You need to be logged in to leave comments. Login now