##// END OF EJS Templates
cleanup: drop the `bytes` compatibility for attribute related function...
marmoute -
r52034:c845479f default
parent child Browse files
Show More
@@ -1,505 +1,489 b''
1 1 # pycompat.py - portability shim for python 3
2 2 #
3 3 # This software may be used and distributed according to the terms of the
4 4 # GNU General Public License version 2 or any later version.
5 5
6 6 """Mercurial portability shim for python 3.
7 7
8 8 This contains aliases to hide python version-specific details from the core.
9 9 """
10 10
11 11
12 12 import builtins
13 13 import codecs
14 14 import concurrent.futures as futures
15 import functools
16 15 import getopt
17 16 import http.client as httplib
18 17 import http.cookiejar as cookielib
19 18 import inspect
20 19 import io
21 20 import json
22 21 import os
23 22 import queue
24 23 import shlex
25 24 import socketserver
26 25 import struct
27 26 import sys
28 27 import tempfile
29 28 import xmlrpc.client as xmlrpclib
30 29
31 30 from typing import (
32 31 Any,
33 32 AnyStr,
34 33 BinaryIO,
35 34 Callable,
36 35 Dict,
37 36 Iterable,
38 37 Iterator,
39 38 List,
40 39 Mapping,
41 40 NoReturn,
42 41 Optional,
43 42 Sequence,
44 43 Tuple,
45 44 Type,
46 45 TypeVar,
47 46 cast,
48 47 overload,
49 48 )
50 49
51 50 ispy3 = sys.version_info[0] >= 3
52 51 ispypy = '__pypy__' in sys.builtin_module_names
53 52 TYPE_CHECKING = False
54 53
55 54 if not globals(): # hide this from non-pytype users
56 55 import typing
57 56
58 57 TYPE_CHECKING = typing.TYPE_CHECKING
59 58
60 59 _GetOptResult = Tuple[List[Tuple[bytes, bytes]], List[bytes]]
61 60 _T0 = TypeVar('_T0')
62 61 _T1 = TypeVar('_T1')
63 62 _S = TypeVar('_S')
64 63 _Tbytestr = TypeVar('_Tbytestr', bound='bytestr')
65 64
66 65
67 66 def future_set_exception_info(f, exc_info):
68 67 f.set_exception(exc_info[0])
69 68
70 69
71 70 FileNotFoundError = builtins.FileNotFoundError
72 71
73 72
74 73 def identity(a: _T0) -> _T0:
75 74 return a
76 75
77 76
78 77 def _rapply(f, xs):
79 78 if xs is None:
80 79 # assume None means non-value of optional data
81 80 return xs
82 81 if isinstance(xs, (list, set, tuple)):
83 82 return type(xs)(_rapply(f, x) for x in xs)
84 83 if isinstance(xs, dict):
85 84 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
86 85 return f(xs)
87 86
88 87
89 88 def rapply(f, xs):
90 89 """Apply function recursively to every item preserving the data structure
91 90
92 91 >>> def f(x):
93 92 ... return 'f(%s)' % x
94 93 >>> rapply(f, None) is None
95 94 True
96 95 >>> rapply(f, 'a')
97 96 'f(a)'
98 97 >>> rapply(f, {'a'}) == {'f(a)'}
99 98 True
100 99 >>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])
101 100 ['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]
102 101
103 102 >>> xs = [object()]
104 103 >>> rapply(identity, xs) is xs
105 104 True
106 105 """
107 106 if f is identity:
108 107 # fast path mainly for py2
109 108 return xs
110 109 return _rapply(f, xs)
111 110
112 111
113 112 if os.name == r'nt':
114 113 # MBCS (or ANSI) filesystem encoding must be used as before.
115 114 # Otherwise non-ASCII filenames in existing repositories would be
116 115 # corrupted.
117 116 # This must be set once prior to any fsencode/fsdecode calls.
118 117 sys._enablelegacywindowsfsencoding() # pytype: disable=module-attr
119 118
120 119 fsencode = os.fsencode
121 120 fsdecode = os.fsdecode
122 121 oscurdir: bytes = os.curdir.encode('ascii')
123 122 oslinesep: bytes = os.linesep.encode('ascii')
124 123 osname: bytes = os.name.encode('ascii')
125 124 ospathsep: bytes = os.pathsep.encode('ascii')
126 125 ospardir: bytes = os.pardir.encode('ascii')
127 126 ossep: bytes = os.sep.encode('ascii')
128 127 osaltsep: Optional[bytes] = os.altsep.encode('ascii') if os.altsep else None
129 128 osdevnull: bytes = os.devnull.encode('ascii')
130 129
131 130 sysplatform: bytes = sys.platform.encode('ascii')
132 131 sysexecutable: bytes = os.fsencode(sys.executable) if sys.executable else b''
133 132
134 133
135 134 if TYPE_CHECKING:
136 135
137 136 @overload
138 137 def maplist(f: Callable[[_T0], _S], arg: Iterable[_T0]) -> List[_S]:
139 138 ...
140 139
141 140 @overload
142 141 def maplist(
143 142 f: Callable[[_T0, _T1], _S], arg1: Iterable[_T0], arg2: Iterable[_T1]
144 143 ) -> List[_S]:
145 144 ...
146 145
147 146
148 147 def maplist(f, *args):
149 148 return list(map(f, *args))
150 149
151 150
152 151 def rangelist(*args) -> List[int]:
153 152 return list(range(*args))
154 153
155 154
156 155 def ziplist(*args):
157 156 return list(zip(*args))
158 157
159 158
160 159 rawinput = input
161 160 getargspec = inspect.getfullargspec
162 161
163 162 long = int
164 163
165 164 if builtins.getattr(sys, 'argv', None) is not None:
166 165 # On POSIX, the char** argv array is converted to Python str using
167 166 # Py_DecodeLocale(). The inverse of this is Py_EncodeLocale(), which
168 167 # isn't directly callable from Python code. In practice, os.fsencode()
169 168 # can be used instead (this is recommended by Python's documentation
170 169 # for sys.argv).
171 170 #
172 171 # On Windows, the wchar_t **argv is passed into the interpreter as-is.
173 172 # Like POSIX, we need to emulate what Py_EncodeLocale() would do. But
174 173 # there's an additional wrinkle. What we really want to access is the
175 174 # ANSI codepage representation of the arguments, as this is what
176 175 # `int main()` would receive if Python 3 didn't define `int wmain()`
177 176 # (this is how Python 2 worked). To get that, we encode with the mbcs
178 177 # encoding, which will pass CP_ACP to the underlying Windows API to
179 178 # produce bytes.
180 179 sysargv: List[bytes] = []
181 180 if os.name == r'nt':
182 181 sysargv = [a.encode("mbcs", "ignore") for a in sys.argv]
183 182 else:
184 183 sysargv = [fsencode(a) for a in sys.argv]
185 184
186 185 bytechr = struct.Struct('>B').pack
187 186 byterepr = b'%r'.__mod__
188 187
189 188
190 189 class bytestr(bytes):
191 190 """A bytes which mostly acts as a Python 2 str
192 191
193 192 >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
194 193 ('', 'foo', 'ascii', '1')
195 194 >>> s = bytestr(b'foo')
196 195 >>> assert s is bytestr(s)
197 196
198 197 __bytes__() should be called if provided:
199 198
200 199 >>> class bytesable:
201 200 ... def __bytes__(self):
202 201 ... return b'bytes'
203 202 >>> bytestr(bytesable())
204 203 'bytes'
205 204
206 205 There's no implicit conversion from non-ascii str as its encoding is
207 206 unknown:
208 207
209 208 >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
210 209 Traceback (most recent call last):
211 210 ...
212 211 UnicodeEncodeError: ...
213 212
214 213 Comparison between bytestr and bytes should work:
215 214
216 215 >>> assert bytestr(b'foo') == b'foo'
217 216 >>> assert b'foo' == bytestr(b'foo')
218 217 >>> assert b'f' in bytestr(b'foo')
219 218 >>> assert bytestr(b'f') in b'foo'
220 219
221 220 Sliced elements should be bytes, not integer:
222 221
223 222 >>> s[1], s[:2]
224 223 (b'o', b'fo')
225 224 >>> list(s), list(reversed(s))
226 225 ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
227 226
228 227 As bytestr type isn't propagated across operations, you need to cast
229 228 bytes to bytestr explicitly:
230 229
231 230 >>> s = bytestr(b'foo').upper()
232 231 >>> t = bytestr(s)
233 232 >>> s[0], t[0]
234 233 (70, b'F')
235 234
236 235 Be careful to not pass a bytestr object to a function which expects
237 236 bytearray-like behavior.
238 237
239 238 >>> t = bytes(t) # cast to bytes
240 239 >>> assert type(t) is bytes
241 240 """
242 241
243 242 # Trick pytype into not demanding Iterable[int] be passed to __new__(),
244 243 # since the appropriate bytes format is done internally.
245 244 #
246 245 # https://github.com/google/pytype/issues/500
247 246 if TYPE_CHECKING:
248 247
249 248 def __init__(self, s: object = b'') -> None:
250 249 pass
251 250
252 251 def __new__(cls: Type[_Tbytestr], s: object = b'') -> _Tbytestr:
253 252 if isinstance(s, bytestr):
254 253 return s
255 254 if not isinstance(
256 255 s, (bytes, bytearray)
257 256 ) and not builtins.hasattr( # hasattr-py3-only
258 257 s, u'__bytes__'
259 258 ):
260 259 s = str(s).encode('ascii')
261 260 return bytes.__new__(cls, s)
262 261
263 262 # The base class uses `int` return in py3, but the point of this class is to
264 263 # behave like py2.
265 264 def __getitem__(self, key) -> bytes: # pytype: disable=signature-mismatch
266 265 s = bytes.__getitem__(self, key)
267 266 if not isinstance(s, bytes):
268 267 s = bytechr(s)
269 268 return s
270 269
271 270 # The base class expects `Iterator[int]` return in py3, but the point of
272 271 # this class is to behave like py2.
273 272 def __iter__(self) -> Iterator[bytes]: # pytype: disable=signature-mismatch
274 273 return iterbytestr(bytes.__iter__(self))
275 274
276 275 def __repr__(self) -> str:
277 276 return bytes.__repr__(self)[1:] # drop b''
278 277
279 278
280 279 def iterbytestr(s: Iterable[int]) -> Iterator[bytes]:
281 280 """Iterate bytes as if it were a str object of Python 2"""
282 281 return map(bytechr, s)
283 282
284 283
285 284 if TYPE_CHECKING:
286 285
287 286 @overload
288 287 def maybebytestr(s: bytes) -> bytestr:
289 288 ...
290 289
291 290 @overload
292 291 def maybebytestr(s: _T0) -> _T0:
293 292 ...
294 293
295 294
296 295 def maybebytestr(s):
297 296 """Promote bytes to bytestr"""
298 297 if isinstance(s, bytes):
299 298 return bytestr(s)
300 299 return s
301 300
302 301
303 302 def sysbytes(s: AnyStr) -> bytes:
304 303 """Convert an internal str (e.g. keyword, __doc__) back to bytes
305 304
306 305 This never raises UnicodeEncodeError, but only ASCII characters
307 306 can be round-trip by sysstr(sysbytes(s)).
308 307 """
309 308 if isinstance(s, bytes):
310 309 return s
311 310 return s.encode('utf-8')
312 311
313 312
314 313 def sysstr(s: AnyStr) -> str:
315 314 """Return a keyword str to be passed to Python functions such as
316 315 getattr() and str.encode()
317 316
318 317 This never raises UnicodeDecodeError. Non-ascii characters are
319 318 considered invalid and mapped to arbitrary but unique code points
320 319 such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
321 320 """
322 321 if isinstance(s, builtins.str):
323 322 return s
324 323 return s.decode('latin-1')
325 324
326 325
327 326 def strurl(url: AnyStr) -> str:
328 327 """Converts a bytes url back to str"""
329 328 if isinstance(url, bytes):
330 329 return url.decode('ascii')
331 330 return url
332 331
333 332
334 333 def bytesurl(url: AnyStr) -> bytes:
335 334 """Converts a str url to bytes by encoding in ascii"""
336 335 if isinstance(url, str):
337 336 return url.encode('ascii')
338 337 return url
339 338
340 339
341 340 def raisewithtb(exc: BaseException, tb) -> NoReturn:
342 341 """Raise exception with the given traceback"""
343 342 raise exc.with_traceback(tb)
344 343
345 344
346 345 def getdoc(obj: object) -> Optional[bytes]:
347 346 """Get docstring as bytes; may be None so gettext() won't confuse it
348 347 with _('')"""
349 348 doc = builtins.getattr(obj, '__doc__', None)
350 349 if doc is None:
351 350 return doc
352 351 return sysbytes(doc)
353 352
354 353
355 def _wrapattrfunc(f):
356 @functools.wraps(f)
357 def w(object, name, *args):
358 if isinstance(name, bytes):
359 from . import util
360
361 msg = b'function "%s" take `str` as argument, not `bytes`'
362 fname = f.__name__.encode('ascii')
363 msg %= fname
364 util.nouideprecwarn(msg, b"6.6", stacklevel=2)
365 return f(object, sysstr(name), *args)
366
367 return w
368
369
370 354 # these wrappers are automagically imported by hgloader
371 delattr = _wrapattrfunc(builtins.delattr)
372 getattr = _wrapattrfunc(builtins.getattr)
373 hasattr = _wrapattrfunc(builtins.hasattr)
374 setattr = _wrapattrfunc(builtins.setattr)
355 delattr = builtins.delattr
356 getattr = builtins.getattr
357 hasattr = builtins.hasattr
358 setattr = builtins.setattr
375 359 xrange = builtins.range
376 360 unicode = str
377 361
378 362
379 363 def open(
380 364 name,
381 365 mode: AnyStr = b'r',
382 366 buffering: int = -1,
383 367 encoding: Optional[str] = None,
384 368 ) -> Any:
385 369 # TODO: assert binary mode, and cast result to BinaryIO?
386 370 return builtins.open(name, sysstr(mode), buffering, encoding)
387 371
388 372
389 safehasattr = _wrapattrfunc(builtins.hasattr)
373 safehasattr = builtins.hasattr
390 374
391 375
392 376 def _getoptbwrapper(
393 377 orig, args: Sequence[bytes], shortlist: bytes, namelist: Sequence[bytes]
394 378 ) -> _GetOptResult:
395 379 """
396 380 Takes bytes arguments, converts them to unicode, pass them to
397 381 getopt.getopt(), convert the returned values back to bytes and then
398 382 return them for Python 3 compatibility as getopt.getopt() don't accepts
399 383 bytes on Python 3.
400 384 """
401 385 args = [a.decode('latin-1') for a in args]
402 386 shortlist = shortlist.decode('latin-1')
403 387 namelist = [a.decode('latin-1') for a in namelist]
404 388 opts, args = orig(args, shortlist, namelist)
405 389 opts = [(a[0].encode('latin-1'), a[1].encode('latin-1')) for a in opts]
406 390 args = [a.encode('latin-1') for a in args]
407 391 return opts, args
408 392
409 393
410 394 def strkwargs(dic: Mapping[bytes, _T0]) -> Dict[str, _T0]:
411 395 """
412 396 Converts the keys of a python dictonary to str i.e. unicodes so that
413 397 they can be passed as keyword arguments as dictionaries with bytes keys
414 398 can't be passed as keyword arguments to functions on Python 3.
415 399 """
416 400 dic = {k.decode('latin-1'): v for k, v in dic.items()}
417 401 return dic
418 402
419 403
420 404 def byteskwargs(dic: Mapping[str, _T0]) -> Dict[bytes, _T0]:
421 405 """
422 406 Converts keys of python dictionaries to bytes as they were converted to
423 407 str to pass that dictonary as a keyword argument on Python 3.
424 408 """
425 409 dic = {k.encode('latin-1'): v for k, v in dic.items()}
426 410 return dic
427 411
428 412
429 413 # TODO: handle shlex.shlex().
430 414 def shlexsplit(
431 415 s: bytes, comments: bool = False, posix: bool = True
432 416 ) -> List[bytes]:
433 417 """
434 418 Takes bytes argument, convert it to str i.e. unicodes, pass that into
435 419 shlex.split(), convert the returned value to bytes and return that for
436 420 Python 3 compatibility as shelx.split() don't accept bytes on Python 3.
437 421 """
438 422 ret = shlex.split(s.decode('latin-1'), comments, posix)
439 423 return [a.encode('latin-1') for a in ret]
440 424
441 425
442 426 iteritems = lambda x: x.items()
443 427 itervalues = lambda x: x.values()
444 428
445 429 json_loads = json.loads
446 430
447 431 isjython: bool = sysplatform.startswith(b'java')
448 432
449 433 isdarwin: bool = sysplatform.startswith(b'darwin')
450 434 islinux: bool = sysplatform.startswith(b'linux')
451 435 isposix: bool = osname == b'posix'
452 436 iswindows: bool = osname == b'nt'
453 437
454 438
455 439 def getoptb(
456 440 args: Sequence[bytes], shortlist: bytes, namelist: Sequence[bytes]
457 441 ) -> _GetOptResult:
458 442 return _getoptbwrapper(getopt.getopt, args, shortlist, namelist)
459 443
460 444
461 445 def gnugetoptb(
462 446 args: Sequence[bytes], shortlist: bytes, namelist: Sequence[bytes]
463 447 ) -> _GetOptResult:
464 448 return _getoptbwrapper(getopt.gnu_getopt, args, shortlist, namelist)
465 449
466 450
467 451 def mkdtemp(
468 452 suffix: bytes = b'', prefix: bytes = b'tmp', dir: Optional[bytes] = None
469 453 ) -> bytes:
470 454 return tempfile.mkdtemp(suffix, prefix, dir)
471 455
472 456
473 457 # text=True is not supported; use util.from/tonativeeol() instead
474 458 def mkstemp(
475 459 suffix: bytes = b'', prefix: bytes = b'tmp', dir: Optional[bytes] = None
476 460 ) -> Tuple[int, bytes]:
477 461 return tempfile.mkstemp(suffix, prefix, dir)
478 462
479 463
480 464 # TemporaryFile does not support an "encoding=" argument on python2.
481 465 # This wrapper file are always open in byte mode.
482 466 def unnamedtempfile(mode: Optional[bytes] = None, *args, **kwargs) -> BinaryIO:
483 467 if mode is None:
484 468 mode = 'w+b'
485 469 else:
486 470 mode = sysstr(mode)
487 471 assert 'b' in mode
488 472 return cast(BinaryIO, tempfile.TemporaryFile(mode, *args, **kwargs))
489 473
490 474
491 475 # NamedTemporaryFile does not support an "encoding=" argument on python2.
492 476 # This wrapper file are always open in byte mode.
493 477 def namedtempfile(
494 478 mode: bytes = b'w+b',
495 479 bufsize: int = -1,
496 480 suffix: bytes = b'',
497 481 prefix: bytes = b'tmp',
498 482 dir: Optional[bytes] = None,
499 483 delete: bool = True,
500 484 ):
501 485 mode = sysstr(mode)
502 486 assert 'b' in mode
503 487 return tempfile.NamedTemporaryFile(
504 488 mode, bufsize, suffix=suffix, prefix=prefix, dir=dir, delete=delete
505 489 )
General Comments 0
You need to be logged in to leave comments. Login now