##// END OF EJS Templates
typing: disable `signature-mismatch` warnings on a few bytestr functions...
Matt Harbison -
r50996:b900f40c default
parent child Browse files
Show More
@@ -1,478 +1,482 b''
1 1 # pycompat.py - portability shim for python 3
2 2 #
3 3 # This software may be used and distributed according to the terms of the
4 4 # GNU General Public License version 2 or any later version.
5 5
6 6 """Mercurial portability shim for python 3.
7 7
8 8 This contains aliases to hide python version-specific details from the core.
9 9 """
10 10
11 11
12 12 import builtins
13 13 import codecs
14 14 import concurrent.futures as futures
15 15 import functools
16 16 import getopt
17 17 import http.client as httplib
18 18 import http.cookiejar as cookielib
19 19 import inspect
20 20 import io
21 21 import json
22 22 import os
23 23 import queue
24 24 import shlex
25 25 import socketserver
26 26 import struct
27 27 import sys
28 28 import tempfile
29 29 import xmlrpc.client as xmlrpclib
30 30
31 31 from typing import (
32 32 Any,
33 33 AnyStr,
34 34 BinaryIO,
35 35 Dict,
36 36 Iterable,
37 37 Iterator,
38 38 List,
39 39 Mapping,
40 40 NoReturn,
41 41 Optional,
42 42 Sequence,
43 43 Tuple,
44 44 Type,
45 45 TypeVar,
46 46 cast,
47 47 overload,
48 48 )
49 49
50 50 ispy3 = sys.version_info[0] >= 3
51 51 ispypy = '__pypy__' in sys.builtin_module_names
52 52 TYPE_CHECKING = False
53 53
54 54 if not globals(): # hide this from non-pytype users
55 55 import typing
56 56
57 57 TYPE_CHECKING = typing.TYPE_CHECKING
58 58
59 59 _GetOptResult = Tuple[List[Tuple[bytes, bytes]], List[bytes]]
60 60 _T0 = TypeVar('_T0')
61 61 _Tbytestr = TypeVar('_Tbytestr', bound='bytestr')
62 62
63 63
64 64 def future_set_exception_info(f, exc_info):
65 65 f.set_exception(exc_info[0])
66 66
67 67
68 68 FileNotFoundError = builtins.FileNotFoundError
69 69
70 70
71 71 def identity(a: _T0) -> _T0:
72 72 return a
73 73
74 74
75 75 def _rapply(f, xs):
76 76 if xs is None:
77 77 # assume None means non-value of optional data
78 78 return xs
79 79 if isinstance(xs, (list, set, tuple)):
80 80 return type(xs)(_rapply(f, x) for x in xs)
81 81 if isinstance(xs, dict):
82 82 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
83 83 return f(xs)
84 84
85 85
86 86 def rapply(f, xs):
87 87 """Apply function recursively to every item preserving the data structure
88 88
89 89 >>> def f(x):
90 90 ... return 'f(%s)' % x
91 91 >>> rapply(f, None) is None
92 92 True
93 93 >>> rapply(f, 'a')
94 94 'f(a)'
95 95 >>> rapply(f, {'a'}) == {'f(a)'}
96 96 True
97 97 >>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])
98 98 ['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]
99 99
100 100 >>> xs = [object()]
101 101 >>> rapply(identity, xs) is xs
102 102 True
103 103 """
104 104 if f is identity:
105 105 # fast path mainly for py2
106 106 return xs
107 107 return _rapply(f, xs)
108 108
109 109
110 110 if os.name == r'nt':
111 111 # MBCS (or ANSI) filesystem encoding must be used as before.
112 112 # Otherwise non-ASCII filenames in existing repositories would be
113 113 # corrupted.
114 114 # This must be set once prior to any fsencode/fsdecode calls.
115 115 sys._enablelegacywindowsfsencoding() # pytype: disable=module-attr
116 116
117 117 fsencode = os.fsencode
118 118 fsdecode = os.fsdecode
119 119 oscurdir: bytes = os.curdir.encode('ascii')
120 120 oslinesep: bytes = os.linesep.encode('ascii')
121 121 osname: bytes = os.name.encode('ascii')
122 122 ospathsep: bytes = os.pathsep.encode('ascii')
123 123 ospardir: bytes = os.pardir.encode('ascii')
124 124 ossep: bytes = os.sep.encode('ascii')
125 125 osaltsep: Optional[bytes] = os.altsep.encode('ascii') if os.altsep else None
126 126 osdevnull: bytes = os.devnull.encode('ascii')
127 127
128 128 sysplatform: bytes = sys.platform.encode('ascii')
129 129 sysexecutable: bytes = os.fsencode(sys.executable) if sys.executable else b''
130 130
131 131
132 132 def maplist(*args):
133 133 return list(map(*args))
134 134
135 135
136 136 def rangelist(*args):
137 137 return list(range(*args))
138 138
139 139
140 140 def ziplist(*args):
141 141 return list(zip(*args))
142 142
143 143
144 144 rawinput = input
145 145 getargspec = inspect.getfullargspec
146 146
147 147 long = int
148 148
149 149 if builtins.getattr(sys, 'argv', None) is not None:
150 150 # On POSIX, the char** argv array is converted to Python str using
151 151 # Py_DecodeLocale(). The inverse of this is Py_EncodeLocale(), which
152 152 # isn't directly callable from Python code. In practice, os.fsencode()
153 153 # can be used instead (this is recommended by Python's documentation
154 154 # for sys.argv).
155 155 #
156 156 # On Windows, the wchar_t **argv is passed into the interpreter as-is.
157 157 # Like POSIX, we need to emulate what Py_EncodeLocale() would do. But
158 158 # there's an additional wrinkle. What we really want to access is the
159 159 # ANSI codepage representation of the arguments, as this is what
160 160 # `int main()` would receive if Python 3 didn't define `int wmain()`
161 161 # (this is how Python 2 worked). To get that, we encode with the mbcs
162 162 # encoding, which will pass CP_ACP to the underlying Windows API to
163 163 # produce bytes.
164 164 sysargv: List[bytes] = []
165 165 if os.name == r'nt':
166 166 sysargv = [a.encode("mbcs", "ignore") for a in sys.argv]
167 167 else:
168 168 sysargv = [fsencode(a) for a in sys.argv]
169 169
170 170 bytechr = struct.Struct('>B').pack
171 171 byterepr = b'%r'.__mod__
172 172
173 173
174 174 class bytestr(bytes):
175 175 """A bytes which mostly acts as a Python 2 str
176 176
177 177 >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
178 178 ('', 'foo', 'ascii', '1')
179 179 >>> s = bytestr(b'foo')
180 180 >>> assert s is bytestr(s)
181 181
182 182 __bytes__() should be called if provided:
183 183
184 184 >>> class bytesable:
185 185 ... def __bytes__(self):
186 186 ... return b'bytes'
187 187 >>> bytestr(bytesable())
188 188 'bytes'
189 189
190 190 There's no implicit conversion from non-ascii str as its encoding is
191 191 unknown:
192 192
193 193 >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
194 194 Traceback (most recent call last):
195 195 ...
196 196 UnicodeEncodeError: ...
197 197
198 198 Comparison between bytestr and bytes should work:
199 199
200 200 >>> assert bytestr(b'foo') == b'foo'
201 201 >>> assert b'foo' == bytestr(b'foo')
202 202 >>> assert b'f' in bytestr(b'foo')
203 203 >>> assert bytestr(b'f') in b'foo'
204 204
205 205 Sliced elements should be bytes, not integer:
206 206
207 207 >>> s[1], s[:2]
208 208 (b'o', b'fo')
209 209 >>> list(s), list(reversed(s))
210 210 ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
211 211
212 212 As bytestr type isn't propagated across operations, you need to cast
213 213 bytes to bytestr explicitly:
214 214
215 215 >>> s = bytestr(b'foo').upper()
216 216 >>> t = bytestr(s)
217 217 >>> s[0], t[0]
218 218 (70, b'F')
219 219
220 220 Be careful to not pass a bytestr object to a function which expects
221 221 bytearray-like behavior.
222 222
223 223 >>> t = bytes(t) # cast to bytes
224 224 >>> assert type(t) is bytes
225 225 """
226 226
227 227 # Trick pytype into not demanding Iterable[int] be passed to __new__(),
228 228 # since the appropriate bytes format is done internally.
229 229 #
230 230 # https://github.com/google/pytype/issues/500
231 231 if TYPE_CHECKING:
232 232
233 233 def __init__(self, s: object = b'') -> None:
234 234 pass
235 235
236 236 def __new__(cls: Type[_Tbytestr], s: object = b'') -> _Tbytestr:
237 237 if isinstance(s, bytestr):
238 238 return s
239 239 if not isinstance(
240 240 s, (bytes, bytearray)
241 241 ) and not builtins.hasattr( # hasattr-py3-only
242 242 s, u'__bytes__'
243 243 ):
244 244 s = str(s).encode('ascii')
245 245 return bytes.__new__(cls, s)
246 246
247 def __getitem__(self, key) -> bytes:
247 # The base class uses `int` return in py3, but the point of this class is to
248 # behave like py2.
249 def __getitem__(self, key) -> bytes: # pytype: disable=signature-mismatch
248 250 s = bytes.__getitem__(self, key)
249 251 if not isinstance(s, bytes):
250 252 s = bytechr(s)
251 253 return s
252 254
253 def __iter__(self) -> Iterator[bytes]:
255 # The base class expects `Iterator[int]` return in py3, but the point of
256 # this class is to behave like py2.
257 def __iter__(self) -> Iterator[bytes]: # pytype: disable=signature-mismatch
254 258 return iterbytestr(bytes.__iter__(self))
255 259
256 260 def __repr__(self) -> str:
257 261 return bytes.__repr__(self)[1:] # drop b''
258 262
259 263
260 264 def iterbytestr(s: Iterable[int]) -> Iterator[bytes]:
261 265 """Iterate bytes as if it were a str object of Python 2"""
262 266 return map(bytechr, s)
263 267
264 268
265 269 if TYPE_CHECKING:
266 270
267 271 @overload
268 272 def maybebytestr(s: bytes) -> bytestr:
269 273 ...
270 274
271 275 @overload
272 276 def maybebytestr(s: _T0) -> _T0:
273 277 ...
274 278
275 279
276 280 def maybebytestr(s):
277 281 """Promote bytes to bytestr"""
278 282 if isinstance(s, bytes):
279 283 return bytestr(s)
280 284 return s
281 285
282 286
283 287 def sysbytes(s: AnyStr) -> bytes:
284 288 """Convert an internal str (e.g. keyword, __doc__) back to bytes
285 289
286 290 This never raises UnicodeEncodeError, but only ASCII characters
287 291 can be round-trip by sysstr(sysbytes(s)).
288 292 """
289 293 if isinstance(s, bytes):
290 294 return s
291 295 return s.encode('utf-8')
292 296
293 297
294 298 def sysstr(s: AnyStr) -> str:
295 299 """Return a keyword str to be passed to Python functions such as
296 300 getattr() and str.encode()
297 301
298 302 This never raises UnicodeDecodeError. Non-ascii characters are
299 303 considered invalid and mapped to arbitrary but unique code points
300 304 such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
301 305 """
302 306 if isinstance(s, builtins.str):
303 307 return s
304 308 return s.decode('latin-1')
305 309
306 310
307 311 def strurl(url: AnyStr) -> str:
308 312 """Converts a bytes url back to str"""
309 313 if isinstance(url, bytes):
310 314 return url.decode('ascii')
311 315 return url
312 316
313 317
314 318 def bytesurl(url: AnyStr) -> bytes:
315 319 """Converts a str url to bytes by encoding in ascii"""
316 320 if isinstance(url, str):
317 321 return url.encode('ascii')
318 322 return url
319 323
320 324
321 325 def raisewithtb(exc: BaseException, tb) -> NoReturn:
322 326 """Raise exception with the given traceback"""
323 327 raise exc.with_traceback(tb)
324 328
325 329
326 330 def getdoc(obj: object) -> Optional[bytes]:
327 331 """Get docstring as bytes; may be None so gettext() won't confuse it
328 332 with _('')"""
329 333 doc = builtins.getattr(obj, '__doc__', None)
330 334 if doc is None:
331 335 return doc
332 336 return sysbytes(doc)
333 337
334 338
335 339 def _wrapattrfunc(f):
336 340 @functools.wraps(f)
337 341 def w(object, name, *args):
338 342 return f(object, sysstr(name), *args)
339 343
340 344 return w
341 345
342 346
343 347 # these wrappers are automagically imported by hgloader
344 348 delattr = _wrapattrfunc(builtins.delattr)
345 349 getattr = _wrapattrfunc(builtins.getattr)
346 350 hasattr = _wrapattrfunc(builtins.hasattr)
347 351 setattr = _wrapattrfunc(builtins.setattr)
348 352 xrange = builtins.range
349 353 unicode = str
350 354
351 355
352 356 def open(
353 357 name,
354 358 mode: AnyStr = b'r',
355 359 buffering: int = -1,
356 360 encoding: Optional[str] = None,
357 361 ) -> Any:
358 362 # TODO: assert binary mode, and cast result to BinaryIO?
359 363 return builtins.open(name, sysstr(mode), buffering, encoding)
360 364
361 365
362 366 safehasattr = _wrapattrfunc(builtins.hasattr)
363 367
364 368
365 369 def _getoptbwrapper(
366 370 orig, args: Sequence[bytes], shortlist: bytes, namelist: Sequence[bytes]
367 371 ) -> _GetOptResult:
368 372 """
369 373 Takes bytes arguments, converts them to unicode, pass them to
370 374 getopt.getopt(), convert the returned values back to bytes and then
371 375 return them for Python 3 compatibility as getopt.getopt() don't accepts
372 376 bytes on Python 3.
373 377 """
374 378 args = [a.decode('latin-1') for a in args]
375 379 shortlist = shortlist.decode('latin-1')
376 380 namelist = [a.decode('latin-1') for a in namelist]
377 381 opts, args = orig(args, shortlist, namelist)
378 382 opts = [(a[0].encode('latin-1'), a[1].encode('latin-1')) for a in opts]
379 383 args = [a.encode('latin-1') for a in args]
380 384 return opts, args
381 385
382 386
383 387 def strkwargs(dic: Mapping[bytes, _T0]) -> Dict[str, _T0]:
384 388 """
385 389 Converts the keys of a python dictonary to str i.e. unicodes so that
386 390 they can be passed as keyword arguments as dictionaries with bytes keys
387 391 can't be passed as keyword arguments to functions on Python 3.
388 392 """
389 393 dic = {k.decode('latin-1'): v for k, v in dic.items()}
390 394 return dic
391 395
392 396
393 397 def byteskwargs(dic: Mapping[str, _T0]) -> Dict[bytes, _T0]:
394 398 """
395 399 Converts keys of python dictionaries to bytes as they were converted to
396 400 str to pass that dictonary as a keyword argument on Python 3.
397 401 """
398 402 dic = {k.encode('latin-1'): v for k, v in dic.items()}
399 403 return dic
400 404
401 405
402 406 # TODO: handle shlex.shlex().
403 407 def shlexsplit(
404 408 s: bytes, comments: bool = False, posix: bool = True
405 409 ) -> List[bytes]:
406 410 """
407 411 Takes bytes argument, convert it to str i.e. unicodes, pass that into
408 412 shlex.split(), convert the returned value to bytes and return that for
409 413 Python 3 compatibility as shelx.split() don't accept bytes on Python 3.
410 414 """
411 415 ret = shlex.split(s.decode('latin-1'), comments, posix)
412 416 return [a.encode('latin-1') for a in ret]
413 417
414 418
415 419 iteritems = lambda x: x.items()
416 420 itervalues = lambda x: x.values()
417 421
418 422 json_loads = json.loads
419 423
420 424 isjython: bool = sysplatform.startswith(b'java')
421 425
422 426 isdarwin: bool = sysplatform.startswith(b'darwin')
423 427 islinux: bool = sysplatform.startswith(b'linux')
424 428 isposix: bool = osname == b'posix'
425 429 iswindows: bool = osname == b'nt'
426 430
427 431
428 432 def getoptb(
429 433 args: Sequence[bytes], shortlist: bytes, namelist: Sequence[bytes]
430 434 ) -> _GetOptResult:
431 435 return _getoptbwrapper(getopt.getopt, args, shortlist, namelist)
432 436
433 437
434 438 def gnugetoptb(
435 439 args: Sequence[bytes], shortlist: bytes, namelist: Sequence[bytes]
436 440 ) -> _GetOptResult:
437 441 return _getoptbwrapper(getopt.gnu_getopt, args, shortlist, namelist)
438 442
439 443
440 444 def mkdtemp(
441 445 suffix: bytes = b'', prefix: bytes = b'tmp', dir: Optional[bytes] = None
442 446 ) -> bytes:
443 447 return tempfile.mkdtemp(suffix, prefix, dir)
444 448
445 449
446 450 # text=True is not supported; use util.from/tonativeeol() instead
447 451 def mkstemp(
448 452 suffix: bytes = b'', prefix: bytes = b'tmp', dir: Optional[bytes] = None
449 453 ) -> Tuple[int, bytes]:
450 454 return tempfile.mkstemp(suffix, prefix, dir)
451 455
452 456
453 457 # TemporaryFile does not support an "encoding=" argument on python2.
454 458 # This wrapper file are always open in byte mode.
455 459 def unnamedtempfile(mode: Optional[bytes] = None, *args, **kwargs) -> BinaryIO:
456 460 if mode is None:
457 461 mode = 'w+b'
458 462 else:
459 463 mode = sysstr(mode)
460 464 assert 'b' in mode
461 465 return cast(BinaryIO, tempfile.TemporaryFile(mode, *args, **kwargs))
462 466
463 467
464 468 # NamedTemporaryFile does not support an "encoding=" argument on python2.
465 469 # This wrapper file are always open in byte mode.
466 470 def namedtempfile(
467 471 mode: bytes = b'w+b',
468 472 bufsize: int = -1,
469 473 suffix: bytes = b'',
470 474 prefix: bytes = b'tmp',
471 475 dir: Optional[bytes] = None,
472 476 delete: bool = True,
473 477 ):
474 478 mode = sysstr(mode)
475 479 assert 'b' in mode
476 480 return tempfile.NamedTemporaryFile(
477 481 mode, bufsize, suffix=suffix, prefix=prefix, dir=dir, delete=delete
478 482 )
General Comments 0
You need to be logged in to leave comments. Login now