##// END OF EJS Templates
pycompat: allow pycompat.sysbytes() even if input already is bytes...
Martin von Zweigbergk -
r44322:66af68d4 default
parent child Browse files
Show More
@@ -1,514 +1,516
1 1 # pycompat.py - portability shim for python 3
2 2 #
3 3 # This software may be used and distributed according to the terms of the
4 4 # GNU General Public License version 2 or any later version.
5 5
6 6 """Mercurial portability shim for python 3.
7 7
8 8 This contains aliases to hide python version-specific details from the core.
9 9 """
10 10
11 11 from __future__ import absolute_import
12 12
13 13 import getopt
14 14 import inspect
15 15 import json
16 16 import os
17 17 import shlex
18 18 import sys
19 19 import tempfile
20 20
21 21 ispy3 = sys.version_info[0] >= 3
22 22 ispypy = '__pypy__' in sys.builtin_module_names
23 23 TYPE_CHECKING = False
24 24
25 25 if not globals(): # hide this from non-pytype users
26 26 import typing
27 27
28 28 TYPE_CHECKING = typing.TYPE_CHECKING
29 29
30 30 if not ispy3:
31 31 import cookielib
32 32 import cPickle as pickle
33 33 import httplib
34 34 import Queue as queue
35 35 import SocketServer as socketserver
36 36 import xmlrpclib
37 37
38 38 from .thirdparty.concurrent import futures
39 39
40 40 def future_set_exception_info(f, exc_info):
41 41 f.set_exception_info(*exc_info)
42 42
43 43
44 44 else:
45 45 import concurrent.futures as futures
46 46 import http.cookiejar as cookielib
47 47 import http.client as httplib
48 48 import pickle
49 49 import queue as queue
50 50 import socketserver
51 51 import xmlrpc.client as xmlrpclib
52 52
53 53 def future_set_exception_info(f, exc_info):
54 54 f.set_exception(exc_info[0])
55 55
56 56
57 57 def identity(a):
58 58 return a
59 59
60 60
61 61 def _rapply(f, xs):
62 62 if xs is None:
63 63 # assume None means non-value of optional data
64 64 return xs
65 65 if isinstance(xs, (list, set, tuple)):
66 66 return type(xs)(_rapply(f, x) for x in xs)
67 67 if isinstance(xs, dict):
68 68 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
69 69 return f(xs)
70 70
71 71
72 72 def rapply(f, xs):
73 73 """Apply function recursively to every item preserving the data structure
74 74
75 75 >>> def f(x):
76 76 ... return 'f(%s)' % x
77 77 >>> rapply(f, None) is None
78 78 True
79 79 >>> rapply(f, 'a')
80 80 'f(a)'
81 81 >>> rapply(f, {'a'}) == {'f(a)'}
82 82 True
83 83 >>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])
84 84 ['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]
85 85
86 86 >>> xs = [object()]
87 87 >>> rapply(identity, xs) is xs
88 88 True
89 89 """
90 90 if f is identity:
91 91 # fast path mainly for py2
92 92 return xs
93 93 return _rapply(f, xs)
94 94
95 95
96 96 if ispy3:
97 97 import builtins
98 98 import codecs
99 99 import functools
100 100 import io
101 101 import struct
102 102
103 103 if os.name == r'nt' and sys.version_info >= (3, 6):
104 104 # MBCS (or ANSI) filesystem encoding must be used as before.
105 105 # Otherwise non-ASCII filenames in existing repositories would be
106 106 # corrupted.
107 107 # This must be set once prior to any fsencode/fsdecode calls.
108 108 sys._enablelegacywindowsfsencoding() # pytype: disable=module-attr
109 109
110 110 fsencode = os.fsencode
111 111 fsdecode = os.fsdecode
112 112 oscurdir = os.curdir.encode('ascii')
113 113 oslinesep = os.linesep.encode('ascii')
114 114 osname = os.name.encode('ascii')
115 115 ospathsep = os.pathsep.encode('ascii')
116 116 ospardir = os.pardir.encode('ascii')
117 117 ossep = os.sep.encode('ascii')
118 118 osaltsep = os.altsep
119 119 if osaltsep:
120 120 osaltsep = osaltsep.encode('ascii')
121 121 osdevnull = os.devnull.encode('ascii')
122 122
123 123 sysplatform = sys.platform.encode('ascii')
124 124 sysexecutable = sys.executable
125 125 if sysexecutable:
126 126 sysexecutable = os.fsencode(sysexecutable)
127 127 bytesio = io.BytesIO
128 128 # TODO deprecate stringio name, as it is a lie on Python 3.
129 129 stringio = bytesio
130 130
131 131 def maplist(*args):
132 132 return list(map(*args))
133 133
134 134 def rangelist(*args):
135 135 return list(range(*args))
136 136
137 137 def ziplist(*args):
138 138 return list(zip(*args))
139 139
140 140 rawinput = input
141 141 getargspec = inspect.getfullargspec
142 142
143 143 long = int
144 144
145 145 # TODO: .buffer might not exist if std streams were replaced; we'll need
146 146 # a silly wrapper to make a bytes stream backed by a unicode one.
147 147 stdin = sys.stdin.buffer
148 148 stdout = sys.stdout.buffer
149 149 stderr = sys.stderr.buffer
150 150
151 151 # Since Python 3 converts argv to wchar_t type by Py_DecodeLocale() on Unix,
152 152 # we can use os.fsencode() to get back bytes argv.
153 153 #
154 154 # https://hg.python.org/cpython/file/v3.5.1/Programs/python.c#l55
155 155 #
156 156 # On Windows, the native argv is unicode and is converted to MBCS bytes
157 157 # since we do enable the legacy filesystem encoding.
158 158 if getattr(sys, 'argv', None) is not None:
159 159 sysargv = list(map(os.fsencode, sys.argv))
160 160
161 161 bytechr = struct.Struct('>B').pack
162 162 byterepr = b'%r'.__mod__
163 163
164 164 class bytestr(bytes):
165 165 """A bytes which mostly acts as a Python 2 str
166 166
167 167 >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
168 168 ('', 'foo', 'ascii', '1')
169 169 >>> s = bytestr(b'foo')
170 170 >>> assert s is bytestr(s)
171 171
172 172 __bytes__() should be called if provided:
173 173
174 174 >>> class bytesable(object):
175 175 ... def __bytes__(self):
176 176 ... return b'bytes'
177 177 >>> bytestr(bytesable())
178 178 'bytes'
179 179
180 180 There's no implicit conversion from non-ascii str as its encoding is
181 181 unknown:
182 182
183 183 >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
184 184 Traceback (most recent call last):
185 185 ...
186 186 UnicodeEncodeError: ...
187 187
188 188 Comparison between bytestr and bytes should work:
189 189
190 190 >>> assert bytestr(b'foo') == b'foo'
191 191 >>> assert b'foo' == bytestr(b'foo')
192 192 >>> assert b'f' in bytestr(b'foo')
193 193 >>> assert bytestr(b'f') in b'foo'
194 194
195 195 Sliced elements should be bytes, not integer:
196 196
197 197 >>> s[1], s[:2]
198 198 (b'o', b'fo')
199 199 >>> list(s), list(reversed(s))
200 200 ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
201 201
202 202 As bytestr type isn't propagated across operations, you need to cast
203 203 bytes to bytestr explicitly:
204 204
205 205 >>> s = bytestr(b'foo').upper()
206 206 >>> t = bytestr(s)
207 207 >>> s[0], t[0]
208 208 (70, b'F')
209 209
210 210 Be careful to not pass a bytestr object to a function which expects
211 211 bytearray-like behavior.
212 212
213 213 >>> t = bytes(t) # cast to bytes
214 214 >>> assert type(t) is bytes
215 215 """
216 216
217 217 def __new__(cls, s=b''):
218 218 if isinstance(s, bytestr):
219 219 return s
220 220 if not isinstance(
221 221 s, (bytes, bytearray)
222 222 ) and not hasattr( # hasattr-py3-only
223 223 s, u'__bytes__'
224 224 ):
225 225 s = str(s).encode('ascii')
226 226 return bytes.__new__(cls, s)
227 227
228 228 def __getitem__(self, key):
229 229 s = bytes.__getitem__(self, key)
230 230 if not isinstance(s, bytes):
231 231 s = bytechr(s)
232 232 return s
233 233
234 234 def __iter__(self):
235 235 return iterbytestr(bytes.__iter__(self))
236 236
237 237 def __repr__(self):
238 238 return bytes.__repr__(self)[1:] # drop b''
239 239
240 240 def iterbytestr(s):
241 241 """Iterate bytes as if it were a str object of Python 2"""
242 242 return map(bytechr, s)
243 243
244 244 def maybebytestr(s):
245 245 """Promote bytes to bytestr"""
246 246 if isinstance(s, bytes):
247 247 return bytestr(s)
248 248 return s
249 249
250 250 def sysbytes(s):
251 251 """Convert an internal str (e.g. keyword, __doc__) back to bytes
252 252
253 253 This never raises UnicodeEncodeError, but only ASCII characters
254 254 can be round-trip by sysstr(sysbytes(s)).
255 255 """
256 if isinstance(s, bytes):
257 return s
256 258 return s.encode('utf-8')
257 259
258 260 def sysstr(s):
259 261 """Return a keyword str to be passed to Python functions such as
260 262 getattr() and str.encode()
261 263
262 264 This never raises UnicodeDecodeError. Non-ascii characters are
263 265 considered invalid and mapped to arbitrary but unique code points
264 266 such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
265 267 """
266 268 if isinstance(s, builtins.str):
267 269 return s
268 270 return s.decode('latin-1')
269 271
270 272 def strurl(url):
271 273 """Converts a bytes url back to str"""
272 274 if isinstance(url, bytes):
273 275 return url.decode('ascii')
274 276 return url
275 277
276 278 def bytesurl(url):
277 279 """Converts a str url to bytes by encoding in ascii"""
278 280 if isinstance(url, str):
279 281 return url.encode('ascii')
280 282 return url
281 283
282 284 def raisewithtb(exc, tb):
283 285 """Raise exception with the given traceback"""
284 286 raise exc.with_traceback(tb)
285 287
286 288 def getdoc(obj):
287 289 """Get docstring as bytes; may be None so gettext() won't confuse it
288 290 with _('')"""
289 291 doc = getattr(obj, '__doc__', None)
290 292 if doc is None:
291 293 return doc
292 294 return sysbytes(doc)
293 295
294 296 def _wrapattrfunc(f):
295 297 @functools.wraps(f)
296 298 def w(object, name, *args):
297 299 return f(object, sysstr(name), *args)
298 300
299 301 return w
300 302
301 303 # these wrappers are automagically imported by hgloader
302 304 delattr = _wrapattrfunc(builtins.delattr)
303 305 getattr = _wrapattrfunc(builtins.getattr)
304 306 hasattr = _wrapattrfunc(builtins.hasattr)
305 307 setattr = _wrapattrfunc(builtins.setattr)
306 308 xrange = builtins.range
307 309 unicode = str
308 310
309 311 def open(name, mode=b'r', buffering=-1, encoding=None):
310 312 return builtins.open(name, sysstr(mode), buffering, encoding)
311 313
312 314 safehasattr = _wrapattrfunc(builtins.hasattr)
313 315
314 316 def _getoptbwrapper(orig, args, shortlist, namelist):
315 317 """
316 318 Takes bytes arguments, converts them to unicode, pass them to
317 319 getopt.getopt(), convert the returned values back to bytes and then
318 320 return them for Python 3 compatibility as getopt.getopt() don't accepts
319 321 bytes on Python 3.
320 322 """
321 323 args = [a.decode('latin-1') for a in args]
322 324 shortlist = shortlist.decode('latin-1')
323 325 namelist = [a.decode('latin-1') for a in namelist]
324 326 opts, args = orig(args, shortlist, namelist)
325 327 opts = [(a[0].encode('latin-1'), a[1].encode('latin-1')) for a in opts]
326 328 args = [a.encode('latin-1') for a in args]
327 329 return opts, args
328 330
329 331 def strkwargs(dic):
330 332 """
331 333 Converts the keys of a python dictonary to str i.e. unicodes so that
332 334 they can be passed as keyword arguments as dictonaries with bytes keys
333 335 can't be passed as keyword arguments to functions on Python 3.
334 336 """
335 337 dic = dict((k.decode('latin-1'), v) for k, v in dic.items())
336 338 return dic
337 339
338 340 def byteskwargs(dic):
339 341 """
340 342 Converts keys of python dictonaries to bytes as they were converted to
341 343 str to pass that dictonary as a keyword argument on Python 3.
342 344 """
343 345 dic = dict((k.encode('latin-1'), v) for k, v in dic.items())
344 346 return dic
345 347
346 348 # TODO: handle shlex.shlex().
347 349 def shlexsplit(s, comments=False, posix=True):
348 350 """
349 351 Takes bytes argument, convert it to str i.e. unicodes, pass that into
350 352 shlex.split(), convert the returned value to bytes and return that for
351 353 Python 3 compatibility as shelx.split() don't accept bytes on Python 3.
352 354 """
353 355 ret = shlex.split(s.decode('latin-1'), comments, posix)
354 356 return [a.encode('latin-1') for a in ret]
355 357
356 358 iteritems = lambda x: x.items()
357 359 itervalues = lambda x: x.values()
358 360
359 361 # Python 3.5's json.load and json.loads require str. We polyfill its
360 362 # code for detecting encoding from bytes.
361 363 if sys.version_info[0:2] < (3, 6):
362 364
363 365 def _detect_encoding(b):
364 366 bstartswith = b.startswith
365 367 if bstartswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)):
366 368 return 'utf-32'
367 369 if bstartswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)):
368 370 return 'utf-16'
369 371 if bstartswith(codecs.BOM_UTF8):
370 372 return 'utf-8-sig'
371 373
372 374 if len(b) >= 4:
373 375 if not b[0]:
374 376 # 00 00 -- -- - utf-32-be
375 377 # 00 XX -- -- - utf-16-be
376 378 return 'utf-16-be' if b[1] else 'utf-32-be'
377 379 if not b[1]:
378 380 # XX 00 00 00 - utf-32-le
379 381 # XX 00 00 XX - utf-16-le
380 382 # XX 00 XX -- - utf-16-le
381 383 return 'utf-16-le' if b[2] or b[3] else 'utf-32-le'
382 384 elif len(b) == 2:
383 385 if not b[0]:
384 386 # 00 XX - utf-16-be
385 387 return 'utf-16-be'
386 388 if not b[1]:
387 389 # XX 00 - utf-16-le
388 390 return 'utf-16-le'
389 391 # default
390 392 return 'utf-8'
391 393
392 394 def json_loads(s, *args, **kwargs):
393 395 if isinstance(s, (bytes, bytearray)):
394 396 s = s.decode(_detect_encoding(s), 'surrogatepass')
395 397
396 398 return json.loads(s, *args, **kwargs)
397 399
398 400 else:
399 401 json_loads = json.loads
400 402
401 403 else:
402 404 import cStringIO
403 405
404 406 xrange = xrange
405 407 unicode = unicode
406 408 bytechr = chr
407 409 byterepr = repr
408 410 bytestr = str
409 411 iterbytestr = iter
410 412 maybebytestr = identity
411 413 sysbytes = identity
412 414 sysstr = identity
413 415 strurl = identity
414 416 bytesurl = identity
415 417 open = open
416 418 delattr = delattr
417 419 getattr = getattr
418 420 hasattr = hasattr
419 421 setattr = setattr
420 422
421 423 # this can't be parsed on Python 3
422 424 exec(b'def raisewithtb(exc, tb):\n raise exc, None, tb\n')
423 425
424 426 def fsencode(filename):
425 427 """
426 428 Partial backport from os.py in Python 3, which only accepts bytes.
427 429 In Python 2, our paths should only ever be bytes, a unicode path
428 430 indicates a bug.
429 431 """
430 432 if isinstance(filename, str):
431 433 return filename
432 434 else:
433 435 raise TypeError("expect str, not %s" % type(filename).__name__)
434 436
435 437 # In Python 2, fsdecode() has a very chance to receive bytes. So it's
436 438 # better not to touch Python 2 part as it's already working fine.
437 439 fsdecode = identity
438 440
439 441 def getdoc(obj):
440 442 return getattr(obj, '__doc__', None)
441 443
442 444 _notset = object()
443 445
444 446 def safehasattr(thing, attr):
445 447 return getattr(thing, attr, _notset) is not _notset
446 448
447 449 def _getoptbwrapper(orig, args, shortlist, namelist):
448 450 return orig(args, shortlist, namelist)
449 451
450 452 strkwargs = identity
451 453 byteskwargs = identity
452 454
453 455 oscurdir = os.curdir
454 456 oslinesep = os.linesep
455 457 osname = os.name
456 458 ospathsep = os.pathsep
457 459 ospardir = os.pardir
458 460 ossep = os.sep
459 461 osaltsep = os.altsep
460 462 osdevnull = os.devnull
461 463 long = long
462 464 stdin = sys.stdin
463 465 stdout = sys.stdout
464 466 stderr = sys.stderr
465 467 if getattr(sys, 'argv', None) is not None:
466 468 sysargv = sys.argv
467 469 sysplatform = sys.platform
468 470 sysexecutable = sys.executable
469 471 shlexsplit = shlex.split
470 472 bytesio = cStringIO.StringIO
471 473 stringio = bytesio
472 474 maplist = map
473 475 rangelist = range
474 476 ziplist = zip
475 477 rawinput = raw_input
476 478 getargspec = inspect.getargspec
477 479 iteritems = lambda x: x.iteritems()
478 480 itervalues = lambda x: x.itervalues()
479 481 json_loads = json.loads
480 482
481 483 isjython = sysplatform.startswith(b'java')
482 484
483 485 isdarwin = sysplatform.startswith(b'darwin')
484 486 islinux = sysplatform.startswith(b'linux')
485 487 isposix = osname == b'posix'
486 488 iswindows = osname == b'nt'
487 489
488 490
489 491 def getoptb(args, shortlist, namelist):
490 492 return _getoptbwrapper(getopt.getopt, args, shortlist, namelist)
491 493
492 494
493 495 def gnugetoptb(args, shortlist, namelist):
494 496 return _getoptbwrapper(getopt.gnu_getopt, args, shortlist, namelist)
495 497
496 498
497 499 def mkdtemp(suffix=b'', prefix=b'tmp', dir=None):
498 500 return tempfile.mkdtemp(suffix, prefix, dir)
499 501
500 502
501 503 # text=True is not supported; use util.from/tonativeeol() instead
502 504 def mkstemp(suffix=b'', prefix=b'tmp', dir=None):
503 505 return tempfile.mkstemp(suffix, prefix, dir)
504 506
505 507
506 508 # mode must include 'b'ytes as encoding= is not supported
507 509 def namedtempfile(
508 510 mode=b'w+b', bufsize=-1, suffix=b'', prefix=b'tmp', dir=None, delete=True
509 511 ):
510 512 mode = sysstr(mode)
511 513 assert 'b' in mode
512 514 return tempfile.NamedTemporaryFile(
513 515 mode, bufsize, suffix=suffix, prefix=prefix, dir=dir, delete=delete
514 516 )
General Comments 0
You need to be logged in to leave comments. Login now