##// END OF EJS Templates
compat: back out a25343d16ebe (initialize LC_CTYPE locale on all Python ...)...
Manuel Jacob -
r45557:f2dc3371 default
parent child Browse files
Show More
@@ -1,552 +1,531 b''
1 1 # pycompat.py - portability shim for python 3
2 2 #
3 3 # This software may be used and distributed according to the terms of the
4 4 # GNU General Public License version 2 or any later version.
5 5
6 6 """Mercurial portability shim for python 3.
7 7
8 8 This contains aliases to hide python version-specific details from the core.
9 9 """
10 10
11 11 from __future__ import absolute_import
12 12
13 13 import getopt
14 14 import inspect
15 15 import json
16 import locale
17 16 import os
18 17 import shlex
19 18 import sys
20 19 import tempfile
21 20
22 21 ispy3 = sys.version_info[0] >= 3
23 22 ispypy = '__pypy__' in sys.builtin_module_names
24 23 TYPE_CHECKING = False
25 24
26 25 if not globals(): # hide this from non-pytype users
27 26 import typing
28 27
29 28 TYPE_CHECKING = typing.TYPE_CHECKING
30 29
31 30 if not ispy3:
32 31 import cookielib
33 32 import cPickle as pickle
34 33 import httplib
35 34 import Queue as queue
36 35 import SocketServer as socketserver
37 36 import xmlrpclib
38 37
39 38 from .thirdparty.concurrent import futures
40 39
41 40 def future_set_exception_info(f, exc_info):
42 41 f.set_exception_info(*exc_info)
43 42
44 43
45 44 else:
46 45 import concurrent.futures as futures
47 46 import http.cookiejar as cookielib
48 47 import http.client as httplib
49 48 import pickle
50 49 import queue as queue
51 50 import socketserver
52 51 import xmlrpc.client as xmlrpclib
53 52
54 53 def future_set_exception_info(f, exc_info):
55 54 f.set_exception(exc_info[0])
56 55
57 56
58 57 def identity(a):
59 58 return a
60 59
61 60
62 61 def _rapply(f, xs):
63 62 if xs is None:
64 63 # assume None means non-value of optional data
65 64 return xs
66 65 if isinstance(xs, (list, set, tuple)):
67 66 return type(xs)(_rapply(f, x) for x in xs)
68 67 if isinstance(xs, dict):
69 68 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
70 69 return f(xs)
71 70
72 71
73 72 def rapply(f, xs):
74 73 """Apply function recursively to every item preserving the data structure
75 74
76 75 >>> def f(x):
77 76 ... return 'f(%s)' % x
78 77 >>> rapply(f, None) is None
79 78 True
80 79 >>> rapply(f, 'a')
81 80 'f(a)'
82 81 >>> rapply(f, {'a'}) == {'f(a)'}
83 82 True
84 83 >>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])
85 84 ['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]
86 85
87 86 >>> xs = [object()]
88 87 >>> rapply(identity, xs) is xs
89 88 True
90 89 """
91 90 if f is identity:
92 91 # fast path mainly for py2
93 92 return xs
94 93 return _rapply(f, xs)
95 94
96 95
97 # Passing the '' locale means that the locale should be set according to the
98 # user settings (environment variables).
99 # Python sometimes avoids setting the global locale settings. When interfacing
100 # with C code (e.g. the curses module or the Subversion bindings), the global
101 # locale settings must be initialized correctly. Python 2 does not initialize
102 # the global locale settings on interpreter startup. Python 3 sometimes
103 # initializes LC_CTYPE, but not consistently at least on Windows. Therefore we
104 # explicitly initialize it to get consistent behavior if it's not already
105 # initialized. Since CPython commit 177d921c8c03d30daa32994362023f777624b10d,
106 # LC_CTYPE is always initialized. If we require Python 3.8+, we should re-check
107 # if we can remove this code.
108 if locale.setlocale(locale.LC_CTYPE, None) == 'C':
109 try:
110 locale.setlocale(locale.LC_CTYPE, '')
111 except locale.Error:
112 # The likely case is that the locale from the environment variables is
113 # unknown.
114 pass
115
116
117 96 if ispy3:
118 97 import builtins
119 98 import codecs
120 99 import functools
121 100 import io
122 101 import struct
123 102
124 103 if os.name == r'nt' and sys.version_info >= (3, 6):
125 104 # MBCS (or ANSI) filesystem encoding must be used as before.
126 105 # Otherwise non-ASCII filenames in existing repositories would be
127 106 # corrupted.
128 107 # This must be set once prior to any fsencode/fsdecode calls.
129 108 sys._enablelegacywindowsfsencoding() # pytype: disable=module-attr
130 109
131 110 fsencode = os.fsencode
132 111 fsdecode = os.fsdecode
133 112 oscurdir = os.curdir.encode('ascii')
134 113 oslinesep = os.linesep.encode('ascii')
135 114 osname = os.name.encode('ascii')
136 115 ospathsep = os.pathsep.encode('ascii')
137 116 ospardir = os.pardir.encode('ascii')
138 117 ossep = os.sep.encode('ascii')
139 118 osaltsep = os.altsep
140 119 if osaltsep:
141 120 osaltsep = osaltsep.encode('ascii')
142 121 osdevnull = os.devnull.encode('ascii')
143 122
144 123 sysplatform = sys.platform.encode('ascii')
145 124 sysexecutable = sys.executable
146 125 if sysexecutable:
147 126 sysexecutable = os.fsencode(sysexecutable)
148 127 bytesio = io.BytesIO
149 128 # TODO deprecate stringio name, as it is a lie on Python 3.
150 129 stringio = bytesio
151 130
152 131 def maplist(*args):
153 132 return list(map(*args))
154 133
155 134 def rangelist(*args):
156 135 return list(range(*args))
157 136
158 137 def ziplist(*args):
159 138 return list(zip(*args))
160 139
161 140 rawinput = input
162 141 getargspec = inspect.getfullargspec
163 142
164 143 long = int
165 144
166 145 # Warning: sys.stdout.buffer and sys.stderr.buffer do not necessarily have
167 146 # the same buffering behavior as sys.stdout and sys.stderr. The interpreter
168 147 # initializes them with block-buffered streams or unbuffered streams (when
169 148 # the -u option or the PYTHONUNBUFFERED environment variable is set), never
170 149 # with a line-buffered stream.
171 150 # TODO: .buffer might not exist if std streams were replaced; we'll need
172 151 # a silly wrapper to make a bytes stream backed by a unicode one.
173 152 stdin = sys.stdin.buffer
174 153 stdout = sys.stdout.buffer
175 154 stderr = sys.stderr.buffer
176 155
177 156 if getattr(sys, 'argv', None) is not None:
178 157 # On POSIX, the char** argv array is converted to Python str using
179 158 # Py_DecodeLocale(). The inverse of this is Py_EncodeLocale(), which
180 159 # isn't directly callable from Python code. In practice, os.fsencode()
181 160 # can be used instead (this is recommended by Python's documentation
182 161 # for sys.argv).
183 162 #
184 163 # On Windows, the wchar_t **argv is passed into the interpreter as-is.
185 164 # Like POSIX, we need to emulate what Py_EncodeLocale() would do. But
186 165 # there's an additional wrinkle. What we really want to access is the
187 166 # ANSI codepage representation of the arguments, as this is what
188 167 # `int main()` would receive if Python 3 didn't define `int wmain()`
189 168 # (this is how Python 2 worked). To get that, we encode with the mbcs
190 169 # encoding, which will pass CP_ACP to the underlying Windows API to
191 170 # produce bytes.
192 171 if os.name == r'nt':
193 172 sysargv = [a.encode("mbcs", "ignore") for a in sys.argv]
194 173 else:
195 174 sysargv = [fsencode(a) for a in sys.argv]
196 175
197 176 bytechr = struct.Struct('>B').pack
198 177 byterepr = b'%r'.__mod__
199 178
200 179 class bytestr(bytes):
201 180 """A bytes which mostly acts as a Python 2 str
202 181
203 182 >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
204 183 ('', 'foo', 'ascii', '1')
205 184 >>> s = bytestr(b'foo')
206 185 >>> assert s is bytestr(s)
207 186
208 187 __bytes__() should be called if provided:
209 188
210 189 >>> class bytesable(object):
211 190 ... def __bytes__(self):
212 191 ... return b'bytes'
213 192 >>> bytestr(bytesable())
214 193 'bytes'
215 194
216 195 There's no implicit conversion from non-ascii str as its encoding is
217 196 unknown:
218 197
219 198 >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
220 199 Traceback (most recent call last):
221 200 ...
222 201 UnicodeEncodeError: ...
223 202
224 203 Comparison between bytestr and bytes should work:
225 204
226 205 >>> assert bytestr(b'foo') == b'foo'
227 206 >>> assert b'foo' == bytestr(b'foo')
228 207 >>> assert b'f' in bytestr(b'foo')
229 208 >>> assert bytestr(b'f') in b'foo'
230 209
231 210 Sliced elements should be bytes, not integer:
232 211
233 212 >>> s[1], s[:2]
234 213 (b'o', b'fo')
235 214 >>> list(s), list(reversed(s))
236 215 ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
237 216
238 217 As bytestr type isn't propagated across operations, you need to cast
239 218 bytes to bytestr explicitly:
240 219
241 220 >>> s = bytestr(b'foo').upper()
242 221 >>> t = bytestr(s)
243 222 >>> s[0], t[0]
244 223 (70, b'F')
245 224
246 225 Be careful to not pass a bytestr object to a function which expects
247 226 bytearray-like behavior.
248 227
249 228 >>> t = bytes(t) # cast to bytes
250 229 >>> assert type(t) is bytes
251 230 """
252 231
253 232 def __new__(cls, s=b''):
254 233 if isinstance(s, bytestr):
255 234 return s
256 235 if not isinstance(
257 236 s, (bytes, bytearray)
258 237 ) and not hasattr( # hasattr-py3-only
259 238 s, u'__bytes__'
260 239 ):
261 240 s = str(s).encode('ascii')
262 241 return bytes.__new__(cls, s)
263 242
264 243 def __getitem__(self, key):
265 244 s = bytes.__getitem__(self, key)
266 245 if not isinstance(s, bytes):
267 246 s = bytechr(s)
268 247 return s
269 248
270 249 def __iter__(self):
271 250 return iterbytestr(bytes.__iter__(self))
272 251
273 252 def __repr__(self):
274 253 return bytes.__repr__(self)[1:] # drop b''
275 254
276 255 def iterbytestr(s):
277 256 """Iterate bytes as if it were a str object of Python 2"""
278 257 return map(bytechr, s)
279 258
280 259 def maybebytestr(s):
281 260 """Promote bytes to bytestr"""
282 261 if isinstance(s, bytes):
283 262 return bytestr(s)
284 263 return s
285 264
286 265 def sysbytes(s):
287 266 """Convert an internal str (e.g. keyword, __doc__) back to bytes
288 267
289 268 This never raises UnicodeEncodeError, but only ASCII characters
290 269 can be round-trip by sysstr(sysbytes(s)).
291 270 """
292 271 if isinstance(s, bytes):
293 272 return s
294 273 return s.encode('utf-8')
295 274
296 275 def sysstr(s):
297 276 """Return a keyword str to be passed to Python functions such as
298 277 getattr() and str.encode()
299 278
300 279 This never raises UnicodeDecodeError. Non-ascii characters are
301 280 considered invalid and mapped to arbitrary but unique code points
302 281 such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
303 282 """
304 283 if isinstance(s, builtins.str):
305 284 return s
306 285 return s.decode('latin-1')
307 286
308 287 def strurl(url):
309 288 """Converts a bytes url back to str"""
310 289 if isinstance(url, bytes):
311 290 return url.decode('ascii')
312 291 return url
313 292
314 293 def bytesurl(url):
315 294 """Converts a str url to bytes by encoding in ascii"""
316 295 if isinstance(url, str):
317 296 return url.encode('ascii')
318 297 return url
319 298
320 299 def raisewithtb(exc, tb):
321 300 """Raise exception with the given traceback"""
322 301 raise exc.with_traceback(tb)
323 302
324 303 def getdoc(obj):
325 304 """Get docstring as bytes; may be None so gettext() won't confuse it
326 305 with _('')"""
327 306 doc = getattr(obj, '__doc__', None)
328 307 if doc is None:
329 308 return doc
330 309 return sysbytes(doc)
331 310
332 311 def _wrapattrfunc(f):
333 312 @functools.wraps(f)
334 313 def w(object, name, *args):
335 314 return f(object, sysstr(name), *args)
336 315
337 316 return w
338 317
339 318 # these wrappers are automagically imported by hgloader
340 319 delattr = _wrapattrfunc(builtins.delattr)
341 320 getattr = _wrapattrfunc(builtins.getattr)
342 321 hasattr = _wrapattrfunc(builtins.hasattr)
343 322 setattr = _wrapattrfunc(builtins.setattr)
344 323 xrange = builtins.range
345 324 unicode = str
346 325
347 326 def open(name, mode=b'r', buffering=-1, encoding=None):
348 327 return builtins.open(name, sysstr(mode), buffering, encoding)
349 328
350 329 safehasattr = _wrapattrfunc(builtins.hasattr)
351 330
352 331 def _getoptbwrapper(orig, args, shortlist, namelist):
353 332 """
354 333 Takes bytes arguments, converts them to unicode, pass them to
355 334 getopt.getopt(), convert the returned values back to bytes and then
356 335 return them for Python 3 compatibility as getopt.getopt() don't accepts
357 336 bytes on Python 3.
358 337 """
359 338 args = [a.decode('latin-1') for a in args]
360 339 shortlist = shortlist.decode('latin-1')
361 340 namelist = [a.decode('latin-1') for a in namelist]
362 341 opts, args = orig(args, shortlist, namelist)
363 342 opts = [(a[0].encode('latin-1'), a[1].encode('latin-1')) for a in opts]
364 343 args = [a.encode('latin-1') for a in args]
365 344 return opts, args
366 345
367 346 def strkwargs(dic):
368 347 """
369 348 Converts the keys of a python dictonary to str i.e. unicodes so that
370 349 they can be passed as keyword arguments as dictonaries with bytes keys
371 350 can't be passed as keyword arguments to functions on Python 3.
372 351 """
373 352 dic = {k.decode('latin-1'): v for k, v in dic.items()}
374 353 return dic
375 354
376 355 def byteskwargs(dic):
377 356 """
378 357 Converts keys of python dictonaries to bytes as they were converted to
379 358 str to pass that dictonary as a keyword argument on Python 3.
380 359 """
381 360 dic = {k.encode('latin-1'): v for k, v in dic.items()}
382 361 return dic
383 362
384 363 # TODO: handle shlex.shlex().
385 364 def shlexsplit(s, comments=False, posix=True):
386 365 """
387 366 Takes bytes argument, convert it to str i.e. unicodes, pass that into
388 367 shlex.split(), convert the returned value to bytes and return that for
389 368 Python 3 compatibility as shelx.split() don't accept bytes on Python 3.
390 369 """
391 370 ret = shlex.split(s.decode('latin-1'), comments, posix)
392 371 return [a.encode('latin-1') for a in ret]
393 372
394 373 iteritems = lambda x: x.items()
395 374 itervalues = lambda x: x.values()
396 375
397 376 # Python 3.5's json.load and json.loads require str. We polyfill its
398 377 # code for detecting encoding from bytes.
399 378 if sys.version_info[0:2] < (3, 6):
400 379
401 380 def _detect_encoding(b):
402 381 bstartswith = b.startswith
403 382 if bstartswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)):
404 383 return 'utf-32'
405 384 if bstartswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)):
406 385 return 'utf-16'
407 386 if bstartswith(codecs.BOM_UTF8):
408 387 return 'utf-8-sig'
409 388
410 389 if len(b) >= 4:
411 390 if not b[0]:
412 391 # 00 00 -- -- - utf-32-be
413 392 # 00 XX -- -- - utf-16-be
414 393 return 'utf-16-be' if b[1] else 'utf-32-be'
415 394 if not b[1]:
416 395 # XX 00 00 00 - utf-32-le
417 396 # XX 00 00 XX - utf-16-le
418 397 # XX 00 XX -- - utf-16-le
419 398 return 'utf-16-le' if b[2] or b[3] else 'utf-32-le'
420 399 elif len(b) == 2:
421 400 if not b[0]:
422 401 # 00 XX - utf-16-be
423 402 return 'utf-16-be'
424 403 if not b[1]:
425 404 # XX 00 - utf-16-le
426 405 return 'utf-16-le'
427 406 # default
428 407 return 'utf-8'
429 408
430 409 def json_loads(s, *args, **kwargs):
431 410 if isinstance(s, (bytes, bytearray)):
432 411 s = s.decode(_detect_encoding(s), 'surrogatepass')
433 412
434 413 return json.loads(s, *args, **kwargs)
435 414
436 415 else:
437 416 json_loads = json.loads
438 417
439 418 else:
440 419 import cStringIO
441 420
442 421 xrange = xrange
443 422 unicode = unicode
444 423 bytechr = chr
445 424 byterepr = repr
446 425 bytestr = str
447 426 iterbytestr = iter
448 427 maybebytestr = identity
449 428 sysbytes = identity
450 429 sysstr = identity
451 430 strurl = identity
452 431 bytesurl = identity
453 432 open = open
454 433 delattr = delattr
455 434 getattr = getattr
456 435 hasattr = hasattr
457 436 setattr = setattr
458 437
459 438 # this can't be parsed on Python 3
460 439 exec(b'def raisewithtb(exc, tb):\n raise exc, None, tb\n')
461 440
462 441 def fsencode(filename):
463 442 """
464 443 Partial backport from os.py in Python 3, which only accepts bytes.
465 444 In Python 2, our paths should only ever be bytes, a unicode path
466 445 indicates a bug.
467 446 """
468 447 if isinstance(filename, str):
469 448 return filename
470 449 else:
471 450 raise TypeError("expect str, not %s" % type(filename).__name__)
472 451
473 452 # In Python 2, fsdecode() has a very chance to receive bytes. So it's
474 453 # better not to touch Python 2 part as it's already working fine.
475 454 fsdecode = identity
476 455
477 456 def getdoc(obj):
478 457 return getattr(obj, '__doc__', None)
479 458
480 459 _notset = object()
481 460
482 461 def safehasattr(thing, attr):
483 462 return getattr(thing, attr, _notset) is not _notset
484 463
485 464 def _getoptbwrapper(orig, args, shortlist, namelist):
486 465 return orig(args, shortlist, namelist)
487 466
488 467 strkwargs = identity
489 468 byteskwargs = identity
490 469
491 470 oscurdir = os.curdir
492 471 oslinesep = os.linesep
493 472 osname = os.name
494 473 ospathsep = os.pathsep
495 474 ospardir = os.pardir
496 475 ossep = os.sep
497 476 osaltsep = os.altsep
498 477 osdevnull = os.devnull
499 478 long = long
500 479 stdin = sys.stdin
501 480 stdout = sys.stdout
502 481 stderr = sys.stderr
503 482 if getattr(sys, 'argv', None) is not None:
504 483 sysargv = sys.argv
505 484 sysplatform = sys.platform
506 485 sysexecutable = sys.executable
507 486 shlexsplit = shlex.split
508 487 bytesio = cStringIO.StringIO
509 488 stringio = bytesio
510 489 maplist = map
511 490 rangelist = range
512 491 ziplist = zip
513 492 rawinput = raw_input
514 493 getargspec = inspect.getargspec
515 494 iteritems = lambda x: x.iteritems()
516 495 itervalues = lambda x: x.itervalues()
517 496 json_loads = json.loads
518 497
519 498 isjython = sysplatform.startswith(b'java')
520 499
521 500 isdarwin = sysplatform.startswith(b'darwin')
522 501 islinux = sysplatform.startswith(b'linux')
523 502 isposix = osname == b'posix'
524 503 iswindows = osname == b'nt'
525 504
526 505
527 506 def getoptb(args, shortlist, namelist):
528 507 return _getoptbwrapper(getopt.getopt, args, shortlist, namelist)
529 508
530 509
531 510 def gnugetoptb(args, shortlist, namelist):
532 511 return _getoptbwrapper(getopt.gnu_getopt, args, shortlist, namelist)
533 512
534 513
535 514 def mkdtemp(suffix=b'', prefix=b'tmp', dir=None):
536 515 return tempfile.mkdtemp(suffix, prefix, dir)
537 516
538 517
539 518 # text=True is not supported; use util.from/tonativeeol() instead
540 519 def mkstemp(suffix=b'', prefix=b'tmp', dir=None):
541 520 return tempfile.mkstemp(suffix, prefix, dir)
542 521
543 522
544 523 # mode must include 'b'ytes as encoding= is not supported
545 524 def namedtempfile(
546 525 mode=b'w+b', bufsize=-1, suffix=b'', prefix=b'tmp', dir=None, delete=True
547 526 ):
548 527 mode = sysstr(mode)
549 528 assert 'b' in mode
550 529 return tempfile.NamedTemporaryFile(
551 530 mode, bufsize, suffix=suffix, prefix=prefix, dir=dir, delete=delete
552 531 )
General Comments 0
You need to be logged in to leave comments. Login now