##// END OF EJS Templates
compat: initialize LC_CTYPE locale on all Python versions and platforms...
Manuel Jacob -
r45923:a25343d1 default
parent child Browse files
Show More
@@ -1,531 +1,552 b''
1 1 # pycompat.py - portability shim for python 3
2 2 #
3 3 # This software may be used and distributed according to the terms of the
4 4 # GNU General Public License version 2 or any later version.
5 5
6 6 """Mercurial portability shim for python 3.
7 7
8 8 This contains aliases to hide python version-specific details from the core.
9 9 """
10 10
11 11 from __future__ import absolute_import
12 12
13 13 import getopt
14 14 import inspect
15 15 import json
16 import locale
16 17 import os
17 18 import shlex
18 19 import sys
19 20 import tempfile
20 21
21 22 ispy3 = sys.version_info[0] >= 3
22 23 ispypy = '__pypy__' in sys.builtin_module_names
23 24 TYPE_CHECKING = False
24 25
25 26 if not globals(): # hide this from non-pytype users
26 27 import typing
27 28
28 29 TYPE_CHECKING = typing.TYPE_CHECKING
29 30
30 31 if not ispy3:
31 32 import cookielib
32 33 import cPickle as pickle
33 34 import httplib
34 35 import Queue as queue
35 36 import SocketServer as socketserver
36 37 import xmlrpclib
37 38
38 39 from .thirdparty.concurrent import futures
39 40
40 41 def future_set_exception_info(f, exc_info):
41 42 f.set_exception_info(*exc_info)
42 43
43 44
44 45 else:
45 46 import concurrent.futures as futures
46 47 import http.cookiejar as cookielib
47 48 import http.client as httplib
48 49 import pickle
49 50 import queue as queue
50 51 import socketserver
51 52 import xmlrpc.client as xmlrpclib
52 53
53 54 def future_set_exception_info(f, exc_info):
54 55 f.set_exception(exc_info[0])
55 56
56 57
57 58 def identity(a):
58 59 return a
59 60
60 61
61 62 def _rapply(f, xs):
62 63 if xs is None:
63 64 # assume None means non-value of optional data
64 65 return xs
65 66 if isinstance(xs, (list, set, tuple)):
66 67 return type(xs)(_rapply(f, x) for x in xs)
67 68 if isinstance(xs, dict):
68 69 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
69 70 return f(xs)
70 71
71 72
72 73 def rapply(f, xs):
73 74 """Apply function recursively to every item preserving the data structure
74 75
75 76 >>> def f(x):
76 77 ... return 'f(%s)' % x
77 78 >>> rapply(f, None) is None
78 79 True
79 80 >>> rapply(f, 'a')
80 81 'f(a)'
81 82 >>> rapply(f, {'a'}) == {'f(a)'}
82 83 True
83 84 >>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])
84 85 ['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]
85 86
86 87 >>> xs = [object()]
87 88 >>> rapply(identity, xs) is xs
88 89 True
89 90 """
90 91 if f is identity:
91 92 # fast path mainly for py2
92 93 return xs
93 94 return _rapply(f, xs)
94 95
95 96
97 # Passing the '' locale means that the locale should be set according to the
98 # user settings (environment variables).
99 # Python sometimes avoids setting the global locale settings. When interfacing
100 # with C code (e.g. the curses module or the Subversion bindings), the global
101 # locale settings must be initialized correctly. Python 2 does not initialize
102 # the global locale settings on interpreter startup. Python 3 sometimes
103 # initializes LC_CTYPE, but not consistently at least on Windows. Therefore we
104 # explicitly initialize it to get consistent behavior if it's not already
105 # initialized. Since CPython commit 177d921c8c03d30daa32994362023f777624b10d,
106 # LC_CTYPE is always initialized. If we require Python 3.8+, we should re-check
107 # if we can remove this code.
108 if locale.setlocale(locale.LC_CTYPE, None) == 'C':
109 try:
110 locale.setlocale(locale.LC_CTYPE, '')
111 except locale.Error:
112 # The likely case is that the locale from the environment variables is
113 # unknown.
114 pass
115
116
96 117 if ispy3:
97 118 import builtins
98 119 import codecs
99 120 import functools
100 121 import io
101 122 import struct
102 123
103 124 if os.name == r'nt' and sys.version_info >= (3, 6):
104 125 # MBCS (or ANSI) filesystem encoding must be used as before.
105 126 # Otherwise non-ASCII filenames in existing repositories would be
106 127 # corrupted.
107 128 # This must be set once prior to any fsencode/fsdecode calls.
108 129 sys._enablelegacywindowsfsencoding() # pytype: disable=module-attr
109 130
110 131 fsencode = os.fsencode
111 132 fsdecode = os.fsdecode
112 133 oscurdir = os.curdir.encode('ascii')
113 134 oslinesep = os.linesep.encode('ascii')
114 135 osname = os.name.encode('ascii')
115 136 ospathsep = os.pathsep.encode('ascii')
116 137 ospardir = os.pardir.encode('ascii')
117 138 ossep = os.sep.encode('ascii')
118 139 osaltsep = os.altsep
119 140 if osaltsep:
120 141 osaltsep = osaltsep.encode('ascii')
121 142 osdevnull = os.devnull.encode('ascii')
122 143
123 144 sysplatform = sys.platform.encode('ascii')
124 145 sysexecutable = sys.executable
125 146 if sysexecutable:
126 147 sysexecutable = os.fsencode(sysexecutable)
127 148 bytesio = io.BytesIO
128 149 # TODO deprecate stringio name, as it is a lie on Python 3.
129 150 stringio = bytesio
130 151
131 152 def maplist(*args):
132 153 return list(map(*args))
133 154
134 155 def rangelist(*args):
135 156 return list(range(*args))
136 157
137 158 def ziplist(*args):
138 159 return list(zip(*args))
139 160
140 161 rawinput = input
141 162 getargspec = inspect.getfullargspec
142 163
143 164 long = int
144 165
145 166 # Warning: sys.stdout.buffer and sys.stderr.buffer do not necessarily have
146 167 # the same buffering behavior as sys.stdout and sys.stderr. The interpreter
147 168 # initializes them with block-buffered streams or unbuffered streams (when
148 169 # the -u option or the PYTHONUNBUFFERED environment variable is set), never
149 170 # with a line-buffered stream.
150 171 # TODO: .buffer might not exist if std streams were replaced; we'll need
151 172 # a silly wrapper to make a bytes stream backed by a unicode one.
152 173 stdin = sys.stdin.buffer
153 174 stdout = sys.stdout.buffer
154 175 stderr = sys.stderr.buffer
155 176
156 177 if getattr(sys, 'argv', None) is not None:
157 178 # On POSIX, the char** argv array is converted to Python str using
158 179 # Py_DecodeLocale(). The inverse of this is Py_EncodeLocale(), which
159 180 # isn't directly callable from Python code. In practice, os.fsencode()
160 181 # can be used instead (this is recommended by Python's documentation
161 182 # for sys.argv).
162 183 #
163 184 # On Windows, the wchar_t **argv is passed into the interpreter as-is.
164 185 # Like POSIX, we need to emulate what Py_EncodeLocale() would do. But
165 186 # there's an additional wrinkle. What we really want to access is the
166 187 # ANSI codepage representation of the arguments, as this is what
167 188 # `int main()` would receive if Python 3 didn't define `int wmain()`
168 189 # (this is how Python 2 worked). To get that, we encode with the mbcs
169 190 # encoding, which will pass CP_ACP to the underlying Windows API to
170 191 # produce bytes.
171 192 if os.name == r'nt':
172 193 sysargv = [a.encode("mbcs", "ignore") for a in sys.argv]
173 194 else:
174 195 sysargv = [fsencode(a) for a in sys.argv]
175 196
176 197 bytechr = struct.Struct('>B').pack
177 198 byterepr = b'%r'.__mod__
178 199
179 200 class bytestr(bytes):
180 201 """A bytes which mostly acts as a Python 2 str
181 202
182 203 >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
183 204 ('', 'foo', 'ascii', '1')
184 205 >>> s = bytestr(b'foo')
185 206 >>> assert s is bytestr(s)
186 207
187 208 __bytes__() should be called if provided:
188 209
189 210 >>> class bytesable(object):
190 211 ... def __bytes__(self):
191 212 ... return b'bytes'
192 213 >>> bytestr(bytesable())
193 214 'bytes'
194 215
195 216 There's no implicit conversion from non-ascii str as its encoding is
196 217 unknown:
197 218
198 219 >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
199 220 Traceback (most recent call last):
200 221 ...
201 222 UnicodeEncodeError: ...
202 223
203 224 Comparison between bytestr and bytes should work:
204 225
205 226 >>> assert bytestr(b'foo') == b'foo'
206 227 >>> assert b'foo' == bytestr(b'foo')
207 228 >>> assert b'f' in bytestr(b'foo')
208 229 >>> assert bytestr(b'f') in b'foo'
209 230
210 231 Sliced elements should be bytes, not integer:
211 232
212 233 >>> s[1], s[:2]
213 234 (b'o', b'fo')
214 235 >>> list(s), list(reversed(s))
215 236 ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
216 237
217 238 As bytestr type isn't propagated across operations, you need to cast
218 239 bytes to bytestr explicitly:
219 240
220 241 >>> s = bytestr(b'foo').upper()
221 242 >>> t = bytestr(s)
222 243 >>> s[0], t[0]
223 244 (70, b'F')
224 245
225 246 Be careful to not pass a bytestr object to a function which expects
226 247 bytearray-like behavior.
227 248
228 249 >>> t = bytes(t) # cast to bytes
229 250 >>> assert type(t) is bytes
230 251 """
231 252
232 253 def __new__(cls, s=b''):
233 254 if isinstance(s, bytestr):
234 255 return s
235 256 if not isinstance(
236 257 s, (bytes, bytearray)
237 258 ) and not hasattr( # hasattr-py3-only
238 259 s, u'__bytes__'
239 260 ):
240 261 s = str(s).encode('ascii')
241 262 return bytes.__new__(cls, s)
242 263
243 264 def __getitem__(self, key):
244 265 s = bytes.__getitem__(self, key)
245 266 if not isinstance(s, bytes):
246 267 s = bytechr(s)
247 268 return s
248 269
249 270 def __iter__(self):
250 271 return iterbytestr(bytes.__iter__(self))
251 272
252 273 def __repr__(self):
253 274 return bytes.__repr__(self)[1:] # drop b''
254 275
255 276 def iterbytestr(s):
256 277 """Iterate bytes as if it were a str object of Python 2"""
257 278 return map(bytechr, s)
258 279
259 280 def maybebytestr(s):
260 281 """Promote bytes to bytestr"""
261 282 if isinstance(s, bytes):
262 283 return bytestr(s)
263 284 return s
264 285
265 286 def sysbytes(s):
266 287 """Convert an internal str (e.g. keyword, __doc__) back to bytes
267 288
268 289 This never raises UnicodeEncodeError, but only ASCII characters
269 290 can be round-trip by sysstr(sysbytes(s)).
270 291 """
271 292 if isinstance(s, bytes):
272 293 return s
273 294 return s.encode('utf-8')
274 295
275 296 def sysstr(s):
276 297 """Return a keyword str to be passed to Python functions such as
277 298 getattr() and str.encode()
278 299
279 300 This never raises UnicodeDecodeError. Non-ascii characters are
280 301 considered invalid and mapped to arbitrary but unique code points
281 302 such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
282 303 """
283 304 if isinstance(s, builtins.str):
284 305 return s
285 306 return s.decode('latin-1')
286 307
287 308 def strurl(url):
288 309 """Converts a bytes url back to str"""
289 310 if isinstance(url, bytes):
290 311 return url.decode('ascii')
291 312 return url
292 313
293 314 def bytesurl(url):
294 315 """Converts a str url to bytes by encoding in ascii"""
295 316 if isinstance(url, str):
296 317 return url.encode('ascii')
297 318 return url
298 319
299 320 def raisewithtb(exc, tb):
300 321 """Raise exception with the given traceback"""
301 322 raise exc.with_traceback(tb)
302 323
303 324 def getdoc(obj):
304 325 """Get docstring as bytes; may be None so gettext() won't confuse it
305 326 with _('')"""
306 327 doc = getattr(obj, '__doc__', None)
307 328 if doc is None:
308 329 return doc
309 330 return sysbytes(doc)
310 331
311 332 def _wrapattrfunc(f):
312 333 @functools.wraps(f)
313 334 def w(object, name, *args):
314 335 return f(object, sysstr(name), *args)
315 336
316 337 return w
317 338
318 339 # these wrappers are automagically imported by hgloader
319 340 delattr = _wrapattrfunc(builtins.delattr)
320 341 getattr = _wrapattrfunc(builtins.getattr)
321 342 hasattr = _wrapattrfunc(builtins.hasattr)
322 343 setattr = _wrapattrfunc(builtins.setattr)
323 344 xrange = builtins.range
324 345 unicode = str
325 346
326 347 def open(name, mode=b'r', buffering=-1, encoding=None):
327 348 return builtins.open(name, sysstr(mode), buffering, encoding)
328 349
329 350 safehasattr = _wrapattrfunc(builtins.hasattr)
330 351
331 352 def _getoptbwrapper(orig, args, shortlist, namelist):
332 353 """
333 354 Takes bytes arguments, converts them to unicode, pass them to
334 355 getopt.getopt(), convert the returned values back to bytes and then
335 356 return them for Python 3 compatibility as getopt.getopt() don't accepts
336 357 bytes on Python 3.
337 358 """
338 359 args = [a.decode('latin-1') for a in args]
339 360 shortlist = shortlist.decode('latin-1')
340 361 namelist = [a.decode('latin-1') for a in namelist]
341 362 opts, args = orig(args, shortlist, namelist)
342 363 opts = [(a[0].encode('latin-1'), a[1].encode('latin-1')) for a in opts]
343 364 args = [a.encode('latin-1') for a in args]
344 365 return opts, args
345 366
346 367 def strkwargs(dic):
347 368 """
348 369 Converts the keys of a python dictonary to str i.e. unicodes so that
349 370 they can be passed as keyword arguments as dictonaries with bytes keys
350 371 can't be passed as keyword arguments to functions on Python 3.
351 372 """
352 373 dic = {k.decode('latin-1'): v for k, v in dic.items()}
353 374 return dic
354 375
355 376 def byteskwargs(dic):
356 377 """
357 378 Converts keys of python dictonaries to bytes as they were converted to
358 379 str to pass that dictonary as a keyword argument on Python 3.
359 380 """
360 381 dic = {k.encode('latin-1'): v for k, v in dic.items()}
361 382 return dic
362 383
363 384 # TODO: handle shlex.shlex().
364 385 def shlexsplit(s, comments=False, posix=True):
365 386 """
366 387 Takes bytes argument, convert it to str i.e. unicodes, pass that into
367 388 shlex.split(), convert the returned value to bytes and return that for
368 389 Python 3 compatibility as shelx.split() don't accept bytes on Python 3.
369 390 """
370 391 ret = shlex.split(s.decode('latin-1'), comments, posix)
371 392 return [a.encode('latin-1') for a in ret]
372 393
373 394 iteritems = lambda x: x.items()
374 395 itervalues = lambda x: x.values()
375 396
376 397 # Python 3.5's json.load and json.loads require str. We polyfill its
377 398 # code for detecting encoding from bytes.
378 399 if sys.version_info[0:2] < (3, 6):
379 400
380 401 def _detect_encoding(b):
381 402 bstartswith = b.startswith
382 403 if bstartswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)):
383 404 return 'utf-32'
384 405 if bstartswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)):
385 406 return 'utf-16'
386 407 if bstartswith(codecs.BOM_UTF8):
387 408 return 'utf-8-sig'
388 409
389 410 if len(b) >= 4:
390 411 if not b[0]:
391 412 # 00 00 -- -- - utf-32-be
392 413 # 00 XX -- -- - utf-16-be
393 414 return 'utf-16-be' if b[1] else 'utf-32-be'
394 415 if not b[1]:
395 416 # XX 00 00 00 - utf-32-le
396 417 # XX 00 00 XX - utf-16-le
397 418 # XX 00 XX -- - utf-16-le
398 419 return 'utf-16-le' if b[2] or b[3] else 'utf-32-le'
399 420 elif len(b) == 2:
400 421 if not b[0]:
401 422 # 00 XX - utf-16-be
402 423 return 'utf-16-be'
403 424 if not b[1]:
404 425 # XX 00 - utf-16-le
405 426 return 'utf-16-le'
406 427 # default
407 428 return 'utf-8'
408 429
409 430 def json_loads(s, *args, **kwargs):
410 431 if isinstance(s, (bytes, bytearray)):
411 432 s = s.decode(_detect_encoding(s), 'surrogatepass')
412 433
413 434 return json.loads(s, *args, **kwargs)
414 435
415 436 else:
416 437 json_loads = json.loads
417 438
418 439 else:
419 440 import cStringIO
420 441
421 442 xrange = xrange
422 443 unicode = unicode
423 444 bytechr = chr
424 445 byterepr = repr
425 446 bytestr = str
426 447 iterbytestr = iter
427 448 maybebytestr = identity
428 449 sysbytes = identity
429 450 sysstr = identity
430 451 strurl = identity
431 452 bytesurl = identity
432 453 open = open
433 454 delattr = delattr
434 455 getattr = getattr
435 456 hasattr = hasattr
436 457 setattr = setattr
437 458
438 459 # this can't be parsed on Python 3
439 460 exec(b'def raisewithtb(exc, tb):\n raise exc, None, tb\n')
440 461
441 462 def fsencode(filename):
442 463 """
443 464 Partial backport from os.py in Python 3, which only accepts bytes.
444 465 In Python 2, our paths should only ever be bytes, a unicode path
445 466 indicates a bug.
446 467 """
447 468 if isinstance(filename, str):
448 469 return filename
449 470 else:
450 471 raise TypeError("expect str, not %s" % type(filename).__name__)
451 472
452 473 # In Python 2, fsdecode() has a very chance to receive bytes. So it's
453 474 # better not to touch Python 2 part as it's already working fine.
454 475 fsdecode = identity
455 476
456 477 def getdoc(obj):
457 478 return getattr(obj, '__doc__', None)
458 479
459 480 _notset = object()
460 481
461 482 def safehasattr(thing, attr):
462 483 return getattr(thing, attr, _notset) is not _notset
463 484
464 485 def _getoptbwrapper(orig, args, shortlist, namelist):
465 486 return orig(args, shortlist, namelist)
466 487
467 488 strkwargs = identity
468 489 byteskwargs = identity
469 490
470 491 oscurdir = os.curdir
471 492 oslinesep = os.linesep
472 493 osname = os.name
473 494 ospathsep = os.pathsep
474 495 ospardir = os.pardir
475 496 ossep = os.sep
476 497 osaltsep = os.altsep
477 498 osdevnull = os.devnull
478 499 long = long
479 500 stdin = sys.stdin
480 501 stdout = sys.stdout
481 502 stderr = sys.stderr
482 503 if getattr(sys, 'argv', None) is not None:
483 504 sysargv = sys.argv
484 505 sysplatform = sys.platform
485 506 sysexecutable = sys.executable
486 507 shlexsplit = shlex.split
487 508 bytesio = cStringIO.StringIO
488 509 stringio = bytesio
489 510 maplist = map
490 511 rangelist = range
491 512 ziplist = zip
492 513 rawinput = raw_input
493 514 getargspec = inspect.getargspec
494 515 iteritems = lambda x: x.iteritems()
495 516 itervalues = lambda x: x.itervalues()
496 517 json_loads = json.loads
497 518
498 519 isjython = sysplatform.startswith(b'java')
499 520
500 521 isdarwin = sysplatform.startswith(b'darwin')
501 522 islinux = sysplatform.startswith(b'linux')
502 523 isposix = osname == b'posix'
503 524 iswindows = osname == b'nt'
504 525
505 526
506 527 def getoptb(args, shortlist, namelist):
507 528 return _getoptbwrapper(getopt.getopt, args, shortlist, namelist)
508 529
509 530
510 531 def gnugetoptb(args, shortlist, namelist):
511 532 return _getoptbwrapper(getopt.gnu_getopt, args, shortlist, namelist)
512 533
513 534
514 535 def mkdtemp(suffix=b'', prefix=b'tmp', dir=None):
515 536 return tempfile.mkdtemp(suffix, prefix, dir)
516 537
517 538
518 539 # text=True is not supported; use util.from/tonativeeol() instead
519 540 def mkstemp(suffix=b'', prefix=b'tmp', dir=None):
520 541 return tempfile.mkstemp(suffix, prefix, dir)
521 542
522 543
523 544 # mode must include 'b'ytes as encoding= is not supported
524 545 def namedtempfile(
525 546 mode=b'w+b', bufsize=-1, suffix=b'', prefix=b'tmp', dir=None, delete=True
526 547 ):
527 548 mode = sysstr(mode)
528 549 assert 'b' in mode
529 550 return tempfile.NamedTemporaryFile(
530 551 mode, bufsize, suffix=suffix, prefix=prefix, dir=dir, delete=delete
531 552 )
General Comments 0
You need to be logged in to leave comments. Login now