##// END OF EJS Templates
py3: byteify strings in pycompat...
Matt Harbison -
r39678:a407f900 default
parent child Browse files
Show More
@@ -1,432 +1,432 b''
1 1 # pycompat.py - portability shim for python 3
2 2 #
3 3 # This software may be used and distributed according to the terms of the
4 4 # GNU General Public License version 2 or any later version.
5 5
6 6 """Mercurial portability shim for python 3.
7 7
8 8 This contains aliases to hide python version-specific details from the core.
9 9 """
10 10
11 11 from __future__ import absolute_import
12 12
13 13 import getopt
14 14 import inspect
15 15 import os
16 16 import shlex
17 17 import sys
18 18 import tempfile
19 19
20 20 ispy3 = (sys.version_info[0] >= 3)
21 21 ispypy = (r'__pypy__' in sys.builtin_module_names)
22 22
23 23 if not ispy3:
24 24 import cookielib
25 25 import cPickle as pickle
26 26 import httplib
27 27 import Queue as queue
28 28 import SocketServer as socketserver
29 29 import xmlrpclib
30 30
31 31 from .thirdparty.concurrent import futures
32 32
33 33 def future_set_exception_info(f, exc_info):
34 34 f.set_exception_info(*exc_info)
35 35 else:
36 36 import concurrent.futures as futures
37 37 import http.cookiejar as cookielib
38 38 import http.client as httplib
39 39 import pickle
40 40 import queue as queue
41 41 import socketserver
42 42 import xmlrpc.client as xmlrpclib
43 43
44 44 def future_set_exception_info(f, exc_info):
45 45 f.set_exception(exc_info[0])
46 46
47 47 def identity(a):
48 48 return a
49 49
50 50 def _rapply(f, xs):
51 51 if xs is None:
52 52 # assume None means non-value of optional data
53 53 return xs
54 54 if isinstance(xs, (list, set, tuple)):
55 55 return type(xs)(_rapply(f, x) for x in xs)
56 56 if isinstance(xs, dict):
57 57 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
58 58 return f(xs)
59 59
60 60 def rapply(f, xs):
61 61 """Apply function recursively to every item preserving the data structure
62 62
63 63 >>> def f(x):
64 64 ... return 'f(%s)' % x
65 65 >>> rapply(f, None) is None
66 66 True
67 67 >>> rapply(f, 'a')
68 68 'f(a)'
69 69 >>> rapply(f, {'a'}) == {'f(a)'}
70 70 True
71 71 >>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])
72 72 ['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]
73 73
74 74 >>> xs = [object()]
75 75 >>> rapply(identity, xs) is xs
76 76 True
77 77 """
78 78 if f is identity:
79 79 # fast path mainly for py2
80 80 return xs
81 81 return _rapply(f, xs)
82 82
83 83 if ispy3:
84 84 import builtins
85 85 import functools
86 86 import io
87 87 import struct
88 88
89 89 fsencode = os.fsencode
90 90 fsdecode = os.fsdecode
91 91 oscurdir = os.curdir.encode('ascii')
92 92 oslinesep = os.linesep.encode('ascii')
93 93 osname = os.name.encode('ascii')
94 94 ospathsep = os.pathsep.encode('ascii')
95 95 ospardir = os.pardir.encode('ascii')
96 96 ossep = os.sep.encode('ascii')
97 97 osaltsep = os.altsep
98 98 if osaltsep:
99 99 osaltsep = osaltsep.encode('ascii')
100 100 # os.getcwd() on Python 3 returns string, but it has os.getcwdb() which
101 101 # returns bytes.
102 102 getcwd = os.getcwdb
103 103 sysplatform = sys.platform.encode('ascii')
104 104 sysexecutable = sys.executable
105 105 if sysexecutable:
106 106 sysexecutable = os.fsencode(sysexecutable)
107 107 bytesio = io.BytesIO
108 108 # TODO deprecate stringio name, as it is a lie on Python 3.
109 109 stringio = bytesio
110 110
111 111 def maplist(*args):
112 112 return list(map(*args))
113 113
114 114 def rangelist(*args):
115 115 return list(range(*args))
116 116
117 117 def ziplist(*args):
118 118 return list(zip(*args))
119 119
120 120 rawinput = input
121 121 getargspec = inspect.getfullargspec
122 122
123 123 long = int
124 124
125 125 # TODO: .buffer might not exist if std streams were replaced; we'll need
126 126 # a silly wrapper to make a bytes stream backed by a unicode one.
127 127 stdin = sys.stdin.buffer
128 128 stdout = sys.stdout.buffer
129 129 stderr = sys.stderr.buffer
130 130
131 131 # Since Python 3 converts argv to wchar_t type by Py_DecodeLocale() on Unix,
132 132 # we can use os.fsencode() to get back bytes argv.
133 133 #
134 134 # https://hg.python.org/cpython/file/v3.5.1/Programs/python.c#l55
135 135 #
136 136 # TODO: On Windows, the native argv is wchar_t, so we'll need a different
137 137 # workaround to simulate the Python 2 (i.e. ANSI Win32 API) behavior.
138 138 if getattr(sys, 'argv', None) is not None:
139 139 sysargv = list(map(os.fsencode, sys.argv))
140 140
141 bytechr = struct.Struct('>B').pack
141 bytechr = struct.Struct(r'>B').pack
142 142 byterepr = b'%r'.__mod__
143 143
144 144 class bytestr(bytes):
145 145 """A bytes which mostly acts as a Python 2 str
146 146
147 147 >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
148 148 ('', 'foo', 'ascii', '1')
149 149 >>> s = bytestr(b'foo')
150 150 >>> assert s is bytestr(s)
151 151
152 152 __bytes__() should be called if provided:
153 153
154 154 >>> class bytesable(object):
155 155 ... def __bytes__(self):
156 156 ... return b'bytes'
157 157 >>> bytestr(bytesable())
158 158 'bytes'
159 159
160 160 There's no implicit conversion from non-ascii str as its encoding is
161 161 unknown:
162 162
163 163 >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
164 164 Traceback (most recent call last):
165 165 ...
166 166 UnicodeEncodeError: ...
167 167
168 168 Comparison between bytestr and bytes should work:
169 169
170 170 >>> assert bytestr(b'foo') == b'foo'
171 171 >>> assert b'foo' == bytestr(b'foo')
172 172 >>> assert b'f' in bytestr(b'foo')
173 173 >>> assert bytestr(b'f') in b'foo'
174 174
175 175 Sliced elements should be bytes, not integer:
176 176
177 177 >>> s[1], s[:2]
178 178 (b'o', b'fo')
179 179 >>> list(s), list(reversed(s))
180 180 ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
181 181
182 182 As bytestr type isn't propagated across operations, you need to cast
183 183 bytes to bytestr explicitly:
184 184
185 185 >>> s = bytestr(b'foo').upper()
186 186 >>> t = bytestr(s)
187 187 >>> s[0], t[0]
188 188 (70, b'F')
189 189
190 190 Be careful to not pass a bytestr object to a function which expects
191 191 bytearray-like behavior.
192 192
193 193 >>> t = bytes(t) # cast to bytes
194 194 >>> assert type(t) is bytes
195 195 """
196 196
197 197 def __new__(cls, s=b''):
198 198 if isinstance(s, bytestr):
199 199 return s
200 200 if (not isinstance(s, (bytes, bytearray))
201 201 and not hasattr(s, u'__bytes__')): # hasattr-py3-only
202 202 s = str(s).encode(u'ascii')
203 203 return bytes.__new__(cls, s)
204 204
205 205 def __getitem__(self, key):
206 206 s = bytes.__getitem__(self, key)
207 207 if not isinstance(s, bytes):
208 208 s = bytechr(s)
209 209 return s
210 210
211 211 def __iter__(self):
212 212 return iterbytestr(bytes.__iter__(self))
213 213
214 214 def __repr__(self):
215 215 return bytes.__repr__(self)[1:] # drop b''
216 216
217 217 def iterbytestr(s):
218 218 """Iterate bytes as if it were a str object of Python 2"""
219 219 return map(bytechr, s)
220 220
221 221 def maybebytestr(s):
222 222 """Promote bytes to bytestr"""
223 223 if isinstance(s, bytes):
224 224 return bytestr(s)
225 225 return s
226 226
227 227 def sysbytes(s):
228 228 """Convert an internal str (e.g. keyword, __doc__) back to bytes
229 229
230 230 This never raises UnicodeEncodeError, but only ASCII characters
231 231 can be round-trip by sysstr(sysbytes(s)).
232 232 """
233 233 return s.encode(u'utf-8')
234 234
235 235 def sysstr(s):
236 236 """Return a keyword str to be passed to Python functions such as
237 237 getattr() and str.encode()
238 238
239 239 This never raises UnicodeDecodeError. Non-ascii characters are
240 240 considered invalid and mapped to arbitrary but unique code points
241 241 such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
242 242 """
243 243 if isinstance(s, builtins.str):
244 244 return s
245 245 return s.decode(u'latin-1')
246 246
247 247 def strurl(url):
248 248 """Converts a bytes url back to str"""
249 249 if isinstance(url, bytes):
250 250 return url.decode(u'ascii')
251 251 return url
252 252
253 253 def bytesurl(url):
254 254 """Converts a str url to bytes by encoding in ascii"""
255 255 if isinstance(url, str):
256 256 return url.encode(u'ascii')
257 257 return url
258 258
259 259 def raisewithtb(exc, tb):
260 260 """Raise exception with the given traceback"""
261 261 raise exc.with_traceback(tb)
262 262
263 263 def getdoc(obj):
264 264 """Get docstring as bytes; may be None so gettext() won't confuse it
265 265 with _('')"""
266 266 doc = getattr(obj, u'__doc__', None)
267 267 if doc is None:
268 268 return doc
269 269 return sysbytes(doc)
270 270
271 271 def _wrapattrfunc(f):
272 272 @functools.wraps(f)
273 273 def w(object, name, *args):
274 274 return f(object, sysstr(name), *args)
275 275 return w
276 276
277 277 # these wrappers are automagically imported by hgloader
278 278 delattr = _wrapattrfunc(builtins.delattr)
279 279 getattr = _wrapattrfunc(builtins.getattr)
280 280 hasattr = _wrapattrfunc(builtins.hasattr)
281 281 setattr = _wrapattrfunc(builtins.setattr)
282 282 xrange = builtins.range
283 283 unicode = str
284 284
285 def open(name, mode='r', buffering=-1, encoding=None):
285 def open(name, mode=b'r', buffering=-1, encoding=None):
286 286 return builtins.open(name, sysstr(mode), buffering, encoding)
287 287
288 288 safehasattr = _wrapattrfunc(builtins.hasattr)
289 289
290 290 def _getoptbwrapper(orig, args, shortlist, namelist):
291 291 """
292 292 Takes bytes arguments, converts them to unicode, pass them to
293 293 getopt.getopt(), convert the returned values back to bytes and then
294 294 return them for Python 3 compatibility as getopt.getopt() don't accepts
295 295 bytes on Python 3.
296 296 """
297 297 args = [a.decode('latin-1') for a in args]
298 298 shortlist = shortlist.decode('latin-1')
299 299 namelist = [a.decode('latin-1') for a in namelist]
300 300 opts, args = orig(args, shortlist, namelist)
301 301 opts = [(a[0].encode('latin-1'), a[1].encode('latin-1'))
302 302 for a in opts]
303 303 args = [a.encode('latin-1') for a in args]
304 304 return opts, args
305 305
306 306 def strkwargs(dic):
307 307 """
308 308 Converts the keys of a python dictonary to str i.e. unicodes so that
309 309 they can be passed as keyword arguments as dictonaries with bytes keys
310 310 can't be passed as keyword arguments to functions on Python 3.
311 311 """
312 312 dic = dict((k.decode('latin-1'), v) for k, v in dic.iteritems())
313 313 return dic
314 314
315 315 def byteskwargs(dic):
316 316 """
317 317 Converts keys of python dictonaries to bytes as they were converted to
318 318 str to pass that dictonary as a keyword argument on Python 3.
319 319 """
320 320 dic = dict((k.encode('latin-1'), v) for k, v in dic.iteritems())
321 321 return dic
322 322
323 323 # TODO: handle shlex.shlex().
324 324 def shlexsplit(s, comments=False, posix=True):
325 325 """
326 326 Takes bytes argument, convert it to str i.e. unicodes, pass that into
327 327 shlex.split(), convert the returned value to bytes and return that for
328 328 Python 3 compatibility as shelx.split() don't accept bytes on Python 3.
329 329 """
330 330 ret = shlex.split(s.decode('latin-1'), comments, posix)
331 331 return [a.encode('latin-1') for a in ret]
332 332
333 333 else:
334 334 import cStringIO
335 335
336 336 xrange = xrange
337 337 unicode = unicode
338 338 bytechr = chr
339 339 byterepr = repr
340 340 bytestr = str
341 341 iterbytestr = iter
342 342 maybebytestr = identity
343 343 sysbytes = identity
344 344 sysstr = identity
345 345 strurl = identity
346 346 bytesurl = identity
347 347
348 348 # this can't be parsed on Python 3
349 349 exec('def raisewithtb(exc, tb):\n'
350 350 ' raise exc, None, tb\n')
351 351
352 352 def fsencode(filename):
353 353 """
354 354 Partial backport from os.py in Python 3, which only accepts bytes.
355 355 In Python 2, our paths should only ever be bytes, a unicode path
356 356 indicates a bug.
357 357 """
358 358 if isinstance(filename, str):
359 359 return filename
360 360 else:
361 361 raise TypeError(
362 "expect str, not %s" % type(filename).__name__)
362 r"expect str, not %s" % type(filename).__name__)
363 363
364 364 # In Python 2, fsdecode() has a very chance to receive bytes. So it's
365 365 # better not to touch Python 2 part as it's already working fine.
366 366 fsdecode = identity
367 367
368 368 def getdoc(obj):
369 369 return getattr(obj, '__doc__', None)
370 370
371 371 _notset = object()
372 372
373 373 def safehasattr(thing, attr):
374 374 return getattr(thing, attr, _notset) is not _notset
375 375
376 376 def _getoptbwrapper(orig, args, shortlist, namelist):
377 377 return orig(args, shortlist, namelist)
378 378
379 379 strkwargs = identity
380 380 byteskwargs = identity
381 381
382 382 oscurdir = os.curdir
383 383 oslinesep = os.linesep
384 384 osname = os.name
385 385 ospathsep = os.pathsep
386 386 ospardir = os.pardir
387 387 ossep = os.sep
388 388 osaltsep = os.altsep
389 389 long = long
390 390 stdin = sys.stdin
391 391 stdout = sys.stdout
392 392 stderr = sys.stderr
393 393 if getattr(sys, 'argv', None) is not None:
394 394 sysargv = sys.argv
395 395 sysplatform = sys.platform
396 396 getcwd = os.getcwd
397 397 sysexecutable = sys.executable
398 398 shlexsplit = shlex.split
399 399 bytesio = cStringIO.StringIO
400 400 stringio = bytesio
401 401 maplist = map
402 402 rangelist = range
403 403 ziplist = zip
404 404 rawinput = raw_input
405 405 getargspec = inspect.getargspec
406 406
407 isjython = sysplatform.startswith('java')
407 isjython = sysplatform.startswith(b'java')
408 408
409 isdarwin = sysplatform == 'darwin'
410 isposix = osname == 'posix'
411 iswindows = osname == 'nt'
409 isdarwin = sysplatform == b'darwin'
410 isposix = osname == b'posix'
411 iswindows = osname == b'nt'
412 412
413 413 def getoptb(args, shortlist, namelist):
414 414 return _getoptbwrapper(getopt.getopt, args, shortlist, namelist)
415 415
416 416 def gnugetoptb(args, shortlist, namelist):
417 417 return _getoptbwrapper(getopt.gnu_getopt, args, shortlist, namelist)
418 418
419 419 def mkdtemp(suffix=b'', prefix=b'tmp', dir=None):
420 420 return tempfile.mkdtemp(suffix, prefix, dir)
421 421
422 422 # text=True is not supported; use util.from/tonativeeol() instead
423 423 def mkstemp(suffix=b'', prefix=b'tmp', dir=None):
424 424 return tempfile.mkstemp(suffix, prefix, dir)
425 425
426 426 # mode must include 'b'ytes as encoding= is not supported
427 427 def namedtempfile(mode=b'w+b', bufsize=-1, suffix=b'', prefix=b'tmp', dir=None,
428 428 delete=True):
429 429 mode = sysstr(mode)
430 430 assert r'b' in mode
431 431 return tempfile.NamedTemporaryFile(mode, bufsize, suffix=suffix,
432 432 prefix=prefix, dir=dir, delete=delete)
General Comments 0
You need to be logged in to leave comments. Login now