##// END OF EJS Templates
pycompat: prevent encoding or decoding values if not required...
Pulkit Goyal -
r36662:e2b87e19 default
parent child Browse files
Show More
@@ -1,353 +1,357 b''
1 1 # pycompat.py - portability shim for python 3
2 2 #
3 3 # This software may be used and distributed according to the terms of the
4 4 # GNU General Public License version 2 or any later version.
5 5
6 6 """Mercurial portability shim for python 3.
7 7
8 8 This contains aliases to hide python version-specific details from the core.
9 9 """
10 10
11 11 from __future__ import absolute_import
12 12
13 13 import getopt
14 14 import inspect
15 15 import os
16 16 import shlex
17 17 import sys
18 18
19 19 ispy3 = (sys.version_info[0] >= 3)
20 20 ispypy = (r'__pypy__' in sys.builtin_module_names)
21 21
22 22 if not ispy3:
23 23 import cookielib
24 24 import cPickle as pickle
25 25 import httplib
26 26 import Queue as _queue
27 27 import SocketServer as socketserver
28 28 import xmlrpclib
29 29 else:
30 30 import http.cookiejar as cookielib
31 31 import http.client as httplib
32 32 import pickle
33 33 import queue as _queue
34 34 import socketserver
35 35 import xmlrpc.client as xmlrpclib
36 36
37 37 empty = _queue.Empty
38 38 queue = _queue.Queue
39 39
40 40 def identity(a):
41 41 return a
42 42
43 43 if ispy3:
44 44 import builtins
45 45 import functools
46 46 import io
47 47 import struct
48 48
49 49 fsencode = os.fsencode
50 50 fsdecode = os.fsdecode
51 51 oslinesep = os.linesep.encode('ascii')
52 52 osname = os.name.encode('ascii')
53 53 ospathsep = os.pathsep.encode('ascii')
54 54 ossep = os.sep.encode('ascii')
55 55 osaltsep = os.altsep
56 56 if osaltsep:
57 57 osaltsep = osaltsep.encode('ascii')
58 58 # os.getcwd() on Python 3 returns string, but it has os.getcwdb() which
59 59 # returns bytes.
60 60 getcwd = os.getcwdb
61 61 sysplatform = sys.platform.encode('ascii')
62 62 sysexecutable = sys.executable
63 63 if sysexecutable:
64 64 sysexecutable = os.fsencode(sysexecutable)
65 65 stringio = io.BytesIO
66 66 maplist = lambda *args: list(map(*args))
67 67 ziplist = lambda *args: list(zip(*args))
68 68 rawinput = input
69 69 getargspec = inspect.getfullargspec
70 70
71 71 # TODO: .buffer might not exist if std streams were replaced; we'll need
72 72 # a silly wrapper to make a bytes stream backed by a unicode one.
73 73 stdin = sys.stdin.buffer
74 74 stdout = sys.stdout.buffer
75 75 stderr = sys.stderr.buffer
76 76
77 77 # Since Python 3 converts argv to wchar_t type by Py_DecodeLocale() on Unix,
78 78 # we can use os.fsencode() to get back bytes argv.
79 79 #
80 80 # https://hg.python.org/cpython/file/v3.5.1/Programs/python.c#l55
81 81 #
82 82 # TODO: On Windows, the native argv is wchar_t, so we'll need a different
83 83 # workaround to simulate the Python 2 (i.e. ANSI Win32 API) behavior.
84 84 if getattr(sys, 'argv', None) is not None:
85 85 sysargv = list(map(os.fsencode, sys.argv))
86 86
87 87 bytechr = struct.Struct('>B').pack
88 88 byterepr = b'%r'.__mod__
89 89
90 90 class bytestr(bytes):
91 91 """A bytes which mostly acts as a Python 2 str
92 92
93 93 >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
94 94 ('', 'foo', 'ascii', '1')
95 95 >>> s = bytestr(b'foo')
96 96 >>> assert s is bytestr(s)
97 97
98 98 __bytes__() should be called if provided:
99 99
100 100 >>> class bytesable(object):
101 101 ... def __bytes__(self):
102 102 ... return b'bytes'
103 103 >>> bytestr(bytesable())
104 104 'bytes'
105 105
106 106 There's no implicit conversion from non-ascii str as its encoding is
107 107 unknown:
108 108
109 109 >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
110 110 Traceback (most recent call last):
111 111 ...
112 112 UnicodeEncodeError: ...
113 113
114 114 Comparison between bytestr and bytes should work:
115 115
116 116 >>> assert bytestr(b'foo') == b'foo'
117 117 >>> assert b'foo' == bytestr(b'foo')
118 118 >>> assert b'f' in bytestr(b'foo')
119 119 >>> assert bytestr(b'f') in b'foo'
120 120
121 121 Sliced elements should be bytes, not integer:
122 122
123 123 >>> s[1], s[:2]
124 124 (b'o', b'fo')
125 125 >>> list(s), list(reversed(s))
126 126 ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
127 127
128 128 As bytestr type isn't propagated across operations, you need to cast
129 129 bytes to bytestr explicitly:
130 130
131 131 >>> s = bytestr(b'foo').upper()
132 132 >>> t = bytestr(s)
133 133 >>> s[0], t[0]
134 134 (70, b'F')
135 135
136 136 Be careful to not pass a bytestr object to a function which expects
137 137 bytearray-like behavior.
138 138
139 139 >>> t = bytes(t) # cast to bytes
140 140 >>> assert type(t) is bytes
141 141 """
142 142
143 143 def __new__(cls, s=b''):
144 144 if isinstance(s, bytestr):
145 145 return s
146 146 if (not isinstance(s, (bytes, bytearray))
147 147 and not hasattr(s, u'__bytes__')): # hasattr-py3-only
148 148 s = str(s).encode(u'ascii')
149 149 return bytes.__new__(cls, s)
150 150
151 151 def __getitem__(self, key):
152 152 s = bytes.__getitem__(self, key)
153 153 if not isinstance(s, bytes):
154 154 s = bytechr(s)
155 155 return s
156 156
157 157 def __iter__(self):
158 158 return iterbytestr(bytes.__iter__(self))
159 159
160 160 def __repr__(self):
161 161 return bytes.__repr__(self)[1:] # drop b''
162 162
163 163 def iterbytestr(s):
164 164 """Iterate bytes as if it were a str object of Python 2"""
165 165 return map(bytechr, s)
166 166
167 167 def maybebytestr(s):
168 168 """Promote bytes to bytestr"""
169 169 if isinstance(s, bytes):
170 170 return bytestr(s)
171 171 return s
172 172
173 173 def sysbytes(s):
174 174 """Convert an internal str (e.g. keyword, __doc__) back to bytes
175 175
176 176 This never raises UnicodeEncodeError, but only ASCII characters
177 177 can be round-trip by sysstr(sysbytes(s)).
178 178 """
179 179 return s.encode(u'utf-8')
180 180
181 181 def sysstr(s):
182 182 """Return a keyword str to be passed to Python functions such as
183 183 getattr() and str.encode()
184 184
185 185 This never raises UnicodeDecodeError. Non-ascii characters are
186 186 considered invalid and mapped to arbitrary but unique code points
187 187 such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
188 188 """
189 189 if isinstance(s, builtins.str):
190 190 return s
191 191 return s.decode(u'latin-1')
192 192
193 193 def strurl(url):
194 194 """Converts a bytes url back to str"""
195 return url.decode(u'ascii')
195 if isinstance(url, bytes):
196 return url.decode(u'ascii')
197 return url
196 198
197 199 def bytesurl(url):
198 200 """Converts a str url to bytes by encoding in ascii"""
199 return url.encode(u'ascii')
201 if isinstance(url, str):
202 return url.encode(u'ascii')
203 return url
200 204
201 205 def raisewithtb(exc, tb):
202 206 """Raise exception with the given traceback"""
203 207 raise exc.with_traceback(tb)
204 208
205 209 def getdoc(obj):
206 210 """Get docstring as bytes; may be None so gettext() won't confuse it
207 211 with _('')"""
208 212 doc = getattr(obj, u'__doc__', None)
209 213 if doc is None:
210 214 return doc
211 215 return sysbytes(doc)
212 216
213 217 def _wrapattrfunc(f):
214 218 @functools.wraps(f)
215 219 def w(object, name, *args):
216 220 return f(object, sysstr(name), *args)
217 221 return w
218 222
219 223 # these wrappers are automagically imported by hgloader
220 224 delattr = _wrapattrfunc(builtins.delattr)
221 225 getattr = _wrapattrfunc(builtins.getattr)
222 226 hasattr = _wrapattrfunc(builtins.hasattr)
223 227 setattr = _wrapattrfunc(builtins.setattr)
224 228 xrange = builtins.range
225 229 unicode = str
226 230
227 231 def open(name, mode='r', buffering=-1, encoding=None):
228 232 return builtins.open(name, sysstr(mode), buffering, encoding)
229 233
230 234 def _getoptbwrapper(orig, args, shortlist, namelist):
231 235 """
232 236 Takes bytes arguments, converts them to unicode, pass them to
233 237 getopt.getopt(), convert the returned values back to bytes and then
234 238 return them for Python 3 compatibility as getopt.getopt() don't accepts
235 239 bytes on Python 3.
236 240 """
237 241 args = [a.decode('latin-1') for a in args]
238 242 shortlist = shortlist.decode('latin-1')
239 243 namelist = [a.decode('latin-1') for a in namelist]
240 244 opts, args = orig(args, shortlist, namelist)
241 245 opts = [(a[0].encode('latin-1'), a[1].encode('latin-1'))
242 246 for a in opts]
243 247 args = [a.encode('latin-1') for a in args]
244 248 return opts, args
245 249
246 250 def strkwargs(dic):
247 251 """
248 252 Converts the keys of a python dictonary to str i.e. unicodes so that
249 253 they can be passed as keyword arguments as dictonaries with bytes keys
250 254 can't be passed as keyword arguments to functions on Python 3.
251 255 """
252 256 dic = dict((k.decode('latin-1'), v) for k, v in dic.iteritems())
253 257 return dic
254 258
255 259 def byteskwargs(dic):
256 260 """
257 261 Converts keys of python dictonaries to bytes as they were converted to
258 262 str to pass that dictonary as a keyword argument on Python 3.
259 263 """
260 264 dic = dict((k.encode('latin-1'), v) for k, v in dic.iteritems())
261 265 return dic
262 266
263 267 # TODO: handle shlex.shlex().
264 268 def shlexsplit(s, comments=False, posix=True):
265 269 """
266 270 Takes bytes argument, convert it to str i.e. unicodes, pass that into
267 271 shlex.split(), convert the returned value to bytes and return that for
268 272 Python 3 compatibility as shelx.split() don't accept bytes on Python 3.
269 273 """
270 274 ret = shlex.split(s.decode('latin-1'), comments, posix)
271 275 return [a.encode('latin-1') for a in ret]
272 276
273 277 def emailparser(*args, **kwargs):
274 278 import email.parser
275 279 return email.parser.BytesParser(*args, **kwargs)
276 280
277 281 else:
278 282 import cStringIO
279 283
280 284 bytechr = chr
281 285 byterepr = repr
282 286 bytestr = str
283 287 iterbytestr = iter
284 288 maybebytestr = identity
285 289 sysbytes = identity
286 290 sysstr = identity
287 291 strurl = identity
288 292 bytesurl = identity
289 293
290 294 # this can't be parsed on Python 3
291 295 exec('def raisewithtb(exc, tb):\n'
292 296 ' raise exc, None, tb\n')
293 297
294 298 def fsencode(filename):
295 299 """
296 300 Partial backport from os.py in Python 3, which only accepts bytes.
297 301 In Python 2, our paths should only ever be bytes, a unicode path
298 302 indicates a bug.
299 303 """
300 304 if isinstance(filename, str):
301 305 return filename
302 306 else:
303 307 raise TypeError(
304 308 "expect str, not %s" % type(filename).__name__)
305 309
306 310 # In Python 2, fsdecode() has a very chance to receive bytes. So it's
307 311 # better not to touch Python 2 part as it's already working fine.
308 312 fsdecode = identity
309 313
310 314 def getdoc(obj):
311 315 return getattr(obj, '__doc__', None)
312 316
313 317 def _getoptbwrapper(orig, args, shortlist, namelist):
314 318 return orig(args, shortlist, namelist)
315 319
316 320 strkwargs = identity
317 321 byteskwargs = identity
318 322
319 323 oslinesep = os.linesep
320 324 osname = os.name
321 325 ospathsep = os.pathsep
322 326 ossep = os.sep
323 327 osaltsep = os.altsep
324 328 stdin = sys.stdin
325 329 stdout = sys.stdout
326 330 stderr = sys.stderr
327 331 if getattr(sys, 'argv', None) is not None:
328 332 sysargv = sys.argv
329 333 sysplatform = sys.platform
330 334 getcwd = os.getcwd
331 335 sysexecutable = sys.executable
332 336 shlexsplit = shlex.split
333 337 stringio = cStringIO.StringIO
334 338 maplist = map
335 339 ziplist = zip
336 340 rawinput = raw_input
337 341 getargspec = inspect.getargspec
338 342
339 343 def emailparser(*args, **kwargs):
340 344 import email.parser
341 345 return email.parser.Parser(*args, **kwargs)
342 346
343 347 isjython = sysplatform.startswith('java')
344 348
345 349 isdarwin = sysplatform == 'darwin'
346 350 isposix = osname == 'posix'
347 351 iswindows = osname == 'nt'
348 352
349 353 def getoptb(args, shortlist, namelist):
350 354 return _getoptbwrapper(getopt.getopt, args, shortlist, namelist)
351 355
352 356 def gnugetoptb(args, shortlist, namelist):
353 357 return _getoptbwrapper(getopt.gnu_getopt, args, shortlist, namelist)
General Comments 0
You need to be logged in to leave comments. Login now