##// END OF EJS Templates
pycompat: move multiline comments above a function to function doc...
Pulkit Goyal -
r32864:f57f1f37 default
parent child Browse files
Show More
@@ -1,450 +1,460 b''
1 1 # pycompat.py - portability shim for python 3
2 2 #
3 3 # This software may be used and distributed according to the terms of the
4 4 # GNU General Public License version 2 or any later version.
5 5
6 6 """Mercurial portability shim for python 3.
7 7
8 8 This contains aliases to hide python version-specific details from the core.
9 9 """
10 10
11 11 from __future__ import absolute_import
12 12
13 13 import getopt
14 14 import os
15 15 import shlex
16 16 import sys
17 17
18 18 ispy3 = (sys.version_info[0] >= 3)
19 19
20 20 if not ispy3:
21 21 import cookielib
22 22 import cPickle as pickle
23 23 import httplib
24 24 import Queue as _queue
25 25 import SocketServer as socketserver
26 26 import xmlrpclib
27 27 else:
28 28 import http.cookiejar as cookielib
29 29 import http.client as httplib
30 30 import pickle
31 31 import queue as _queue
32 32 import socketserver
33 33 import xmlrpc.client as xmlrpclib
34 34
35 35 def identity(a):
36 36 return a
37 37
38 38 if ispy3:
39 39 import builtins
40 40 import functools
41 41 import io
42 42 import struct
43 43
44 44 fsencode = os.fsencode
45 45 fsdecode = os.fsdecode
46 # A bytes version of os.name.
47 46 oslinesep = os.linesep.encode('ascii')
48 47 osname = os.name.encode('ascii')
49 48 ospathsep = os.pathsep.encode('ascii')
50 49 ossep = os.sep.encode('ascii')
51 50 osaltsep = os.altsep
52 51 if osaltsep:
53 52 osaltsep = osaltsep.encode('ascii')
54 53 # os.getcwd() on Python 3 returns string, but it has os.getcwdb() which
55 54 # returns bytes.
56 55 getcwd = os.getcwdb
57 56 sysplatform = sys.platform.encode('ascii')
58 57 sysexecutable = sys.executable
59 58 if sysexecutable:
60 59 sysexecutable = os.fsencode(sysexecutable)
61 60 stringio = io.BytesIO
62 61 maplist = lambda *args: list(map(*args))
63 62
64 63 # TODO: .buffer might not exist if std streams were replaced; we'll need
65 64 # a silly wrapper to make a bytes stream backed by a unicode one.
66 65 stdin = sys.stdin.buffer
67 66 stdout = sys.stdout.buffer
68 67 stderr = sys.stderr.buffer
69 68
70 69 # Since Python 3 converts argv to wchar_t type by Py_DecodeLocale() on Unix,
71 70 # we can use os.fsencode() to get back bytes argv.
72 71 #
73 72 # https://hg.python.org/cpython/file/v3.5.1/Programs/python.c#l55
74 73 #
75 74 # TODO: On Windows, the native argv is wchar_t, so we'll need a different
76 75 # workaround to simulate the Python 2 (i.e. ANSI Win32 API) behavior.
77 76 if getattr(sys, 'argv', None) is not None:
78 77 sysargv = list(map(os.fsencode, sys.argv))
79 78
80 79 bytechr = struct.Struct('>B').pack
81 80
82 81 class bytestr(bytes):
83 82 """A bytes which mostly acts as a Python 2 str
84 83
85 84 >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
86 85 (b'', b'foo', b'ascii', b'1')
87 86 >>> s = bytestr(b'foo')
88 87 >>> assert s is bytestr(s)
89 88
90 89 __bytes__() should be called if provided:
91 90
92 91 >>> class bytesable(object):
93 92 ... def __bytes__(self):
94 93 ... return b'bytes'
95 94 >>> bytestr(bytesable())
96 95 b'bytes'
97 96
98 97 There's no implicit conversion from non-ascii str as its encoding is
99 98 unknown:
100 99
101 100 >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
102 101 Traceback (most recent call last):
103 102 ...
104 103 UnicodeEncodeError: ...
105 104
106 105 Comparison between bytestr and bytes should work:
107 106
108 107 >>> assert bytestr(b'foo') == b'foo'
109 108 >>> assert b'foo' == bytestr(b'foo')
110 109 >>> assert b'f' in bytestr(b'foo')
111 110 >>> assert bytestr(b'f') in b'foo'
112 111
113 112 Sliced elements should be bytes, not integer:
114 113
115 114 >>> s[1], s[:2]
116 115 (b'o', b'fo')
117 116 >>> list(s), list(reversed(s))
118 117 ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
119 118
120 119 As bytestr type isn't propagated across operations, you need to cast
121 120 bytes to bytestr explicitly:
122 121
123 122 >>> s = bytestr(b'foo').upper()
124 123 >>> t = bytestr(s)
125 124 >>> s[0], t[0]
126 125 (70, b'F')
127 126
128 127 Be careful to not pass a bytestr object to a function which expects
129 128 bytearray-like behavior.
130 129
131 130 >>> t = bytes(t) # cast to bytes
132 131 >>> assert type(t) is bytes
133 132 """
134 133
135 134 def __new__(cls, s=b''):
136 135 if isinstance(s, bytestr):
137 136 return s
138 137 if (not isinstance(s, (bytes, bytearray))
139 138 and not hasattr(s, u'__bytes__')): # hasattr-py3-only
140 139 s = str(s).encode(u'ascii')
141 140 return bytes.__new__(cls, s)
142 141
143 142 def __getitem__(self, key):
144 143 s = bytes.__getitem__(self, key)
145 144 if not isinstance(s, bytes):
146 145 s = bytechr(s)
147 146 return s
148 147
149 148 def __iter__(self):
150 149 return iterbytestr(bytes.__iter__(self))
151 150
152 151 def iterbytestr(s):
153 152 """Iterate bytes as if it were a str object of Python 2"""
154 153 return map(bytechr, s)
155 154
156 155 def sysbytes(s):
157 156 """Convert an internal str (e.g. keyword, __doc__) back to bytes
158 157
159 158 This never raises UnicodeEncodeError, but only ASCII characters
160 159 can be round-trip by sysstr(sysbytes(s)).
161 160 """
162 161 return s.encode(u'utf-8')
163 162
164 163 def sysstr(s):
165 164 """Return a keyword str to be passed to Python functions such as
166 165 getattr() and str.encode()
167 166
168 167 This never raises UnicodeDecodeError. Non-ascii characters are
169 168 considered invalid and mapped to arbitrary but unique code points
170 169 such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
171 170 """
172 171 if isinstance(s, builtins.str):
173 172 return s
174 173 return s.decode(u'latin-1')
175 174
176 175 def strurl(url):
177 176 """Converts a bytes url back to str"""
178 177 return url.decode(u'ascii')
179 178
180 179 def bytesurl(url):
181 180 """Converts a str url to bytes by encoding in ascii"""
182 181 return url.encode(u'ascii')
183 182
184 183 def raisewithtb(exc, tb):
185 184 """Raise exception with the given traceback"""
186 185 raise exc.with_traceback(tb)
187 186
188 187 def getdoc(obj):
189 188 """Get docstring as bytes; may be None so gettext() won't confuse it
190 189 with _('')"""
191 190 doc = getattr(obj, u'__doc__', None)
192 191 if doc is None:
193 192 return doc
194 193 return sysbytes(doc)
195 194
196 195 def _wrapattrfunc(f):
197 196 @functools.wraps(f)
198 197 def w(object, name, *args):
199 198 return f(object, sysstr(name), *args)
200 199 return w
201 200
202 201 # these wrappers are automagically imported by hgloader
203 202 delattr = _wrapattrfunc(builtins.delattr)
204 203 getattr = _wrapattrfunc(builtins.getattr)
205 204 hasattr = _wrapattrfunc(builtins.hasattr)
206 205 setattr = _wrapattrfunc(builtins.setattr)
207 206 xrange = builtins.range
208 207 unicode = str
209 208
210 209 def open(name, mode='r', buffering=-1):
211 210 return builtins.open(name, sysstr(mode), buffering)
212 211
213 # getopt.getopt() on Python 3 deals with unicodes internally so we cannot
214 # pass bytes there. Passing unicodes will result in unicodes as return
215 # values which we need to convert again to bytes.
216 212 def getoptb(args, shortlist, namelist):
213 """
214 Takes bytes arguments, converts them to unicode, pass them to
215 getopt.getopt(), convert the returned values back to bytes and then
216 return them for Python 3 compatibility as getopt.getopt() don't accepts
217 bytes on Python 3.
218 """
217 219 args = [a.decode('latin-1') for a in args]
218 220 shortlist = shortlist.decode('latin-1')
219 221 namelist = [a.decode('latin-1') for a in namelist]
220 222 opts, args = getopt.getopt(args, shortlist, namelist)
221 223 opts = [(a[0].encode('latin-1'), a[1].encode('latin-1'))
222 224 for a in opts]
223 225 args = [a.encode('latin-1') for a in args]
224 226 return opts, args
225 227
226 # keys of keyword arguments in Python need to be strings which are unicodes
227 # Python 3. This function takes keyword arguments, convert the keys to str.
228 228 def strkwargs(dic):
229 """
230 Converts the keys of a python dictonary to str i.e. unicodes so that
231 they can be passed as keyword arguments as dictonaries with bytes keys
232 can't be passed as keyword arguments to functions on Python 3.
233 """
229 234 dic = dict((k.decode('latin-1'), v) for k, v in dic.iteritems())
230 235 return dic
231 236
232 # keys of keyword arguments need to be unicode while passing into
233 # a function. This function helps us to convert those keys back to bytes
234 # again as we need to deal with bytes.
235 237 def byteskwargs(dic):
238 """
239 Converts keys of python dictonaries to bytes as they were converted to
240 str to pass that dictonary as a keyword argument on Python 3.
241 """
236 242 dic = dict((k.encode('latin-1'), v) for k, v in dic.iteritems())
237 243 return dic
238 244
239 # shlex.split() accepts unicodes on Python 3. This function takes bytes
240 # argument, convert it into unicodes, pass into shlex.split(), convert the
241 # returned value to bytes and return that.
242 245 # TODO: handle shlex.shlex().
243 246 def shlexsplit(s):
247 """
248 Takes bytes argument, convert it to str i.e. unicodes, pass that into
249 shlex.split(), convert the returned value to bytes and return that for
250 Python 3 compatibility as shelx.split() don't accept bytes on Python 3.
251 """
244 252 ret = shlex.split(s.decode('latin-1'))
245 253 return [a.encode('latin-1') for a in ret]
246 254
247 255 else:
248 256 import cStringIO
249 257
250 258 bytechr = chr
251 259 bytestr = str
252 260 iterbytestr = iter
253 261 sysbytes = identity
254 262 sysstr = identity
255 263 strurl = identity
256 264 bytesurl = identity
257 265
258 266 # this can't be parsed on Python 3
259 267 exec('def raisewithtb(exc, tb):\n'
260 268 ' raise exc, None, tb\n')
261 269
262 # Partial backport from os.py in Python 3, which only accepts bytes.
263 # In Python 2, our paths should only ever be bytes, a unicode path
264 # indicates a bug.
265 270 def fsencode(filename):
271 """
272 Partial backport from os.py in Python 3, which only accepts bytes.
273 In Python 2, our paths should only ever be bytes, a unicode path
274 indicates a bug.
275 """
266 276 if isinstance(filename, str):
267 277 return filename
268 278 else:
269 279 raise TypeError(
270 280 "expect str, not %s" % type(filename).__name__)
271 281
272 282 # In Python 2, fsdecode() has a very chance to receive bytes. So it's
273 283 # better not to touch Python 2 part as it's already working fine.
274 284 fsdecode = identity
275 285
276 286 def getdoc(obj):
277 287 return getattr(obj, '__doc__', None)
278 288
279 289 def getoptb(args, shortlist, namelist):
280 290 return getopt.getopt(args, shortlist, namelist)
281 291
282 292 strkwargs = identity
283 293 byteskwargs = identity
284 294
285 295 oslinesep = os.linesep
286 296 osname = os.name
287 297 ospathsep = os.pathsep
288 298 ossep = os.sep
289 299 osaltsep = os.altsep
290 300 stdin = sys.stdin
291 301 stdout = sys.stdout
292 302 stderr = sys.stderr
293 303 if getattr(sys, 'argv', None) is not None:
294 304 sysargv = sys.argv
295 305 sysplatform = sys.platform
296 306 getcwd = os.getcwd
297 307 sysexecutable = sys.executable
298 308 shlexsplit = shlex.split
299 309 stringio = cStringIO.StringIO
300 310 maplist = map
301 311
302 312 empty = _queue.Empty
303 313 queue = _queue.Queue
304 314
305 315 class _pycompatstub(object):
306 316 def __init__(self):
307 317 self._aliases = {}
308 318
309 319 def _registeraliases(self, origin, items):
310 320 """Add items that will be populated at the first access"""
311 321 items = map(sysstr, items)
312 322 self._aliases.update(
313 323 (item.replace(sysstr('_'), sysstr('')).lower(), (origin, item))
314 324 for item in items)
315 325
316 326 def _registeralias(self, origin, attr, name):
317 327 """Alias ``origin``.``attr`` as ``name``"""
318 328 self._aliases[sysstr(name)] = (origin, sysstr(attr))
319 329
320 330 def __getattr__(self, name):
321 331 try:
322 332 origin, item = self._aliases[name]
323 333 except KeyError:
324 334 raise AttributeError(name)
325 335 self.__dict__[name] = obj = getattr(origin, item)
326 336 return obj
327 337
328 338 httpserver = _pycompatstub()
329 339 urlreq = _pycompatstub()
330 340 urlerr = _pycompatstub()
331 341 if not ispy3:
332 342 import BaseHTTPServer
333 343 import CGIHTTPServer
334 344 import SimpleHTTPServer
335 345 import urllib2
336 346 import urllib
337 347 import urlparse
338 348 urlreq._registeraliases(urllib, (
339 349 "addclosehook",
340 350 "addinfourl",
341 351 "ftpwrapper",
342 352 "pathname2url",
343 353 "quote",
344 354 "splitattr",
345 355 "splitpasswd",
346 356 "splitport",
347 357 "splituser",
348 358 "unquote",
349 359 "url2pathname",
350 360 "urlencode",
351 361 ))
352 362 urlreq._registeraliases(urllib2, (
353 363 "AbstractHTTPHandler",
354 364 "BaseHandler",
355 365 "build_opener",
356 366 "FileHandler",
357 367 "FTPHandler",
358 368 "HTTPBasicAuthHandler",
359 369 "HTTPDigestAuthHandler",
360 370 "HTTPHandler",
361 371 "HTTPPasswordMgrWithDefaultRealm",
362 372 "HTTPSHandler",
363 373 "install_opener",
364 374 "ProxyHandler",
365 375 "Request",
366 376 "urlopen",
367 377 ))
368 378 urlreq._registeraliases(urlparse, (
369 379 "urlparse",
370 380 "urlunparse",
371 381 ))
372 382 urlerr._registeraliases(urllib2, (
373 383 "HTTPError",
374 384 "URLError",
375 385 ))
376 386 httpserver._registeraliases(BaseHTTPServer, (
377 387 "HTTPServer",
378 388 "BaseHTTPRequestHandler",
379 389 ))
380 390 httpserver._registeraliases(SimpleHTTPServer, (
381 391 "SimpleHTTPRequestHandler",
382 392 ))
383 393 httpserver._registeraliases(CGIHTTPServer, (
384 394 "CGIHTTPRequestHandler",
385 395 ))
386 396
387 397 else:
388 398 import urllib.parse
389 399 urlreq._registeraliases(urllib.parse, (
390 400 "splitattr",
391 401 "splitpasswd",
392 402 "splitport",
393 403 "splituser",
394 404 "urlparse",
395 405 "urlunparse",
396 406 ))
397 407 urlreq._registeralias(urllib.parse, "unquote_to_bytes", "unquote")
398 408 import urllib.request
399 409 urlreq._registeraliases(urllib.request, (
400 410 "AbstractHTTPHandler",
401 411 "BaseHandler",
402 412 "build_opener",
403 413 "FileHandler",
404 414 "FTPHandler",
405 415 "ftpwrapper",
406 416 "HTTPHandler",
407 417 "HTTPSHandler",
408 418 "install_opener",
409 419 "pathname2url",
410 420 "HTTPBasicAuthHandler",
411 421 "HTTPDigestAuthHandler",
412 422 "HTTPPasswordMgrWithDefaultRealm",
413 423 "ProxyHandler",
414 424 "Request",
415 425 "url2pathname",
416 426 "urlopen",
417 427 ))
418 428 import urllib.response
419 429 urlreq._registeraliases(urllib.response, (
420 430 "addclosehook",
421 431 "addinfourl",
422 432 ))
423 433 import urllib.error
424 434 urlerr._registeraliases(urllib.error, (
425 435 "HTTPError",
426 436 "URLError",
427 437 ))
428 438 import http.server
429 439 httpserver._registeraliases(http.server, (
430 440 "HTTPServer",
431 441 "BaseHTTPRequestHandler",
432 442 "SimpleHTTPRequestHandler",
433 443 "CGIHTTPRequestHandler",
434 444 ))
435 445
436 446 # urllib.parse.quote() accepts both str and bytes, decodes bytes
437 447 # (if necessary), and returns str. This is wonky. We provide a custom
438 448 # implementation that only accepts bytes and emits bytes.
439 449 def quote(s, safe=r'/'):
440 450 s = urllib.parse.quote_from_bytes(s, safe=safe)
441 451 return s.encode('ascii', 'strict')
442 452
443 453 # urllib.parse.urlencode() returns str. We use this function to make
444 454 # sure we return bytes.
445 455 def urlencode(query, doseq=False):
446 456 s = urllib.parse.urlencode(query, doseq=doseq)
447 457 return s.encode('ascii')
448 458
449 459 urlreq.quote = quote
450 460 urlreq.urlencode = urlencode
General Comments 0
You need to be logged in to leave comments. Login now