##// END OF EJS Templates
py3: add pycompat.unicode and add it to importer...
Pulkit Goyal -
r31843:526e4597 default
parent child Browse files
Show More
@@ -1,402 +1,403 b''
1 1 # __init__.py - Startup and module loading logic for Mercurial.
2 2 #
3 3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import imp
11 11 import os
12 12 import sys
13 13 import zipimport
14 14
15 15 from . import (
16 16 policy
17 17 )
18 18
19 19 __all__ = []
20 20
21 21 modulepolicy = policy.policy
22 22
23 23 # Modules that have both Python and C implementations. See also the
24 24 # set of .py files under mercurial/pure/.
25 25 _dualmodules = set([
26 26 'mercurial.base85',
27 27 'mercurial.bdiff',
28 28 'mercurial.diffhelpers',
29 29 'mercurial.mpatch',
30 30 'mercurial.osutil',
31 31 'mercurial.parsers',
32 32 ])
33 33
34 34 class hgimporter(object):
35 35 """Object that conforms to import hook interface defined in PEP-302."""
36 36 def find_module(self, name, path=None):
37 37 # We only care about modules that have both C and pure implementations.
38 38 if name in _dualmodules:
39 39 return self
40 40 return None
41 41
42 42 def load_module(self, name):
43 43 mod = sys.modules.get(name, None)
44 44 if mod:
45 45 return mod
46 46
47 47 mercurial = sys.modules['mercurial']
48 48
49 49 # The zip importer behaves sufficiently differently from the default
50 50 # importer to warrant its own code path.
51 51 loader = getattr(mercurial, '__loader__', None)
52 52 if isinstance(loader, zipimport.zipimporter):
53 53 def ziploader(*paths):
54 54 """Obtain a zipimporter for a directory under the main zip."""
55 55 path = os.path.join(loader.archive, *paths)
56 56 zl = sys.path_importer_cache.get(path)
57 57 if not zl:
58 58 zl = zipimport.zipimporter(path)
59 59 return zl
60 60
61 61 try:
62 62 if modulepolicy in policy.policynoc:
63 63 raise ImportError()
64 64
65 65 zl = ziploader('mercurial')
66 66 mod = zl.load_module(name)
67 67 # Unlike imp, ziploader doesn't expose module metadata that
68 68 # indicates the type of module. So just assume what we found
69 69 # is OK (even though it could be a pure Python module).
70 70 except ImportError:
71 71 if modulepolicy == b'c':
72 72 raise
73 73 zl = ziploader('mercurial', 'pure')
74 74 mod = zl.load_module(name)
75 75
76 76 sys.modules[name] = mod
77 77 return mod
78 78
79 79 # Unlike the default importer which searches special locations and
80 80 # sys.path, we only look in the directory where "mercurial" was
81 81 # imported from.
82 82
83 83 # imp.find_module doesn't support submodules (modules with ".").
84 84 # Instead you have to pass the parent package's __path__ attribute
85 85 # as the path argument.
86 86 stem = name.split('.')[-1]
87 87
88 88 try:
89 89 if modulepolicy in policy.policynoc:
90 90 raise ImportError()
91 91
92 92 modinfo = imp.find_module(stem, mercurial.__path__)
93 93
94 94 # The Mercurial installer used to copy files from
95 95 # mercurial/pure/*.py to mercurial/*.py. Therefore, it's possible
96 96 # for some installations to have .py files under mercurial/*.
97 97 # Loading Python modules when we expected C versions could result
98 98 # in a) poor performance b) loading a version from a previous
99 99 # Mercurial version, potentially leading to incompatibility. Either
100 100 # scenario is bad. So we verify that modules loaded from
101 101 # mercurial/* are C extensions. If the current policy allows the
102 102 # loading of .py modules, the module will be re-imported from
103 103 # mercurial/pure/* below.
104 104 if modinfo[2][2] != imp.C_EXTENSION:
105 105 raise ImportError('.py version of %s found where C '
106 106 'version should exist' % name)
107 107
108 108 except ImportError:
109 109 if modulepolicy == b'c':
110 110 raise
111 111
112 112 # Could not load the C extension and pure Python is allowed. So
113 113 # try to load them.
114 114 from . import pure
115 115 modinfo = imp.find_module(stem, pure.__path__)
116 116 if not modinfo:
117 117 raise ImportError('could not find mercurial module %s' %
118 118 name)
119 119
120 120 mod = imp.load_module(name, *modinfo)
121 121 sys.modules[name] = mod
122 122 return mod
123 123
124 124 # Python 3 uses a custom module loader that transforms source code between
125 125 # source file reading and compilation. This is done by registering a custom
126 126 # finder that changes the spec for Mercurial modules to use a custom loader.
127 127 if sys.version_info[0] >= 3:
128 128 from . import pure
129 129 import importlib
130 130 import io
131 131 import token
132 132 import tokenize
133 133
134 134 class hgpathentryfinder(importlib.abc.MetaPathFinder):
135 135 """A sys.meta_path finder that uses a custom module loader."""
136 136 def find_spec(self, fullname, path, target=None):
137 137 # Only handle Mercurial-related modules.
138 138 if not fullname.startswith(('mercurial.', 'hgext.', 'hgext3rd.')):
139 139 return None
140 140 # zstd is already dual-version clean, don't try and mangle it
141 141 if fullname.startswith('mercurial.zstd'):
142 142 return None
143 143
144 144 # This assumes Python 3 doesn't support loading C modules.
145 145 if fullname in _dualmodules:
146 146 stem = fullname.split('.')[-1]
147 147 fullname = 'mercurial.pure.%s' % stem
148 148 target = pure
149 149 assert len(path) == 1
150 150 path = [os.path.join(path[0], 'pure')]
151 151
152 152 # Try to find the module using other registered finders.
153 153 spec = None
154 154 for finder in sys.meta_path:
155 155 if finder == self:
156 156 continue
157 157
158 158 spec = finder.find_spec(fullname, path, target=target)
159 159 if spec:
160 160 break
161 161
162 162 # This is a Mercurial-related module but we couldn't find it
163 163 # using the previously-registered finders. This likely means
164 164 # the module doesn't exist.
165 165 if not spec:
166 166 return None
167 167
168 168 if fullname.startswith('mercurial.pure.'):
169 169 spec.name = spec.name.replace('.pure.', '.')
170 170
171 171 # TODO need to support loaders from alternate specs, like zip
172 172 # loaders.
173 173 spec.loader = hgloader(spec.name, spec.origin)
174 174 return spec
175 175
176 176 def replacetokens(tokens, fullname):
177 177 """Transform a stream of tokens from raw to Python 3.
178 178
179 179 It is called by the custom module loading machinery to rewrite
180 180 source/tokens between source decoding and compilation.
181 181
182 182 Returns a generator of possibly rewritten tokens.
183 183
184 184 The input token list may be mutated as part of processing. However,
185 185 its changes do not necessarily match the output token stream.
186 186
187 187 REMEMBER TO CHANGE ``BYTECODEHEADER`` WHEN CHANGING THIS FUNCTION
188 188 OR CACHED FILES WON'T GET INVALIDATED PROPERLY.
189 189 """
190 190 futureimpline = False
191 191
192 192 # The following utility functions access the tokens list and i index of
193 193 # the for i, t enumerate(tokens) loop below
194 194 def _isop(j, *o):
195 195 """Assert that tokens[j] is an OP with one of the given values"""
196 196 try:
197 197 return tokens[j].type == token.OP and tokens[j].string in o
198 198 except IndexError:
199 199 return False
200 200
201 201 def _findargnofcall(n):
202 202 """Find arg n of a call expression (start at 0)
203 203
204 204 Returns index of the first token of that argument, or None if
205 205 there is not that many arguments.
206 206
207 207 Assumes that token[i + 1] is '('.
208 208
209 209 """
210 210 nested = 0
211 211 for j in range(i + 2, len(tokens)):
212 212 if _isop(j, ')', ']', '}'):
213 213 # end of call, tuple, subscription or dict / set
214 214 nested -= 1
215 215 if nested < 0:
216 216 return None
217 217 elif n == 0:
218 218 # this is the starting position of arg
219 219 return j
220 220 elif _isop(j, '(', '[', '{'):
221 221 nested += 1
222 222 elif _isop(j, ',') and nested == 0:
223 223 n -= 1
224 224
225 225 return None
226 226
227 227 def _ensureunicode(j):
228 228 """Make sure the token at j is a unicode string
229 229
230 230 This rewrites a string token to include the unicode literal prefix
231 231 so the string transformer won't add the byte prefix.
232 232
233 233 Ignores tokens that are not strings. Assumes bounds checking has
234 234 already been done.
235 235
236 236 """
237 237 st = tokens[j]
238 238 if st.type == token.STRING and st.string.startswith(("'", '"')):
239 239 tokens[j] = st._replace(string='u%s' % st.string)
240 240
241 241 for i, t in enumerate(tokens):
242 242 # Convert most string literals to byte literals. String literals
243 243 # in Python 2 are bytes. String literals in Python 3 are unicode.
244 244 # Most strings in Mercurial are bytes and unicode strings are rare.
245 245 # Rather than rewrite all string literals to use ``b''`` to indicate
246 246 # byte strings, we apply this token transformer to insert the ``b``
247 247 # prefix nearly everywhere.
248 248 if t.type == token.STRING:
249 249 s = t.string
250 250
251 251 # Preserve docstrings as string literals. This is inconsistent
252 252 # with regular unprefixed strings. However, the
253 253 # "from __future__" parsing (which allows a module docstring to
254 254 # exist before it) doesn't properly handle the docstring if it
255 255 # is b''' prefixed, leading to a SyntaxError. We leave all
256 256 # docstrings as unprefixed to avoid this. This means Mercurial
257 257 # components touching docstrings need to handle unicode,
258 258 # unfortunately.
259 259 if s[0:3] in ("'''", '"""'):
260 260 yield t
261 261 continue
262 262
263 263 # If the first character isn't a quote, it is likely a string
264 264 # prefixing character (such as 'b', 'u', or 'r'. Ignore.
265 265 if s[0] not in ("'", '"'):
266 266 yield t
267 267 continue
268 268
269 269 # String literal. Prefix to make a b'' string.
270 270 yield t._replace(string='b%s' % t.string)
271 271 continue
272 272
273 273 # Insert compatibility imports at "from __future__ import" line.
274 274 # No '\n' should be added to preserve line numbers.
275 275 if (t.type == token.NAME and t.string == 'import' and
276 276 all(u.type == token.NAME for u in tokens[i - 2:i]) and
277 277 [u.string for u in tokens[i - 2:i]] == ['from', '__future__']):
278 278 futureimpline = True
279 279 if t.type == token.NEWLINE and futureimpline:
280 280 futureimpline = False
281 281 if fullname == 'mercurial.pycompat':
282 282 yield t
283 283 continue
284 284 r, c = t.start
285 285 l = (b'; from mercurial.pycompat import '
286 b'delattr, getattr, hasattr, setattr, xrange, open\n')
286 b'delattr, getattr, hasattr, setattr, xrange, '
287 b'open, unicode\n')
287 288 for u in tokenize.tokenize(io.BytesIO(l).readline):
288 289 if u.type in (tokenize.ENCODING, token.ENDMARKER):
289 290 continue
290 291 yield u._replace(
291 292 start=(r, c + u.start[1]), end=(r, c + u.end[1]))
292 293 continue
293 294
294 295 # This looks like a function call.
295 296 if t.type == token.NAME and _isop(i + 1, '('):
296 297 fn = t.string
297 298
298 299 # *attr() builtins don't accept byte strings to 2nd argument.
299 300 if (fn in ('getattr', 'setattr', 'hasattr', 'safehasattr') and
300 301 not _isop(i - 1, '.')):
301 302 arg1idx = _findargnofcall(1)
302 303 if arg1idx is not None:
303 304 _ensureunicode(arg1idx)
304 305
305 306 # .encode() and .decode() on str/bytes/unicode don't accept
306 307 # byte strings on Python 3.
307 308 elif fn in ('encode', 'decode') and _isop(i - 1, '.'):
308 309 for argn in range(2):
309 310 argidx = _findargnofcall(argn)
310 311 if argidx is not None:
311 312 _ensureunicode(argidx)
312 313
313 314 # It changes iteritems/values to items/values as they are not
314 315 # present in Python 3 world.
315 316 elif fn in ('iteritems', 'itervalues'):
316 317 yield t._replace(string=fn[4:])
317 318 continue
318 319
319 320 # Emit unmodified token.
320 321 yield t
321 322
322 323 # Header to add to bytecode files. This MUST be changed when
323 324 # ``replacetoken`` or any mechanism that changes semantics of module
324 325 # loading is changed. Otherwise cached bytecode may get loaded without
325 326 # the new transformation mechanisms applied.
326 BYTECODEHEADER = b'HG\x00\x09'
327 BYTECODEHEADER = b'HG\x00\x0a'
327 328
328 329 class hgloader(importlib.machinery.SourceFileLoader):
329 330 """Custom module loader that transforms source code.
330 331
331 332 When the source code is converted to a code object, we transform
332 333 certain patterns to be Python 3 compatible. This allows us to write code
333 334 that is natively Python 2 and compatible with Python 3 without
334 335 making the code excessively ugly.
335 336
336 337 We do this by transforming the token stream between parse and compile.
337 338
338 339 Implementing transformations invalidates caching assumptions made
339 340 by the built-in importer. The built-in importer stores a header on
340 341 saved bytecode files indicating the Python/bytecode version. If the
341 342 version changes, the cached bytecode is ignored. The Mercurial
342 343 transformations could change at any time. This means we need to check
343 344 that cached bytecode was generated with the current transformation
344 345 code or there could be a mismatch between cached bytecode and what
345 346 would be generated from this class.
346 347
347 348 We supplement the bytecode caching layer by wrapping ``get_data``
348 349 and ``set_data``. These functions are called when the
349 350 ``SourceFileLoader`` retrieves and saves bytecode cache files,
350 351 respectively. We simply add an additional header on the file. As
351 352 long as the version in this file is changed when semantics change,
352 353 cached bytecode should be invalidated when transformations change.
353 354
354 355 The added header has the form ``HG<VERSION>``. That is a literal
355 356 ``HG`` with 2 binary bytes indicating the transformation version.
356 357 """
357 358 def get_data(self, path):
358 359 data = super(hgloader, self).get_data(path)
359 360
360 361 if not path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)):
361 362 return data
362 363
363 364 # There should be a header indicating the Mercurial transformation
364 365 # version. If it doesn't exist or doesn't match the current version,
365 366 # we raise an OSError because that is what
366 367 # ``SourceFileLoader.get_code()`` expects when loading bytecode
367 368 # paths to indicate the cached file is "bad."
368 369 if data[0:2] != b'HG':
369 370 raise OSError('no hg header')
370 371 if data[0:4] != BYTECODEHEADER:
371 372 raise OSError('hg header version mismatch')
372 373
373 374 return data[4:]
374 375
375 376 def set_data(self, path, data, *args, **kwargs):
376 377 if path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)):
377 378 data = BYTECODEHEADER + data
378 379
379 380 return super(hgloader, self).set_data(path, data, *args, **kwargs)
380 381
381 382 def source_to_code(self, data, path):
382 383 """Perform token transformation before compilation."""
383 384 buf = io.BytesIO(data)
384 385 tokens = tokenize.tokenize(buf.readline)
385 386 data = tokenize.untokenize(replacetokens(list(tokens), self.name))
386 387 # Python's built-in importer strips frames from exceptions raised
387 388 # for this code. Unfortunately, that mechanism isn't extensible
388 389 # and our frame will be blamed for the import failure. There
389 390 # are extremely hacky ways to do frame stripping. We haven't
390 391 # implemented them because they are very ugly.
391 392 return super(hgloader, self).source_to_code(data, path)
392 393
393 394 # We automagically register our custom importer as a side-effect of loading.
394 395 # This is necessary to ensure that any entry points are able to import
395 396 # mercurial.* modules without having to perform this registration themselves.
396 397 if sys.version_info[0] >= 3:
397 398 _importercls = hgpathentryfinder
398 399 else:
399 400 _importercls = hgimporter
400 401 if not any(isinstance(x, _importercls) for x in sys.meta_path):
401 402 # meta_path is used before any implicit finders and before sys.path.
402 403 sys.meta_path.insert(0, _importercls())
@@ -1,409 +1,410 b''
1 1 # pycompat.py - portability shim for python 3
2 2 #
3 3 # This software may be used and distributed according to the terms of the
4 4 # GNU General Public License version 2 or any later version.
5 5
6 6 """Mercurial portability shim for python 3.
7 7
8 8 This contains aliases to hide python version-specific details from the core.
9 9 """
10 10
11 11 from __future__ import absolute_import
12 12
13 13 import getopt
14 14 import os
15 15 import shlex
16 16 import sys
17 17
18 18 ispy3 = (sys.version_info[0] >= 3)
19 19
20 20 if not ispy3:
21 21 import cPickle as pickle
22 22 import httplib
23 23 import Queue as _queue
24 24 import SocketServer as socketserver
25 25 import xmlrpclib
26 26 else:
27 27 import http.client as httplib
28 28 import pickle
29 29 import queue as _queue
30 30 import socketserver
31 31 import xmlrpc.client as xmlrpclib
32 32
33 33 def identity(a):
34 34 return a
35 35
36 36 if ispy3:
37 37 import builtins
38 38 import functools
39 39 import io
40 40 import struct
41 41
42 42 fsencode = os.fsencode
43 43 fsdecode = os.fsdecode
44 44 # A bytes version of os.name.
45 45 oslinesep = os.linesep.encode('ascii')
46 46 osname = os.name.encode('ascii')
47 47 ospathsep = os.pathsep.encode('ascii')
48 48 ossep = os.sep.encode('ascii')
49 49 osaltsep = os.altsep
50 50 if osaltsep:
51 51 osaltsep = osaltsep.encode('ascii')
52 52 # os.getcwd() on Python 3 returns string, but it has os.getcwdb() which
53 53 # returns bytes.
54 54 getcwd = os.getcwdb
55 55 sysplatform = sys.platform.encode('ascii')
56 56 sysexecutable = sys.executable
57 57 if sysexecutable:
58 58 sysexecutable = os.fsencode(sysexecutable)
59 59 stringio = io.BytesIO
60 60 maplist = lambda *args: list(map(*args))
61 61
62 62 # TODO: .buffer might not exist if std streams were replaced; we'll need
63 63 # a silly wrapper to make a bytes stream backed by a unicode one.
64 64 stdin = sys.stdin.buffer
65 65 stdout = sys.stdout.buffer
66 66 stderr = sys.stderr.buffer
67 67
68 68 # Since Python 3 converts argv to wchar_t type by Py_DecodeLocale() on Unix,
69 69 # we can use os.fsencode() to get back bytes argv.
70 70 #
71 71 # https://hg.python.org/cpython/file/v3.5.1/Programs/python.c#l55
72 72 #
73 73 # TODO: On Windows, the native argv is wchar_t, so we'll need a different
74 74 # workaround to simulate the Python 2 (i.e. ANSI Win32 API) behavior.
75 75 if getattr(sys, 'argv', None) is not None:
76 76 sysargv = list(map(os.fsencode, sys.argv))
77 77
78 78 bytechr = struct.Struct('>B').pack
79 79
80 80 class bytestr(bytes):
81 81 """A bytes which mostly acts as a Python 2 str
82 82
83 83 >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
84 84 (b'', b'foo', b'ascii', b'1')
85 85 >>> s = bytestr(b'foo')
86 86 >>> assert s is bytestr(s)
87 87
88 88 There's no implicit conversion from non-ascii str as its encoding is
89 89 unknown:
90 90
91 91 >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
92 92 Traceback (most recent call last):
93 93 ...
94 94 UnicodeEncodeError: ...
95 95
96 96 Comparison between bytestr and bytes should work:
97 97
98 98 >>> assert bytestr(b'foo') == b'foo'
99 99 >>> assert b'foo' == bytestr(b'foo')
100 100 >>> assert b'f' in bytestr(b'foo')
101 101 >>> assert bytestr(b'f') in b'foo'
102 102
103 103 Sliced elements should be bytes, not integer:
104 104
105 105 >>> s[1], s[:2]
106 106 (b'o', b'fo')
107 107 >>> list(s), list(reversed(s))
108 108 ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
109 109
110 110 As bytestr type isn't propagated across operations, you need to cast
111 111 bytes to bytestr explicitly:
112 112
113 113 >>> s = bytestr(b'foo').upper()
114 114 >>> t = bytestr(s)
115 115 >>> s[0], t[0]
116 116 (70, b'F')
117 117
118 118 Be careful to not pass a bytestr object to a function which expects
119 119 bytearray-like behavior.
120 120
121 121 >>> t = bytes(t) # cast to bytes
122 122 >>> assert type(t) is bytes
123 123 """
124 124
125 125 def __new__(cls, s=b''):
126 126 if isinstance(s, bytestr):
127 127 return s
128 128 if not isinstance(s, (bytes, bytearray)):
129 129 s = str(s).encode(u'ascii')
130 130 return bytes.__new__(cls, s)
131 131
132 132 def __getitem__(self, key):
133 133 s = bytes.__getitem__(self, key)
134 134 if not isinstance(s, bytes):
135 135 s = bytechr(s)
136 136 return s
137 137
138 138 def __iter__(self):
139 139 return iterbytestr(bytes.__iter__(self))
140 140
141 141 def iterbytestr(s):
142 142 """Iterate bytes as if it were a str object of Python 2"""
143 143 return map(bytechr, s)
144 144
145 145 def sysbytes(s):
146 146 """Convert an internal str (e.g. keyword, __doc__) back to bytes
147 147
148 148 This never raises UnicodeEncodeError, but only ASCII characters
149 149 can be round-trip by sysstr(sysbytes(s)).
150 150 """
151 151 return s.encode(u'utf-8')
152 152
153 153 def sysstr(s):
154 154 """Return a keyword str to be passed to Python functions such as
155 155 getattr() and str.encode()
156 156
157 157 This never raises UnicodeDecodeError. Non-ascii characters are
158 158 considered invalid and mapped to arbitrary but unique code points
159 159 such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
160 160 """
161 161 if isinstance(s, builtins.str):
162 162 return s
163 163 return s.decode(u'latin-1')
164 164
165 165 def _wrapattrfunc(f):
166 166 @functools.wraps(f)
167 167 def w(object, name, *args):
168 168 return f(object, sysstr(name), *args)
169 169 return w
170 170
171 171 # these wrappers are automagically imported by hgloader
172 172 delattr = _wrapattrfunc(builtins.delattr)
173 173 getattr = _wrapattrfunc(builtins.getattr)
174 174 hasattr = _wrapattrfunc(builtins.hasattr)
175 175 setattr = _wrapattrfunc(builtins.setattr)
176 176 xrange = builtins.range
177 unicode = str
177 178
178 179 def open(name, mode='r', buffering=-1):
179 180 return builtins.open(name, sysstr(mode), buffering)
180 181
181 182 # getopt.getopt() on Python 3 deals with unicodes internally so we cannot
182 183 # pass bytes there. Passing unicodes will result in unicodes as return
183 184 # values which we need to convert again to bytes.
184 185 def getoptb(args, shortlist, namelist):
185 186 args = [a.decode('latin-1') for a in args]
186 187 shortlist = shortlist.decode('latin-1')
187 188 namelist = [a.decode('latin-1') for a in namelist]
188 189 opts, args = getopt.getopt(args, shortlist, namelist)
189 190 opts = [(a[0].encode('latin-1'), a[1].encode('latin-1'))
190 191 for a in opts]
191 192 args = [a.encode('latin-1') for a in args]
192 193 return opts, args
193 194
194 195 # keys of keyword arguments in Python need to be strings which are unicodes
195 196 # Python 3. This function takes keyword arguments, convert the keys to str.
196 197 def strkwargs(dic):
197 198 dic = dict((k.decode('latin-1'), v) for k, v in dic.iteritems())
198 199 return dic
199 200
200 201 # keys of keyword arguments need to be unicode while passing into
201 202 # a function. This function helps us to convert those keys back to bytes
202 203 # again as we need to deal with bytes.
203 204 def byteskwargs(dic):
204 205 dic = dict((k.encode('latin-1'), v) for k, v in dic.iteritems())
205 206 return dic
206 207
207 208 # shlex.split() accepts unicodes on Python 3. This function takes bytes
208 209 # argument, convert it into unicodes, pass into shlex.split(), convert the
209 210 # returned value to bytes and return that.
210 211 # TODO: handle shlex.shlex().
211 212 def shlexsplit(s):
212 213 ret = shlex.split(s.decode('latin-1'))
213 214 return [a.encode('latin-1') for a in ret]
214 215
215 216 else:
216 217 import cStringIO
217 218
218 219 bytechr = chr
219 220 bytestr = str
220 221 iterbytestr = iter
221 222 sysbytes = identity
222 223 sysstr = identity
223 224
224 225 # Partial backport from os.py in Python 3, which only accepts bytes.
225 226 # In Python 2, our paths should only ever be bytes, a unicode path
226 227 # indicates a bug.
227 228 def fsencode(filename):
228 229 if isinstance(filename, str):
229 230 return filename
230 231 else:
231 232 raise TypeError(
232 233 "expect str, not %s" % type(filename).__name__)
233 234
234 235 # In Python 2, fsdecode() has a very chance to receive bytes. So it's
235 236 # better not to touch Python 2 part as it's already working fine.
236 237 fsdecode = identity
237 238
238 239 def getoptb(args, shortlist, namelist):
239 240 return getopt.getopt(args, shortlist, namelist)
240 241
241 242 strkwargs = identity
242 243 byteskwargs = identity
243 244
244 245 oslinesep = os.linesep
245 246 osname = os.name
246 247 ospathsep = os.pathsep
247 248 ossep = os.sep
248 249 osaltsep = os.altsep
249 250 stdin = sys.stdin
250 251 stdout = sys.stdout
251 252 stderr = sys.stderr
252 253 if getattr(sys, 'argv', None) is not None:
253 254 sysargv = sys.argv
254 255 sysplatform = sys.platform
255 256 getcwd = os.getcwd
256 257 sysexecutable = sys.executable
257 258 shlexsplit = shlex.split
258 259 stringio = cStringIO.StringIO
259 260 maplist = map
260 261
261 262 empty = _queue.Empty
262 263 queue = _queue.Queue
263 264
264 265 class _pycompatstub(object):
265 266 def __init__(self):
266 267 self._aliases = {}
267 268
268 269 def _registeraliases(self, origin, items):
269 270 """Add items that will be populated at the first access"""
270 271 items = map(sysstr, items)
271 272 self._aliases.update(
272 273 (item.replace(sysstr('_'), sysstr('')).lower(), (origin, item))
273 274 for item in items)
274 275
275 276 def _registeralias(self, origin, attr, name):
276 277 """Alias ``origin``.``attr`` as ``name``"""
277 278 self._aliases[sysstr(name)] = (origin, sysstr(attr))
278 279
279 280 def __getattr__(self, name):
280 281 try:
281 282 origin, item = self._aliases[name]
282 283 except KeyError:
283 284 raise AttributeError(name)
284 285 self.__dict__[name] = obj = getattr(origin, item)
285 286 return obj
286 287
287 288 httpserver = _pycompatstub()
288 289 urlreq = _pycompatstub()
289 290 urlerr = _pycompatstub()
290 291 if not ispy3:
291 292 import BaseHTTPServer
292 293 import CGIHTTPServer
293 294 import SimpleHTTPServer
294 295 import urllib2
295 296 import urllib
296 297 import urlparse
297 298 urlreq._registeraliases(urllib, (
298 299 "addclosehook",
299 300 "addinfourl",
300 301 "ftpwrapper",
301 302 "pathname2url",
302 303 "quote",
303 304 "splitattr",
304 305 "splitpasswd",
305 306 "splitport",
306 307 "splituser",
307 308 "unquote",
308 309 "url2pathname",
309 310 "urlencode",
310 311 ))
311 312 urlreq._registeraliases(urllib2, (
312 313 "AbstractHTTPHandler",
313 314 "BaseHandler",
314 315 "build_opener",
315 316 "FileHandler",
316 317 "FTPHandler",
317 318 "HTTPBasicAuthHandler",
318 319 "HTTPDigestAuthHandler",
319 320 "HTTPHandler",
320 321 "HTTPPasswordMgrWithDefaultRealm",
321 322 "HTTPSHandler",
322 323 "install_opener",
323 324 "ProxyHandler",
324 325 "Request",
325 326 "urlopen",
326 327 ))
327 328 urlreq._registeraliases(urlparse, (
328 329 "urlparse",
329 330 "urlunparse",
330 331 ))
331 332 urlerr._registeraliases(urllib2, (
332 333 "HTTPError",
333 334 "URLError",
334 335 ))
335 336 httpserver._registeraliases(BaseHTTPServer, (
336 337 "HTTPServer",
337 338 "BaseHTTPRequestHandler",
338 339 ))
339 340 httpserver._registeraliases(SimpleHTTPServer, (
340 341 "SimpleHTTPRequestHandler",
341 342 ))
342 343 httpserver._registeraliases(CGIHTTPServer, (
343 344 "CGIHTTPRequestHandler",
344 345 ))
345 346
346 347 else:
347 348 import urllib.parse
348 349 urlreq._registeraliases(urllib.parse, (
349 350 "splitattr",
350 351 "splitpasswd",
351 352 "splitport",
352 353 "splituser",
353 354 "urlparse",
354 355 "urlunparse",
355 356 ))
356 357 urlreq._registeralias(urllib.parse, "unquote_to_bytes", "unquote")
357 358 import urllib.request
358 359 urlreq._registeraliases(urllib.request, (
359 360 "AbstractHTTPHandler",
360 361 "BaseHandler",
361 362 "build_opener",
362 363 "FileHandler",
363 364 "FTPHandler",
364 365 "ftpwrapper",
365 366 "HTTPHandler",
366 367 "HTTPSHandler",
367 368 "install_opener",
368 369 "pathname2url",
369 370 "HTTPBasicAuthHandler",
370 371 "HTTPDigestAuthHandler",
371 372 "HTTPPasswordMgrWithDefaultRealm",
372 373 "ProxyHandler",
373 374 "Request",
374 375 "url2pathname",
375 376 "urlopen",
376 377 ))
377 378 import urllib.response
378 379 urlreq._registeraliases(urllib.response, (
379 380 "addclosehook",
380 381 "addinfourl",
381 382 ))
382 383 import urllib.error
383 384 urlerr._registeraliases(urllib.error, (
384 385 "HTTPError",
385 386 "URLError",
386 387 ))
387 388 import http.server
388 389 httpserver._registeraliases(http.server, (
389 390 "HTTPServer",
390 391 "BaseHTTPRequestHandler",
391 392 "SimpleHTTPRequestHandler",
392 393 "CGIHTTPRequestHandler",
393 394 ))
394 395
395 396 # urllib.parse.quote() accepts both str and bytes, decodes bytes
396 397 # (if necessary), and returns str. This is wonky. We provide a custom
397 398 # implementation that only accepts bytes and emits bytes.
398 399 def quote(s, safe=r'/'):
399 400 s = urllib.parse.quote_from_bytes(s, safe=safe)
400 401 return s.encode('ascii', 'strict')
401 402
402 403 # urllib.parse.urlencode() returns str. We use this function to make
403 404 # sure we return bytes.
404 405 def urlencode(query, doseq=False):
405 406 s = urllib.parse.urlencode(query, doseq=doseq)
406 407 return s.encode('ascii')
407 408
408 409 urlreq.quote = quote
409 410 urlreq.urlencode = urlencode
General Comments 0
You need to be logged in to leave comments. Login now