##// END OF EJS Templates
py3: rewrite itervalues() as values() by importer...
Yuya Nishihara -
r31445:83e08014 default
parent child Browse files
Show More
@@ -1,402 +1,402
1 1 # __init__.py - Startup and module loading logic for Mercurial.
2 2 #
3 3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import imp
11 11 import os
12 12 import sys
13 13 import zipimport
14 14
15 15 from . import (
16 16 policy
17 17 )
18 18
19 19 __all__ = []
20 20
21 21 modulepolicy = policy.policy
22 22
23 23 # Modules that have both Python and C implementations. See also the
24 24 # set of .py files under mercurial/pure/.
25 25 _dualmodules = set([
26 26 'mercurial.base85',
27 27 'mercurial.bdiff',
28 28 'mercurial.diffhelpers',
29 29 'mercurial.mpatch',
30 30 'mercurial.osutil',
31 31 'mercurial.parsers',
32 32 ])
33 33
34 34 class hgimporter(object):
35 35 """Object that conforms to import hook interface defined in PEP-302."""
36 36 def find_module(self, name, path=None):
37 37 # We only care about modules that have both C and pure implementations.
38 38 if name in _dualmodules:
39 39 return self
40 40 return None
41 41
42 42 def load_module(self, name):
43 43 mod = sys.modules.get(name, None)
44 44 if mod:
45 45 return mod
46 46
47 47 mercurial = sys.modules['mercurial']
48 48
49 49 # The zip importer behaves sufficiently differently from the default
50 50 # importer to warrant its own code path.
51 51 loader = getattr(mercurial, '__loader__', None)
52 52 if isinstance(loader, zipimport.zipimporter):
53 53 def ziploader(*paths):
54 54 """Obtain a zipimporter for a directory under the main zip."""
55 55 path = os.path.join(loader.archive, *paths)
56 56 zl = sys.path_importer_cache.get(path)
57 57 if not zl:
58 58 zl = zipimport.zipimporter(path)
59 59 return zl
60 60
61 61 try:
62 62 if modulepolicy in policy.policynoc:
63 63 raise ImportError()
64 64
65 65 zl = ziploader('mercurial')
66 66 mod = zl.load_module(name)
67 67 # Unlike imp, ziploader doesn't expose module metadata that
68 68 # indicates the type of module. So just assume what we found
69 69 # is OK (even though it could be a pure Python module).
70 70 except ImportError:
71 71 if modulepolicy == b'c':
72 72 raise
73 73 zl = ziploader('mercurial', 'pure')
74 74 mod = zl.load_module(name)
75 75
76 76 sys.modules[name] = mod
77 77 return mod
78 78
79 79 # Unlike the default importer which searches special locations and
80 80 # sys.path, we only look in the directory where "mercurial" was
81 81 # imported from.
82 82
83 83 # imp.find_module doesn't support submodules (modules with ".").
84 84 # Instead you have to pass the parent package's __path__ attribute
85 85 # as the path argument.
86 86 stem = name.split('.')[-1]
87 87
88 88 try:
89 89 if modulepolicy in policy.policynoc:
90 90 raise ImportError()
91 91
92 92 modinfo = imp.find_module(stem, mercurial.__path__)
93 93
94 94 # The Mercurial installer used to copy files from
95 95 # mercurial/pure/*.py to mercurial/*.py. Therefore, it's possible
96 96 # for some installations to have .py files under mercurial/*.
97 97 # Loading Python modules when we expected C versions could result
98 98 # in a) poor performance b) loading a version from a previous
99 99 # Mercurial version, potentially leading to incompatibility. Either
100 100 # scenario is bad. So we verify that modules loaded from
101 101 # mercurial/* are C extensions. If the current policy allows the
102 102 # loading of .py modules, the module will be re-imported from
103 103 # mercurial/pure/* below.
104 104 if modinfo[2][2] != imp.C_EXTENSION:
105 105 raise ImportError('.py version of %s found where C '
106 106 'version should exist' % name)
107 107
108 108 except ImportError:
109 109 if modulepolicy == b'c':
110 110 raise
111 111
112 112 # Could not load the C extension and pure Python is allowed. So
113 113 # try to load them.
114 114 from . import pure
115 115 modinfo = imp.find_module(stem, pure.__path__)
116 116 if not modinfo:
117 117 raise ImportError('could not find mercurial module %s' %
118 118 name)
119 119
120 120 mod = imp.load_module(name, *modinfo)
121 121 sys.modules[name] = mod
122 122 return mod
123 123
124 124 # Python 3 uses a custom module loader that transforms source code between
125 125 # source file reading and compilation. This is done by registering a custom
126 126 # finder that changes the spec for Mercurial modules to use a custom loader.
127 127 if sys.version_info[0] >= 3:
128 128 from . import pure
129 129 import importlib
130 130 import io
131 131 import token
132 132 import tokenize
133 133
134 134 class hgpathentryfinder(importlib.abc.MetaPathFinder):
135 135 """A sys.meta_path finder that uses a custom module loader."""
136 136 def find_spec(self, fullname, path, target=None):
137 137 # Only handle Mercurial-related modules.
138 138 if not fullname.startswith(('mercurial.', 'hgext.', 'hgext3rd.')):
139 139 return None
140 140 # zstd is already dual-version clean, don't try and mangle it
141 141 if fullname.startswith('mercurial.zstd'):
142 142 return None
143 143
144 144 # This assumes Python 3 doesn't support loading C modules.
145 145 if fullname in _dualmodules:
146 146 stem = fullname.split('.')[-1]
147 147 fullname = 'mercurial.pure.%s' % stem
148 148 target = pure
149 149 assert len(path) == 1
150 150 path = [os.path.join(path[0], 'pure')]
151 151
152 152 # Try to find the module using other registered finders.
153 153 spec = None
154 154 for finder in sys.meta_path:
155 155 if finder == self:
156 156 continue
157 157
158 158 spec = finder.find_spec(fullname, path, target=target)
159 159 if spec:
160 160 break
161 161
162 162 # This is a Mercurial-related module but we couldn't find it
163 163 # using the previously-registered finders. This likely means
164 164 # the module doesn't exist.
165 165 if not spec:
166 166 return None
167 167
168 168 if fullname.startswith('mercurial.pure.'):
169 169 spec.name = spec.name.replace('.pure.', '.')
170 170
171 171 # TODO need to support loaders from alternate specs, like zip
172 172 # loaders.
173 173 spec.loader = hgloader(spec.name, spec.origin)
174 174 return spec
175 175
176 176 def replacetokens(tokens, fullname):
177 177 """Transform a stream of tokens from raw to Python 3.
178 178
179 179 It is called by the custom module loading machinery to rewrite
180 180 source/tokens between source decoding and compilation.
181 181
182 182 Returns a generator of possibly rewritten tokens.
183 183
184 184 The input token list may be mutated as part of processing. However,
185 185 its changes do not necessarily match the output token stream.
186 186
187 187 REMEMBER TO CHANGE ``BYTECODEHEADER`` WHEN CHANGING THIS FUNCTION
188 188 OR CACHED FILES WON'T GET INVALIDATED PROPERLY.
189 189 """
190 190 futureimpline = False
191 191
192 192 # The following utility functions access the tokens list and i index of
193 193 # the for i, t enumerate(tokens) loop below
194 194 def _isop(j, *o):
195 195 """Assert that tokens[j] is an OP with one of the given values"""
196 196 try:
197 197 return tokens[j].type == token.OP and tokens[j].string in o
198 198 except IndexError:
199 199 return False
200 200
201 201 def _findargnofcall(n):
202 202 """Find arg n of a call expression (start at 0)
203 203
204 204 Returns index of the first token of that argument, or None if
205 205 there is not that many arguments.
206 206
207 207 Assumes that token[i + 1] is '('.
208 208
209 209 """
210 210 nested = 0
211 211 for j in range(i + 2, len(tokens)):
212 212 if _isop(j, ')', ']', '}'):
213 213 # end of call, tuple, subscription or dict / set
214 214 nested -= 1
215 215 if nested < 0:
216 216 return None
217 217 elif n == 0:
218 218 # this is the starting position of arg
219 219 return j
220 220 elif _isop(j, '(', '[', '{'):
221 221 nested += 1
222 222 elif _isop(j, ',') and nested == 0:
223 223 n -= 1
224 224
225 225 return None
226 226
227 227 def _ensureunicode(j):
228 228 """Make sure the token at j is a unicode string
229 229
230 230 This rewrites a string token to include the unicode literal prefix
231 231 so the string transformer won't add the byte prefix.
232 232
233 233 Ignores tokens that are not strings. Assumes bounds checking has
234 234 already been done.
235 235
236 236 """
237 237 st = tokens[j]
238 238 if st.type == token.STRING and st.string.startswith(("'", '"')):
239 239 tokens[j] = st._replace(string='u%s' % st.string)
240 240
241 241 for i, t in enumerate(tokens):
242 242 # Convert most string literals to byte literals. String literals
243 243 # in Python 2 are bytes. String literals in Python 3 are unicode.
244 244 # Most strings in Mercurial are bytes and unicode strings are rare.
245 245 # Rather than rewrite all string literals to use ``b''`` to indicate
246 246 # byte strings, we apply this token transformer to insert the ``b``
247 247 # prefix nearly everywhere.
248 248 if t.type == token.STRING:
249 249 s = t.string
250 250
251 251 # Preserve docstrings as string literals. This is inconsistent
252 252 # with regular unprefixed strings. However, the
253 253 # "from __future__" parsing (which allows a module docstring to
254 254 # exist before it) doesn't properly handle the docstring if it
255 255 # is b''' prefixed, leading to a SyntaxError. We leave all
256 256 # docstrings as unprefixed to avoid this. This means Mercurial
257 257 # components touching docstrings need to handle unicode,
258 258 # unfortunately.
259 259 if s[0:3] in ("'''", '"""'):
260 260 yield t
261 261 continue
262 262
263 263 # If the first character isn't a quote, it is likely a string
264 264 # prefixing character (such as 'b', 'u', or 'r'. Ignore.
265 265 if s[0] not in ("'", '"'):
266 266 yield t
267 267 continue
268 268
269 269 # String literal. Prefix to make a b'' string.
270 270 yield t._replace(string='b%s' % t.string)
271 271 continue
272 272
273 273 # Insert compatibility imports at "from __future__ import" line.
274 274 # No '\n' should be added to preserve line numbers.
275 275 if (t.type == token.NAME and t.string == 'import' and
276 276 all(u.type == token.NAME for u in tokens[i - 2:i]) and
277 277 [u.string for u in tokens[i - 2:i]] == ['from', '__future__']):
278 278 futureimpline = True
279 279 if t.type == token.NEWLINE and futureimpline:
280 280 futureimpline = False
281 281 if fullname == 'mercurial.pycompat':
282 282 yield t
283 283 continue
284 284 r, c = t.start
285 285 l = (b'; from mercurial.pycompat import '
286 286 b'delattr, getattr, hasattr, setattr, xrange, open\n')
287 287 for u in tokenize.tokenize(io.BytesIO(l).readline):
288 288 if u.type in (tokenize.ENCODING, token.ENDMARKER):
289 289 continue
290 290 yield u._replace(
291 291 start=(r, c + u.start[1]), end=(r, c + u.end[1]))
292 292 continue
293 293
294 294 # This looks like a function call.
295 295 if t.type == token.NAME and _isop(i + 1, '('):
296 296 fn = t.string
297 297
298 298 # *attr() builtins don't accept byte strings to 2nd argument.
299 299 if (fn in ('getattr', 'setattr', 'hasattr', 'safehasattr') and
300 300 not _isop(i - 1, '.')):
301 301 arg1idx = _findargnofcall(1)
302 302 if arg1idx is not None:
303 303 _ensureunicode(arg1idx)
304 304
305 305 # .encode() and .decode() on str/bytes/unicode don't accept
306 306 # byte strings on Python 3.
307 307 elif fn in ('encode', 'decode') and _isop(i - 1, '.'):
308 308 for argn in range(2):
309 309 argidx = _findargnofcall(argn)
310 310 if argidx is not None:
311 311 _ensureunicode(argidx)
312 312
313 # It changes iteritems to items as iteritems is not
313 # It changes iteritems/values to items/values as they are not
314 314 # present in Python 3 world.
315 elif fn == 'iteritems':
316 yield t._replace(string='items')
315 elif fn in ('iteritems', 'itervalues'):
316 yield t._replace(string=fn[4:])
317 317 continue
318 318
319 319 # Emit unmodified token.
320 320 yield t
321 321
322 322 # Header to add to bytecode files. This MUST be changed when
323 323 # ``replacetoken`` or any mechanism that changes semantics of module
324 324 # loading is changed. Otherwise cached bytecode may get loaded without
325 325 # the new transformation mechanisms applied.
326 BYTECODEHEADER = b'HG\x00\x08'
326 BYTECODEHEADER = b'HG\x00\x09'
327 327
328 328 class hgloader(importlib.machinery.SourceFileLoader):
329 329 """Custom module loader that transforms source code.
330 330
331 331 When the source code is converted to a code object, we transform
332 332 certain patterns to be Python 3 compatible. This allows us to write code
333 333 that is natively Python 2 and compatible with Python 3 without
334 334 making the code excessively ugly.
335 335
336 336 We do this by transforming the token stream between parse and compile.
337 337
338 338 Implementing transformations invalidates caching assumptions made
339 339 by the built-in importer. The built-in importer stores a header on
340 340 saved bytecode files indicating the Python/bytecode version. If the
341 341 version changes, the cached bytecode is ignored. The Mercurial
342 342 transformations could change at any time. This means we need to check
343 343 that cached bytecode was generated with the current transformation
344 344 code or there could be a mismatch between cached bytecode and what
345 345 would be generated from this class.
346 346
347 347 We supplement the bytecode caching layer by wrapping ``get_data``
348 348 and ``set_data``. These functions are called when the
349 349 ``SourceFileLoader`` retrieves and saves bytecode cache files,
350 350 respectively. We simply add an additional header on the file. As
351 351 long as the version in this file is changed when semantics change,
352 352 cached bytecode should be invalidated when transformations change.
353 353
354 354 The added header has the form ``HG<VERSION>``. That is a literal
355 355 ``HG`` with 2 binary bytes indicating the transformation version.
356 356 """
357 357 def get_data(self, path):
358 358 data = super(hgloader, self).get_data(path)
359 359
360 360 if not path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)):
361 361 return data
362 362
363 363 # There should be a header indicating the Mercurial transformation
364 364 # version. If it doesn't exist or doesn't match the current version,
365 365 # we raise an OSError because that is what
366 366 # ``SourceFileLoader.get_code()`` expects when loading bytecode
367 367 # paths to indicate the cached file is "bad."
368 368 if data[0:2] != b'HG':
369 369 raise OSError('no hg header')
370 370 if data[0:4] != BYTECODEHEADER:
371 371 raise OSError('hg header version mismatch')
372 372
373 373 return data[4:]
374 374
375 375 def set_data(self, path, data, *args, **kwargs):
376 376 if path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)):
377 377 data = BYTECODEHEADER + data
378 378
379 379 return super(hgloader, self).set_data(path, data, *args, **kwargs)
380 380
381 381 def source_to_code(self, data, path):
382 382 """Perform token transformation before compilation."""
383 383 buf = io.BytesIO(data)
384 384 tokens = tokenize.tokenize(buf.readline)
385 385 data = tokenize.untokenize(replacetokens(list(tokens), self.name))
386 386 # Python's built-in importer strips frames from exceptions raised
387 387 # for this code. Unfortunately, that mechanism isn't extensible
388 388 # and our frame will be blamed for the import failure. There
389 389 # are extremely hacky ways to do frame stripping. We haven't
390 390 # implemented them because they are very ugly.
391 391 return super(hgloader, self).source_to_code(data, path)
392 392
393 393 # We automagically register our custom importer as a side-effect of loading.
394 394 # This is necessary to ensure that any entry points are able to import
395 395 # mercurial.* modules without having to perform this registration themselves.
396 396 if sys.version_info[0] >= 3:
397 397 _importercls = hgpathentryfinder
398 398 else:
399 399 _importercls = hgimporter
400 400 if not any(isinstance(x, _importercls) for x in sys.meta_path):
401 401 # meta_path is used before any implicit finders and before sys.path.
402 402 sys.meta_path.insert(0, _importercls())
General Comments 0
You need to be logged in to leave comments. Login now