##// END OF EJS Templates
py3: use namedtuple._replace to produce new tokens
Martijn Pieters -
r30166:102e6ef5 default
parent child Browse files
Show More
@@ -1,412 +1,406 b''
1 1 # __init__.py - Startup and module loading logic for Mercurial.
2 2 #
3 3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import imp
11 11 import os
12 12 import sys
13 13 import zipimport
14 14
15 15 from . import (
16 16 policy
17 17 )
18 18
19 19 __all__ = []
20 20
21 21 modulepolicy = policy.policy
22 22
23 23 # Modules that have both Python and C implementations. See also the
24 24 # set of .py files under mercurial/pure/.
25 25 _dualmodules = set([
26 26 'mercurial.base85',
27 27 'mercurial.bdiff',
28 28 'mercurial.diffhelpers',
29 29 'mercurial.mpatch',
30 30 'mercurial.osutil',
31 31 'mercurial.parsers',
32 32 ])
33 33
34 34 class hgimporter(object):
35 35 """Object that conforms to import hook interface defined in PEP-302."""
36 36 def find_module(self, name, path=None):
37 37 # We only care about modules that have both C and pure implementations.
38 38 if name in _dualmodules:
39 39 return self
40 40 return None
41 41
42 42 def load_module(self, name):
43 43 mod = sys.modules.get(name, None)
44 44 if mod:
45 45 return mod
46 46
47 47 mercurial = sys.modules['mercurial']
48 48
49 49 # The zip importer behaves sufficiently differently from the default
50 50 # importer to warrant its own code path.
51 51 loader = getattr(mercurial, '__loader__', None)
52 52 if isinstance(loader, zipimport.zipimporter):
53 53 def ziploader(*paths):
54 54 """Obtain a zipimporter for a directory under the main zip."""
55 55 path = os.path.join(loader.archive, *paths)
56 56 zl = sys.path_importer_cache.get(path)
57 57 if not zl:
58 58 zl = zipimport.zipimporter(path)
59 59 return zl
60 60
61 61 try:
62 62 if modulepolicy in policy.policynoc:
63 63 raise ImportError()
64 64
65 65 zl = ziploader('mercurial')
66 66 mod = zl.load_module(name)
67 67 # Unlike imp, ziploader doesn't expose module metadata that
68 68 # indicates the type of module. So just assume what we found
69 69 # is OK (even though it could be a pure Python module).
70 70 except ImportError:
71 71 if modulepolicy == 'c':
72 72 raise
73 73 zl = ziploader('mercurial', 'pure')
74 74 mod = zl.load_module(name)
75 75
76 76 sys.modules[name] = mod
77 77 return mod
78 78
79 79 # Unlike the default importer which searches special locations and
80 80 # sys.path, we only look in the directory where "mercurial" was
81 81 # imported from.
82 82
83 83 # imp.find_module doesn't support submodules (modules with ".").
84 84 # Instead you have to pass the parent package's __path__ attribute
85 85 # as the path argument.
86 86 stem = name.split('.')[-1]
87 87
88 88 try:
89 89 if modulepolicy in policy.policynoc:
90 90 raise ImportError()
91 91
92 92 modinfo = imp.find_module(stem, mercurial.__path__)
93 93
94 94 # The Mercurial installer used to copy files from
95 95 # mercurial/pure/*.py to mercurial/*.py. Therefore, it's possible
96 96 # for some installations to have .py files under mercurial/*.
97 97 # Loading Python modules when we expected C versions could result
98 98 # in a) poor performance b) loading a version from a previous
99 99 # Mercurial version, potentially leading to incompatibility. Either
100 100 # scenario is bad. So we verify that modules loaded from
101 101 # mercurial/* are C extensions. If the current policy allows the
102 102 # loading of .py modules, the module will be re-imported from
103 103 # mercurial/pure/* below.
104 104 if modinfo[2][2] != imp.C_EXTENSION:
105 105 raise ImportError('.py version of %s found where C '
106 106 'version should exist' % name)
107 107
108 108 except ImportError:
109 109 if modulepolicy == 'c':
110 110 raise
111 111
112 112 # Could not load the C extension and pure Python is allowed. So
113 113 # try to load them.
114 114 from . import pure
115 115 modinfo = imp.find_module(stem, pure.__path__)
116 116 if not modinfo:
117 117 raise ImportError('could not find mercurial module %s' %
118 118 name)
119 119
120 120 mod = imp.load_module(name, *modinfo)
121 121 sys.modules[name] = mod
122 122 return mod
123 123
124 124 # Python 3 uses a custom module loader that transforms source code between
125 125 # source file reading and compilation. This is done by registering a custom
126 126 # finder that changes the spec for Mercurial modules to use a custom loader.
127 127 if sys.version_info[0] >= 3:
128 128 from . import pure
129 129 import importlib
130 130 import io
131 131 import token
132 132 import tokenize
133 133
134 134 class hgpathentryfinder(importlib.abc.MetaPathFinder):
135 135 """A sys.meta_path finder that uses a custom module loader."""
136 136 def find_spec(self, fullname, path, target=None):
137 137 # Only handle Mercurial-related modules.
138 138 if not fullname.startswith(('mercurial.', 'hgext.', 'hgext3rd.')):
139 139 return None
140 140
141 141 # This assumes Python 3 doesn't support loading C modules.
142 142 if fullname in _dualmodules:
143 143 stem = fullname.split('.')[-1]
144 144 fullname = 'mercurial.pure.%s' % stem
145 145 target = pure
146 146 assert len(path) == 1
147 147 path = [os.path.join(path[0], 'pure')]
148 148
149 149 # Try to find the module using other registered finders.
150 150 spec = None
151 151 for finder in sys.meta_path:
152 152 if finder == self:
153 153 continue
154 154
155 155 spec = finder.find_spec(fullname, path, target=target)
156 156 if spec:
157 157 break
158 158
159 159 # This is a Mercurial-related module but we couldn't find it
160 160 # using the previously-registered finders. This likely means
161 161 # the module doesn't exist.
162 162 if not spec:
163 163 return None
164 164
165 165 if fullname.startswith('mercurial.pure.'):
166 166 spec.name = spec.name.replace('.pure.', '.')
167 167
168 168 # TODO need to support loaders from alternate specs, like zip
169 169 # loaders.
170 170 spec.loader = hgloader(spec.name, spec.origin)
171 171 return spec
172 172
173 173 def replacetokens(tokens, fullname):
174 174 """Transform a stream of tokens from raw to Python 3.
175 175
176 176 It is called by the custom module loading machinery to rewrite
177 177 source/tokens between source decoding and compilation.
178 178
179 179 Returns a generator of possibly rewritten tokens.
180 180
181 181 The input token list may be mutated as part of processing. However,
182 182 its changes do not necessarily match the output token stream.
183 183
184 184 REMEMBER TO CHANGE ``BYTECODEHEADER`` WHEN CHANGING THIS FUNCTION
185 185 OR CACHED FILES WON'T GET INVALIDATED PROPERLY.
186 186 """
187 187 futureimpline = False
188 188
189 189 # The following utility functions access the tokens list and i index of
190 190 # the for i, t enumerate(tokens) loop below
191 191 def _isop(j, *o):
192 192 """Assert that tokens[j] is an OP with one of the given values"""
193 193 try:
194 194 return tokens[j].type == token.OP and tokens[j].string in o
195 195 except IndexError:
196 196 return False
197 197
198 198 def _findargnofcall(n):
199 199 """Find arg n of a call expression (start at 0)
200 200
201 201 Returns index of the first token of that argument, or None if
202 202 there is not that many arguments.
203 203
204 204 Assumes that token[i + 1] is '('.
205 205
206 206 """
207 207 nested = 0
208 208 for j in range(i + 2, len(tokens)):
209 209 if _isop(j, ')', ']', '}'):
210 210 # end of call, tuple, subscription or dict / set
211 211 nested -= 1
212 212 if nested < 0:
213 213 return None
214 214 elif n == 0:
215 215 # this is the starting position of arg
216 216 return j
217 217 elif _isop(j, '(', '[', '{'):
218 218 nested += 1
219 219 elif _isop(j, ',') and nested == 0:
220 220 n -= 1
221 221
222 222 return None
223 223
224 224 def _ensureunicode(j):
225 225 """Make sure the token at j is a unicode string
226 226
227 227 This rewrites a string token to include the unicode literal prefix
228 228 so the string transformer won't add the byte prefix.
229 229
230 230 Ignores tokens that are not strings. Assumes bounds checking has
231 231 already been done.
232 232
233 233 """
234 234 st = tokens[j]
235 235 if st.type == token.STRING and st.string.startswith(("'", '"')):
236 rt = tokenize.TokenInfo(st.type, 'u%s' % st.string,
237 st.start, st.end, st.line)
238 tokens[j] = rt
236 tokens[j] = st._replace(string='u%s' % st.string)
239 237
240 238 for i, t in enumerate(tokens):
241 239 # Convert most string literals to byte literals. String literals
242 240 # in Python 2 are bytes. String literals in Python 3 are unicode.
243 241 # Most strings in Mercurial are bytes and unicode strings are rare.
244 242 # Rather than rewrite all string literals to use ``b''`` to indicate
245 243 # byte strings, we apply this token transformer to insert the ``b``
246 244 # prefix nearly everywhere.
247 245 if t.type == token.STRING:
248 246 s = t.string
249 247
250 248 # Preserve docstrings as string literals. This is inconsistent
251 249 # with regular unprefixed strings. However, the
252 250 # "from __future__" parsing (which allows a module docstring to
253 251 # exist before it) doesn't properly handle the docstring if it
254 252 # is b''' prefixed, leading to a SyntaxError. We leave all
255 253 # docstrings as unprefixed to avoid this. This means Mercurial
256 254 # components touching docstrings need to handle unicode,
257 255 # unfortunately.
258 256 if s[0:3] in ("'''", '"""'):
259 257 yield t
260 258 continue
261 259
262 260 # If the first character isn't a quote, it is likely a string
263 261 # prefixing character (such as 'b', 'u', or 'r'. Ignore.
264 262 if s[0] not in ("'", '"'):
265 263 yield t
266 264 continue
267 265
268 266 # String literal. Prefix to make a b'' string.
269 yield tokenize.TokenInfo(t.type, 'b%s' % s, t.start, t.end,
270 t.line)
267 yield t._replace(string='b%s' % t.string)
271 268 continue
272 269
273 270 # Insert compatibility imports at "from __future__ import" line.
274 271 # No '\n' should be added to preserve line numbers.
275 272 if (t.type == token.NAME and t.string == 'import' and
276 273 all(u.type == token.NAME for u in tokens[i - 2:i]) and
277 274 [u.string for u in tokens[i - 2:i]] == ['from', '__future__']):
278 275 futureimpline = True
279 276 if t.type == token.NEWLINE and futureimpline:
280 277 futureimpline = False
281 278 if fullname == 'mercurial.pycompat':
282 279 yield t
283 280 continue
284 281 r, c = t.start
285 282 l = (b'; from mercurial.pycompat import '
286 283 b'delattr, getattr, hasattr, setattr, xrange\n')
287 284 for u in tokenize.tokenize(io.BytesIO(l).readline):
288 285 if u.type in (tokenize.ENCODING, token.ENDMARKER):
289 286 continue
290 yield tokenize.TokenInfo(u.type, u.string,
291 (r, c + u.start[1]),
292 (r, c + u.end[1]),
293 '')
287 yield u._replace(
288 start=(r, c + u.start[1]), end=(r, c + u.end[1]))
294 289 continue
295 290
296 291 # This looks like a function call.
297 292 if t.type == token.NAME and _isop(i + 1, '('):
298 293 fn = t.string
299 294
300 295 # *attr() builtins don't accept byte strings to 2nd argument.
301 296 if (fn in ('getattr', 'setattr', 'hasattr', 'safehasattr') and
302 297 not _isop(i - 1, '.')):
303 298 arg1idx = _findargnofcall(1)
304 299 if arg1idx is not None:
305 300 _ensureunicode(arg1idx)
306 301
307 302 # .encode() and .decode() on str/bytes/unicode don't accept
308 303 # byte strings on Python 3.
309 304 elif fn in ('encode', 'decode') and _isop(i - 1, '.'):
310 305 for argn in range(2):
311 306 argidx = _findargnofcall(argn)
312 307 if argidx is not None:
313 308 _ensureunicode(argidx)
314 309
315 310 # Bare open call (not an attribute on something else), the
316 311 # second argument (mode) must be a string, not bytes
317 312 elif fn == 'open' and not _isop(i - 1, '.'):
318 313 arg1idx = _findargnofcall(1)
319 314 if arg1idx is not None:
320 315 _ensureunicode(arg1idx)
321 316
322 317 # It changes iteritems to items as iteritems is not
323 318 # present in Python 3 world.
324 319 elif fn == 'iteritems':
325 yield tokenize.TokenInfo(t.type, 'items',
326 t.start, t.end, t.line)
320 yield t._replace(string='items')
327 321 continue
328 322
329 323 # Emit unmodified token.
330 324 yield t
331 325
332 326 # Header to add to bytecode files. This MUST be changed when
333 327 # ``replacetoken`` or any mechanism that changes semantics of module
334 328 # loading is changed. Otherwise cached bytecode may get loaded without
335 329 # the new transformation mechanisms applied.
336 330 BYTECODEHEADER = b'HG\x00\x06'
337 331
338 332 class hgloader(importlib.machinery.SourceFileLoader):
339 333 """Custom module loader that transforms source code.
340 334
341 335 When the source code is converted to a code object, we transform
342 336 certain patterns to be Python 3 compatible. This allows us to write code
343 337 that is natively Python 2 and compatible with Python 3 without
344 338 making the code excessively ugly.
345 339
346 340 We do this by transforming the token stream between parse and compile.
347 341
348 342 Implementing transformations invalidates caching assumptions made
349 343 by the built-in importer. The built-in importer stores a header on
350 344 saved bytecode files indicating the Python/bytecode version. If the
351 345 version changes, the cached bytecode is ignored. The Mercurial
352 346 transformations could change at any time. This means we need to check
353 347 that cached bytecode was generated with the current transformation
354 348 code or there could be a mismatch between cached bytecode and what
355 349 would be generated from this class.
356 350
357 351 We supplement the bytecode caching layer by wrapping ``get_data``
358 352 and ``set_data``. These functions are called when the
359 353 ``SourceFileLoader`` retrieves and saves bytecode cache files,
360 354 respectively. We simply add an additional header on the file. As
361 355 long as the version in this file is changed when semantics change,
362 356 cached bytecode should be invalidated when transformations change.
363 357
364 358 The added header has the form ``HG<VERSION>``. That is a literal
365 359 ``HG`` with 2 binary bytes indicating the transformation version.
366 360 """
367 361 def get_data(self, path):
368 362 data = super(hgloader, self).get_data(path)
369 363
370 364 if not path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)):
371 365 return data
372 366
373 367 # There should be a header indicating the Mercurial transformation
374 368 # version. If it doesn't exist or doesn't match the current version,
375 369 # we raise an OSError because that is what
376 370 # ``SourceFileLoader.get_code()`` expects when loading bytecode
377 371 # paths to indicate the cached file is "bad."
378 372 if data[0:2] != b'HG':
379 373 raise OSError('no hg header')
380 374 if data[0:4] != BYTECODEHEADER:
381 375 raise OSError('hg header version mismatch')
382 376
383 377 return data[4:]
384 378
385 379 def set_data(self, path, data, *args, **kwargs):
386 380 if path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)):
387 381 data = BYTECODEHEADER + data
388 382
389 383 return super(hgloader, self).set_data(path, data, *args, **kwargs)
390 384
391 385 def source_to_code(self, data, path):
392 386 """Perform token transformation before compilation."""
393 387 buf = io.BytesIO(data)
394 388 tokens = tokenize.tokenize(buf.readline)
395 389 data = tokenize.untokenize(replacetokens(list(tokens), self.name))
396 390 # Python's built-in importer strips frames from exceptions raised
397 391 # for this code. Unfortunately, that mechanism isn't extensible
398 392 # and our frame will be blamed for the import failure. There
399 393 # are extremely hacky ways to do frame stripping. We haven't
400 394 # implemented them because they are very ugly.
401 395 return super(hgloader, self).source_to_code(data, path)
402 396
403 397 # We automagically register our custom importer as a side-effect of loading.
404 398 # This is necessary to ensure that any entry points are able to import
405 399 # mercurial.* modules without having to perform this registration themselves.
406 400 if sys.version_info[0] >= 3:
407 401 _importercls = hgpathentryfinder
408 402 else:
409 403 _importercls = hgimporter
410 404 if not any(isinstance(x, _importercls) for x in sys.meta_path):
411 405 # meta_path is used before any implicit finders and before sys.path.
412 406 sys.meta_path.insert(0, _importercls())
General Comments 0
You need to be logged in to leave comments. Login now