##// END OF EJS Templates
byteify-strings: fork py3 code transformer to make it a standalone command...
Yuya Nishihara -
r38403:1d9c97db default
parent child Browse files
Show More
@@ -1,4 +1,4 b''
1 # __init__.py - Startup and module loading logic for Mercurial.
1 # byteify-strings.py - transform string literals to be Python 3 safe
2 2 #
3 3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
@@ -7,84 +7,18 b''
7 7
8 8 from __future__ import absolute_import
9 9
10 import sys
11
12 # Allow 'from mercurial import demandimport' to keep working.
13 import hgdemandimport
14 demandimport = hgdemandimport
15
16 __all__ = []
17
18 # Python 3 uses a custom module loader that transforms source code between
19 # source file reading and compilation. This is done by registering a custom
20 # finder that changes the spec for Mercurial modules to use a custom loader.
21 if sys.version_info[0] >= 3:
22 import importlib
23 import importlib.abc
24 import io
25 import token
26 import tokenize
10 import io
11 import token
12 import tokenize
27 13
28 class hgpathentryfinder(importlib.abc.MetaPathFinder):
29 """A sys.meta_path finder that uses a custom module loader."""
30 def find_spec(self, fullname, path, target=None):
31 # Only handle Mercurial-related modules.
32 if not fullname.startswith(('mercurial.', 'hgext.', 'hgext3rd.')):
33 return None
34 # don't try to parse binary
35 if fullname.startswith('mercurial.cext.'):
36 return None
37 # third-party packages are expected to be dual-version clean
38 if fullname.startswith('mercurial.thirdparty'):
39 return None
40 # zstd is already dual-version clean, don't try and mangle it
41 if fullname.startswith('mercurial.zstd'):
42 return None
43 # pywatchman is already dual-version clean, don't try and mangle it
44 if fullname.startswith('hgext.fsmonitor.pywatchman'):
45 return None
46
47 # Try to find the module using other registered finders.
48 spec = None
49 for finder in sys.meta_path:
50 if finder == self:
51 continue
52
53 spec = finder.find_spec(fullname, path, target=target)
54 if spec:
55 break
56
57 # This is a Mercurial-related module but we couldn't find it
58 # using the previously-registered finders. This likely means
59 # the module doesn't exist.
60 if not spec:
61 return None
62
63 # TODO need to support loaders from alternate specs, like zip
64 # loaders.
65 loader = hgloader(spec.name, spec.origin)
66 # Can't use util.safehasattr here because that would require
67 # importing util, and we're in import code.
68 if hasattr(spec.loader, 'loader'): # hasattr-py3-only
69 # This is a nested loader (maybe a lazy loader?)
70 spec.loader.loader = loader
71 else:
72 spec.loader = loader
73 return spec
74
14 if True:
75 15 def replacetokens(tokens, fullname):
76 16 """Transform a stream of tokens from raw to Python 3.
77 17
78 It is called by the custom module loading machinery to rewrite
79 source/tokens between source decoding and compilation.
80
81 18 Returns a generator of possibly rewritten tokens.
82 19
83 20 The input token list may be mutated as part of processing. However,
84 21 its changes do not necessarily match the output token stream.
85
86 REMEMBER TO CHANGE ``BYTECODEHEADER`` WHEN CHANGING THIS FUNCTION
87 OR CACHED FILES WON'T GET INVALIDATED PROPERLY.
88 22 """
89 23 futureimpline = False
90 24
@@ -218,82 +152,3 b' if sys.version_info[0] >= 3:'
218 152
219 153 # Emit unmodified token.
220 154 yield t
221
222 # Header to add to bytecode files. This MUST be changed when
223 # ``replacetoken`` or any mechanism that changes semantics of module
224 # loading is changed. Otherwise cached bytecode may get loaded without
225 # the new transformation mechanisms applied.
226 BYTECODEHEADER = b'HG\x00\x0a'
227
228 class hgloader(importlib.machinery.SourceFileLoader):
229 """Custom module loader that transforms source code.
230
231 When the source code is converted to a code object, we transform
232 certain patterns to be Python 3 compatible. This allows us to write code
233 that is natively Python 2 and compatible with Python 3 without
234 making the code excessively ugly.
235
236 We do this by transforming the token stream between parse and compile.
237
238 Implementing transformations invalidates caching assumptions made
239 by the built-in importer. The built-in importer stores a header on
240 saved bytecode files indicating the Python/bytecode version. If the
241 version changes, the cached bytecode is ignored. The Mercurial
242 transformations could change at any time. This means we need to check
243 that cached bytecode was generated with the current transformation
244 code or there could be a mismatch between cached bytecode and what
245 would be generated from this class.
246
247 We supplement the bytecode caching layer by wrapping ``get_data``
248 and ``set_data``. These functions are called when the
249 ``SourceFileLoader`` retrieves and saves bytecode cache files,
250 respectively. We simply add an additional header on the file. As
251 long as the version in this file is changed when semantics change,
252 cached bytecode should be invalidated when transformations change.
253
254 The added header has the form ``HG<VERSION>``. That is a literal
255 ``HG`` with 2 binary bytes indicating the transformation version.
256 """
257 def get_data(self, path):
258 data = super(hgloader, self).get_data(path)
259
260 if not path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)):
261 return data
262
263 # There should be a header indicating the Mercurial transformation
264 # version. If it doesn't exist or doesn't match the current version,
265 # we raise an OSError because that is what
266 # ``SourceFileLoader.get_code()`` expects when loading bytecode
267 # paths to indicate the cached file is "bad."
268 if data[0:2] != b'HG':
269 raise OSError('no hg header')
270 if data[0:4] != BYTECODEHEADER:
271 raise OSError('hg header version mismatch')
272
273 return data[4:]
274
275 def set_data(self, path, data, *args, **kwargs):
276 if path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)):
277 data = BYTECODEHEADER + data
278
279 return super(hgloader, self).set_data(path, data, *args, **kwargs)
280
281 def source_to_code(self, data, path):
282 """Perform token transformation before compilation."""
283 buf = io.BytesIO(data)
284 tokens = tokenize.tokenize(buf.readline)
285 data = tokenize.untokenize(replacetokens(list(tokens), self.name))
286 # Python's built-in importer strips frames from exceptions raised
287 # for this code. Unfortunately, that mechanism isn't extensible
288 # and our frame will be blamed for the import failure. There
289 # are extremely hacky ways to do frame stripping. We haven't
290 # implemented them because they are very ugly.
291 return super(hgloader, self).source_to_code(data, path)
292
293 # We automagically register our custom importer as a side-effect of
294 # loading. This is necessary to ensure that any entry points are able
295 # to import mercurial.* modules without having to perform this
296 # registration themselves.
297 if not any(isinstance(x, hgpathentryfinder) for x in sys.meta_path):
298 # meta_path is used before any implicit finders and before sys.path.
299 sys.meta_path.insert(0, hgpathentryfinder())
General Comments 0
You need to be logged in to leave comments. Login now