Show More
@@ -1,4 +1,4 b'' | |||
|
1 | # __init__.py - Startup and module loading logic for Mercurial. | |
|
1 | # byteify-strings.py - transform string literals to be Python 3 safe | |
|
2 | 2 | # |
|
3 | 3 | # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com> |
|
4 | 4 | # |
@@ -7,84 +7,18 b'' | |||
|
7 | 7 | |
|
8 | 8 | from __future__ import absolute_import |
|
9 | 9 | |
|
10 |
import |
|
|
11 | ||
|
12 | # Allow 'from mercurial import demandimport' to keep working. | |
|
13 | import hgdemandimport | |
|
14 | demandimport = hgdemandimport | |
|
15 | ||
|
16 | __all__ = [] | |
|
17 | ||
|
18 | # Python 3 uses a custom module loader that transforms source code between | |
|
19 | # source file reading and compilation. This is done by registering a custom | |
|
20 | # finder that changes the spec for Mercurial modules to use a custom loader. | |
|
21 | if sys.version_info[0] >= 3: | |
|
22 | import importlib | |
|
23 | import importlib.abc | |
|
24 | import io | |
|
25 | import token | |
|
26 | import tokenize | |
|
10 | import io | |
|
11 | import token | |
|
12 | import tokenize | |
|
27 | 13 | |
|
28 | class hgpathentryfinder(importlib.abc.MetaPathFinder): | |
|
29 | """A sys.meta_path finder that uses a custom module loader.""" | |
|
30 | def find_spec(self, fullname, path, target=None): | |
|
31 | # Only handle Mercurial-related modules. | |
|
32 | if not fullname.startswith(('mercurial.', 'hgext.', 'hgext3rd.')): | |
|
33 | return None | |
|
34 | # don't try to parse binary | |
|
35 | if fullname.startswith('mercurial.cext.'): | |
|
36 | return None | |
|
37 | # third-party packages are expected to be dual-version clean | |
|
38 | if fullname.startswith('mercurial.thirdparty'): | |
|
39 | return None | |
|
40 | # zstd is already dual-version clean, don't try and mangle it | |
|
41 | if fullname.startswith('mercurial.zstd'): | |
|
42 | return None | |
|
43 | # pywatchman is already dual-version clean, don't try and mangle it | |
|
44 | if fullname.startswith('hgext.fsmonitor.pywatchman'): | |
|
45 | return None | |
|
46 | ||
|
47 | # Try to find the module using other registered finders. | |
|
48 | spec = None | |
|
49 | for finder in sys.meta_path: | |
|
50 | if finder == self: | |
|
51 | continue | |
|
52 | ||
|
53 | spec = finder.find_spec(fullname, path, target=target) | |
|
54 | if spec: | |
|
55 | break | |
|
56 | ||
|
57 | # This is a Mercurial-related module but we couldn't find it | |
|
58 | # using the previously-registered finders. This likely means | |
|
59 | # the module doesn't exist. | |
|
60 | if not spec: | |
|
61 | return None | |
|
62 | ||
|
63 | # TODO need to support loaders from alternate specs, like zip | |
|
64 | # loaders. | |
|
65 | loader = hgloader(spec.name, spec.origin) | |
|
66 | # Can't use util.safehasattr here because that would require | |
|
67 | # importing util, and we're in import code. | |
|
68 | if hasattr(spec.loader, 'loader'): # hasattr-py3-only | |
|
69 | # This is a nested loader (maybe a lazy loader?) | |
|
70 | spec.loader.loader = loader | |
|
71 | else: | |
|
72 | spec.loader = loader | |
|
73 | return spec | |
|
74 | ||
|
14 | if True: | |
|
75 | 15 | def replacetokens(tokens, fullname): |
|
76 | 16 | """Transform a stream of tokens from raw to Python 3. |
|
77 | 17 | |
|
78 | It is called by the custom module loading machinery to rewrite | |
|
79 | source/tokens between source decoding and compilation. | |
|
80 | ||
|
81 | 18 | Returns a generator of possibly rewritten tokens. |
|
82 | 19 | |
|
83 | 20 | The input token list may be mutated as part of processing. However, |
|
84 | 21 | its changes do not necessarily match the output token stream. |
|
85 | ||
|
86 | REMEMBER TO CHANGE ``BYTECODEHEADER`` WHEN CHANGING THIS FUNCTION | |
|
87 | OR CACHED FILES WON'T GET INVALIDATED PROPERLY. | |
|
88 | 22 | """ |
|
89 | 23 | futureimpline = False |
|
90 | 24 | |
@@ -218,82 +152,3 b' if sys.version_info[0] >= 3:' | |||
|
218 | 152 | |
|
219 | 153 | # Emit unmodified token. |
|
220 | 154 | yield t |
|
221 | ||
|
222 | # Header to add to bytecode files. This MUST be changed when | |
|
223 | # ``replacetoken`` or any mechanism that changes semantics of module | |
|
224 | # loading is changed. Otherwise cached bytecode may get loaded without | |
|
225 | # the new transformation mechanisms applied. | |
|
226 | BYTECODEHEADER = b'HG\x00\x0a' | |
|
227 | ||
|
228 | class hgloader(importlib.machinery.SourceFileLoader): | |
|
229 | """Custom module loader that transforms source code. | |
|
230 | ||
|
231 | When the source code is converted to a code object, we transform | |
|
232 | certain patterns to be Python 3 compatible. This allows us to write code | |
|
233 | that is natively Python 2 and compatible with Python 3 without | |
|
234 | making the code excessively ugly. | |
|
235 | ||
|
236 | We do this by transforming the token stream between parse and compile. | |
|
237 | ||
|
238 | Implementing transformations invalidates caching assumptions made | |
|
239 | by the built-in importer. The built-in importer stores a header on | |
|
240 | saved bytecode files indicating the Python/bytecode version. If the | |
|
241 | version changes, the cached bytecode is ignored. The Mercurial | |
|
242 | transformations could change at any time. This means we need to check | |
|
243 | that cached bytecode was generated with the current transformation | |
|
244 | code or there could be a mismatch between cached bytecode and what | |
|
245 | would be generated from this class. | |
|
246 | ||
|
247 | We supplement the bytecode caching layer by wrapping ``get_data`` | |
|
248 | and ``set_data``. These functions are called when the | |
|
249 | ``SourceFileLoader`` retrieves and saves bytecode cache files, | |
|
250 | respectively. We simply add an additional header on the file. As | |
|
251 | long as the version in this file is changed when semantics change, | |
|
252 | cached bytecode should be invalidated when transformations change. | |
|
253 | ||
|
254 | The added header has the form ``HG<VERSION>``. That is a literal | |
|
255 | ``HG`` with 2 binary bytes indicating the transformation version. | |
|
256 | """ | |
|
257 | def get_data(self, path): | |
|
258 | data = super(hgloader, self).get_data(path) | |
|
259 | ||
|
260 | if not path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)): | |
|
261 | return data | |
|
262 | ||
|
263 | # There should be a header indicating the Mercurial transformation | |
|
264 | # version. If it doesn't exist or doesn't match the current version, | |
|
265 | # we raise an OSError because that is what | |
|
266 | # ``SourceFileLoader.get_code()`` expects when loading bytecode | |
|
267 | # paths to indicate the cached file is "bad." | |
|
268 | if data[0:2] != b'HG': | |
|
269 | raise OSError('no hg header') | |
|
270 | if data[0:4] != BYTECODEHEADER: | |
|
271 | raise OSError('hg header version mismatch') | |
|
272 | ||
|
273 | return data[4:] | |
|
274 | ||
|
275 | def set_data(self, path, data, *args, **kwargs): | |
|
276 | if path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)): | |
|
277 | data = BYTECODEHEADER + data | |
|
278 | ||
|
279 | return super(hgloader, self).set_data(path, data, *args, **kwargs) | |
|
280 | ||
|
281 | def source_to_code(self, data, path): | |
|
282 | """Perform token transformation before compilation.""" | |
|
283 | buf = io.BytesIO(data) | |
|
284 | tokens = tokenize.tokenize(buf.readline) | |
|
285 | data = tokenize.untokenize(replacetokens(list(tokens), self.name)) | |
|
286 | # Python's built-in importer strips frames from exceptions raised | |
|
287 | # for this code. Unfortunately, that mechanism isn't extensible | |
|
288 | # and our frame will be blamed for the import failure. There | |
|
289 | # are extremely hacky ways to do frame stripping. We haven't | |
|
290 | # implemented them because they are very ugly. | |
|
291 | return super(hgloader, self).source_to_code(data, path) | |
|
292 | ||
|
293 | # We automagically register our custom importer as a side-effect of | |
|
294 | # loading. This is necessary to ensure that any entry points are able | |
|
295 | # to import mercurial.* modules without having to perform this | |
|
296 | # registration themselves. | |
|
297 | if not any(isinstance(x, hgpathentryfinder) for x in sys.meta_path): | |
|
298 | # meta_path is used before any implicit finders and before sys.path. | |
|
299 | sys.meta_path.insert(0, hgpathentryfinder()) |
General Comments 0
You need to be logged in to leave comments.
Login now