Show More
@@ -1,4 +1,4 b'' | |||||
1 | # __init__.py - Startup and module loading logic for Mercurial. |
|
1 | # byteify-strings.py - transform string literals to be Python 3 safe | |
2 | # |
|
2 | # | |
3 | # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com> |
|
3 | # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com> | |
4 | # |
|
4 | # | |
@@ -7,84 +7,18 b'' | |||||
7 |
|
7 | |||
8 | from __future__ import absolute_import |
|
8 | from __future__ import absolute_import | |
9 |
|
9 | |||
10 |
import |
|
10 | import io | |
11 |
|
11 | import token | ||
12 | # Allow 'from mercurial import demandimport' to keep working. |
|
12 | import tokenize | |
13 | import hgdemandimport |
|
|||
14 | demandimport = hgdemandimport |
|
|||
15 |
|
||||
16 | __all__ = [] |
|
|||
17 |
|
||||
18 | # Python 3 uses a custom module loader that transforms source code between |
|
|||
19 | # source file reading and compilation. This is done by registering a custom |
|
|||
20 | # finder that changes the spec for Mercurial modules to use a custom loader. |
|
|||
21 | if sys.version_info[0] >= 3: |
|
|||
22 | import importlib |
|
|||
23 | import importlib.abc |
|
|||
24 | import io |
|
|||
25 | import token |
|
|||
26 | import tokenize |
|
|||
27 |
|
13 | |||
28 | class hgpathentryfinder(importlib.abc.MetaPathFinder): |
|
14 | if True: | |
29 | """A sys.meta_path finder that uses a custom module loader.""" |
|
|||
30 | def find_spec(self, fullname, path, target=None): |
|
|||
31 | # Only handle Mercurial-related modules. |
|
|||
32 | if not fullname.startswith(('mercurial.', 'hgext.', 'hgext3rd.')): |
|
|||
33 | return None |
|
|||
34 | # don't try to parse binary |
|
|||
35 | if fullname.startswith('mercurial.cext.'): |
|
|||
36 | return None |
|
|||
37 | # third-party packages are expected to be dual-version clean |
|
|||
38 | if fullname.startswith('mercurial.thirdparty'): |
|
|||
39 | return None |
|
|||
40 | # zstd is already dual-version clean, don't try and mangle it |
|
|||
41 | if fullname.startswith('mercurial.zstd'): |
|
|||
42 | return None |
|
|||
43 | # pywatchman is already dual-version clean, don't try and mangle it |
|
|||
44 | if fullname.startswith('hgext.fsmonitor.pywatchman'): |
|
|||
45 | return None |
|
|||
46 |
|
||||
47 | # Try to find the module using other registered finders. |
|
|||
48 | spec = None |
|
|||
49 | for finder in sys.meta_path: |
|
|||
50 | if finder == self: |
|
|||
51 | continue |
|
|||
52 |
|
||||
53 | spec = finder.find_spec(fullname, path, target=target) |
|
|||
54 | if spec: |
|
|||
55 | break |
|
|||
56 |
|
||||
57 | # This is a Mercurial-related module but we couldn't find it |
|
|||
58 | # using the previously-registered finders. This likely means |
|
|||
59 | # the module doesn't exist. |
|
|||
60 | if not spec: |
|
|||
61 | return None |
|
|||
62 |
|
||||
63 | # TODO need to support loaders from alternate specs, like zip |
|
|||
64 | # loaders. |
|
|||
65 | loader = hgloader(spec.name, spec.origin) |
|
|||
66 | # Can't use util.safehasattr here because that would require |
|
|||
67 | # importing util, and we're in import code. |
|
|||
68 | if hasattr(spec.loader, 'loader'): # hasattr-py3-only |
|
|||
69 | # This is a nested loader (maybe a lazy loader?) |
|
|||
70 | spec.loader.loader = loader |
|
|||
71 | else: |
|
|||
72 | spec.loader = loader |
|
|||
73 | return spec |
|
|||
74 |
|
||||
75 | def replacetokens(tokens, fullname): |
|
15 | def replacetokens(tokens, fullname): | |
76 | """Transform a stream of tokens from raw to Python 3. |
|
16 | """Transform a stream of tokens from raw to Python 3. | |
77 |
|
17 | |||
78 | It is called by the custom module loading machinery to rewrite |
|
|||
79 | source/tokens between source decoding and compilation. |
|
|||
80 |
|
||||
81 | Returns a generator of possibly rewritten tokens. |
|
18 | Returns a generator of possibly rewritten tokens. | |
82 |
|
19 | |||
83 | The input token list may be mutated as part of processing. However, |
|
20 | The input token list may be mutated as part of processing. However, | |
84 | its changes do not necessarily match the output token stream. |
|
21 | its changes do not necessarily match the output token stream. | |
85 |
|
||||
86 | REMEMBER TO CHANGE ``BYTECODEHEADER`` WHEN CHANGING THIS FUNCTION |
|
|||
87 | OR CACHED FILES WON'T GET INVALIDATED PROPERLY. |
|
|||
88 | """ |
|
22 | """ | |
89 | futureimpline = False |
|
23 | futureimpline = False | |
90 |
|
24 | |||
@@ -218,82 +152,3 b' if sys.version_info[0] >= 3:' | |||||
218 |
|
152 | |||
219 | # Emit unmodified token. |
|
153 | # Emit unmodified token. | |
220 | yield t |
|
154 | yield t | |
221 |
|
||||
222 | # Header to add to bytecode files. This MUST be changed when |
|
|||
223 | # ``replacetoken`` or any mechanism that changes semantics of module |
|
|||
224 | # loading is changed. Otherwise cached bytecode may get loaded without |
|
|||
225 | # the new transformation mechanisms applied. |
|
|||
226 | BYTECODEHEADER = b'HG\x00\x0a' |
|
|||
227 |
|
||||
228 | class hgloader(importlib.machinery.SourceFileLoader): |
|
|||
229 | """Custom module loader that transforms source code. |
|
|||
230 |
|
||||
231 | When the source code is converted to a code object, we transform |
|
|||
232 | certain patterns to be Python 3 compatible. This allows us to write code |
|
|||
233 | that is natively Python 2 and compatible with Python 3 without |
|
|||
234 | making the code excessively ugly. |
|
|||
235 |
|
||||
236 | We do this by transforming the token stream between parse and compile. |
|
|||
237 |
|
||||
238 | Implementing transformations invalidates caching assumptions made |
|
|||
239 | by the built-in importer. The built-in importer stores a header on |
|
|||
240 | saved bytecode files indicating the Python/bytecode version. If the |
|
|||
241 | version changes, the cached bytecode is ignored. The Mercurial |
|
|||
242 | transformations could change at any time. This means we need to check |
|
|||
243 | that cached bytecode was generated with the current transformation |
|
|||
244 | code or there could be a mismatch between cached bytecode and what |
|
|||
245 | would be generated from this class. |
|
|||
246 |
|
||||
247 | We supplement the bytecode caching layer by wrapping ``get_data`` |
|
|||
248 | and ``set_data``. These functions are called when the |
|
|||
249 | ``SourceFileLoader`` retrieves and saves bytecode cache files, |
|
|||
250 | respectively. We simply add an additional header on the file. As |
|
|||
251 | long as the version in this file is changed when semantics change, |
|
|||
252 | cached bytecode should be invalidated when transformations change. |
|
|||
253 |
|
||||
254 | The added header has the form ``HG<VERSION>``. That is a literal |
|
|||
255 | ``HG`` with 2 binary bytes indicating the transformation version. |
|
|||
256 | """ |
|
|||
257 | def get_data(self, path): |
|
|||
258 | data = super(hgloader, self).get_data(path) |
|
|||
259 |
|
||||
260 | if not path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)): |
|
|||
261 | return data |
|
|||
262 |
|
||||
263 | # There should be a header indicating the Mercurial transformation |
|
|||
264 | # version. If it doesn't exist or doesn't match the current version, |
|
|||
265 | # we raise an OSError because that is what |
|
|||
266 | # ``SourceFileLoader.get_code()`` expects when loading bytecode |
|
|||
267 | # paths to indicate the cached file is "bad." |
|
|||
268 | if data[0:2] != b'HG': |
|
|||
269 | raise OSError('no hg header') |
|
|||
270 | if data[0:4] != BYTECODEHEADER: |
|
|||
271 | raise OSError('hg header version mismatch') |
|
|||
272 |
|
||||
273 | return data[4:] |
|
|||
274 |
|
||||
275 | def set_data(self, path, data, *args, **kwargs): |
|
|||
276 | if path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)): |
|
|||
277 | data = BYTECODEHEADER + data |
|
|||
278 |
|
||||
279 | return super(hgloader, self).set_data(path, data, *args, **kwargs) |
|
|||
280 |
|
||||
281 | def source_to_code(self, data, path): |
|
|||
282 | """Perform token transformation before compilation.""" |
|
|||
283 | buf = io.BytesIO(data) |
|
|||
284 | tokens = tokenize.tokenize(buf.readline) |
|
|||
285 | data = tokenize.untokenize(replacetokens(list(tokens), self.name)) |
|
|||
286 | # Python's built-in importer strips frames from exceptions raised |
|
|||
287 | # for this code. Unfortunately, that mechanism isn't extensible |
|
|||
288 | # and our frame will be blamed for the import failure. There |
|
|||
289 | # are extremely hacky ways to do frame stripping. We haven't |
|
|||
290 | # implemented them because they are very ugly. |
|
|||
291 | return super(hgloader, self).source_to_code(data, path) |
|
|||
292 |
|
||||
293 | # We automagically register our custom importer as a side-effect of |
|
|||
294 | # loading. This is necessary to ensure that any entry points are able |
|
|||
295 | # to import mercurial.* modules without having to perform this |
|
|||
296 | # registration themselves. |
|
|||
297 | if not any(isinstance(x, hgpathentryfinder) for x in sys.meta_path): |
|
|||
298 | # meta_path is used before any implicit finders and before sys.path. |
|
|||
299 | sys.meta_path.insert(0, hgpathentryfinder()) |
|
General Comments 0
You need to be logged in to leave comments.
Login now