Show More
@@ -1,398 +1,279 b'' | |||||
1 | # __init__.py - Startup and module loading logic for Mercurial. |
|
1 | # __init__.py - Startup and module loading logic for Mercurial. | |
2 | # |
|
2 | # | |
3 | # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com> |
|
3 | # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com> | |
4 | # |
|
4 | # | |
5 | # This software may be used and distributed according to the terms of the |
|
5 | # This software may be used and distributed according to the terms of the | |
6 | # GNU General Public License version 2 or any later version. |
|
6 | # GNU General Public License version 2 or any later version. | |
7 |
|
7 | |||
8 | from __future__ import absolute_import |
|
8 | from __future__ import absolute_import | |
9 |
|
9 | |||
10 | import imp |
|
|||
11 | import os |
|
|||
12 | import sys |
|
10 | import sys | |
13 | import zipimport |
|
|||
14 |
|
||||
15 | from . import ( |
|
|||
16 | policy |
|
|||
17 | ) |
|
|||
18 |
|
11 | |||
19 | __all__ = [] |
|
12 | __all__ = [] | |
20 |
|
13 | |||
21 | modulepolicy = policy.policy |
|
|||
22 |
|
||||
23 | # Modules that have both Python and C implementations. See also the |
|
|||
24 | # set of .py files under mercurial/pure/. |
|
|||
25 | _dualmodules = { |
|
|||
26 | } |
|
|||
27 |
|
||||
28 | class hgimporter(object): |
|
|||
29 | """Object that conforms to import hook interface defined in PEP-302.""" |
|
|||
30 | def find_module(self, name, path=None): |
|
|||
31 | # We only care about modules that have both C and pure implementations. |
|
|||
32 | if name in _dualmodules: |
|
|||
33 | return self |
|
|||
34 | return None |
|
|||
35 |
|
||||
36 | def load_module(self, name): |
|
|||
37 | mod = sys.modules.get(name, None) |
|
|||
38 | if mod: |
|
|||
39 | return mod |
|
|||
40 |
|
||||
41 | mercurial = sys.modules['mercurial'] |
|
|||
42 |
|
||||
43 | # The zip importer behaves sufficiently differently from the default |
|
|||
44 | # importer to warrant its own code path. |
|
|||
45 | loader = getattr(mercurial, '__loader__', None) |
|
|||
46 | if isinstance(loader, zipimport.zipimporter): |
|
|||
47 | def ziploader(*paths): |
|
|||
48 | """Obtain a zipimporter for a directory under the main zip.""" |
|
|||
49 | path = os.path.join(loader.archive, *paths) |
|
|||
50 | zl = sys.path_importer_cache.get(path) |
|
|||
51 | if not zl: |
|
|||
52 | zl = zipimport.zipimporter(path) |
|
|||
53 | return zl |
|
|||
54 |
|
||||
55 | try: |
|
|||
56 | if modulepolicy in policy.policynoc: |
|
|||
57 | raise ImportError() |
|
|||
58 |
|
||||
59 | zl = ziploader('mercurial') |
|
|||
60 | mod = zl.load_module(name) |
|
|||
61 | # Unlike imp, ziploader doesn't expose module metadata that |
|
|||
62 | # indicates the type of module. So just assume what we found |
|
|||
63 | # is OK (even though it could be a pure Python module). |
|
|||
64 | except ImportError: |
|
|||
65 | if modulepolicy == b'c': |
|
|||
66 | raise |
|
|||
67 | zl = ziploader('mercurial', 'pure') |
|
|||
68 | mod = zl.load_module(name) |
|
|||
69 |
|
||||
70 | sys.modules[name] = mod |
|
|||
71 | return mod |
|
|||
72 |
|
||||
73 | # Unlike the default importer which searches special locations and |
|
|||
74 | # sys.path, we only look in the directory where "mercurial" was |
|
|||
75 | # imported from. |
|
|||
76 |
|
||||
77 | # imp.find_module doesn't support submodules (modules with "."). |
|
|||
78 | # Instead you have to pass the parent package's __path__ attribute |
|
|||
79 | # as the path argument. |
|
|||
80 | stem = name.split('.')[-1] |
|
|||
81 |
|
||||
82 | try: |
|
|||
83 | if modulepolicy in policy.policynoc: |
|
|||
84 | raise ImportError() |
|
|||
85 |
|
||||
86 | modinfo = imp.find_module(stem, mercurial.__path__) |
|
|||
87 |
|
||||
88 | # The Mercurial installer used to copy files from |
|
|||
89 | # mercurial/pure/*.py to mercurial/*.py. Therefore, it's possible |
|
|||
90 | # for some installations to have .py files under mercurial/*. |
|
|||
91 | # Loading Python modules when we expected C versions could result |
|
|||
92 | # in a) poor performance b) loading a version from a previous |
|
|||
93 | # Mercurial version, potentially leading to incompatibility. Either |
|
|||
94 | # scenario is bad. So we verify that modules loaded from |
|
|||
95 | # mercurial/* are C extensions. If the current policy allows the |
|
|||
96 | # loading of .py modules, the module will be re-imported from |
|
|||
97 | # mercurial/pure/* below. |
|
|||
98 | if modinfo[2][2] != imp.C_EXTENSION: |
|
|||
99 | raise ImportError('.py version of %s found where C ' |
|
|||
100 | 'version should exist' % name) |
|
|||
101 |
|
||||
102 | except ImportError: |
|
|||
103 | if modulepolicy == b'c': |
|
|||
104 | raise |
|
|||
105 |
|
||||
106 | # Could not load the C extension and pure Python is allowed. So |
|
|||
107 | # try to load them. |
|
|||
108 | from . import pure |
|
|||
109 | modinfo = imp.find_module(stem, pure.__path__) |
|
|||
110 | if not modinfo: |
|
|||
111 | raise ImportError('could not find mercurial module %s' % |
|
|||
112 | name) |
|
|||
113 |
|
||||
114 | mod = imp.load_module(name, *modinfo) |
|
|||
115 | sys.modules[name] = mod |
|
|||
116 | return mod |
|
|||
117 |
|
||||
118 | # Python 3 uses a custom module loader that transforms source code between |
|
14 | # Python 3 uses a custom module loader that transforms source code between | |
119 | # source file reading and compilation. This is done by registering a custom |
|
15 | # source file reading and compilation. This is done by registering a custom | |
120 | # finder that changes the spec for Mercurial modules to use a custom loader. |
|
16 | # finder that changes the spec for Mercurial modules to use a custom loader. | |
121 | if sys.version_info[0] >= 3: |
|
17 | if sys.version_info[0] >= 3: | |
122 | from . import pure |
|
|||
123 | import importlib |
|
18 | import importlib | |
|
19 | import importlib.abc | |||
124 | import io |
|
20 | import io | |
125 | import token |
|
21 | import token | |
126 | import tokenize |
|
22 | import tokenize | |
127 |
|
23 | |||
128 | class hgpathentryfinder(importlib.abc.MetaPathFinder): |
|
24 | class hgpathentryfinder(importlib.abc.MetaPathFinder): | |
129 | """A sys.meta_path finder that uses a custom module loader.""" |
|
25 | """A sys.meta_path finder that uses a custom module loader.""" | |
130 | def find_spec(self, fullname, path, target=None): |
|
26 | def find_spec(self, fullname, path, target=None): | |
131 | # Only handle Mercurial-related modules. |
|
27 | # Only handle Mercurial-related modules. | |
132 | if not fullname.startswith(('mercurial.', 'hgext.', 'hgext3rd.')): |
|
28 | if not fullname.startswith(('mercurial.', 'hgext.', 'hgext3rd.')): | |
133 | return None |
|
29 | return None | |
134 | # zstd is already dual-version clean, don't try and mangle it |
|
30 | # zstd is already dual-version clean, don't try and mangle it | |
135 | if fullname.startswith('mercurial.zstd'): |
|
31 | if fullname.startswith('mercurial.zstd'): | |
136 | return None |
|
32 | return None | |
137 |
|
33 | |||
138 | # This assumes Python 3 doesn't support loading C modules. |
|
|||
139 | if fullname in _dualmodules: |
|
|||
140 | stem = fullname.split('.')[-1] |
|
|||
141 | fullname = 'mercurial.pure.%s' % stem |
|
|||
142 | target = pure |
|
|||
143 | assert len(path) == 1 |
|
|||
144 | path = [os.path.join(path[0], 'pure')] |
|
|||
145 |
|
||||
146 | # Try to find the module using other registered finders. |
|
34 | # Try to find the module using other registered finders. | |
147 | spec = None |
|
35 | spec = None | |
148 | for finder in sys.meta_path: |
|
36 | for finder in sys.meta_path: | |
149 | if finder == self: |
|
37 | if finder == self: | |
150 | continue |
|
38 | continue | |
151 |
|
39 | |||
152 | spec = finder.find_spec(fullname, path, target=target) |
|
40 | spec = finder.find_spec(fullname, path, target=target) | |
153 | if spec: |
|
41 | if spec: | |
154 | break |
|
42 | break | |
155 |
|
43 | |||
156 | # This is a Mercurial-related module but we couldn't find it |
|
44 | # This is a Mercurial-related module but we couldn't find it | |
157 | # using the previously-registered finders. This likely means |
|
45 | # using the previously-registered finders. This likely means | |
158 | # the module doesn't exist. |
|
46 | # the module doesn't exist. | |
159 | if not spec: |
|
47 | if not spec: | |
160 | return None |
|
48 | return None | |
161 |
|
49 | |||
162 | if (fullname.startswith('mercurial.pure.') |
|
|||
163 | and fullname.replace('.pure.', '.') in _dualmodules): |
|
|||
164 | spec.name = spec.name.replace('.pure.', '.') |
|
|||
165 |
|
||||
166 | # TODO need to support loaders from alternate specs, like zip |
|
50 | # TODO need to support loaders from alternate specs, like zip | |
167 | # loaders. |
|
51 | # loaders. | |
168 | spec.loader = hgloader(spec.name, spec.origin) |
|
52 | spec.loader = hgloader(spec.name, spec.origin) | |
169 | return spec |
|
53 | return spec | |
170 |
|
54 | |||
171 | def replacetokens(tokens, fullname): |
|
55 | def replacetokens(tokens, fullname): | |
172 | """Transform a stream of tokens from raw to Python 3. |
|
56 | """Transform a stream of tokens from raw to Python 3. | |
173 |
|
57 | |||
174 | It is called by the custom module loading machinery to rewrite |
|
58 | It is called by the custom module loading machinery to rewrite | |
175 | source/tokens between source decoding and compilation. |
|
59 | source/tokens between source decoding and compilation. | |
176 |
|
60 | |||
177 | Returns a generator of possibly rewritten tokens. |
|
61 | Returns a generator of possibly rewritten tokens. | |
178 |
|
62 | |||
179 | The input token list may be mutated as part of processing. However, |
|
63 | The input token list may be mutated as part of processing. However, | |
180 | its changes do not necessarily match the output token stream. |
|
64 | its changes do not necessarily match the output token stream. | |
181 |
|
65 | |||
182 | REMEMBER TO CHANGE ``BYTECODEHEADER`` WHEN CHANGING THIS FUNCTION |
|
66 | REMEMBER TO CHANGE ``BYTECODEHEADER`` WHEN CHANGING THIS FUNCTION | |
183 | OR CACHED FILES WON'T GET INVALIDATED PROPERLY. |
|
67 | OR CACHED FILES WON'T GET INVALIDATED PROPERLY. | |
184 | """ |
|
68 | """ | |
185 | futureimpline = False |
|
69 | futureimpline = False | |
186 |
|
70 | |||
187 | # The following utility functions access the tokens list and i index of |
|
71 | # The following utility functions access the tokens list and i index of | |
188 | # the for i, t enumerate(tokens) loop below |
|
72 | # the for i, t enumerate(tokens) loop below | |
189 | def _isop(j, *o): |
|
73 | def _isop(j, *o): | |
190 | """Assert that tokens[j] is an OP with one of the given values""" |
|
74 | """Assert that tokens[j] is an OP with one of the given values""" | |
191 | try: |
|
75 | try: | |
192 | return tokens[j].type == token.OP and tokens[j].string in o |
|
76 | return tokens[j].type == token.OP and tokens[j].string in o | |
193 | except IndexError: |
|
77 | except IndexError: | |
194 | return False |
|
78 | return False | |
195 |
|
79 | |||
196 | def _findargnofcall(n): |
|
80 | def _findargnofcall(n): | |
197 | """Find arg n of a call expression (start at 0) |
|
81 | """Find arg n of a call expression (start at 0) | |
198 |
|
82 | |||
199 | Returns index of the first token of that argument, or None if |
|
83 | Returns index of the first token of that argument, or None if | |
200 | there is not that many arguments. |
|
84 | there is not that many arguments. | |
201 |
|
85 | |||
202 | Assumes that token[i + 1] is '('. |
|
86 | Assumes that token[i + 1] is '('. | |
203 |
|
87 | |||
204 | """ |
|
88 | """ | |
205 | nested = 0 |
|
89 | nested = 0 | |
206 | for j in range(i + 2, len(tokens)): |
|
90 | for j in range(i + 2, len(tokens)): | |
207 | if _isop(j, ')', ']', '}'): |
|
91 | if _isop(j, ')', ']', '}'): | |
208 | # end of call, tuple, subscription or dict / set |
|
92 | # end of call, tuple, subscription or dict / set | |
209 | nested -= 1 |
|
93 | nested -= 1 | |
210 | if nested < 0: |
|
94 | if nested < 0: | |
211 | return None |
|
95 | return None | |
212 | elif n == 0: |
|
96 | elif n == 0: | |
213 | # this is the starting position of arg |
|
97 | # this is the starting position of arg | |
214 | return j |
|
98 | return j | |
215 | elif _isop(j, '(', '[', '{'): |
|
99 | elif _isop(j, '(', '[', '{'): | |
216 | nested += 1 |
|
100 | nested += 1 | |
217 | elif _isop(j, ',') and nested == 0: |
|
101 | elif _isop(j, ',') and nested == 0: | |
218 | n -= 1 |
|
102 | n -= 1 | |
219 |
|
103 | |||
220 | return None |
|
104 | return None | |
221 |
|
105 | |||
222 | def _ensureunicode(j): |
|
106 | def _ensureunicode(j): | |
223 | """Make sure the token at j is a unicode string |
|
107 | """Make sure the token at j is a unicode string | |
224 |
|
108 | |||
225 | This rewrites a string token to include the unicode literal prefix |
|
109 | This rewrites a string token to include the unicode literal prefix | |
226 | so the string transformer won't add the byte prefix. |
|
110 | so the string transformer won't add the byte prefix. | |
227 |
|
111 | |||
228 | Ignores tokens that are not strings. Assumes bounds checking has |
|
112 | Ignores tokens that are not strings. Assumes bounds checking has | |
229 | already been done. |
|
113 | already been done. | |
230 |
|
114 | |||
231 | """ |
|
115 | """ | |
232 | st = tokens[j] |
|
116 | st = tokens[j] | |
233 | if st.type == token.STRING and st.string.startswith(("'", '"')): |
|
117 | if st.type == token.STRING and st.string.startswith(("'", '"')): | |
234 | tokens[j] = st._replace(string='u%s' % st.string) |
|
118 | tokens[j] = st._replace(string='u%s' % st.string) | |
235 |
|
119 | |||
236 | for i, t in enumerate(tokens): |
|
120 | for i, t in enumerate(tokens): | |
237 | # Convert most string literals to byte literals. String literals |
|
121 | # Convert most string literals to byte literals. String literals | |
238 | # in Python 2 are bytes. String literals in Python 3 are unicode. |
|
122 | # in Python 2 are bytes. String literals in Python 3 are unicode. | |
239 | # Most strings in Mercurial are bytes and unicode strings are rare. |
|
123 | # Most strings in Mercurial are bytes and unicode strings are rare. | |
240 | # Rather than rewrite all string literals to use ``b''`` to indicate |
|
124 | # Rather than rewrite all string literals to use ``b''`` to indicate | |
241 | # byte strings, we apply this token transformer to insert the ``b`` |
|
125 | # byte strings, we apply this token transformer to insert the ``b`` | |
242 | # prefix nearly everywhere. |
|
126 | # prefix nearly everywhere. | |
243 | if t.type == token.STRING: |
|
127 | if t.type == token.STRING: | |
244 | s = t.string |
|
128 | s = t.string | |
245 |
|
129 | |||
246 | # Preserve docstrings as string literals. This is inconsistent |
|
130 | # Preserve docstrings as string literals. This is inconsistent | |
247 | # with regular unprefixed strings. However, the |
|
131 | # with regular unprefixed strings. However, the | |
248 | # "from __future__" parsing (which allows a module docstring to |
|
132 | # "from __future__" parsing (which allows a module docstring to | |
249 | # exist before it) doesn't properly handle the docstring if it |
|
133 | # exist before it) doesn't properly handle the docstring if it | |
250 | # is b''' prefixed, leading to a SyntaxError. We leave all |
|
134 | # is b''' prefixed, leading to a SyntaxError. We leave all | |
251 | # docstrings as unprefixed to avoid this. This means Mercurial |
|
135 | # docstrings as unprefixed to avoid this. This means Mercurial | |
252 | # components touching docstrings need to handle unicode, |
|
136 | # components touching docstrings need to handle unicode, | |
253 | # unfortunately. |
|
137 | # unfortunately. | |
254 | if s[0:3] in ("'''", '"""'): |
|
138 | if s[0:3] in ("'''", '"""'): | |
255 | yield t |
|
139 | yield t | |
256 | continue |
|
140 | continue | |
257 |
|
141 | |||
258 | # If the first character isn't a quote, it is likely a string |
|
142 | # If the first character isn't a quote, it is likely a string | |
259 | # prefixing character (such as 'b', 'u', or 'r'. Ignore. |
|
143 | # prefixing character (such as 'b', 'u', or 'r'. Ignore. | |
260 | if s[0] not in ("'", '"'): |
|
144 | if s[0] not in ("'", '"'): | |
261 | yield t |
|
145 | yield t | |
262 | continue |
|
146 | continue | |
263 |
|
147 | |||
264 | # String literal. Prefix to make a b'' string. |
|
148 | # String literal. Prefix to make a b'' string. | |
265 | yield t._replace(string='b%s' % t.string) |
|
149 | yield t._replace(string='b%s' % t.string) | |
266 | continue |
|
150 | continue | |
267 |
|
151 | |||
268 | # Insert compatibility imports at "from __future__ import" line. |
|
152 | # Insert compatibility imports at "from __future__ import" line. | |
269 | # No '\n' should be added to preserve line numbers. |
|
153 | # No '\n' should be added to preserve line numbers. | |
270 | if (t.type == token.NAME and t.string == 'import' and |
|
154 | if (t.type == token.NAME and t.string == 'import' and | |
271 | all(u.type == token.NAME for u in tokens[i - 2:i]) and |
|
155 | all(u.type == token.NAME for u in tokens[i - 2:i]) and | |
272 | [u.string for u in tokens[i - 2:i]] == ['from', '__future__']): |
|
156 | [u.string for u in tokens[i - 2:i]] == ['from', '__future__']): | |
273 | futureimpline = True |
|
157 | futureimpline = True | |
274 | if t.type == token.NEWLINE and futureimpline: |
|
158 | if t.type == token.NEWLINE and futureimpline: | |
275 | futureimpline = False |
|
159 | futureimpline = False | |
276 | if fullname == 'mercurial.pycompat': |
|
160 | if fullname == 'mercurial.pycompat': | |
277 | yield t |
|
161 | yield t | |
278 | continue |
|
162 | continue | |
279 | r, c = t.start |
|
163 | r, c = t.start | |
280 | l = (b'; from mercurial.pycompat import ' |
|
164 | l = (b'; from mercurial.pycompat import ' | |
281 | b'delattr, getattr, hasattr, setattr, xrange, ' |
|
165 | b'delattr, getattr, hasattr, setattr, xrange, ' | |
282 | b'open, unicode\n') |
|
166 | b'open, unicode\n') | |
283 | for u in tokenize.tokenize(io.BytesIO(l).readline): |
|
167 | for u in tokenize.tokenize(io.BytesIO(l).readline): | |
284 | if u.type in (tokenize.ENCODING, token.ENDMARKER): |
|
168 | if u.type in (tokenize.ENCODING, token.ENDMARKER): | |
285 | continue |
|
169 | continue | |
286 | yield u._replace( |
|
170 | yield u._replace( | |
287 | start=(r, c + u.start[1]), end=(r, c + u.end[1])) |
|
171 | start=(r, c + u.start[1]), end=(r, c + u.end[1])) | |
288 | continue |
|
172 | continue | |
289 |
|
173 | |||
290 | # This looks like a function call. |
|
174 | # This looks like a function call. | |
291 | if t.type == token.NAME and _isop(i + 1, '('): |
|
175 | if t.type == token.NAME and _isop(i + 1, '('): | |
292 | fn = t.string |
|
176 | fn = t.string | |
293 |
|
177 | |||
294 | # *attr() builtins don't accept byte strings to 2nd argument. |
|
178 | # *attr() builtins don't accept byte strings to 2nd argument. | |
295 | if (fn in ('getattr', 'setattr', 'hasattr', 'safehasattr') and |
|
179 | if (fn in ('getattr', 'setattr', 'hasattr', 'safehasattr') and | |
296 | not _isop(i - 1, '.')): |
|
180 | not _isop(i - 1, '.')): | |
297 | arg1idx = _findargnofcall(1) |
|
181 | arg1idx = _findargnofcall(1) | |
298 | if arg1idx is not None: |
|
182 | if arg1idx is not None: | |
299 | _ensureunicode(arg1idx) |
|
183 | _ensureunicode(arg1idx) | |
300 |
|
184 | |||
301 | # .encode() and .decode() on str/bytes/unicode don't accept |
|
185 | # .encode() and .decode() on str/bytes/unicode don't accept | |
302 | # byte strings on Python 3. |
|
186 | # byte strings on Python 3. | |
303 | elif fn in ('encode', 'decode') and _isop(i - 1, '.'): |
|
187 | elif fn in ('encode', 'decode') and _isop(i - 1, '.'): | |
304 | for argn in range(2): |
|
188 | for argn in range(2): | |
305 | argidx = _findargnofcall(argn) |
|
189 | argidx = _findargnofcall(argn) | |
306 | if argidx is not None: |
|
190 | if argidx is not None: | |
307 | _ensureunicode(argidx) |
|
191 | _ensureunicode(argidx) | |
308 |
|
192 | |||
309 | # It changes iteritems/values to items/values as they are not |
|
193 | # It changes iteritems/values to items/values as they are not | |
310 | # present in Python 3 world. |
|
194 | # present in Python 3 world. | |
311 | elif fn in ('iteritems', 'itervalues'): |
|
195 | elif fn in ('iteritems', 'itervalues'): | |
312 | yield t._replace(string=fn[4:]) |
|
196 | yield t._replace(string=fn[4:]) | |
313 | continue |
|
197 | continue | |
314 |
|
198 | |||
315 | # Emit unmodified token. |
|
199 | # Emit unmodified token. | |
316 | yield t |
|
200 | yield t | |
317 |
|
201 | |||
318 | # Header to add to bytecode files. This MUST be changed when |
|
202 | # Header to add to bytecode files. This MUST be changed when | |
319 | # ``replacetoken`` or any mechanism that changes semantics of module |
|
203 | # ``replacetoken`` or any mechanism that changes semantics of module | |
320 | # loading is changed. Otherwise cached bytecode may get loaded without |
|
204 | # loading is changed. Otherwise cached bytecode may get loaded without | |
321 | # the new transformation mechanisms applied. |
|
205 | # the new transformation mechanisms applied. | |
322 | BYTECODEHEADER = b'HG\x00\x0a' |
|
206 | BYTECODEHEADER = b'HG\x00\x0a' | |
323 |
|
207 | |||
324 | class hgloader(importlib.machinery.SourceFileLoader): |
|
208 | class hgloader(importlib.machinery.SourceFileLoader): | |
325 | """Custom module loader that transforms source code. |
|
209 | """Custom module loader that transforms source code. | |
326 |
|
210 | |||
327 | When the source code is converted to a code object, we transform |
|
211 | When the source code is converted to a code object, we transform | |
328 | certain patterns to be Python 3 compatible. This allows us to write code |
|
212 | certain patterns to be Python 3 compatible. This allows us to write code | |
329 | that is natively Python 2 and compatible with Python 3 without |
|
213 | that is natively Python 2 and compatible with Python 3 without | |
330 | making the code excessively ugly. |
|
214 | making the code excessively ugly. | |
331 |
|
215 | |||
332 | We do this by transforming the token stream between parse and compile. |
|
216 | We do this by transforming the token stream between parse and compile. | |
333 |
|
217 | |||
334 | Implementing transformations invalidates caching assumptions made |
|
218 | Implementing transformations invalidates caching assumptions made | |
335 | by the built-in importer. The built-in importer stores a header on |
|
219 | by the built-in importer. The built-in importer stores a header on | |
336 | saved bytecode files indicating the Python/bytecode version. If the |
|
220 | saved bytecode files indicating the Python/bytecode version. If the | |
337 | version changes, the cached bytecode is ignored. The Mercurial |
|
221 | version changes, the cached bytecode is ignored. The Mercurial | |
338 | transformations could change at any time. This means we need to check |
|
222 | transformations could change at any time. This means we need to check | |
339 | that cached bytecode was generated with the current transformation |
|
223 | that cached bytecode was generated with the current transformation | |
340 | code or there could be a mismatch between cached bytecode and what |
|
224 | code or there could be a mismatch between cached bytecode and what | |
341 | would be generated from this class. |
|
225 | would be generated from this class. | |
342 |
|
226 | |||
343 | We supplement the bytecode caching layer by wrapping ``get_data`` |
|
227 | We supplement the bytecode caching layer by wrapping ``get_data`` | |
344 | and ``set_data``. These functions are called when the |
|
228 | and ``set_data``. These functions are called when the | |
345 | ``SourceFileLoader`` retrieves and saves bytecode cache files, |
|
229 | ``SourceFileLoader`` retrieves and saves bytecode cache files, | |
346 | respectively. We simply add an additional header on the file. As |
|
230 | respectively. We simply add an additional header on the file. As | |
347 | long as the version in this file is changed when semantics change, |
|
231 | long as the version in this file is changed when semantics change, | |
348 | cached bytecode should be invalidated when transformations change. |
|
232 | cached bytecode should be invalidated when transformations change. | |
349 |
|
233 | |||
350 | The added header has the form ``HG<VERSION>``. That is a literal |
|
234 | The added header has the form ``HG<VERSION>``. That is a literal | |
351 | ``HG`` with 2 binary bytes indicating the transformation version. |
|
235 | ``HG`` with 2 binary bytes indicating the transformation version. | |
352 | """ |
|
236 | """ | |
353 | def get_data(self, path): |
|
237 | def get_data(self, path): | |
354 | data = super(hgloader, self).get_data(path) |
|
238 | data = super(hgloader, self).get_data(path) | |
355 |
|
239 | |||
356 | if not path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)): |
|
240 | if not path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)): | |
357 | return data |
|
241 | return data | |
358 |
|
242 | |||
359 | # There should be a header indicating the Mercurial transformation |
|
243 | # There should be a header indicating the Mercurial transformation | |
360 | # version. If it doesn't exist or doesn't match the current version, |
|
244 | # version. If it doesn't exist or doesn't match the current version, | |
361 | # we raise an OSError because that is what |
|
245 | # we raise an OSError because that is what | |
362 | # ``SourceFileLoader.get_code()`` expects when loading bytecode |
|
246 | # ``SourceFileLoader.get_code()`` expects when loading bytecode | |
363 | # paths to indicate the cached file is "bad." |
|
247 | # paths to indicate the cached file is "bad." | |
364 | if data[0:2] != b'HG': |
|
248 | if data[0:2] != b'HG': | |
365 | raise OSError('no hg header') |
|
249 | raise OSError('no hg header') | |
366 | if data[0:4] != BYTECODEHEADER: |
|
250 | if data[0:4] != BYTECODEHEADER: | |
367 | raise OSError('hg header version mismatch') |
|
251 | raise OSError('hg header version mismatch') | |
368 |
|
252 | |||
369 | return data[4:] |
|
253 | return data[4:] | |
370 |
|
254 | |||
371 | def set_data(self, path, data, *args, **kwargs): |
|
255 | def set_data(self, path, data, *args, **kwargs): | |
372 | if path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)): |
|
256 | if path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)): | |
373 | data = BYTECODEHEADER + data |
|
257 | data = BYTECODEHEADER + data | |
374 |
|
258 | |||
375 | return super(hgloader, self).set_data(path, data, *args, **kwargs) |
|
259 | return super(hgloader, self).set_data(path, data, *args, **kwargs) | |
376 |
|
260 | |||
377 | def source_to_code(self, data, path): |
|
261 | def source_to_code(self, data, path): | |
378 | """Perform token transformation before compilation.""" |
|
262 | """Perform token transformation before compilation.""" | |
379 | buf = io.BytesIO(data) |
|
263 | buf = io.BytesIO(data) | |
380 | tokens = tokenize.tokenize(buf.readline) |
|
264 | tokens = tokenize.tokenize(buf.readline) | |
381 | data = tokenize.untokenize(replacetokens(list(tokens), self.name)) |
|
265 | data = tokenize.untokenize(replacetokens(list(tokens), self.name)) | |
382 | # Python's built-in importer strips frames from exceptions raised |
|
266 | # Python's built-in importer strips frames from exceptions raised | |
383 | # for this code. Unfortunately, that mechanism isn't extensible |
|
267 | # for this code. Unfortunately, that mechanism isn't extensible | |
384 | # and our frame will be blamed for the import failure. There |
|
268 | # and our frame will be blamed for the import failure. There | |
385 | # are extremely hacky ways to do frame stripping. We haven't |
|
269 | # are extremely hacky ways to do frame stripping. We haven't | |
386 | # implemented them because they are very ugly. |
|
270 | # implemented them because they are very ugly. | |
387 | return super(hgloader, self).source_to_code(data, path) |
|
271 | return super(hgloader, self).source_to_code(data, path) | |
388 |
|
272 | |||
389 |
# We automagically register our custom importer as a side-effect of |
|
273 | # We automagically register our custom importer as a side-effect of | |
390 |
# This is necessary to ensure that any entry points are able |
|
274 | # loading. This is necessary to ensure that any entry points are able | |
391 |
# mercurial.* modules without having to perform this |
|
275 | # to import mercurial.* modules without having to perform this | |
392 | if sys.version_info[0] >= 3: |
|
276 | # registration themselves. | |
393 | _importercls = hgpathentryfinder |
|
277 | if not any(isinstance(x, hgpathentryfinder) for x in sys.meta_path): | |
394 | else: |
|
278 | # meta_path is used before any implicit finders and before sys.path. | |
395 | _importercls = hgimporter |
|
279 | sys.meta_path.insert(0, hgpathentryfinder()) | |
396 | if not any(isinstance(x, _importercls) for x in sys.meta_path): |
|
|||
397 | # meta_path is used before any implicit finders and before sys.path. |
|
|||
398 | sys.meta_path.insert(0, _importercls()) |
|
General Comments 0
You need to be logged in to leave comments.
Login now