Show More
@@ -1,412 +1,406 b'' | |||
|
1 | 1 | # __init__.py - Startup and module loading logic for Mercurial. |
|
2 | 2 | # |
|
3 | 3 | # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com> |
|
4 | 4 | # |
|
5 | 5 | # This software may be used and distributed according to the terms of the |
|
6 | 6 | # GNU General Public License version 2 or any later version. |
|
7 | 7 | |
|
8 | 8 | from __future__ import absolute_import |
|
9 | 9 | |
|
10 | 10 | import imp |
|
11 | 11 | import os |
|
12 | 12 | import sys |
|
13 | 13 | import zipimport |
|
14 | 14 | |
|
15 | 15 | from . import ( |
|
16 | 16 | policy |
|
17 | 17 | ) |
|
18 | 18 | |
|
19 | 19 | __all__ = [] |
|
20 | 20 | |
|
21 | 21 | modulepolicy = policy.policy |
|
22 | 22 | |
|
23 | 23 | # Modules that have both Python and C implementations. See also the |
|
24 | 24 | # set of .py files under mercurial/pure/. |
|
25 | 25 | _dualmodules = set([ |
|
26 | 26 | 'mercurial.base85', |
|
27 | 27 | 'mercurial.bdiff', |
|
28 | 28 | 'mercurial.diffhelpers', |
|
29 | 29 | 'mercurial.mpatch', |
|
30 | 30 | 'mercurial.osutil', |
|
31 | 31 | 'mercurial.parsers', |
|
32 | 32 | ]) |
|
33 | 33 | |
|
34 | 34 | class hgimporter(object): |
|
35 | 35 | """Object that conforms to import hook interface defined in PEP-302.""" |
|
36 | 36 | def find_module(self, name, path=None): |
|
37 | 37 | # We only care about modules that have both C and pure implementations. |
|
38 | 38 | if name in _dualmodules: |
|
39 | 39 | return self |
|
40 | 40 | return None |
|
41 | 41 | |
|
42 | 42 | def load_module(self, name): |
|
43 | 43 | mod = sys.modules.get(name, None) |
|
44 | 44 | if mod: |
|
45 | 45 | return mod |
|
46 | 46 | |
|
47 | 47 | mercurial = sys.modules['mercurial'] |
|
48 | 48 | |
|
49 | 49 | # The zip importer behaves sufficiently differently from the default |
|
50 | 50 | # importer to warrant its own code path. |
|
51 | 51 | loader = getattr(mercurial, '__loader__', None) |
|
52 | 52 | if isinstance(loader, zipimport.zipimporter): |
|
53 | 53 | def ziploader(*paths): |
|
54 | 54 | """Obtain a zipimporter for a directory under the main zip.""" |
|
55 | 55 | path = os.path.join(loader.archive, *paths) |
|
56 | 56 | zl = sys.path_importer_cache.get(path) |
|
57 | 57 | if not zl: |
|
58 | 58 | zl = zipimport.zipimporter(path) |
|
59 | 59 | return zl |
|
60 | 60 | |
|
61 | 61 | try: |
|
62 | 62 | if modulepolicy in policy.policynoc: |
|
63 | 63 | raise ImportError() |
|
64 | 64 | |
|
65 | 65 | zl = ziploader('mercurial') |
|
66 | 66 | mod = zl.load_module(name) |
|
67 | 67 | # Unlike imp, ziploader doesn't expose module metadata that |
|
68 | 68 | # indicates the type of module. So just assume what we found |
|
69 | 69 | # is OK (even though it could be a pure Python module). |
|
70 | 70 | except ImportError: |
|
71 | 71 | if modulepolicy == 'c': |
|
72 | 72 | raise |
|
73 | 73 | zl = ziploader('mercurial', 'pure') |
|
74 | 74 | mod = zl.load_module(name) |
|
75 | 75 | |
|
76 | 76 | sys.modules[name] = mod |
|
77 | 77 | return mod |
|
78 | 78 | |
|
79 | 79 | # Unlike the default importer which searches special locations and |
|
80 | 80 | # sys.path, we only look in the directory where "mercurial" was |
|
81 | 81 | # imported from. |
|
82 | 82 | |
|
83 | 83 | # imp.find_module doesn't support submodules (modules with "."). |
|
84 | 84 | # Instead you have to pass the parent package's __path__ attribute |
|
85 | 85 | # as the path argument. |
|
86 | 86 | stem = name.split('.')[-1] |
|
87 | 87 | |
|
88 | 88 | try: |
|
89 | 89 | if modulepolicy in policy.policynoc: |
|
90 | 90 | raise ImportError() |
|
91 | 91 | |
|
92 | 92 | modinfo = imp.find_module(stem, mercurial.__path__) |
|
93 | 93 | |
|
94 | 94 | # The Mercurial installer used to copy files from |
|
95 | 95 | # mercurial/pure/*.py to mercurial/*.py. Therefore, it's possible |
|
96 | 96 | # for some installations to have .py files under mercurial/*. |
|
97 | 97 | # Loading Python modules when we expected C versions could result |
|
98 | 98 | # in a) poor performance b) loading a version from a previous |
|
99 | 99 | # Mercurial version, potentially leading to incompatibility. Either |
|
100 | 100 | # scenario is bad. So we verify that modules loaded from |
|
101 | 101 | # mercurial/* are C extensions. If the current policy allows the |
|
102 | 102 | # loading of .py modules, the module will be re-imported from |
|
103 | 103 | # mercurial/pure/* below. |
|
104 | 104 | if modinfo[2][2] != imp.C_EXTENSION: |
|
105 | 105 | raise ImportError('.py version of %s found where C ' |
|
106 | 106 | 'version should exist' % name) |
|
107 | 107 | |
|
108 | 108 | except ImportError: |
|
109 | 109 | if modulepolicy == 'c': |
|
110 | 110 | raise |
|
111 | 111 | |
|
112 | 112 | # Could not load the C extension and pure Python is allowed. So |
|
113 | 113 | # try to load them. |
|
114 | 114 | from . import pure |
|
115 | 115 | modinfo = imp.find_module(stem, pure.__path__) |
|
116 | 116 | if not modinfo: |
|
117 | 117 | raise ImportError('could not find mercurial module %s' % |
|
118 | 118 | name) |
|
119 | 119 | |
|
120 | 120 | mod = imp.load_module(name, *modinfo) |
|
121 | 121 | sys.modules[name] = mod |
|
122 | 122 | return mod |
|
123 | 123 | |
|
124 | 124 | # Python 3 uses a custom module loader that transforms source code between |
|
125 | 125 | # source file reading and compilation. This is done by registering a custom |
|
126 | 126 | # finder that changes the spec for Mercurial modules to use a custom loader. |
|
127 | 127 | if sys.version_info[0] >= 3: |
|
128 | 128 | from . import pure |
|
129 | 129 | import importlib |
|
130 | 130 | import io |
|
131 | 131 | import token |
|
132 | 132 | import tokenize |
|
133 | 133 | |
|
134 | 134 | class hgpathentryfinder(importlib.abc.MetaPathFinder): |
|
135 | 135 | """A sys.meta_path finder that uses a custom module loader.""" |
|
136 | 136 | def find_spec(self, fullname, path, target=None): |
|
137 | 137 | # Only handle Mercurial-related modules. |
|
138 | 138 | if not fullname.startswith(('mercurial.', 'hgext.', 'hgext3rd.')): |
|
139 | 139 | return None |
|
140 | 140 | |
|
141 | 141 | # This assumes Python 3 doesn't support loading C modules. |
|
142 | 142 | if fullname in _dualmodules: |
|
143 | 143 | stem = fullname.split('.')[-1] |
|
144 | 144 | fullname = 'mercurial.pure.%s' % stem |
|
145 | 145 | target = pure |
|
146 | 146 | assert len(path) == 1 |
|
147 | 147 | path = [os.path.join(path[0], 'pure')] |
|
148 | 148 | |
|
149 | 149 | # Try to find the module using other registered finders. |
|
150 | 150 | spec = None |
|
151 | 151 | for finder in sys.meta_path: |
|
152 | 152 | if finder == self: |
|
153 | 153 | continue |
|
154 | 154 | |
|
155 | 155 | spec = finder.find_spec(fullname, path, target=target) |
|
156 | 156 | if spec: |
|
157 | 157 | break |
|
158 | 158 | |
|
159 | 159 | # This is a Mercurial-related module but we couldn't find it |
|
160 | 160 | # using the previously-registered finders. This likely means |
|
161 | 161 | # the module doesn't exist. |
|
162 | 162 | if not spec: |
|
163 | 163 | return None |
|
164 | 164 | |
|
165 | 165 | if fullname.startswith('mercurial.pure.'): |
|
166 | 166 | spec.name = spec.name.replace('.pure.', '.') |
|
167 | 167 | |
|
168 | 168 | # TODO need to support loaders from alternate specs, like zip |
|
169 | 169 | # loaders. |
|
170 | 170 | spec.loader = hgloader(spec.name, spec.origin) |
|
171 | 171 | return spec |
|
172 | 172 | |
|
173 | 173 | def replacetokens(tokens, fullname): |
|
174 | 174 | """Transform a stream of tokens from raw to Python 3. |
|
175 | 175 | |
|
176 | 176 | It is called by the custom module loading machinery to rewrite |
|
177 | 177 | source/tokens between source decoding and compilation. |
|
178 | 178 | |
|
179 | 179 | Returns a generator of possibly rewritten tokens. |
|
180 | 180 | |
|
181 | 181 | The input token list may be mutated as part of processing. However, |
|
182 | 182 | its changes do not necessarily match the output token stream. |
|
183 | 183 | |
|
184 | 184 | REMEMBER TO CHANGE ``BYTECODEHEADER`` WHEN CHANGING THIS FUNCTION |
|
185 | 185 | OR CACHED FILES WON'T GET INVALIDATED PROPERLY. |
|
186 | 186 | """ |
|
187 | 187 | futureimpline = False |
|
188 | 188 | |
|
189 | 189 | # The following utility functions access the tokens list and i index of |
|
190 | 190 | # the for i, t enumerate(tokens) loop below |
|
191 | 191 | def _isop(j, *o): |
|
192 | 192 | """Assert that tokens[j] is an OP with one of the given values""" |
|
193 | 193 | try: |
|
194 | 194 | return tokens[j].type == token.OP and tokens[j].string in o |
|
195 | 195 | except IndexError: |
|
196 | 196 | return False |
|
197 | 197 | |
|
198 | 198 | def _findargnofcall(n): |
|
199 | 199 | """Find arg n of a call expression (start at 0) |
|
200 | 200 | |
|
201 | 201 | Returns index of the first token of that argument, or None if |
|
202 | 202 | there is not that many arguments. |
|
203 | 203 | |
|
204 | 204 | Assumes that token[i + 1] is '('. |
|
205 | 205 | |
|
206 | 206 | """ |
|
207 | 207 | nested = 0 |
|
208 | 208 | for j in range(i + 2, len(tokens)): |
|
209 | 209 | if _isop(j, ')', ']', '}'): |
|
210 | 210 | # end of call, tuple, subscription or dict / set |
|
211 | 211 | nested -= 1 |
|
212 | 212 | if nested < 0: |
|
213 | 213 | return None |
|
214 | 214 | elif n == 0: |
|
215 | 215 | # this is the starting position of arg |
|
216 | 216 | return j |
|
217 | 217 | elif _isop(j, '(', '[', '{'): |
|
218 | 218 | nested += 1 |
|
219 | 219 | elif _isop(j, ',') and nested == 0: |
|
220 | 220 | n -= 1 |
|
221 | 221 | |
|
222 | 222 | return None |
|
223 | 223 | |
|
224 | 224 | def _ensureunicode(j): |
|
225 | 225 | """Make sure the token at j is a unicode string |
|
226 | 226 | |
|
227 | 227 | This rewrites a string token to include the unicode literal prefix |
|
228 | 228 | so the string transformer won't add the byte prefix. |
|
229 | 229 | |
|
230 | 230 | Ignores tokens that are not strings. Assumes bounds checking has |
|
231 | 231 | already been done. |
|
232 | 232 | |
|
233 | 233 | """ |
|
234 | 234 | st = tokens[j] |
|
235 | 235 | if st.type == token.STRING and st.string.startswith(("'", '"')): |
|
236 |
|
|
|
237 | st.start, st.end, st.line) | |
|
238 | tokens[j] = rt | |
|
236 | tokens[j] = st._replace(string='u%s' % st.string) | |
|
239 | 237 | |
|
240 | 238 | for i, t in enumerate(tokens): |
|
241 | 239 | # Convert most string literals to byte literals. String literals |
|
242 | 240 | # in Python 2 are bytes. String literals in Python 3 are unicode. |
|
243 | 241 | # Most strings in Mercurial are bytes and unicode strings are rare. |
|
244 | 242 | # Rather than rewrite all string literals to use ``b''`` to indicate |
|
245 | 243 | # byte strings, we apply this token transformer to insert the ``b`` |
|
246 | 244 | # prefix nearly everywhere. |
|
247 | 245 | if t.type == token.STRING: |
|
248 | 246 | s = t.string |
|
249 | 247 | |
|
250 | 248 | # Preserve docstrings as string literals. This is inconsistent |
|
251 | 249 | # with regular unprefixed strings. However, the |
|
252 | 250 | # "from __future__" parsing (which allows a module docstring to |
|
253 | 251 | # exist before it) doesn't properly handle the docstring if it |
|
254 | 252 | # is b''' prefixed, leading to a SyntaxError. We leave all |
|
255 | 253 | # docstrings as unprefixed to avoid this. This means Mercurial |
|
256 | 254 | # components touching docstrings need to handle unicode, |
|
257 | 255 | # unfortunately. |
|
258 | 256 | if s[0:3] in ("'''", '"""'): |
|
259 | 257 | yield t |
|
260 | 258 | continue |
|
261 | 259 | |
|
262 | 260 | # If the first character isn't a quote, it is likely a string |
|
263 | 261 | # prefixing character (such as 'b', 'u', or 'r'. Ignore. |
|
264 | 262 | if s[0] not in ("'", '"'): |
|
265 | 263 | yield t |
|
266 | 264 | continue |
|
267 | 265 | |
|
268 | 266 | # String literal. Prefix to make a b'' string. |
|
269 | yield tokenize.TokenInfo(t.type, 'b%s' % s, t.start, t.end, | |
|
270 | t.line) | |
|
267 | yield t._replace(string='b%s' % t.string) | |
|
271 | 268 | continue |
|
272 | 269 | |
|
273 | 270 | # Insert compatibility imports at "from __future__ import" line. |
|
274 | 271 | # No '\n' should be added to preserve line numbers. |
|
275 | 272 | if (t.type == token.NAME and t.string == 'import' and |
|
276 | 273 | all(u.type == token.NAME for u in tokens[i - 2:i]) and |
|
277 | 274 | [u.string for u in tokens[i - 2:i]] == ['from', '__future__']): |
|
278 | 275 | futureimpline = True |
|
279 | 276 | if t.type == token.NEWLINE and futureimpline: |
|
280 | 277 | futureimpline = False |
|
281 | 278 | if fullname == 'mercurial.pycompat': |
|
282 | 279 | yield t |
|
283 | 280 | continue |
|
284 | 281 | r, c = t.start |
|
285 | 282 | l = (b'; from mercurial.pycompat import ' |
|
286 | 283 | b'delattr, getattr, hasattr, setattr, xrange\n') |
|
287 | 284 | for u in tokenize.tokenize(io.BytesIO(l).readline): |
|
288 | 285 | if u.type in (tokenize.ENCODING, token.ENDMARKER): |
|
289 | 286 | continue |
|
290 |
yield |
|
|
291 |
|
|
|
292 | (r, c + u.end[1]), | |
|
293 | '') | |
|
287 | yield u._replace( | |
|
288 | start=(r, c + u.start[1]), end=(r, c + u.end[1])) | |
|
294 | 289 | continue |
|
295 | 290 | |
|
296 | 291 | # This looks like a function call. |
|
297 | 292 | if t.type == token.NAME and _isop(i + 1, '('): |
|
298 | 293 | fn = t.string |
|
299 | 294 | |
|
300 | 295 | # *attr() builtins don't accept byte strings to 2nd argument. |
|
301 | 296 | if (fn in ('getattr', 'setattr', 'hasattr', 'safehasattr') and |
|
302 | 297 | not _isop(i - 1, '.')): |
|
303 | 298 | arg1idx = _findargnofcall(1) |
|
304 | 299 | if arg1idx is not None: |
|
305 | 300 | _ensureunicode(arg1idx) |
|
306 | 301 | |
|
307 | 302 | # .encode() and .decode() on str/bytes/unicode don't accept |
|
308 | 303 | # byte strings on Python 3. |
|
309 | 304 | elif fn in ('encode', 'decode') and _isop(i - 1, '.'): |
|
310 | 305 | for argn in range(2): |
|
311 | 306 | argidx = _findargnofcall(argn) |
|
312 | 307 | if argidx is not None: |
|
313 | 308 | _ensureunicode(argidx) |
|
314 | 309 | |
|
315 | 310 | # Bare open call (not an attribute on something else), the |
|
316 | 311 | # second argument (mode) must be a string, not bytes |
|
317 | 312 | elif fn == 'open' and not _isop(i - 1, '.'): |
|
318 | 313 | arg1idx = _findargnofcall(1) |
|
319 | 314 | if arg1idx is not None: |
|
320 | 315 | _ensureunicode(arg1idx) |
|
321 | 316 | |
|
322 | 317 | # It changes iteritems to items as iteritems is not |
|
323 | 318 | # present in Python 3 world. |
|
324 | 319 | elif fn == 'iteritems': |
|
325 |
yield |
|
|
326 | t.start, t.end, t.line) | |
|
320 | yield t._replace(string='items') | |
|
327 | 321 | continue |
|
328 | 322 | |
|
329 | 323 | # Emit unmodified token. |
|
330 | 324 | yield t |
|
331 | 325 | |
|
332 | 326 | # Header to add to bytecode files. This MUST be changed when |
|
333 | 327 | # ``replacetoken`` or any mechanism that changes semantics of module |
|
334 | 328 | # loading is changed. Otherwise cached bytecode may get loaded without |
|
335 | 329 | # the new transformation mechanisms applied. |
|
336 | 330 | BYTECODEHEADER = b'HG\x00\x06' |
|
337 | 331 | |
|
338 | 332 | class hgloader(importlib.machinery.SourceFileLoader): |
|
339 | 333 | """Custom module loader that transforms source code. |
|
340 | 334 | |
|
341 | 335 | When the source code is converted to a code object, we transform |
|
342 | 336 | certain patterns to be Python 3 compatible. This allows us to write code |
|
343 | 337 | that is natively Python 2 and compatible with Python 3 without |
|
344 | 338 | making the code excessively ugly. |
|
345 | 339 | |
|
346 | 340 | We do this by transforming the token stream between parse and compile. |
|
347 | 341 | |
|
348 | 342 | Implementing transformations invalidates caching assumptions made |
|
349 | 343 | by the built-in importer. The built-in importer stores a header on |
|
350 | 344 | saved bytecode files indicating the Python/bytecode version. If the |
|
351 | 345 | version changes, the cached bytecode is ignored. The Mercurial |
|
352 | 346 | transformations could change at any time. This means we need to check |
|
353 | 347 | that cached bytecode was generated with the current transformation |
|
354 | 348 | code or there could be a mismatch between cached bytecode and what |
|
355 | 349 | would be generated from this class. |
|
356 | 350 | |
|
357 | 351 | We supplement the bytecode caching layer by wrapping ``get_data`` |
|
358 | 352 | and ``set_data``. These functions are called when the |
|
359 | 353 | ``SourceFileLoader`` retrieves and saves bytecode cache files, |
|
360 | 354 | respectively. We simply add an additional header on the file. As |
|
361 | 355 | long as the version in this file is changed when semantics change, |
|
362 | 356 | cached bytecode should be invalidated when transformations change. |
|
363 | 357 | |
|
364 | 358 | The added header has the form ``HG<VERSION>``. That is a literal |
|
365 | 359 | ``HG`` with 2 binary bytes indicating the transformation version. |
|
366 | 360 | """ |
|
367 | 361 | def get_data(self, path): |
|
368 | 362 | data = super(hgloader, self).get_data(path) |
|
369 | 363 | |
|
370 | 364 | if not path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)): |
|
371 | 365 | return data |
|
372 | 366 | |
|
373 | 367 | # There should be a header indicating the Mercurial transformation |
|
374 | 368 | # version. If it doesn't exist or doesn't match the current version, |
|
375 | 369 | # we raise an OSError because that is what |
|
376 | 370 | # ``SourceFileLoader.get_code()`` expects when loading bytecode |
|
377 | 371 | # paths to indicate the cached file is "bad." |
|
378 | 372 | if data[0:2] != b'HG': |
|
379 | 373 | raise OSError('no hg header') |
|
380 | 374 | if data[0:4] != BYTECODEHEADER: |
|
381 | 375 | raise OSError('hg header version mismatch') |
|
382 | 376 | |
|
383 | 377 | return data[4:] |
|
384 | 378 | |
|
385 | 379 | def set_data(self, path, data, *args, **kwargs): |
|
386 | 380 | if path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)): |
|
387 | 381 | data = BYTECODEHEADER + data |
|
388 | 382 | |
|
389 | 383 | return super(hgloader, self).set_data(path, data, *args, **kwargs) |
|
390 | 384 | |
|
391 | 385 | def source_to_code(self, data, path): |
|
392 | 386 | """Perform token transformation before compilation.""" |
|
393 | 387 | buf = io.BytesIO(data) |
|
394 | 388 | tokens = tokenize.tokenize(buf.readline) |
|
395 | 389 | data = tokenize.untokenize(replacetokens(list(tokens), self.name)) |
|
396 | 390 | # Python's built-in importer strips frames from exceptions raised |
|
397 | 391 | # for this code. Unfortunately, that mechanism isn't extensible |
|
398 | 392 | # and our frame will be blamed for the import failure. There |
|
399 | 393 | # are extremely hacky ways to do frame stripping. We haven't |
|
400 | 394 | # implemented them because they are very ugly. |
|
401 | 395 | return super(hgloader, self).source_to_code(data, path) |
|
402 | 396 | |
|
403 | 397 | # We automagically register our custom importer as a side-effect of loading. |
|
404 | 398 | # This is necessary to ensure that any entry points are able to import |
|
405 | 399 | # mercurial.* modules without having to perform this registration themselves. |
|
406 | 400 | if sys.version_info[0] >= 3: |
|
407 | 401 | _importercls = hgpathentryfinder |
|
408 | 402 | else: |
|
409 | 403 | _importercls = hgimporter |
|
410 | 404 | if not any(isinstance(x, _importercls) for x in sys.meta_path): |
|
411 | 405 | # meta_path is used before any implicit finders and before sys.path. |
|
412 | 406 | sys.meta_path.insert(0, _importercls()) |
General Comments 0
You need to be logged in to leave comments.
Login now