##// END OF EJS Templates
policy: drop custom importer for pure modules
Yuya Nishihara -
r32373:57008258 default
parent child Browse files
Show More
@@ -1,398 +1,279 b''
1 # __init__.py - Startup and module loading logic for Mercurial.
1 # __init__.py - Startup and module loading logic for Mercurial.
2 #
2 #
3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import imp
11 import os
12 import sys
10 import sys
13 import zipimport
14
15 from . import (
16 policy
17 )
18
11
19 __all__ = []
12 __all__ = []
20
13
21 modulepolicy = policy.policy
22
23 # Modules that have both Python and C implementations. See also the
24 # set of .py files under mercurial/pure/.
25 _dualmodules = {
26 }
27
28 class hgimporter(object):
29 """Object that conforms to import hook interface defined in PEP-302."""
30 def find_module(self, name, path=None):
31 # We only care about modules that have both C and pure implementations.
32 if name in _dualmodules:
33 return self
34 return None
35
36 def load_module(self, name):
37 mod = sys.modules.get(name, None)
38 if mod:
39 return mod
40
41 mercurial = sys.modules['mercurial']
42
43 # The zip importer behaves sufficiently differently from the default
44 # importer to warrant its own code path.
45 loader = getattr(mercurial, '__loader__', None)
46 if isinstance(loader, zipimport.zipimporter):
47 def ziploader(*paths):
48 """Obtain a zipimporter for a directory under the main zip."""
49 path = os.path.join(loader.archive, *paths)
50 zl = sys.path_importer_cache.get(path)
51 if not zl:
52 zl = zipimport.zipimporter(path)
53 return zl
54
55 try:
56 if modulepolicy in policy.policynoc:
57 raise ImportError()
58
59 zl = ziploader('mercurial')
60 mod = zl.load_module(name)
61 # Unlike imp, ziploader doesn't expose module metadata that
62 # indicates the type of module. So just assume what we found
63 # is OK (even though it could be a pure Python module).
64 except ImportError:
65 if modulepolicy == b'c':
66 raise
67 zl = ziploader('mercurial', 'pure')
68 mod = zl.load_module(name)
69
70 sys.modules[name] = mod
71 return mod
72
73 # Unlike the default importer which searches special locations and
74 # sys.path, we only look in the directory where "mercurial" was
75 # imported from.
76
77 # imp.find_module doesn't support submodules (modules with ".").
78 # Instead you have to pass the parent package's __path__ attribute
79 # as the path argument.
80 stem = name.split('.')[-1]
81
82 try:
83 if modulepolicy in policy.policynoc:
84 raise ImportError()
85
86 modinfo = imp.find_module(stem, mercurial.__path__)
87
88 # The Mercurial installer used to copy files from
89 # mercurial/pure/*.py to mercurial/*.py. Therefore, it's possible
90 # for some installations to have .py files under mercurial/*.
91 # Loading Python modules when we expected C versions could result
92 # in a) poor performance b) loading a version from a previous
93 # Mercurial version, potentially leading to incompatibility. Either
94 # scenario is bad. So we verify that modules loaded from
95 # mercurial/* are C extensions. If the current policy allows the
96 # loading of .py modules, the module will be re-imported from
97 # mercurial/pure/* below.
98 if modinfo[2][2] != imp.C_EXTENSION:
99 raise ImportError('.py version of %s found where C '
100 'version should exist' % name)
101
102 except ImportError:
103 if modulepolicy == b'c':
104 raise
105
106 # Could not load the C extension and pure Python is allowed. So
107 # try to load them.
108 from . import pure
109 modinfo = imp.find_module(stem, pure.__path__)
110 if not modinfo:
111 raise ImportError('could not find mercurial module %s' %
112 name)
113
114 mod = imp.load_module(name, *modinfo)
115 sys.modules[name] = mod
116 return mod
117
118 # Python 3 uses a custom module loader that transforms source code between
14 # Python 3 uses a custom module loader that transforms source code between
119 # source file reading and compilation. This is done by registering a custom
15 # source file reading and compilation. This is done by registering a custom
120 # finder that changes the spec for Mercurial modules to use a custom loader.
16 # finder that changes the spec for Mercurial modules to use a custom loader.
121 if sys.version_info[0] >= 3:
17 if sys.version_info[0] >= 3:
122 from . import pure
123 import importlib
18 import importlib
19 import importlib.abc
124 import io
20 import io
125 import token
21 import token
126 import tokenize
22 import tokenize
127
23
128 class hgpathentryfinder(importlib.abc.MetaPathFinder):
24 class hgpathentryfinder(importlib.abc.MetaPathFinder):
129 """A sys.meta_path finder that uses a custom module loader."""
25 """A sys.meta_path finder that uses a custom module loader."""
130 def find_spec(self, fullname, path, target=None):
26 def find_spec(self, fullname, path, target=None):
131 # Only handle Mercurial-related modules.
27 # Only handle Mercurial-related modules.
132 if not fullname.startswith(('mercurial.', 'hgext.', 'hgext3rd.')):
28 if not fullname.startswith(('mercurial.', 'hgext.', 'hgext3rd.')):
133 return None
29 return None
134 # zstd is already dual-version clean, don't try and mangle it
30 # zstd is already dual-version clean, don't try and mangle it
135 if fullname.startswith('mercurial.zstd'):
31 if fullname.startswith('mercurial.zstd'):
136 return None
32 return None
137
33
138 # This assumes Python 3 doesn't support loading C modules.
139 if fullname in _dualmodules:
140 stem = fullname.split('.')[-1]
141 fullname = 'mercurial.pure.%s' % stem
142 target = pure
143 assert len(path) == 1
144 path = [os.path.join(path[0], 'pure')]
145
146 # Try to find the module using other registered finders.
34 # Try to find the module using other registered finders.
147 spec = None
35 spec = None
148 for finder in sys.meta_path:
36 for finder in sys.meta_path:
149 if finder == self:
37 if finder == self:
150 continue
38 continue
151
39
152 spec = finder.find_spec(fullname, path, target=target)
40 spec = finder.find_spec(fullname, path, target=target)
153 if spec:
41 if spec:
154 break
42 break
155
43
156 # This is a Mercurial-related module but we couldn't find it
44 # This is a Mercurial-related module but we couldn't find it
157 # using the previously-registered finders. This likely means
45 # using the previously-registered finders. This likely means
158 # the module doesn't exist.
46 # the module doesn't exist.
159 if not spec:
47 if not spec:
160 return None
48 return None
161
49
162 if (fullname.startswith('mercurial.pure.')
163 and fullname.replace('.pure.', '.') in _dualmodules):
164 spec.name = spec.name.replace('.pure.', '.')
165
166 # TODO need to support loaders from alternate specs, like zip
50 # TODO need to support loaders from alternate specs, like zip
167 # loaders.
51 # loaders.
168 spec.loader = hgloader(spec.name, spec.origin)
52 spec.loader = hgloader(spec.name, spec.origin)
169 return spec
53 return spec
170
54
171 def replacetokens(tokens, fullname):
55 def replacetokens(tokens, fullname):
172 """Transform a stream of tokens from raw to Python 3.
56 """Transform a stream of tokens from raw to Python 3.
173
57
174 It is called by the custom module loading machinery to rewrite
58 It is called by the custom module loading machinery to rewrite
175 source/tokens between source decoding and compilation.
59 source/tokens between source decoding and compilation.
176
60
177 Returns a generator of possibly rewritten tokens.
61 Returns a generator of possibly rewritten tokens.
178
62
179 The input token list may be mutated as part of processing. However,
63 The input token list may be mutated as part of processing. However,
180 its changes do not necessarily match the output token stream.
64 its changes do not necessarily match the output token stream.
181
65
182 REMEMBER TO CHANGE ``BYTECODEHEADER`` WHEN CHANGING THIS FUNCTION
66 REMEMBER TO CHANGE ``BYTECODEHEADER`` WHEN CHANGING THIS FUNCTION
183 OR CACHED FILES WON'T GET INVALIDATED PROPERLY.
67 OR CACHED FILES WON'T GET INVALIDATED PROPERLY.
184 """
68 """
185 futureimpline = False
69 futureimpline = False
186
70
187 # The following utility functions access the tokens list and i index of
71 # The following utility functions access the tokens list and i index of
188 # the for i, t enumerate(tokens) loop below
72 # the for i, t enumerate(tokens) loop below
189 def _isop(j, *o):
73 def _isop(j, *o):
190 """Assert that tokens[j] is an OP with one of the given values"""
74 """Assert that tokens[j] is an OP with one of the given values"""
191 try:
75 try:
192 return tokens[j].type == token.OP and tokens[j].string in o
76 return tokens[j].type == token.OP and tokens[j].string in o
193 except IndexError:
77 except IndexError:
194 return False
78 return False
195
79
196 def _findargnofcall(n):
80 def _findargnofcall(n):
197 """Find arg n of a call expression (start at 0)
81 """Find arg n of a call expression (start at 0)
198
82
199 Returns index of the first token of that argument, or None if
83 Returns index of the first token of that argument, or None if
200 there is not that many arguments.
84 there is not that many arguments.
201
85
202 Assumes that token[i + 1] is '('.
86 Assumes that token[i + 1] is '('.
203
87
204 """
88 """
205 nested = 0
89 nested = 0
206 for j in range(i + 2, len(tokens)):
90 for j in range(i + 2, len(tokens)):
207 if _isop(j, ')', ']', '}'):
91 if _isop(j, ')', ']', '}'):
208 # end of call, tuple, subscription or dict / set
92 # end of call, tuple, subscription or dict / set
209 nested -= 1
93 nested -= 1
210 if nested < 0:
94 if nested < 0:
211 return None
95 return None
212 elif n == 0:
96 elif n == 0:
213 # this is the starting position of arg
97 # this is the starting position of arg
214 return j
98 return j
215 elif _isop(j, '(', '[', '{'):
99 elif _isop(j, '(', '[', '{'):
216 nested += 1
100 nested += 1
217 elif _isop(j, ',') and nested == 0:
101 elif _isop(j, ',') and nested == 0:
218 n -= 1
102 n -= 1
219
103
220 return None
104 return None
221
105
222 def _ensureunicode(j):
106 def _ensureunicode(j):
223 """Make sure the token at j is a unicode string
107 """Make sure the token at j is a unicode string
224
108
225 This rewrites a string token to include the unicode literal prefix
109 This rewrites a string token to include the unicode literal prefix
226 so the string transformer won't add the byte prefix.
110 so the string transformer won't add the byte prefix.
227
111
228 Ignores tokens that are not strings. Assumes bounds checking has
112 Ignores tokens that are not strings. Assumes bounds checking has
229 already been done.
113 already been done.
230
114
231 """
115 """
232 st = tokens[j]
116 st = tokens[j]
233 if st.type == token.STRING and st.string.startswith(("'", '"')):
117 if st.type == token.STRING and st.string.startswith(("'", '"')):
234 tokens[j] = st._replace(string='u%s' % st.string)
118 tokens[j] = st._replace(string='u%s' % st.string)
235
119
236 for i, t in enumerate(tokens):
120 for i, t in enumerate(tokens):
237 # Convert most string literals to byte literals. String literals
121 # Convert most string literals to byte literals. String literals
238 # in Python 2 are bytes. String literals in Python 3 are unicode.
122 # in Python 2 are bytes. String literals in Python 3 are unicode.
239 # Most strings in Mercurial are bytes and unicode strings are rare.
123 # Most strings in Mercurial are bytes and unicode strings are rare.
240 # Rather than rewrite all string literals to use ``b''`` to indicate
124 # Rather than rewrite all string literals to use ``b''`` to indicate
241 # byte strings, we apply this token transformer to insert the ``b``
125 # byte strings, we apply this token transformer to insert the ``b``
242 # prefix nearly everywhere.
126 # prefix nearly everywhere.
243 if t.type == token.STRING:
127 if t.type == token.STRING:
244 s = t.string
128 s = t.string
245
129
246 # Preserve docstrings as string literals. This is inconsistent
130 # Preserve docstrings as string literals. This is inconsistent
247 # with regular unprefixed strings. However, the
131 # with regular unprefixed strings. However, the
248 # "from __future__" parsing (which allows a module docstring to
132 # "from __future__" parsing (which allows a module docstring to
249 # exist before it) doesn't properly handle the docstring if it
133 # exist before it) doesn't properly handle the docstring if it
250 # is b''' prefixed, leading to a SyntaxError. We leave all
134 # is b''' prefixed, leading to a SyntaxError. We leave all
251 # docstrings as unprefixed to avoid this. This means Mercurial
135 # docstrings as unprefixed to avoid this. This means Mercurial
252 # components touching docstrings need to handle unicode,
136 # components touching docstrings need to handle unicode,
253 # unfortunately.
137 # unfortunately.
254 if s[0:3] in ("'''", '"""'):
138 if s[0:3] in ("'''", '"""'):
255 yield t
139 yield t
256 continue
140 continue
257
141
258 # If the first character isn't a quote, it is likely a string
142 # If the first character isn't a quote, it is likely a string
259 # prefixing character (such as 'b', 'u', or 'r'. Ignore.
143 # prefixing character (such as 'b', 'u', or 'r'. Ignore.
260 if s[0] not in ("'", '"'):
144 if s[0] not in ("'", '"'):
261 yield t
145 yield t
262 continue
146 continue
263
147
264 # String literal. Prefix to make a b'' string.
148 # String literal. Prefix to make a b'' string.
265 yield t._replace(string='b%s' % t.string)
149 yield t._replace(string='b%s' % t.string)
266 continue
150 continue
267
151
268 # Insert compatibility imports at "from __future__ import" line.
152 # Insert compatibility imports at "from __future__ import" line.
269 # No '\n' should be added to preserve line numbers.
153 # No '\n' should be added to preserve line numbers.
270 if (t.type == token.NAME and t.string == 'import' and
154 if (t.type == token.NAME and t.string == 'import' and
271 all(u.type == token.NAME for u in tokens[i - 2:i]) and
155 all(u.type == token.NAME for u in tokens[i - 2:i]) and
272 [u.string for u in tokens[i - 2:i]] == ['from', '__future__']):
156 [u.string for u in tokens[i - 2:i]] == ['from', '__future__']):
273 futureimpline = True
157 futureimpline = True
274 if t.type == token.NEWLINE and futureimpline:
158 if t.type == token.NEWLINE and futureimpline:
275 futureimpline = False
159 futureimpline = False
276 if fullname == 'mercurial.pycompat':
160 if fullname == 'mercurial.pycompat':
277 yield t
161 yield t
278 continue
162 continue
279 r, c = t.start
163 r, c = t.start
280 l = (b'; from mercurial.pycompat import '
164 l = (b'; from mercurial.pycompat import '
281 b'delattr, getattr, hasattr, setattr, xrange, '
165 b'delattr, getattr, hasattr, setattr, xrange, '
282 b'open, unicode\n')
166 b'open, unicode\n')
283 for u in tokenize.tokenize(io.BytesIO(l).readline):
167 for u in tokenize.tokenize(io.BytesIO(l).readline):
284 if u.type in (tokenize.ENCODING, token.ENDMARKER):
168 if u.type in (tokenize.ENCODING, token.ENDMARKER):
285 continue
169 continue
286 yield u._replace(
170 yield u._replace(
287 start=(r, c + u.start[1]), end=(r, c + u.end[1]))
171 start=(r, c + u.start[1]), end=(r, c + u.end[1]))
288 continue
172 continue
289
173
290 # This looks like a function call.
174 # This looks like a function call.
291 if t.type == token.NAME and _isop(i + 1, '('):
175 if t.type == token.NAME and _isop(i + 1, '('):
292 fn = t.string
176 fn = t.string
293
177
294 # *attr() builtins don't accept byte strings to 2nd argument.
178 # *attr() builtins don't accept byte strings to 2nd argument.
295 if (fn in ('getattr', 'setattr', 'hasattr', 'safehasattr') and
179 if (fn in ('getattr', 'setattr', 'hasattr', 'safehasattr') and
296 not _isop(i - 1, '.')):
180 not _isop(i - 1, '.')):
297 arg1idx = _findargnofcall(1)
181 arg1idx = _findargnofcall(1)
298 if arg1idx is not None:
182 if arg1idx is not None:
299 _ensureunicode(arg1idx)
183 _ensureunicode(arg1idx)
300
184
301 # .encode() and .decode() on str/bytes/unicode don't accept
185 # .encode() and .decode() on str/bytes/unicode don't accept
302 # byte strings on Python 3.
186 # byte strings on Python 3.
303 elif fn in ('encode', 'decode') and _isop(i - 1, '.'):
187 elif fn in ('encode', 'decode') and _isop(i - 1, '.'):
304 for argn in range(2):
188 for argn in range(2):
305 argidx = _findargnofcall(argn)
189 argidx = _findargnofcall(argn)
306 if argidx is not None:
190 if argidx is not None:
307 _ensureunicode(argidx)
191 _ensureunicode(argidx)
308
192
309 # It changes iteritems/values to items/values as they are not
193 # It changes iteritems/values to items/values as they are not
310 # present in Python 3 world.
194 # present in Python 3 world.
311 elif fn in ('iteritems', 'itervalues'):
195 elif fn in ('iteritems', 'itervalues'):
312 yield t._replace(string=fn[4:])
196 yield t._replace(string=fn[4:])
313 continue
197 continue
314
198
315 # Emit unmodified token.
199 # Emit unmodified token.
316 yield t
200 yield t
317
201
318 # Header to add to bytecode files. This MUST be changed when
202 # Header to add to bytecode files. This MUST be changed when
319 # ``replacetoken`` or any mechanism that changes semantics of module
203 # ``replacetoken`` or any mechanism that changes semantics of module
320 # loading is changed. Otherwise cached bytecode may get loaded without
204 # loading is changed. Otherwise cached bytecode may get loaded without
321 # the new transformation mechanisms applied.
205 # the new transformation mechanisms applied.
322 BYTECODEHEADER = b'HG\x00\x0a'
206 BYTECODEHEADER = b'HG\x00\x0a'
323
207
324 class hgloader(importlib.machinery.SourceFileLoader):
208 class hgloader(importlib.machinery.SourceFileLoader):
325 """Custom module loader that transforms source code.
209 """Custom module loader that transforms source code.
326
210
327 When the source code is converted to a code object, we transform
211 When the source code is converted to a code object, we transform
328 certain patterns to be Python 3 compatible. This allows us to write code
212 certain patterns to be Python 3 compatible. This allows us to write code
329 that is natively Python 2 and compatible with Python 3 without
213 that is natively Python 2 and compatible with Python 3 without
330 making the code excessively ugly.
214 making the code excessively ugly.
331
215
332 We do this by transforming the token stream between parse and compile.
216 We do this by transforming the token stream between parse and compile.
333
217
334 Implementing transformations invalidates caching assumptions made
218 Implementing transformations invalidates caching assumptions made
335 by the built-in importer. The built-in importer stores a header on
219 by the built-in importer. The built-in importer stores a header on
336 saved bytecode files indicating the Python/bytecode version. If the
220 saved bytecode files indicating the Python/bytecode version. If the
337 version changes, the cached bytecode is ignored. The Mercurial
221 version changes, the cached bytecode is ignored. The Mercurial
338 transformations could change at any time. This means we need to check
222 transformations could change at any time. This means we need to check
339 that cached bytecode was generated with the current transformation
223 that cached bytecode was generated with the current transformation
340 code or there could be a mismatch between cached bytecode and what
224 code or there could be a mismatch between cached bytecode and what
341 would be generated from this class.
225 would be generated from this class.
342
226
343 We supplement the bytecode caching layer by wrapping ``get_data``
227 We supplement the bytecode caching layer by wrapping ``get_data``
344 and ``set_data``. These functions are called when the
228 and ``set_data``. These functions are called when the
345 ``SourceFileLoader`` retrieves and saves bytecode cache files,
229 ``SourceFileLoader`` retrieves and saves bytecode cache files,
346 respectively. We simply add an additional header on the file. As
230 respectively. We simply add an additional header on the file. As
347 long as the version in this file is changed when semantics change,
231 long as the version in this file is changed when semantics change,
348 cached bytecode should be invalidated when transformations change.
232 cached bytecode should be invalidated when transformations change.
349
233
350 The added header has the form ``HG<VERSION>``. That is a literal
234 The added header has the form ``HG<VERSION>``. That is a literal
351 ``HG`` with 2 binary bytes indicating the transformation version.
235 ``HG`` with 2 binary bytes indicating the transformation version.
352 """
236 """
353 def get_data(self, path):
237 def get_data(self, path):
354 data = super(hgloader, self).get_data(path)
238 data = super(hgloader, self).get_data(path)
355
239
356 if not path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)):
240 if not path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)):
357 return data
241 return data
358
242
359 # There should be a header indicating the Mercurial transformation
243 # There should be a header indicating the Mercurial transformation
360 # version. If it doesn't exist or doesn't match the current version,
244 # version. If it doesn't exist or doesn't match the current version,
361 # we raise an OSError because that is what
245 # we raise an OSError because that is what
362 # ``SourceFileLoader.get_code()`` expects when loading bytecode
246 # ``SourceFileLoader.get_code()`` expects when loading bytecode
363 # paths to indicate the cached file is "bad."
247 # paths to indicate the cached file is "bad."
364 if data[0:2] != b'HG':
248 if data[0:2] != b'HG':
365 raise OSError('no hg header')
249 raise OSError('no hg header')
366 if data[0:4] != BYTECODEHEADER:
250 if data[0:4] != BYTECODEHEADER:
367 raise OSError('hg header version mismatch')
251 raise OSError('hg header version mismatch')
368
252
369 return data[4:]
253 return data[4:]
370
254
371 def set_data(self, path, data, *args, **kwargs):
255 def set_data(self, path, data, *args, **kwargs):
372 if path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)):
256 if path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)):
373 data = BYTECODEHEADER + data
257 data = BYTECODEHEADER + data
374
258
375 return super(hgloader, self).set_data(path, data, *args, **kwargs)
259 return super(hgloader, self).set_data(path, data, *args, **kwargs)
376
260
377 def source_to_code(self, data, path):
261 def source_to_code(self, data, path):
378 """Perform token transformation before compilation."""
262 """Perform token transformation before compilation."""
379 buf = io.BytesIO(data)
263 buf = io.BytesIO(data)
380 tokens = tokenize.tokenize(buf.readline)
264 tokens = tokenize.tokenize(buf.readline)
381 data = tokenize.untokenize(replacetokens(list(tokens), self.name))
265 data = tokenize.untokenize(replacetokens(list(tokens), self.name))
382 # Python's built-in importer strips frames from exceptions raised
266 # Python's built-in importer strips frames from exceptions raised
383 # for this code. Unfortunately, that mechanism isn't extensible
267 # for this code. Unfortunately, that mechanism isn't extensible
384 # and our frame will be blamed for the import failure. There
268 # and our frame will be blamed for the import failure. There
385 # are extremely hacky ways to do frame stripping. We haven't
269 # are extremely hacky ways to do frame stripping. We haven't
386 # implemented them because they are very ugly.
270 # implemented them because they are very ugly.
387 return super(hgloader, self).source_to_code(data, path)
271 return super(hgloader, self).source_to_code(data, path)
388
272
389 # We automagically register our custom importer as a side-effect of loading.
273 # We automagically register our custom importer as a side-effect of
390 # This is necessary to ensure that any entry points are able to import
274 # loading. This is necessary to ensure that any entry points are able
391 # mercurial.* modules without having to perform this registration themselves.
275 # to import mercurial.* modules without having to perform this
392 if sys.version_info[0] >= 3:
276 # registration themselves.
393 _importercls = hgpathentryfinder
277 if not any(isinstance(x, hgpathentryfinder) for x in sys.meta_path):
394 else:
278 # meta_path is used before any implicit finders and before sys.path.
395 _importercls = hgimporter
279 sys.meta_path.insert(0, hgpathentryfinder())
396 if not any(isinstance(x, _importercls) for x in sys.meta_path):
397 # meta_path is used before any implicit finders and before sys.path.
398 sys.meta_path.insert(0, _importercls())
General Comments 0
You need to be logged in to leave comments. Login now