##// END OF EJS Templates
python3: allow hgloader to work with lazy loaders...
Siddharth Agarwal -
r32425:397e3a2e default
parent child Browse files
Show More
@@ -1,283 +1,290
1 # __init__.py - Startup and module loading logic for Mercurial.
1 # __init__.py - Startup and module loading logic for Mercurial.
2 #
2 #
3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import sys
10 import sys
11
11
12 # Allow 'from mercurial import demandimport' to keep working.
12 # Allow 'from mercurial import demandimport' to keep working.
13 import hgdemandimport
13 import hgdemandimport
14 demandimport = hgdemandimport
14 demandimport = hgdemandimport
15
15
16 __all__ = []
16 __all__ = []
17
17
18 # Python 3 uses a custom module loader that transforms source code between
18 # Python 3 uses a custom module loader that transforms source code between
19 # source file reading and compilation. This is done by registering a custom
19 # source file reading and compilation. This is done by registering a custom
20 # finder that changes the spec for Mercurial modules to use a custom loader.
20 # finder that changes the spec for Mercurial modules to use a custom loader.
21 if sys.version_info[0] >= 3:
21 if sys.version_info[0] >= 3:
22 import importlib
22 import importlib
23 import importlib.abc
23 import importlib.abc
24 import io
24 import io
25 import token
25 import token
26 import tokenize
26 import tokenize
27
27
28 class hgpathentryfinder(importlib.abc.MetaPathFinder):
28 class hgpathentryfinder(importlib.abc.MetaPathFinder):
29 """A sys.meta_path finder that uses a custom module loader."""
29 """A sys.meta_path finder that uses a custom module loader."""
30 def find_spec(self, fullname, path, target=None):
30 def find_spec(self, fullname, path, target=None):
31 # Only handle Mercurial-related modules.
31 # Only handle Mercurial-related modules.
32 if not fullname.startswith(('mercurial.', 'hgext.', 'hgext3rd.')):
32 if not fullname.startswith(('mercurial.', 'hgext.', 'hgext3rd.')):
33 return None
33 return None
34 # zstd is already dual-version clean, don't try and mangle it
34 # zstd is already dual-version clean, don't try and mangle it
35 if fullname.startswith('mercurial.zstd'):
35 if fullname.startswith('mercurial.zstd'):
36 return None
36 return None
37
37
38 # Try to find the module using other registered finders.
38 # Try to find the module using other registered finders.
39 spec = None
39 spec = None
40 for finder in sys.meta_path:
40 for finder in sys.meta_path:
41 if finder == self:
41 if finder == self:
42 continue
42 continue
43
43
44 spec = finder.find_spec(fullname, path, target=target)
44 spec = finder.find_spec(fullname, path, target=target)
45 if spec:
45 if spec:
46 break
46 break
47
47
48 # This is a Mercurial-related module but we couldn't find it
48 # This is a Mercurial-related module but we couldn't find it
49 # using the previously-registered finders. This likely means
49 # using the previously-registered finders. This likely means
50 # the module doesn't exist.
50 # the module doesn't exist.
51 if not spec:
51 if not spec:
52 return None
52 return None
53
53
54 # TODO need to support loaders from alternate specs, like zip
54 # TODO need to support loaders from alternate specs, like zip
55 # loaders.
55 # loaders.
56 spec.loader = hgloader(spec.name, spec.origin)
56 loader = hgloader(spec.name, spec.origin)
57 # Can't use util.safehasattr here because that would require
58 # importing util, and we're in import code.
59 if hasattr(spec.loader, 'loader'): # hasattr-py3-only
60 # This is a nested loader (maybe a lazy loader?)
61 spec.loader.loader = loader
62 else:
63 spec.loader = loader
57 return spec
64 return spec
58
65
59 def replacetokens(tokens, fullname):
66 def replacetokens(tokens, fullname):
60 """Transform a stream of tokens from raw to Python 3.
67 """Transform a stream of tokens from raw to Python 3.
61
68
62 It is called by the custom module loading machinery to rewrite
69 It is called by the custom module loading machinery to rewrite
63 source/tokens between source decoding and compilation.
70 source/tokens between source decoding and compilation.
64
71
65 Returns a generator of possibly rewritten tokens.
72 Returns a generator of possibly rewritten tokens.
66
73
67 The input token list may be mutated as part of processing. However,
74 The input token list may be mutated as part of processing. However,
68 its changes do not necessarily match the output token stream.
75 its changes do not necessarily match the output token stream.
69
76
70 REMEMBER TO CHANGE ``BYTECODEHEADER`` WHEN CHANGING THIS FUNCTION
77 REMEMBER TO CHANGE ``BYTECODEHEADER`` WHEN CHANGING THIS FUNCTION
71 OR CACHED FILES WON'T GET INVALIDATED PROPERLY.
78 OR CACHED FILES WON'T GET INVALIDATED PROPERLY.
72 """
79 """
73 futureimpline = False
80 futureimpline = False
74
81
75 # The following utility functions access the tokens list and i index of
82 # The following utility functions access the tokens list and i index of
76 # the for i, t enumerate(tokens) loop below
83 # the for i, t enumerate(tokens) loop below
77 def _isop(j, *o):
84 def _isop(j, *o):
78 """Assert that tokens[j] is an OP with one of the given values"""
85 """Assert that tokens[j] is an OP with one of the given values"""
79 try:
86 try:
80 return tokens[j].type == token.OP and tokens[j].string in o
87 return tokens[j].type == token.OP and tokens[j].string in o
81 except IndexError:
88 except IndexError:
82 return False
89 return False
83
90
84 def _findargnofcall(n):
91 def _findargnofcall(n):
85 """Find arg n of a call expression (start at 0)
92 """Find arg n of a call expression (start at 0)
86
93
87 Returns index of the first token of that argument, or None if
94 Returns index of the first token of that argument, or None if
88 there is not that many arguments.
95 there is not that many arguments.
89
96
90 Assumes that token[i + 1] is '('.
97 Assumes that token[i + 1] is '('.
91
98
92 """
99 """
93 nested = 0
100 nested = 0
94 for j in range(i + 2, len(tokens)):
101 for j in range(i + 2, len(tokens)):
95 if _isop(j, ')', ']', '}'):
102 if _isop(j, ')', ']', '}'):
96 # end of call, tuple, subscription or dict / set
103 # end of call, tuple, subscription or dict / set
97 nested -= 1
104 nested -= 1
98 if nested < 0:
105 if nested < 0:
99 return None
106 return None
100 elif n == 0:
107 elif n == 0:
101 # this is the starting position of arg
108 # this is the starting position of arg
102 return j
109 return j
103 elif _isop(j, '(', '[', '{'):
110 elif _isop(j, '(', '[', '{'):
104 nested += 1
111 nested += 1
105 elif _isop(j, ',') and nested == 0:
112 elif _isop(j, ',') and nested == 0:
106 n -= 1
113 n -= 1
107
114
108 return None
115 return None
109
116
110 def _ensureunicode(j):
117 def _ensureunicode(j):
111 """Make sure the token at j is a unicode string
118 """Make sure the token at j is a unicode string
112
119
113 This rewrites a string token to include the unicode literal prefix
120 This rewrites a string token to include the unicode literal prefix
114 so the string transformer won't add the byte prefix.
121 so the string transformer won't add the byte prefix.
115
122
116 Ignores tokens that are not strings. Assumes bounds checking has
123 Ignores tokens that are not strings. Assumes bounds checking has
117 already been done.
124 already been done.
118
125
119 """
126 """
120 st = tokens[j]
127 st = tokens[j]
121 if st.type == token.STRING and st.string.startswith(("'", '"')):
128 if st.type == token.STRING and st.string.startswith(("'", '"')):
122 tokens[j] = st._replace(string='u%s' % st.string)
129 tokens[j] = st._replace(string='u%s' % st.string)
123
130
124 for i, t in enumerate(tokens):
131 for i, t in enumerate(tokens):
125 # Convert most string literals to byte literals. String literals
132 # Convert most string literals to byte literals. String literals
126 # in Python 2 are bytes. String literals in Python 3 are unicode.
133 # in Python 2 are bytes. String literals in Python 3 are unicode.
127 # Most strings in Mercurial are bytes and unicode strings are rare.
134 # Most strings in Mercurial are bytes and unicode strings are rare.
128 # Rather than rewrite all string literals to use ``b''`` to indicate
135 # Rather than rewrite all string literals to use ``b''`` to indicate
129 # byte strings, we apply this token transformer to insert the ``b``
136 # byte strings, we apply this token transformer to insert the ``b``
130 # prefix nearly everywhere.
137 # prefix nearly everywhere.
131 if t.type == token.STRING:
138 if t.type == token.STRING:
132 s = t.string
139 s = t.string
133
140
134 # Preserve docstrings as string literals. This is inconsistent
141 # Preserve docstrings as string literals. This is inconsistent
135 # with regular unprefixed strings. However, the
142 # with regular unprefixed strings. However, the
136 # "from __future__" parsing (which allows a module docstring to
143 # "from __future__" parsing (which allows a module docstring to
137 # exist before it) doesn't properly handle the docstring if it
144 # exist before it) doesn't properly handle the docstring if it
138 # is b''' prefixed, leading to a SyntaxError. We leave all
145 # is b''' prefixed, leading to a SyntaxError. We leave all
139 # docstrings as unprefixed to avoid this. This means Mercurial
146 # docstrings as unprefixed to avoid this. This means Mercurial
140 # components touching docstrings need to handle unicode,
147 # components touching docstrings need to handle unicode,
141 # unfortunately.
148 # unfortunately.
142 if s[0:3] in ("'''", '"""'):
149 if s[0:3] in ("'''", '"""'):
143 yield t
150 yield t
144 continue
151 continue
145
152
146 # If the first character isn't a quote, it is likely a string
153 # If the first character isn't a quote, it is likely a string
147 # prefixing character (such as 'b', 'u', or 'r'. Ignore.
154 # prefixing character (such as 'b', 'u', or 'r'. Ignore.
148 if s[0] not in ("'", '"'):
155 if s[0] not in ("'", '"'):
149 yield t
156 yield t
150 continue
157 continue
151
158
152 # String literal. Prefix to make a b'' string.
159 # String literal. Prefix to make a b'' string.
153 yield t._replace(string='b%s' % t.string)
160 yield t._replace(string='b%s' % t.string)
154 continue
161 continue
155
162
156 # Insert compatibility imports at "from __future__ import" line.
163 # Insert compatibility imports at "from __future__ import" line.
157 # No '\n' should be added to preserve line numbers.
164 # No '\n' should be added to preserve line numbers.
158 if (t.type == token.NAME and t.string == 'import' and
165 if (t.type == token.NAME and t.string == 'import' and
159 all(u.type == token.NAME for u in tokens[i - 2:i]) and
166 all(u.type == token.NAME for u in tokens[i - 2:i]) and
160 [u.string for u in tokens[i - 2:i]] == ['from', '__future__']):
167 [u.string for u in tokens[i - 2:i]] == ['from', '__future__']):
161 futureimpline = True
168 futureimpline = True
162 if t.type == token.NEWLINE and futureimpline:
169 if t.type == token.NEWLINE and futureimpline:
163 futureimpline = False
170 futureimpline = False
164 if fullname == 'mercurial.pycompat':
171 if fullname == 'mercurial.pycompat':
165 yield t
172 yield t
166 continue
173 continue
167 r, c = t.start
174 r, c = t.start
168 l = (b'; from mercurial.pycompat import '
175 l = (b'; from mercurial.pycompat import '
169 b'delattr, getattr, hasattr, setattr, xrange, '
176 b'delattr, getattr, hasattr, setattr, xrange, '
170 b'open, unicode\n')
177 b'open, unicode\n')
171 for u in tokenize.tokenize(io.BytesIO(l).readline):
178 for u in tokenize.tokenize(io.BytesIO(l).readline):
172 if u.type in (tokenize.ENCODING, token.ENDMARKER):
179 if u.type in (tokenize.ENCODING, token.ENDMARKER):
173 continue
180 continue
174 yield u._replace(
181 yield u._replace(
175 start=(r, c + u.start[1]), end=(r, c + u.end[1]))
182 start=(r, c + u.start[1]), end=(r, c + u.end[1]))
176 continue
183 continue
177
184
178 # This looks like a function call.
185 # This looks like a function call.
179 if t.type == token.NAME and _isop(i + 1, '('):
186 if t.type == token.NAME and _isop(i + 1, '('):
180 fn = t.string
187 fn = t.string
181
188
182 # *attr() builtins don't accept byte strings to 2nd argument.
189 # *attr() builtins don't accept byte strings to 2nd argument.
183 if (fn in ('getattr', 'setattr', 'hasattr', 'safehasattr') and
190 if (fn in ('getattr', 'setattr', 'hasattr', 'safehasattr') and
184 not _isop(i - 1, '.')):
191 not _isop(i - 1, '.')):
185 arg1idx = _findargnofcall(1)
192 arg1idx = _findargnofcall(1)
186 if arg1idx is not None:
193 if arg1idx is not None:
187 _ensureunicode(arg1idx)
194 _ensureunicode(arg1idx)
188
195
189 # .encode() and .decode() on str/bytes/unicode don't accept
196 # .encode() and .decode() on str/bytes/unicode don't accept
190 # byte strings on Python 3.
197 # byte strings on Python 3.
191 elif fn in ('encode', 'decode') and _isop(i - 1, '.'):
198 elif fn in ('encode', 'decode') and _isop(i - 1, '.'):
192 for argn in range(2):
199 for argn in range(2):
193 argidx = _findargnofcall(argn)
200 argidx = _findargnofcall(argn)
194 if argidx is not None:
201 if argidx is not None:
195 _ensureunicode(argidx)
202 _ensureunicode(argidx)
196
203
197 # It changes iteritems/values to items/values as they are not
204 # It changes iteritems/values to items/values as they are not
198 # present in Python 3 world.
205 # present in Python 3 world.
199 elif fn in ('iteritems', 'itervalues'):
206 elif fn in ('iteritems', 'itervalues'):
200 yield t._replace(string=fn[4:])
207 yield t._replace(string=fn[4:])
201 continue
208 continue
202
209
203 # Emit unmodified token.
210 # Emit unmodified token.
204 yield t
211 yield t
205
212
206 # Header to add to bytecode files. This MUST be changed when
213 # Header to add to bytecode files. This MUST be changed when
207 # ``replacetoken`` or any mechanism that changes semantics of module
214 # ``replacetoken`` or any mechanism that changes semantics of module
208 # loading is changed. Otherwise cached bytecode may get loaded without
215 # loading is changed. Otherwise cached bytecode may get loaded without
209 # the new transformation mechanisms applied.
216 # the new transformation mechanisms applied.
210 BYTECODEHEADER = b'HG\x00\x0a'
217 BYTECODEHEADER = b'HG\x00\x0a'
211
218
212 class hgloader(importlib.machinery.SourceFileLoader):
219 class hgloader(importlib.machinery.SourceFileLoader):
213 """Custom module loader that transforms source code.
220 """Custom module loader that transforms source code.
214
221
215 When the source code is converted to a code object, we transform
222 When the source code is converted to a code object, we transform
216 certain patterns to be Python 3 compatible. This allows us to write code
223 certain patterns to be Python 3 compatible. This allows us to write code
217 that is natively Python 2 and compatible with Python 3 without
224 that is natively Python 2 and compatible with Python 3 without
218 making the code excessively ugly.
225 making the code excessively ugly.
219
226
220 We do this by transforming the token stream between parse and compile.
227 We do this by transforming the token stream between parse and compile.
221
228
222 Implementing transformations invalidates caching assumptions made
229 Implementing transformations invalidates caching assumptions made
223 by the built-in importer. The built-in importer stores a header on
230 by the built-in importer. The built-in importer stores a header on
224 saved bytecode files indicating the Python/bytecode version. If the
231 saved bytecode files indicating the Python/bytecode version. If the
225 version changes, the cached bytecode is ignored. The Mercurial
232 version changes, the cached bytecode is ignored. The Mercurial
226 transformations could change at any time. This means we need to check
233 transformations could change at any time. This means we need to check
227 that cached bytecode was generated with the current transformation
234 that cached bytecode was generated with the current transformation
228 code or there could be a mismatch between cached bytecode and what
235 code or there could be a mismatch between cached bytecode and what
229 would be generated from this class.
236 would be generated from this class.
230
237
231 We supplement the bytecode caching layer by wrapping ``get_data``
238 We supplement the bytecode caching layer by wrapping ``get_data``
232 and ``set_data``. These functions are called when the
239 and ``set_data``. These functions are called when the
233 ``SourceFileLoader`` retrieves and saves bytecode cache files,
240 ``SourceFileLoader`` retrieves and saves bytecode cache files,
234 respectively. We simply add an additional header on the file. As
241 respectively. We simply add an additional header on the file. As
235 long as the version in this file is changed when semantics change,
242 long as the version in this file is changed when semantics change,
236 cached bytecode should be invalidated when transformations change.
243 cached bytecode should be invalidated when transformations change.
237
244
238 The added header has the form ``HG<VERSION>``. That is a literal
245 The added header has the form ``HG<VERSION>``. That is a literal
239 ``HG`` with 2 binary bytes indicating the transformation version.
246 ``HG`` with 2 binary bytes indicating the transformation version.
240 """
247 """
241 def get_data(self, path):
248 def get_data(self, path):
242 data = super(hgloader, self).get_data(path)
249 data = super(hgloader, self).get_data(path)
243
250
244 if not path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)):
251 if not path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)):
245 return data
252 return data
246
253
247 # There should be a header indicating the Mercurial transformation
254 # There should be a header indicating the Mercurial transformation
248 # version. If it doesn't exist or doesn't match the current version,
255 # version. If it doesn't exist or doesn't match the current version,
249 # we raise an OSError because that is what
256 # we raise an OSError because that is what
250 # ``SourceFileLoader.get_code()`` expects when loading bytecode
257 # ``SourceFileLoader.get_code()`` expects when loading bytecode
251 # paths to indicate the cached file is "bad."
258 # paths to indicate the cached file is "bad."
252 if data[0:2] != b'HG':
259 if data[0:2] != b'HG':
253 raise OSError('no hg header')
260 raise OSError('no hg header')
254 if data[0:4] != BYTECODEHEADER:
261 if data[0:4] != BYTECODEHEADER:
255 raise OSError('hg header version mismatch')
262 raise OSError('hg header version mismatch')
256
263
257 return data[4:]
264 return data[4:]
258
265
259 def set_data(self, path, data, *args, **kwargs):
266 def set_data(self, path, data, *args, **kwargs):
260 if path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)):
267 if path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)):
261 data = BYTECODEHEADER + data
268 data = BYTECODEHEADER + data
262
269
263 return super(hgloader, self).set_data(path, data, *args, **kwargs)
270 return super(hgloader, self).set_data(path, data, *args, **kwargs)
264
271
265 def source_to_code(self, data, path):
272 def source_to_code(self, data, path):
266 """Perform token transformation before compilation."""
273 """Perform token transformation before compilation."""
267 buf = io.BytesIO(data)
274 buf = io.BytesIO(data)
268 tokens = tokenize.tokenize(buf.readline)
275 tokens = tokenize.tokenize(buf.readline)
269 data = tokenize.untokenize(replacetokens(list(tokens), self.name))
276 data = tokenize.untokenize(replacetokens(list(tokens), self.name))
270 # Python's built-in importer strips frames from exceptions raised
277 # Python's built-in importer strips frames from exceptions raised
271 # for this code. Unfortunately, that mechanism isn't extensible
278 # for this code. Unfortunately, that mechanism isn't extensible
272 # and our frame will be blamed for the import failure. There
279 # and our frame will be blamed for the import failure. There
273 # are extremely hacky ways to do frame stripping. We haven't
280 # are extremely hacky ways to do frame stripping. We haven't
274 # implemented them because they are very ugly.
281 # implemented them because they are very ugly.
275 return super(hgloader, self).source_to_code(data, path)
282 return super(hgloader, self).source_to_code(data, path)
276
283
277 # We automagically register our custom importer as a side-effect of
284 # We automagically register our custom importer as a side-effect of
278 # loading. This is necessary to ensure that any entry points are able
285 # loading. This is necessary to ensure that any entry points are able
279 # to import mercurial.* modules without having to perform this
286 # to import mercurial.* modules without having to perform this
280 # registration themselves.
287 # registration themselves.
281 if not any(isinstance(x, hgpathentryfinder) for x in sys.meta_path):
288 if not any(isinstance(x, hgpathentryfinder) for x in sys.meta_path):
282 # meta_path is used before any implicit finders and before sys.path.
289 # meta_path is used before any implicit finders and before sys.path.
283 sys.meta_path.insert(0, hgpathentryfinder())
290 sys.meta_path.insert(0, hgpathentryfinder())
General Comments 0
You need to be logged in to leave comments. Login now