##// END OF EJS Templates
python3: allow hgloader to work with lazy loaders...
Siddharth Agarwal -
r32425:397e3a2e default
parent child Browse files
Show More
@@ -1,283 +1,290
1 1 # __init__.py - Startup and module loading logic for Mercurial.
2 2 #
3 3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import sys
11 11
12 12 # Allow 'from mercurial import demandimport' to keep working.
13 13 import hgdemandimport
14 14 demandimport = hgdemandimport
15 15
16 16 __all__ = []
17 17
18 18 # Python 3 uses a custom module loader that transforms source code between
19 19 # source file reading and compilation. This is done by registering a custom
20 20 # finder that changes the spec for Mercurial modules to use a custom loader.
21 21 if sys.version_info[0] >= 3:
22 22 import importlib
23 23 import importlib.abc
24 24 import io
25 25 import token
26 26 import tokenize
27 27
28 28 class hgpathentryfinder(importlib.abc.MetaPathFinder):
29 29 """A sys.meta_path finder that uses a custom module loader."""
30 30 def find_spec(self, fullname, path, target=None):
31 31 # Only handle Mercurial-related modules.
32 32 if not fullname.startswith(('mercurial.', 'hgext.', 'hgext3rd.')):
33 33 return None
34 34 # zstd is already dual-version clean, don't try and mangle it
35 35 if fullname.startswith('mercurial.zstd'):
36 36 return None
37 37
38 38 # Try to find the module using other registered finders.
39 39 spec = None
40 40 for finder in sys.meta_path:
41 41 if finder == self:
42 42 continue
43 43
44 44 spec = finder.find_spec(fullname, path, target=target)
45 45 if spec:
46 46 break
47 47
48 48 # This is a Mercurial-related module but we couldn't find it
49 49 # using the previously-registered finders. This likely means
50 50 # the module doesn't exist.
51 51 if not spec:
52 52 return None
53 53
54 54 # TODO need to support loaders from alternate specs, like zip
55 55 # loaders.
56 spec.loader = hgloader(spec.name, spec.origin)
56 loader = hgloader(spec.name, spec.origin)
57 # Can't use util.safehasattr here because that would require
58 # importing util, and we're in import code.
59 if hasattr(spec.loader, 'loader'): # hasattr-py3-only
60 # This is a nested loader (maybe a lazy loader?)
61 spec.loader.loader = loader
62 else:
63 spec.loader = loader
57 64 return spec
58 65
59 66 def replacetokens(tokens, fullname):
60 67 """Transform a stream of tokens from raw to Python 3.
61 68
62 69 It is called by the custom module loading machinery to rewrite
63 70 source/tokens between source decoding and compilation.
64 71
65 72 Returns a generator of possibly rewritten tokens.
66 73
67 74 The input token list may be mutated as part of processing. However,
68 75 its changes do not necessarily match the output token stream.
69 76
70 77 REMEMBER TO CHANGE ``BYTECODEHEADER`` WHEN CHANGING THIS FUNCTION
71 78 OR CACHED FILES WON'T GET INVALIDATED PROPERLY.
72 79 """
73 80 futureimpline = False
74 81
75 82 # The following utility functions access the tokens list and i index of
76 83 # the for i, t enumerate(tokens) loop below
77 84 def _isop(j, *o):
78 85 """Assert that tokens[j] is an OP with one of the given values"""
79 86 try:
80 87 return tokens[j].type == token.OP and tokens[j].string in o
81 88 except IndexError:
82 89 return False
83 90
84 91 def _findargnofcall(n):
85 92 """Find arg n of a call expression (start at 0)
86 93
87 94 Returns index of the first token of that argument, or None if
88 95 there is not that many arguments.
89 96
90 97 Assumes that token[i + 1] is '('.
91 98
92 99 """
93 100 nested = 0
94 101 for j in range(i + 2, len(tokens)):
95 102 if _isop(j, ')', ']', '}'):
96 103 # end of call, tuple, subscription or dict / set
97 104 nested -= 1
98 105 if nested < 0:
99 106 return None
100 107 elif n == 0:
101 108 # this is the starting position of arg
102 109 return j
103 110 elif _isop(j, '(', '[', '{'):
104 111 nested += 1
105 112 elif _isop(j, ',') and nested == 0:
106 113 n -= 1
107 114
108 115 return None
109 116
110 117 def _ensureunicode(j):
111 118 """Make sure the token at j is a unicode string
112 119
113 120 This rewrites a string token to include the unicode literal prefix
114 121 so the string transformer won't add the byte prefix.
115 122
116 123 Ignores tokens that are not strings. Assumes bounds checking has
117 124 already been done.
118 125
119 126 """
120 127 st = tokens[j]
121 128 if st.type == token.STRING and st.string.startswith(("'", '"')):
122 129 tokens[j] = st._replace(string='u%s' % st.string)
123 130
124 131 for i, t in enumerate(tokens):
125 132 # Convert most string literals to byte literals. String literals
126 133 # in Python 2 are bytes. String literals in Python 3 are unicode.
127 134 # Most strings in Mercurial are bytes and unicode strings are rare.
128 135 # Rather than rewrite all string literals to use ``b''`` to indicate
129 136 # byte strings, we apply this token transformer to insert the ``b``
130 137 # prefix nearly everywhere.
131 138 if t.type == token.STRING:
132 139 s = t.string
133 140
134 141 # Preserve docstrings as string literals. This is inconsistent
135 142 # with regular unprefixed strings. However, the
136 143 # "from __future__" parsing (which allows a module docstring to
137 144 # exist before it) doesn't properly handle the docstring if it
138 145 # is b''' prefixed, leading to a SyntaxError. We leave all
139 146 # docstrings as unprefixed to avoid this. This means Mercurial
140 147 # components touching docstrings need to handle unicode,
141 148 # unfortunately.
142 149 if s[0:3] in ("'''", '"""'):
143 150 yield t
144 151 continue
145 152
146 153 # If the first character isn't a quote, it is likely a string
147 154 # prefixing character (such as 'b', 'u', or 'r'. Ignore.
148 155 if s[0] not in ("'", '"'):
149 156 yield t
150 157 continue
151 158
152 159 # String literal. Prefix to make a b'' string.
153 160 yield t._replace(string='b%s' % t.string)
154 161 continue
155 162
156 163 # Insert compatibility imports at "from __future__ import" line.
157 164 # No '\n' should be added to preserve line numbers.
158 165 if (t.type == token.NAME and t.string == 'import' and
159 166 all(u.type == token.NAME for u in tokens[i - 2:i]) and
160 167 [u.string for u in tokens[i - 2:i]] == ['from', '__future__']):
161 168 futureimpline = True
162 169 if t.type == token.NEWLINE and futureimpline:
163 170 futureimpline = False
164 171 if fullname == 'mercurial.pycompat':
165 172 yield t
166 173 continue
167 174 r, c = t.start
168 175 l = (b'; from mercurial.pycompat import '
169 176 b'delattr, getattr, hasattr, setattr, xrange, '
170 177 b'open, unicode\n')
171 178 for u in tokenize.tokenize(io.BytesIO(l).readline):
172 179 if u.type in (tokenize.ENCODING, token.ENDMARKER):
173 180 continue
174 181 yield u._replace(
175 182 start=(r, c + u.start[1]), end=(r, c + u.end[1]))
176 183 continue
177 184
178 185 # This looks like a function call.
179 186 if t.type == token.NAME and _isop(i + 1, '('):
180 187 fn = t.string
181 188
182 189 # *attr() builtins don't accept byte strings to 2nd argument.
183 190 if (fn in ('getattr', 'setattr', 'hasattr', 'safehasattr') and
184 191 not _isop(i - 1, '.')):
185 192 arg1idx = _findargnofcall(1)
186 193 if arg1idx is not None:
187 194 _ensureunicode(arg1idx)
188 195
189 196 # .encode() and .decode() on str/bytes/unicode don't accept
190 197 # byte strings on Python 3.
191 198 elif fn in ('encode', 'decode') and _isop(i - 1, '.'):
192 199 for argn in range(2):
193 200 argidx = _findargnofcall(argn)
194 201 if argidx is not None:
195 202 _ensureunicode(argidx)
196 203
197 204 # It changes iteritems/values to items/values as they are not
198 205 # present in Python 3 world.
199 206 elif fn in ('iteritems', 'itervalues'):
200 207 yield t._replace(string=fn[4:])
201 208 continue
202 209
203 210 # Emit unmodified token.
204 211 yield t
205 212
206 213 # Header to add to bytecode files. This MUST be changed when
207 214 # ``replacetoken`` or any mechanism that changes semantics of module
208 215 # loading is changed. Otherwise cached bytecode may get loaded without
209 216 # the new transformation mechanisms applied.
210 217 BYTECODEHEADER = b'HG\x00\x0a'
211 218
212 219 class hgloader(importlib.machinery.SourceFileLoader):
213 220 """Custom module loader that transforms source code.
214 221
215 222 When the source code is converted to a code object, we transform
216 223 certain patterns to be Python 3 compatible. This allows us to write code
217 224 that is natively Python 2 and compatible with Python 3 without
218 225 making the code excessively ugly.
219 226
220 227 We do this by transforming the token stream between parse and compile.
221 228
222 229 Implementing transformations invalidates caching assumptions made
223 230 by the built-in importer. The built-in importer stores a header on
224 231 saved bytecode files indicating the Python/bytecode version. If the
225 232 version changes, the cached bytecode is ignored. The Mercurial
226 233 transformations could change at any time. This means we need to check
227 234 that cached bytecode was generated with the current transformation
228 235 code or there could be a mismatch between cached bytecode and what
229 236 would be generated from this class.
230 237
231 238 We supplement the bytecode caching layer by wrapping ``get_data``
232 239 and ``set_data``. These functions are called when the
233 240 ``SourceFileLoader`` retrieves and saves bytecode cache files,
234 241 respectively. We simply add an additional header on the file. As
235 242 long as the version in this file is changed when semantics change,
236 243 cached bytecode should be invalidated when transformations change.
237 244
238 245 The added header has the form ``HG<VERSION>``. That is a literal
239 246 ``HG`` with 2 binary bytes indicating the transformation version.
240 247 """
241 248 def get_data(self, path):
242 249 data = super(hgloader, self).get_data(path)
243 250
244 251 if not path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)):
245 252 return data
246 253
247 254 # There should be a header indicating the Mercurial transformation
248 255 # version. If it doesn't exist or doesn't match the current version,
249 256 # we raise an OSError because that is what
250 257 # ``SourceFileLoader.get_code()`` expects when loading bytecode
251 258 # paths to indicate the cached file is "bad."
252 259 if data[0:2] != b'HG':
253 260 raise OSError('no hg header')
254 261 if data[0:4] != BYTECODEHEADER:
255 262 raise OSError('hg header version mismatch')
256 263
257 264 return data[4:]
258 265
259 266 def set_data(self, path, data, *args, **kwargs):
260 267 if path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)):
261 268 data = BYTECODEHEADER + data
262 269
263 270 return super(hgloader, self).set_data(path, data, *args, **kwargs)
264 271
265 272 def source_to_code(self, data, path):
266 273 """Perform token transformation before compilation."""
267 274 buf = io.BytesIO(data)
268 275 tokens = tokenize.tokenize(buf.readline)
269 276 data = tokenize.untokenize(replacetokens(list(tokens), self.name))
270 277 # Python's built-in importer strips frames from exceptions raised
271 278 # for this code. Unfortunately, that mechanism isn't extensible
272 279 # and our frame will be blamed for the import failure. There
273 280 # are extremely hacky ways to do frame stripping. We haven't
274 281 # implemented them because they are very ugly.
275 282 return super(hgloader, self).source_to_code(data, path)
276 283
277 284 # We automagically register our custom importer as a side-effect of
278 285 # loading. This is necessary to ensure that any entry points are able
279 286 # to import mercurial.* modules without having to perform this
280 287 # registration themselves.
281 288 if not any(isinstance(x, hgpathentryfinder) for x in sys.meta_path):
282 289 # meta_path is used before any implicit finders and before sys.path.
283 290 sys.meta_path.insert(0, hgpathentryfinder())
General Comments 0
You need to be logged in to leave comments. Login now