##// END OF EJS Templates
loader: pywatchman appears to already be py3 compatible...
Augie Fackler -
r32521:942051a2 default
parent child Browse files
Show More
@@ -1,290 +1,293
1 1 # __init__.py - Startup and module loading logic for Mercurial.
2 2 #
3 3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import sys
11 11
12 12 # Allow 'from mercurial import demandimport' to keep working.
13 13 import hgdemandimport
14 14 demandimport = hgdemandimport
15 15
16 16 __all__ = []
17 17
18 18 # Python 3 uses a custom module loader that transforms source code between
19 19 # source file reading and compilation. This is done by registering a custom
20 20 # finder that changes the spec for Mercurial modules to use a custom loader.
21 21 if sys.version_info[0] >= 3:
22 22 import importlib
23 23 import importlib.abc
24 24 import io
25 25 import token
26 26 import tokenize
27 27
28 28 class hgpathentryfinder(importlib.abc.MetaPathFinder):
29 29 """A sys.meta_path finder that uses a custom module loader."""
30 30 def find_spec(self, fullname, path, target=None):
31 31 # Only handle Mercurial-related modules.
32 32 if not fullname.startswith(('mercurial.', 'hgext.', 'hgext3rd.')):
33 33 return None
34 34 # zstd is already dual-version clean, don't try and mangle it
35 35 if fullname.startswith('mercurial.zstd'):
36 36 return None
37 # pywatchman is already dual-version clean, don't try and mangle it
38 if fullname.startswith('hgext.fsmonitor.pywatchman'):
39 return None
37 40
38 41 # Try to find the module using other registered finders.
39 42 spec = None
40 43 for finder in sys.meta_path:
41 44 if finder == self:
42 45 continue
43 46
44 47 spec = finder.find_spec(fullname, path, target=target)
45 48 if spec:
46 49 break
47 50
48 51 # This is a Mercurial-related module but we couldn't find it
49 52 # using the previously-registered finders. This likely means
50 53 # the module doesn't exist.
51 54 if not spec:
52 55 return None
53 56
54 57 # TODO need to support loaders from alternate specs, like zip
55 58 # loaders.
56 59 loader = hgloader(spec.name, spec.origin)
57 60 # Can't use util.safehasattr here because that would require
58 61 # importing util, and we're in import code.
59 62 if hasattr(spec.loader, 'loader'): # hasattr-py3-only
60 63 # This is a nested loader (maybe a lazy loader?)
61 64 spec.loader.loader = loader
62 65 else:
63 66 spec.loader = loader
64 67 return spec
65 68
66 69 def replacetokens(tokens, fullname):
67 70 """Transform a stream of tokens from raw to Python 3.
68 71
69 72 It is called by the custom module loading machinery to rewrite
70 73 source/tokens between source decoding and compilation.
71 74
72 75 Returns a generator of possibly rewritten tokens.
73 76
74 77 The input token list may be mutated as part of processing. However,
75 78 its changes do not necessarily match the output token stream.
76 79
77 80 REMEMBER TO CHANGE ``BYTECODEHEADER`` WHEN CHANGING THIS FUNCTION
78 81 OR CACHED FILES WON'T GET INVALIDATED PROPERLY.
79 82 """
80 83 futureimpline = False
81 84
82 85 # The following utility functions access the tokens list and i index of
83 86 # the for i, t enumerate(tokens) loop below
84 87 def _isop(j, *o):
85 88 """Assert that tokens[j] is an OP with one of the given values"""
86 89 try:
87 90 return tokens[j].type == token.OP and tokens[j].string in o
88 91 except IndexError:
89 92 return False
90 93
91 94 def _findargnofcall(n):
92 95 """Find arg n of a call expression (start at 0)
93 96
94 97 Returns index of the first token of that argument, or None if
95 98 there is not that many arguments.
96 99
97 100 Assumes that token[i + 1] is '('.
98 101
99 102 """
100 103 nested = 0
101 104 for j in range(i + 2, len(tokens)):
102 105 if _isop(j, ')', ']', '}'):
103 106 # end of call, tuple, subscription or dict / set
104 107 nested -= 1
105 108 if nested < 0:
106 109 return None
107 110 elif n == 0:
108 111 # this is the starting position of arg
109 112 return j
110 113 elif _isop(j, '(', '[', '{'):
111 114 nested += 1
112 115 elif _isop(j, ',') and nested == 0:
113 116 n -= 1
114 117
115 118 return None
116 119
117 120 def _ensureunicode(j):
118 121 """Make sure the token at j is a unicode string
119 122
120 123 This rewrites a string token to include the unicode literal prefix
121 124 so the string transformer won't add the byte prefix.
122 125
123 126 Ignores tokens that are not strings. Assumes bounds checking has
124 127 already been done.
125 128
126 129 """
127 130 st = tokens[j]
128 131 if st.type == token.STRING and st.string.startswith(("'", '"')):
129 132 tokens[j] = st._replace(string='u%s' % st.string)
130 133
131 134 for i, t in enumerate(tokens):
132 135 # Convert most string literals to byte literals. String literals
133 136 # in Python 2 are bytes. String literals in Python 3 are unicode.
134 137 # Most strings in Mercurial are bytes and unicode strings are rare.
135 138 # Rather than rewrite all string literals to use ``b''`` to indicate
136 139 # byte strings, we apply this token transformer to insert the ``b``
137 140 # prefix nearly everywhere.
138 141 if t.type == token.STRING:
139 142 s = t.string
140 143
141 144 # Preserve docstrings as string literals. This is inconsistent
142 145 # with regular unprefixed strings. However, the
143 146 # "from __future__" parsing (which allows a module docstring to
144 147 # exist before it) doesn't properly handle the docstring if it
145 148 # is b''' prefixed, leading to a SyntaxError. We leave all
146 149 # docstrings as unprefixed to avoid this. This means Mercurial
147 150 # components touching docstrings need to handle unicode,
148 151 # unfortunately.
149 152 if s[0:3] in ("'''", '"""'):
150 153 yield t
151 154 continue
152 155
153 156 # If the first character isn't a quote, it is likely a string
154 157 # prefixing character (such as 'b', 'u', or 'r'. Ignore.
155 158 if s[0] not in ("'", '"'):
156 159 yield t
157 160 continue
158 161
159 162 # String literal. Prefix to make a b'' string.
160 163 yield t._replace(string='b%s' % t.string)
161 164 continue
162 165
163 166 # Insert compatibility imports at "from __future__ import" line.
164 167 # No '\n' should be added to preserve line numbers.
165 168 if (t.type == token.NAME and t.string == 'import' and
166 169 all(u.type == token.NAME for u in tokens[i - 2:i]) and
167 170 [u.string for u in tokens[i - 2:i]] == ['from', '__future__']):
168 171 futureimpline = True
169 172 if t.type == token.NEWLINE and futureimpline:
170 173 futureimpline = False
171 174 if fullname == 'mercurial.pycompat':
172 175 yield t
173 176 continue
174 177 r, c = t.start
175 178 l = (b'; from mercurial.pycompat import '
176 179 b'delattr, getattr, hasattr, setattr, xrange, '
177 180 b'open, unicode\n')
178 181 for u in tokenize.tokenize(io.BytesIO(l).readline):
179 182 if u.type in (tokenize.ENCODING, token.ENDMARKER):
180 183 continue
181 184 yield u._replace(
182 185 start=(r, c + u.start[1]), end=(r, c + u.end[1]))
183 186 continue
184 187
185 188 # This looks like a function call.
186 189 if t.type == token.NAME and _isop(i + 1, '('):
187 190 fn = t.string
188 191
189 192 # *attr() builtins don't accept byte strings to 2nd argument.
190 193 if (fn in ('getattr', 'setattr', 'hasattr', 'safehasattr') and
191 194 not _isop(i - 1, '.')):
192 195 arg1idx = _findargnofcall(1)
193 196 if arg1idx is not None:
194 197 _ensureunicode(arg1idx)
195 198
196 199 # .encode() and .decode() on str/bytes/unicode don't accept
197 200 # byte strings on Python 3.
198 201 elif fn in ('encode', 'decode') and _isop(i - 1, '.'):
199 202 for argn in range(2):
200 203 argidx = _findargnofcall(argn)
201 204 if argidx is not None:
202 205 _ensureunicode(argidx)
203 206
204 207 # It changes iteritems/values to items/values as they are not
205 208 # present in Python 3 world.
206 209 elif fn in ('iteritems', 'itervalues'):
207 210 yield t._replace(string=fn[4:])
208 211 continue
209 212
210 213 # Emit unmodified token.
211 214 yield t
212 215
213 216 # Header to add to bytecode files. This MUST be changed when
214 217 # ``replacetoken`` or any mechanism that changes semantics of module
215 218 # loading is changed. Otherwise cached bytecode may get loaded without
216 219 # the new transformation mechanisms applied.
217 220 BYTECODEHEADER = b'HG\x00\x0a'
218 221
219 222 class hgloader(importlib.machinery.SourceFileLoader):
220 223 """Custom module loader that transforms source code.
221 224
222 225 When the source code is converted to a code object, we transform
223 226 certain patterns to be Python 3 compatible. This allows us to write code
224 227 that is natively Python 2 and compatible with Python 3 without
225 228 making the code excessively ugly.
226 229
227 230 We do this by transforming the token stream between parse and compile.
228 231
229 232 Implementing transformations invalidates caching assumptions made
230 233 by the built-in importer. The built-in importer stores a header on
231 234 saved bytecode files indicating the Python/bytecode version. If the
232 235 version changes, the cached bytecode is ignored. The Mercurial
233 236 transformations could change at any time. This means we need to check
234 237 that cached bytecode was generated with the current transformation
235 238 code or there could be a mismatch between cached bytecode and what
236 239 would be generated from this class.
237 240
238 241 We supplement the bytecode caching layer by wrapping ``get_data``
239 242 and ``set_data``. These functions are called when the
240 243 ``SourceFileLoader`` retrieves and saves bytecode cache files,
241 244 respectively. We simply add an additional header on the file. As
242 245 long as the version in this file is changed when semantics change,
243 246 cached bytecode should be invalidated when transformations change.
244 247
245 248 The added header has the form ``HG<VERSION>``. That is a literal
246 249 ``HG`` with 2 binary bytes indicating the transformation version.
247 250 """
248 251 def get_data(self, path):
249 252 data = super(hgloader, self).get_data(path)
250 253
251 254 if not path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)):
252 255 return data
253 256
254 257 # There should be a header indicating the Mercurial transformation
255 258 # version. If it doesn't exist or doesn't match the current version,
256 259 # we raise an OSError because that is what
257 260 # ``SourceFileLoader.get_code()`` expects when loading bytecode
258 261 # paths to indicate the cached file is "bad."
259 262 if data[0:2] != b'HG':
260 263 raise OSError('no hg header')
261 264 if data[0:4] != BYTECODEHEADER:
262 265 raise OSError('hg header version mismatch')
263 266
264 267 return data[4:]
265 268
266 269 def set_data(self, path, data, *args, **kwargs):
267 270 if path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)):
268 271 data = BYTECODEHEADER + data
269 272
270 273 return super(hgloader, self).set_data(path, data, *args, **kwargs)
271 274
272 275 def source_to_code(self, data, path):
273 276 """Perform token transformation before compilation."""
274 277 buf = io.BytesIO(data)
275 278 tokens = tokenize.tokenize(buf.readline)
276 279 data = tokenize.untokenize(replacetokens(list(tokens), self.name))
277 280 # Python's built-in importer strips frames from exceptions raised
278 281 # for this code. Unfortunately, that mechanism isn't extensible
279 282 # and our frame will be blamed for the import failure. There
280 283 # are extremely hacky ways to do frame stripping. We haven't
281 284 # implemented them because they are very ugly.
282 285 return super(hgloader, self).source_to_code(data, path)
283 286
284 287 # We automagically register our custom importer as a side-effect of
285 288 # loading. This is necessary to ensure that any entry points are able
286 289 # to import mercurial.* modules without having to perform this
287 290 # registration themselves.
288 291 if not any(isinstance(x, hgpathentryfinder) for x in sys.meta_path):
289 292 # meta_path is used before any implicit finders and before sys.path.
290 293 sys.meta_path.insert(0, hgpathentryfinder())
General Comments 0
You need to be logged in to leave comments. Login now