##// END OF EJS Templates
loader: pywatchman appears to already be py3 compatible...
Augie Fackler -
r32521:942051a2 default
parent child Browse files
Show More
@@ -1,290 +1,293
1 # __init__.py - Startup and module loading logic for Mercurial.
1 # __init__.py - Startup and module loading logic for Mercurial.
2 #
2 #
3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import sys
10 import sys
11
11
12 # Allow 'from mercurial import demandimport' to keep working.
12 # Allow 'from mercurial import demandimport' to keep working.
13 import hgdemandimport
13 import hgdemandimport
14 demandimport = hgdemandimport
14 demandimport = hgdemandimport
15
15
16 __all__ = []
16 __all__ = []
17
17
18 # Python 3 uses a custom module loader that transforms source code between
18 # Python 3 uses a custom module loader that transforms source code between
19 # source file reading and compilation. This is done by registering a custom
19 # source file reading and compilation. This is done by registering a custom
20 # finder that changes the spec for Mercurial modules to use a custom loader.
20 # finder that changes the spec for Mercurial modules to use a custom loader.
21 if sys.version_info[0] >= 3:
21 if sys.version_info[0] >= 3:
22 import importlib
22 import importlib
23 import importlib.abc
23 import importlib.abc
24 import io
24 import io
25 import token
25 import token
26 import tokenize
26 import tokenize
27
27
28 class hgpathentryfinder(importlib.abc.MetaPathFinder):
28 class hgpathentryfinder(importlib.abc.MetaPathFinder):
29 """A sys.meta_path finder that uses a custom module loader."""
29 """A sys.meta_path finder that uses a custom module loader."""
30 def find_spec(self, fullname, path, target=None):
30 def find_spec(self, fullname, path, target=None):
31 # Only handle Mercurial-related modules.
31 # Only handle Mercurial-related modules.
32 if not fullname.startswith(('mercurial.', 'hgext.', 'hgext3rd.')):
32 if not fullname.startswith(('mercurial.', 'hgext.', 'hgext3rd.')):
33 return None
33 return None
34 # zstd is already dual-version clean, don't try and mangle it
34 # zstd is already dual-version clean, don't try and mangle it
35 if fullname.startswith('mercurial.zstd'):
35 if fullname.startswith('mercurial.zstd'):
36 return None
36 return None
37 # pywatchman is already dual-version clean, don't try and mangle it
38 if fullname.startswith('hgext.fsmonitor.pywatchman'):
39 return None
37
40
38 # Try to find the module using other registered finders.
41 # Try to find the module using other registered finders.
39 spec = None
42 spec = None
40 for finder in sys.meta_path:
43 for finder in sys.meta_path:
41 if finder == self:
44 if finder == self:
42 continue
45 continue
43
46
44 spec = finder.find_spec(fullname, path, target=target)
47 spec = finder.find_spec(fullname, path, target=target)
45 if spec:
48 if spec:
46 break
49 break
47
50
48 # This is a Mercurial-related module but we couldn't find it
51 # This is a Mercurial-related module but we couldn't find it
49 # using the previously-registered finders. This likely means
52 # using the previously-registered finders. This likely means
50 # the module doesn't exist.
53 # the module doesn't exist.
51 if not spec:
54 if not spec:
52 return None
55 return None
53
56
54 # TODO need to support loaders from alternate specs, like zip
57 # TODO need to support loaders from alternate specs, like zip
55 # loaders.
58 # loaders.
56 loader = hgloader(spec.name, spec.origin)
59 loader = hgloader(spec.name, spec.origin)
57 # Can't use util.safehasattr here because that would require
60 # Can't use util.safehasattr here because that would require
58 # importing util, and we're in import code.
61 # importing util, and we're in import code.
59 if hasattr(spec.loader, 'loader'): # hasattr-py3-only
62 if hasattr(spec.loader, 'loader'): # hasattr-py3-only
60 # This is a nested loader (maybe a lazy loader?)
63 # This is a nested loader (maybe a lazy loader?)
61 spec.loader.loader = loader
64 spec.loader.loader = loader
62 else:
65 else:
63 spec.loader = loader
66 spec.loader = loader
64 return spec
67 return spec
65
68
66 def replacetokens(tokens, fullname):
69 def replacetokens(tokens, fullname):
67 """Transform a stream of tokens from raw to Python 3.
70 """Transform a stream of tokens from raw to Python 3.
68
71
69 It is called by the custom module loading machinery to rewrite
72 It is called by the custom module loading machinery to rewrite
70 source/tokens between source decoding and compilation.
73 source/tokens between source decoding and compilation.
71
74
72 Returns a generator of possibly rewritten tokens.
75 Returns a generator of possibly rewritten tokens.
73
76
74 The input token list may be mutated as part of processing. However,
77 The input token list may be mutated as part of processing. However,
75 its changes do not necessarily match the output token stream.
78 its changes do not necessarily match the output token stream.
76
79
77 REMEMBER TO CHANGE ``BYTECODEHEADER`` WHEN CHANGING THIS FUNCTION
80 REMEMBER TO CHANGE ``BYTECODEHEADER`` WHEN CHANGING THIS FUNCTION
78 OR CACHED FILES WON'T GET INVALIDATED PROPERLY.
81 OR CACHED FILES WON'T GET INVALIDATED PROPERLY.
79 """
82 """
80 futureimpline = False
83 futureimpline = False
81
84
82 # The following utility functions access the tokens list and i index of
85 # The following utility functions access the tokens list and i index of
83 # the for i, t enumerate(tokens) loop below
86 # the for i, t enumerate(tokens) loop below
84 def _isop(j, *o):
87 def _isop(j, *o):
85 """Assert that tokens[j] is an OP with one of the given values"""
88 """Assert that tokens[j] is an OP with one of the given values"""
86 try:
89 try:
87 return tokens[j].type == token.OP and tokens[j].string in o
90 return tokens[j].type == token.OP and tokens[j].string in o
88 except IndexError:
91 except IndexError:
89 return False
92 return False
90
93
91 def _findargnofcall(n):
94 def _findargnofcall(n):
92 """Find arg n of a call expression (start at 0)
95 """Find arg n of a call expression (start at 0)
93
96
94 Returns index of the first token of that argument, or None if
97 Returns index of the first token of that argument, or None if
95 there is not that many arguments.
98 there is not that many arguments.
96
99
97 Assumes that token[i + 1] is '('.
100 Assumes that token[i + 1] is '('.
98
101
99 """
102 """
100 nested = 0
103 nested = 0
101 for j in range(i + 2, len(tokens)):
104 for j in range(i + 2, len(tokens)):
102 if _isop(j, ')', ']', '}'):
105 if _isop(j, ')', ']', '}'):
103 # end of call, tuple, subscription or dict / set
106 # end of call, tuple, subscription or dict / set
104 nested -= 1
107 nested -= 1
105 if nested < 0:
108 if nested < 0:
106 return None
109 return None
107 elif n == 0:
110 elif n == 0:
108 # this is the starting position of arg
111 # this is the starting position of arg
109 return j
112 return j
110 elif _isop(j, '(', '[', '{'):
113 elif _isop(j, '(', '[', '{'):
111 nested += 1
114 nested += 1
112 elif _isop(j, ',') and nested == 0:
115 elif _isop(j, ',') and nested == 0:
113 n -= 1
116 n -= 1
114
117
115 return None
118 return None
116
119
117 def _ensureunicode(j):
120 def _ensureunicode(j):
118 """Make sure the token at j is a unicode string
121 """Make sure the token at j is a unicode string
119
122
120 This rewrites a string token to include the unicode literal prefix
123 This rewrites a string token to include the unicode literal prefix
121 so the string transformer won't add the byte prefix.
124 so the string transformer won't add the byte prefix.
122
125
123 Ignores tokens that are not strings. Assumes bounds checking has
126 Ignores tokens that are not strings. Assumes bounds checking has
124 already been done.
127 already been done.
125
128
126 """
129 """
127 st = tokens[j]
130 st = tokens[j]
128 if st.type == token.STRING and st.string.startswith(("'", '"')):
131 if st.type == token.STRING and st.string.startswith(("'", '"')):
129 tokens[j] = st._replace(string='u%s' % st.string)
132 tokens[j] = st._replace(string='u%s' % st.string)
130
133
131 for i, t in enumerate(tokens):
134 for i, t in enumerate(tokens):
132 # Convert most string literals to byte literals. String literals
135 # Convert most string literals to byte literals. String literals
133 # in Python 2 are bytes. String literals in Python 3 are unicode.
136 # in Python 2 are bytes. String literals in Python 3 are unicode.
134 # Most strings in Mercurial are bytes and unicode strings are rare.
137 # Most strings in Mercurial are bytes and unicode strings are rare.
135 # Rather than rewrite all string literals to use ``b''`` to indicate
138 # Rather than rewrite all string literals to use ``b''`` to indicate
136 # byte strings, we apply this token transformer to insert the ``b``
139 # byte strings, we apply this token transformer to insert the ``b``
137 # prefix nearly everywhere.
140 # prefix nearly everywhere.
138 if t.type == token.STRING:
141 if t.type == token.STRING:
139 s = t.string
142 s = t.string
140
143
141 # Preserve docstrings as string literals. This is inconsistent
144 # Preserve docstrings as string literals. This is inconsistent
142 # with regular unprefixed strings. However, the
145 # with regular unprefixed strings. However, the
143 # "from __future__" parsing (which allows a module docstring to
146 # "from __future__" parsing (which allows a module docstring to
144 # exist before it) doesn't properly handle the docstring if it
147 # exist before it) doesn't properly handle the docstring if it
145 # is b''' prefixed, leading to a SyntaxError. We leave all
148 # is b''' prefixed, leading to a SyntaxError. We leave all
146 # docstrings as unprefixed to avoid this. This means Mercurial
149 # docstrings as unprefixed to avoid this. This means Mercurial
147 # components touching docstrings need to handle unicode,
150 # components touching docstrings need to handle unicode,
148 # unfortunately.
151 # unfortunately.
149 if s[0:3] in ("'''", '"""'):
152 if s[0:3] in ("'''", '"""'):
150 yield t
153 yield t
151 continue
154 continue
152
155
153 # If the first character isn't a quote, it is likely a string
156 # If the first character isn't a quote, it is likely a string
154 # prefixing character (such as 'b', 'u', or 'r'. Ignore.
157 # prefixing character (such as 'b', 'u', or 'r'. Ignore.
155 if s[0] not in ("'", '"'):
158 if s[0] not in ("'", '"'):
156 yield t
159 yield t
157 continue
160 continue
158
161
159 # String literal. Prefix to make a b'' string.
162 # String literal. Prefix to make a b'' string.
160 yield t._replace(string='b%s' % t.string)
163 yield t._replace(string='b%s' % t.string)
161 continue
164 continue
162
165
163 # Insert compatibility imports at "from __future__ import" line.
166 # Insert compatibility imports at "from __future__ import" line.
164 # No '\n' should be added to preserve line numbers.
167 # No '\n' should be added to preserve line numbers.
165 if (t.type == token.NAME and t.string == 'import' and
168 if (t.type == token.NAME and t.string == 'import' and
166 all(u.type == token.NAME for u in tokens[i - 2:i]) and
169 all(u.type == token.NAME for u in tokens[i - 2:i]) and
167 [u.string for u in tokens[i - 2:i]] == ['from', '__future__']):
170 [u.string for u in tokens[i - 2:i]] == ['from', '__future__']):
168 futureimpline = True
171 futureimpline = True
169 if t.type == token.NEWLINE and futureimpline:
172 if t.type == token.NEWLINE and futureimpline:
170 futureimpline = False
173 futureimpline = False
171 if fullname == 'mercurial.pycompat':
174 if fullname == 'mercurial.pycompat':
172 yield t
175 yield t
173 continue
176 continue
174 r, c = t.start
177 r, c = t.start
175 l = (b'; from mercurial.pycompat import '
178 l = (b'; from mercurial.pycompat import '
176 b'delattr, getattr, hasattr, setattr, xrange, '
179 b'delattr, getattr, hasattr, setattr, xrange, '
177 b'open, unicode\n')
180 b'open, unicode\n')
178 for u in tokenize.tokenize(io.BytesIO(l).readline):
181 for u in tokenize.tokenize(io.BytesIO(l).readline):
179 if u.type in (tokenize.ENCODING, token.ENDMARKER):
182 if u.type in (tokenize.ENCODING, token.ENDMARKER):
180 continue
183 continue
181 yield u._replace(
184 yield u._replace(
182 start=(r, c + u.start[1]), end=(r, c + u.end[1]))
185 start=(r, c + u.start[1]), end=(r, c + u.end[1]))
183 continue
186 continue
184
187
185 # This looks like a function call.
188 # This looks like a function call.
186 if t.type == token.NAME and _isop(i + 1, '('):
189 if t.type == token.NAME and _isop(i + 1, '('):
187 fn = t.string
190 fn = t.string
188
191
189 # *attr() builtins don't accept byte strings to 2nd argument.
192 # *attr() builtins don't accept byte strings to 2nd argument.
190 if (fn in ('getattr', 'setattr', 'hasattr', 'safehasattr') and
193 if (fn in ('getattr', 'setattr', 'hasattr', 'safehasattr') and
191 not _isop(i - 1, '.')):
194 not _isop(i - 1, '.')):
192 arg1idx = _findargnofcall(1)
195 arg1idx = _findargnofcall(1)
193 if arg1idx is not None:
196 if arg1idx is not None:
194 _ensureunicode(arg1idx)
197 _ensureunicode(arg1idx)
195
198
196 # .encode() and .decode() on str/bytes/unicode don't accept
199 # .encode() and .decode() on str/bytes/unicode don't accept
197 # byte strings on Python 3.
200 # byte strings on Python 3.
198 elif fn in ('encode', 'decode') and _isop(i - 1, '.'):
201 elif fn in ('encode', 'decode') and _isop(i - 1, '.'):
199 for argn in range(2):
202 for argn in range(2):
200 argidx = _findargnofcall(argn)
203 argidx = _findargnofcall(argn)
201 if argidx is not None:
204 if argidx is not None:
202 _ensureunicode(argidx)
205 _ensureunicode(argidx)
203
206
204 # It changes iteritems/values to items/values as they are not
207 # It changes iteritems/values to items/values as they are not
205 # present in Python 3 world.
208 # present in Python 3 world.
206 elif fn in ('iteritems', 'itervalues'):
209 elif fn in ('iteritems', 'itervalues'):
207 yield t._replace(string=fn[4:])
210 yield t._replace(string=fn[4:])
208 continue
211 continue
209
212
210 # Emit unmodified token.
213 # Emit unmodified token.
211 yield t
214 yield t
212
215
213 # Header to add to bytecode files. This MUST be changed when
216 # Header to add to bytecode files. This MUST be changed when
214 # ``replacetoken`` or any mechanism that changes semantics of module
217 # ``replacetoken`` or any mechanism that changes semantics of module
215 # loading is changed. Otherwise cached bytecode may get loaded without
218 # loading is changed. Otherwise cached bytecode may get loaded without
216 # the new transformation mechanisms applied.
219 # the new transformation mechanisms applied.
217 BYTECODEHEADER = b'HG\x00\x0a'
220 BYTECODEHEADER = b'HG\x00\x0a'
218
221
219 class hgloader(importlib.machinery.SourceFileLoader):
222 class hgloader(importlib.machinery.SourceFileLoader):
220 """Custom module loader that transforms source code.
223 """Custom module loader that transforms source code.
221
224
222 When the source code is converted to a code object, we transform
225 When the source code is converted to a code object, we transform
223 certain patterns to be Python 3 compatible. This allows us to write code
226 certain patterns to be Python 3 compatible. This allows us to write code
224 that is natively Python 2 and compatible with Python 3 without
227 that is natively Python 2 and compatible with Python 3 without
225 making the code excessively ugly.
228 making the code excessively ugly.
226
229
227 We do this by transforming the token stream between parse and compile.
230 We do this by transforming the token stream between parse and compile.
228
231
229 Implementing transformations invalidates caching assumptions made
232 Implementing transformations invalidates caching assumptions made
230 by the built-in importer. The built-in importer stores a header on
233 by the built-in importer. The built-in importer stores a header on
231 saved bytecode files indicating the Python/bytecode version. If the
234 saved bytecode files indicating the Python/bytecode version. If the
232 version changes, the cached bytecode is ignored. The Mercurial
235 version changes, the cached bytecode is ignored. The Mercurial
233 transformations could change at any time. This means we need to check
236 transformations could change at any time. This means we need to check
234 that cached bytecode was generated with the current transformation
237 that cached bytecode was generated with the current transformation
235 code or there could be a mismatch between cached bytecode and what
238 code or there could be a mismatch between cached bytecode and what
236 would be generated from this class.
239 would be generated from this class.
237
240
238 We supplement the bytecode caching layer by wrapping ``get_data``
241 We supplement the bytecode caching layer by wrapping ``get_data``
239 and ``set_data``. These functions are called when the
242 and ``set_data``. These functions are called when the
240 ``SourceFileLoader`` retrieves and saves bytecode cache files,
243 ``SourceFileLoader`` retrieves and saves bytecode cache files,
241 respectively. We simply add an additional header on the file. As
244 respectively. We simply add an additional header on the file. As
242 long as the version in this file is changed when semantics change,
245 long as the version in this file is changed when semantics change,
243 cached bytecode should be invalidated when transformations change.
246 cached bytecode should be invalidated when transformations change.
244
247
245 The added header has the form ``HG<VERSION>``. That is a literal
248 The added header has the form ``HG<VERSION>``. That is a literal
246 ``HG`` with 2 binary bytes indicating the transformation version.
249 ``HG`` with 2 binary bytes indicating the transformation version.
247 """
250 """
248 def get_data(self, path):
251 def get_data(self, path):
249 data = super(hgloader, self).get_data(path)
252 data = super(hgloader, self).get_data(path)
250
253
251 if not path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)):
254 if not path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)):
252 return data
255 return data
253
256
254 # There should be a header indicating the Mercurial transformation
257 # There should be a header indicating the Mercurial transformation
255 # version. If it doesn't exist or doesn't match the current version,
258 # version. If it doesn't exist or doesn't match the current version,
256 # we raise an OSError because that is what
259 # we raise an OSError because that is what
257 # ``SourceFileLoader.get_code()`` expects when loading bytecode
260 # ``SourceFileLoader.get_code()`` expects when loading bytecode
258 # paths to indicate the cached file is "bad."
261 # paths to indicate the cached file is "bad."
259 if data[0:2] != b'HG':
262 if data[0:2] != b'HG':
260 raise OSError('no hg header')
263 raise OSError('no hg header')
261 if data[0:4] != BYTECODEHEADER:
264 if data[0:4] != BYTECODEHEADER:
262 raise OSError('hg header version mismatch')
265 raise OSError('hg header version mismatch')
263
266
264 return data[4:]
267 return data[4:]
265
268
266 def set_data(self, path, data, *args, **kwargs):
269 def set_data(self, path, data, *args, **kwargs):
267 if path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)):
270 if path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)):
268 data = BYTECODEHEADER + data
271 data = BYTECODEHEADER + data
269
272
270 return super(hgloader, self).set_data(path, data, *args, **kwargs)
273 return super(hgloader, self).set_data(path, data, *args, **kwargs)
271
274
272 def source_to_code(self, data, path):
275 def source_to_code(self, data, path):
273 """Perform token transformation before compilation."""
276 """Perform token transformation before compilation."""
274 buf = io.BytesIO(data)
277 buf = io.BytesIO(data)
275 tokens = tokenize.tokenize(buf.readline)
278 tokens = tokenize.tokenize(buf.readline)
276 data = tokenize.untokenize(replacetokens(list(tokens), self.name))
279 data = tokenize.untokenize(replacetokens(list(tokens), self.name))
277 # Python's built-in importer strips frames from exceptions raised
280 # Python's built-in importer strips frames from exceptions raised
278 # for this code. Unfortunately, that mechanism isn't extensible
281 # for this code. Unfortunately, that mechanism isn't extensible
279 # and our frame will be blamed for the import failure. There
282 # and our frame will be blamed for the import failure. There
280 # are extremely hacky ways to do frame stripping. We haven't
283 # are extremely hacky ways to do frame stripping. We haven't
281 # implemented them because they are very ugly.
284 # implemented them because they are very ugly.
282 return super(hgloader, self).source_to_code(data, path)
285 return super(hgloader, self).source_to_code(data, path)
283
286
284 # We automagically register our custom importer as a side-effect of
287 # We automagically register our custom importer as a side-effect of
285 # loading. This is necessary to ensure that any entry points are able
288 # loading. This is necessary to ensure that any entry points are able
286 # to import mercurial.* modules without having to perform this
289 # to import mercurial.* modules without having to perform this
287 # registration themselves.
290 # registration themselves.
288 if not any(isinstance(x, hgpathentryfinder) for x in sys.meta_path):
291 if not any(isinstance(x, hgpathentryfinder) for x in sys.meta_path):
289 # meta_path is used before any implicit finders and before sys.path.
292 # meta_path is used before any implicit finders and before sys.path.
290 sys.meta_path.insert(0, hgpathentryfinder())
293 sys.meta_path.insert(0, hgpathentryfinder())
General Comments 0
You need to be logged in to leave comments. Login now