##// END OF EJS Templates
py3: refactor token parsing to handle call args properly...
Martijn Pieters -
r30165:42337729 default
parent child Browse files
Show More
@@ -185,6 +185,58 b' if sys.version_info[0] >= 3:'
185 OR CACHED FILES WON'T GET INVALIDATED PROPERLY.
185 OR CACHED FILES WON'T GET INVALIDATED PROPERLY.
186 """
186 """
187 futureimpline = False
187 futureimpline = False
188
189 # The following utility functions access the tokens list and i index of
190 # the for i, t enumerate(tokens) loop below
191 def _isop(j, *o):
192 """Assert that tokens[j] is an OP with one of the given values"""
193 try:
194 return tokens[j].type == token.OP and tokens[j].string in o
195 except IndexError:
196 return False
197
198 def _findargnofcall(n):
199 """Find arg n of a call expression (start at 0)
200
201 Returns index of the first token of that argument, or None if
202 there is not that many arguments.
203
204 Assumes that token[i + 1] is '('.
205
206 """
207 nested = 0
208 for j in range(i + 2, len(tokens)):
209 if _isop(j, ')', ']', '}'):
210 # end of call, tuple, subscription or dict / set
211 nested -= 1
212 if nested < 0:
213 return None
214 elif n == 0:
215 # this is the starting position of arg
216 return j
217 elif _isop(j, '(', '[', '{'):
218 nested += 1
219 elif _isop(j, ',') and nested == 0:
220 n -= 1
221
222 return None
223
224 def _ensureunicode(j):
225 """Make sure the token at j is a unicode string
226
227 This rewrites a string token to include the unicode literal prefix
228 so the string transformer won't add the byte prefix.
229
230 Ignores tokens that are not strings. Assumes bounds checking has
231 already been done.
232
233 """
234 st = tokens[j]
235 if st.type == token.STRING and st.string.startswith(("'", '"')):
236 rt = tokenize.TokenInfo(st.type, 'u%s' % st.string,
237 st.start, st.end, st.line)
238 tokens[j] = rt
239
188 for i, t in enumerate(tokens):
240 for i, t in enumerate(tokens):
189 # Convert most string literals to byte literals. String literals
241 # Convert most string literals to byte literals. String literals
190 # in Python 2 are bytes. String literals in Python 3 are unicode.
242 # in Python 2 are bytes. String literals in Python 3 are unicode.
@@ -241,91 +293,35 b' if sys.version_info[0] >= 3:'
241 '')
293 '')
242 continue
294 continue
243
295
244 try:
245 nexttoken = tokens[i + 1]
246 except IndexError:
247 nexttoken = None
248
249 try:
250 prevtoken = tokens[i - 1]
251 except IndexError:
252 prevtoken = None
253
254 # This looks like a function call.
296 # This looks like a function call.
255 if (t.type == token.NAME and nexttoken and
297 if t.type == token.NAME and _isop(i + 1, '('):
256 nexttoken.type == token.OP and nexttoken.string == '('):
257 fn = t.string
298 fn = t.string
258
299
259 # *attr() builtins don't accept byte strings to 2nd argument.
300 # *attr() builtins don't accept byte strings to 2nd argument.
260 # Rewrite the token to include the unicode literal prefix so
301 if (fn in ('getattr', 'setattr', 'hasattr', 'safehasattr') and
261 # the string transformer above doesn't add the byte prefix.
302 not _isop(i - 1, '.')):
262 if fn in ('getattr', 'setattr', 'hasattr', 'safehasattr'):
303 arg1idx = _findargnofcall(1)
263 try:
304 if arg1idx is not None:
264 # (NAME, 'getattr')
305 _ensureunicode(arg1idx)
265 # (OP, '(')
266 # (NAME, 'foo')
267 # (OP, ',')
268 # (NAME|STRING, foo)
269 st = tokens[i + 4]
270 if (st.type == token.STRING and
271 st.string[0] in ("'", '"')):
272 rt = tokenize.TokenInfo(st.type, 'u%s' % st.string,
273 st.start, st.end, st.line)
274 tokens[i + 4] = rt
275 except IndexError:
276 pass
277
306
278 # .encode() and .decode() on str/bytes/unicode don't accept
307 # .encode() and .decode() on str/bytes/unicode don't accept
279 # byte strings on Python 3. Rewrite the token to include the
308 # byte strings on Python 3.
280 # unicode literal prefix so the string transformer above doesn't
309 elif fn in ('encode', 'decode') and _isop(i - 1, '.'):
281 # add the byte prefix. The loop helps in handling multiple
310 for argn in range(2):
282 # arguments.
311 argidx = _findargnofcall(argn)
283 if (fn in ('encode', 'decode') and
312 if argidx is not None:
284 prevtoken.type == token.OP and prevtoken.string == '.'):
313 _ensureunicode(argidx)
285 # (OP, '.')
286 # (NAME, 'encode')
287 # (OP, '(')
288 # [(VARIABLE, encoding)]
289 # [(OP, '.')]
290 # [(VARIABLE, encoding)]
291 # [(OP, ',')]
292 # (STRING, 'utf-8')
293 # (OP, ')')
294 j = i
295 try:
296 while (tokens[j + 1].string in ('(', ',', '.')):
297 st = tokens[j + 2]
298 if (st.type == token.STRING and
299 st.string[0] in ("'", '"')):
300 rt = tokenize.TokenInfo(st.type,
301 'u%s' % st.string,
302 st.start, st.end, st.line)
303 tokens[j + 2] = rt
304 j = j + 2
305 except IndexError:
306 pass
307
314
308 # Bare open call (not an attribute on something else)
315 # Bare open call (not an attribute on something else), the
309 if (fn == 'open' and not (prevtoken.type == token.OP and
316 # second argument (mode) must be a string, not bytes
310 prevtoken.string == '.')):
317 elif fn == 'open' and not _isop(i - 1, '.'):
311 try:
318 arg1idx = _findargnofcall(1)
312 # (NAME, 'open')
319 if arg1idx is not None:
313 # (OP, '(')
320 _ensureunicode(arg1idx)
314 # (NAME|STRING, 'filename')
315 # (OP, ',')
316 # (NAME|STRING, mode)
317 st = tokens[i + 4]
318 if (st.type == token.STRING and
319 st.string[0] in ("'", '"')):
320 rt = tokenize.TokenInfo(st.type, 'u%s' % st.string,
321 st.start, st.end, st.line)
322 tokens[i + 4] = rt
323 except IndexError:
324 pass
325
321
326 # It changes iteritems to items as iteritems is not
322 # It changes iteritems to items as iteritems is not
327 # present in Python 3 world.
323 # present in Python 3 world.
328 if fn == 'iteritems':
324 elif fn == 'iteritems':
329 yield tokenize.TokenInfo(t.type, 'items',
325 yield tokenize.TokenInfo(t.type, 'items',
330 t.start, t.end, t.line)
326 t.start, t.end, t.line)
331 continue
327 continue
@@ -337,7 +333,7 b' if sys.version_info[0] >= 3:'
337 # ``replacetoken`` or any mechanism that changes semantics of module
333 # ``replacetoken`` or any mechanism that changes semantics of module
338 # loading is changed. Otherwise cached bytecode may get loaded without
334 # loading is changed. Otherwise cached bytecode may get loaded without
339 # the new transformation mechanisms applied.
335 # the new transformation mechanisms applied.
340 BYTECODEHEADER = b'HG\x00\x05'
336 BYTECODEHEADER = b'HG\x00\x06'
341
337
342 class hgloader(importlib.machinery.SourceFileLoader):
338 class hgloader(importlib.machinery.SourceFileLoader):
343 """Custom module loader that transforms source code.
339 """Custom module loader that transforms source code.
General Comments 0
You need to be logged in to leave comments. Login now