##// END OF EJS Templates
Switch inputtransformer2 back to stdlib tokenize module
Thomas Kluyver -
Show More
@@ -9,9 +9,8 b' This includes the machinery to recognise and transform ``%magic`` commands,'
9
9
10 from codeop import compile_command
10 from codeop import compile_command
11 import re
11 import re
12 import tokenize
12 from typing import List, Tuple
13 from typing import List, Tuple
13 from IPython.utils import tokenize2
14 from IPython.utils.tokenutil import generate_tokens
15
14
16 _indent_re = re.compile(r'^[ \t]+')
15 _indent_re = re.compile(r'^[ \t]+')
17
16
@@ -140,7 +139,7 b' class MagicAssign(TokenTransformBase):'
140 if (assign_ix is not None) \
139 if (assign_ix is not None) \
141 and (len(line) >= assign_ix + 2) \
140 and (len(line) >= assign_ix + 2) \
142 and (line[assign_ix+1].string == '%') \
141 and (line[assign_ix+1].string == '%') \
143 and (line[assign_ix+2].type == tokenize2.NAME):
142 and (line[assign_ix+2].type == tokenize.NAME):
144 return cls(line[assign_ix+1].start)
143 return cls(line[assign_ix+1].start)
145
144
146 def transform(self, lines: List[str]):
145 def transform(self, lines: List[str]):
@@ -172,10 +171,10 b' class SystemAssign(TokenTransformBase):'
172 assign_ix = _find_assign_op(line)
171 assign_ix = _find_assign_op(line)
173 if (assign_ix is not None) \
172 if (assign_ix is not None) \
174 and (len(line) >= assign_ix + 2) \
173 and (len(line) >= assign_ix + 2) \
175 and (line[assign_ix + 1].type == tokenize2.ERRORTOKEN):
174 and (line[assign_ix + 1].type == tokenize.ERRORTOKEN):
176 ix = assign_ix + 1
175 ix = assign_ix + 1
177
176
178 while ix < len(line) and line[ix].type == tokenize2.ERRORTOKEN:
177 while ix < len(line) and line[ix].type == tokenize.ERRORTOKEN:
179 if line[ix].string == '!':
178 if line[ix].string == '!':
180 return cls(line[ix].start)
179 return cls(line[ix].start)
181 elif not line[ix].string.isspace():
180 elif not line[ix].string.isspace():
@@ -289,7 +288,7 b' class EscapedCommand(TokenTransformBase):'
289 """
288 """
290 for line in tokens_by_line:
289 for line in tokens_by_line:
291 ix = 0
290 ix = 0
292 while line[ix].type in {tokenize2.INDENT, tokenize2.DEDENT}:
291 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
293 ix += 1
292 ix += 1
294 if line[ix].string in ESCAPE_SINGLES:
293 if line[ix].string in ESCAPE_SINGLES:
295 return cls(line[ix].start)
294 return cls(line[ix].start)
@@ -338,7 +337,7 b' class HelpEnd(TokenTransformBase):'
338 if len(line) > 2 and line[-2].string == '?':
337 if len(line) > 2 and line[-2].string == '?':
339 # Find the first token that's not INDENT/DEDENT
338 # Find the first token that's not INDENT/DEDENT
340 ix = 0
339 ix = 0
341 while line[ix].type in {tokenize2.INDENT, tokenize2.DEDENT}:
340 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
342 ix += 1
341 ix += 1
343 return cls(line[ix].start, line[-2].start)
342 return cls(line[ix].start, line[-2].start)
344
343
@@ -365,11 +364,31 b' class HelpEnd(TokenTransformBase):'
365 return lines_before + [new_line] + lines_after
364 return lines_before + [new_line] + lines_after
366
365
367 def make_tokens_by_line(lines):
366 def make_tokens_by_line(lines):
367 """Tokenize a series of lines and group tokens by line.
368
369 The tokens for a multiline Python string or expression are
370 grouped as one line.
371 """
372 # NL tokens are used inside multiline expressions, but also after blank
373 # lines or comments. This is intentional - see https://bugs.python.org/issue17061
374 # We want to group the former case together but split the latter, so we
375 # track parentheses level, similar to the internals of tokenize.
376 NEWLINE, NL = tokenize.NEWLINE, tokenize.NL
368 tokens_by_line = [[]]
377 tokens_by_line = [[]]
369 for token in generate_tokens(iter(lines).__next__):
378 parenlev = 0
370 tokens_by_line[-1].append(token)
379 try:
371 if token.type == tokenize2.NEWLINE:
380 for token in tokenize.generate_tokens(iter(lines).__next__):
372 tokens_by_line.append([])
381 tokens_by_line[-1].append(token)
382 if (token.type == NEWLINE) \
383 or ((token.type == NL) and (parenlev <= 0)):
384 tokens_by_line.append([])
385 elif token.string in {'(', '[', '{'}:
386 parenlev += 1
387 elif token.string in {')', ']', '}'}:
388 parenlev -= 1
389 except tokenize.TokenError:
390 # Input ended in a multiline string or expression. That's OK for us.
391 pass
373
392
374 return tokens_by_line
393 return tokens_by_line
375
394
@@ -490,21 +509,21 b' class TransformerManager:'
490 return 'invalid', None
509 return 'invalid', None
491
510
492 tokens_by_line = make_tokens_by_line(lines)
511 tokens_by_line = make_tokens_by_line(lines)
493 if tokens_by_line[-1][-1].type != tokenize2.ENDMARKER:
512 if tokens_by_line[-1][-1].type != tokenize.ENDMARKER:
494 # We're in a multiline string or expression
513 # We're in a multiline string or expression
495 return 'incomplete', find_last_indent(lines)
514 return 'incomplete', find_last_indent(lines)
496
515
497 # Find the last token on the previous line that's not NEWLINE or COMMENT
516 # Find the last token on the previous line that's not NEWLINE or COMMENT
498 toks_last_line = tokens_by_line[-2]
517 toks_last_line = tokens_by_line[-2]
499 ix = len(toks_last_line) - 1
518 ix = len(toks_last_line) - 1
500 while ix >= 0 and toks_last_line[ix].type in {tokenize2.NEWLINE,
519 while ix >= 0 and toks_last_line[ix].type in {tokenize.NEWLINE,
501 tokenize2.COMMENT}:
520 tokenize.COMMENT}:
502 ix -= 1
521 ix -= 1
503
522
504 if toks_last_line[ix].string == ':':
523 if toks_last_line[ix].string == ':':
505 # The last line starts a block (e.g. 'if foo:')
524 # The last line starts a block (e.g. 'if foo:')
506 ix = 0
525 ix = 0
507 while toks_last_line[ix].type in {tokenize2.INDENT, tokenize2.DEDENT}:
526 while toks_last_line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
508 ix += 1
527 ix += 1
509 indent = toks_last_line[ix].start[1]
528 indent = toks_last_line[ix].start[1]
510 return 'incomplete', indent + 4
529 return 'incomplete', indent + 4
General Comments 0
You need to be logged in to leave comments. Login now