##// END OF EJS Templates
Fix issues due to breaking tokenize changes in 3.12 (#14107)...
Matthias Bussonnier -
r28328:635815e8 merge
parent child Browse files
Show More
@@ -44,6 +44,7 b' from IPython.core.inputtransformer import (leading_indent,'
44 assign_from_system,
44 assign_from_system,
45 assemble_python_lines,
45 assemble_python_lines,
46 )
46 )
47 from IPython.utils import tokenutil
47
48
48 # These are available in this module for backwards compatibility.
49 # These are available in this module for backwards compatibility.
49 from IPython.core.inputtransformer import (ESC_SHELL, ESC_SH_CAP, ESC_HELP,
50 from IPython.core.inputtransformer import (ESC_SHELL, ESC_SH_CAP, ESC_HELP,
@@ -128,7 +129,7 b' def partial_tokens(s):'
128 readline = io.StringIO(s).readline
129 readline = io.StringIO(s).readline
129 token = tokenize.TokenInfo(tokenize.NEWLINE, '', (1, 0), (1, 0), '')
130 token = tokenize.TokenInfo(tokenize.NEWLINE, '', (1, 0), (1, 0), '')
130 try:
131 try:
131 for token in tokenize.generate_tokens(readline):
132 for token in tokenutil.generate_tokens_catch_errors(readline):
132 yield token
133 yield token
133 except tokenize.TokenError as e:
134 except tokenize.TokenError as e:
134 # catch EOF error
135 # catch EOF error
@@ -150,9 +151,22 b' def find_next_indent(code):'
150 tokens.pop()
151 tokens.pop()
151 if not tokens:
152 if not tokens:
152 return 0
153 return 0
153 while (tokens[-1].type in {tokenize.DEDENT, tokenize.NEWLINE, tokenize.COMMENT}):
154
155 while tokens[-1].type in {
156 tokenize.DEDENT,
157 tokenize.NEWLINE,
158 tokenize.COMMENT,
159 tokenize.ERRORTOKEN,
160 }:
154 tokens.pop()
161 tokens.pop()
155
162
163 # Starting in Python 3.12, the tokenize module adds implicit newlines at the end
164 # of input. We need to remove those if we're in a multiline statement
165 if tokens[-1].type == IN_MULTILINE_STATEMENT:
166 while tokens[-2].type in {tokenize.NL}:
167 tokens.pop(-2)
168
169
156 if tokens[-1].type == INCOMPLETE_STRING:
170 if tokens[-1].type == INCOMPLETE_STRING:
157 # Inside a multiline string
171 # Inside a multiline string
158 return 0
172 return 0
@@ -9,10 +9,11 b' import abc'
9 import functools
9 import functools
10 import re
10 import re
11 import tokenize
11 import tokenize
12 from tokenize import generate_tokens, untokenize, TokenError
12 from tokenize import untokenize, TokenError
13 from io import StringIO
13 from io import StringIO
14
14
15 from IPython.core.splitinput import LineInfo
15 from IPython.core.splitinput import LineInfo
16 from IPython.utils import tokenutil
16
17
17 #-----------------------------------------------------------------------------
18 #-----------------------------------------------------------------------------
18 # Globals
19 # Globals
@@ -127,7 +128,7 b' class TokenInputTransformer(InputTransformer):'
127
128
128 def reset_tokenizer(self):
129 def reset_tokenizer(self):
129 it = iter(self.buf)
130 it = iter(self.buf)
130 self.tokenizer = generate_tokens(it.__next__)
131 self.tokenizer = tokenutil.generate_tokens_catch_errors(it.__next__)
131
132
132 def push(self, line):
133 def push(self, line):
133 self.buf.append(line + '\n')
134 self.buf.append(line + '\n')
@@ -295,7 +296,7 b' def _line_tokens(line):'
295 readline = StringIO(line).readline
296 readline = StringIO(line).readline
296 toktypes = set()
297 toktypes = set()
297 try:
298 try:
298 for t in generate_tokens(readline):
299 for t in tokenutil.generate_tokens_catch_errors(readline):
299 toktypes.add(t[0])
300 toktypes.add(t[0])
300 except TokenError as e:
301 except TokenError as e:
301 # There are only two cases where a TokenError is raised.
302 # There are only two cases where a TokenError is raised.
@@ -13,10 +13,13 b' deprecated in 7.0.'
13 import ast
13 import ast
14 from codeop import CommandCompiler, Compile
14 from codeop import CommandCompiler, Compile
15 import re
15 import re
16 import sys
16 import tokenize
17 import tokenize
17 from typing import List, Tuple, Optional, Any
18 from typing import List, Tuple, Optional, Any
18 import warnings
19 import warnings
19
20
21 from IPython.utils import tokenutil
22
20 _indent_re = re.compile(r'^[ \t]+')
23 _indent_re = re.compile(r'^[ \t]+')
21
24
22 def leading_empty_lines(lines):
25 def leading_empty_lines(lines):
@@ -269,9 +272,7 b' class MagicAssign(TokenTransformBase):'
269 class SystemAssign(TokenTransformBase):
272 class SystemAssign(TokenTransformBase):
270 """Transformer for assignments from system commands (a = !foo)"""
273 """Transformer for assignments from system commands (a = !foo)"""
271 @classmethod
274 @classmethod
272 def find(cls, tokens_by_line):
275 def find_pre_312(cls, tokens_by_line):
273 """Find the first system assignment (a = !foo) in the cell.
274 """
275 for line in tokens_by_line:
276 for line in tokens_by_line:
276 assign_ix = _find_assign_op(line)
277 assign_ix = _find_assign_op(line)
277 if (assign_ix is not None) \
278 if (assign_ix is not None) \
@@ -287,6 +288,26 b' class SystemAssign(TokenTransformBase):'
287 break
288 break
288 ix += 1
289 ix += 1
289
290
291 @classmethod
292 def find_post_312(cls, tokens_by_line):
293 for line in tokens_by_line:
294 assign_ix = _find_assign_op(line)
295 if (
296 (assign_ix is not None)
297 and not line[assign_ix].line.strip().startswith("=")
298 and (len(line) >= assign_ix + 2)
299 and (line[assign_ix + 1].type == tokenize.OP)
300 and (line[assign_ix + 1].string == "!")
301 ):
302 return cls(line[assign_ix + 1].start)
303
304 @classmethod
305 def find(cls, tokens_by_line):
306 """Find the first system assignment (a = !foo) in the cell."""
307 if sys.version_info < (3, 12):
308 return cls.find_pre_312(tokens_by_line)
309 return cls.find_post_312(tokens_by_line)
310
290 def transform(self, lines: List[str]):
311 def transform(self, lines: List[str]):
291 """Transform a system assignment found by the ``find()`` classmethod.
312 """Transform a system assignment found by the ``find()`` classmethod.
292 """
313 """
@@ -511,7 +532,9 b' def make_tokens_by_line(lines:List[str]):'
511 )
532 )
512 parenlev = 0
533 parenlev = 0
513 try:
534 try:
514 for token in tokenize.generate_tokens(iter(lines).__next__):
535 for token in tokenutil.generate_tokens_catch_errors(
536 iter(lines).__next__, extra_errors_to_catch=["expected EOF"]
537 ):
515 tokens_by_line[-1].append(token)
538 tokens_by_line[-1].append(token)
516 if (token.type == NEWLINE) \
539 if (token.type == NEWLINE) \
517 or ((token.type == NL) and (parenlev <= 0)):
540 or ((token.type == NL) and (parenlev <= 0)):
@@ -677,9 +700,13 b' class TransformerManager:'
677 if not lines:
700 if not lines:
678 return 'complete', None
701 return 'complete', None
679
702
680 if lines[-1].endswith('\\'):
703 for line in reversed(lines):
681 # Explicit backslash continuation
704 if not line.strip():
682 return 'incomplete', find_last_indent(lines)
705 continue
706 elif line.strip("\n").endswith("\\"):
707 return "incomplete", find_last_indent(lines)
708 else:
709 break
683
710
684 try:
711 try:
685 for transform in self.cleanup_transforms:
712 for transform in self.cleanup_transforms:
@@ -717,7 +744,10 b' class TransformerManager:'
717 if not tokens_by_line:
744 if not tokens_by_line:
718 return 'incomplete', find_last_indent(lines)
745 return 'incomplete', find_last_indent(lines)
719
746
720 if tokens_by_line[-1][-1].type != tokenize.ENDMARKER:
747 if (
748 tokens_by_line[-1][-1].type != tokenize.ENDMARKER
749 and tokens_by_line[-1][-1].type != tokenize.ERRORTOKEN
750 ):
721 # We're in a multiline string or expression
751 # We're in a multiline string or expression
722 return 'incomplete', find_last_indent(lines)
752 return 'incomplete', find_last_indent(lines)
723
753
@@ -298,13 +298,18 b' def test_find_assign_op_dedent():'
298 )
298 )
299
299
300
300
301 extra_closing_paren_param = (
302 pytest.param("(\n))", "invalid", None)
303 if sys.version_info >= (3, 12)
304 else pytest.param("(\n))", "incomplete", 0)
305 )
301 examples = [
306 examples = [
302 pytest.param("a = 1", "complete", None),
307 pytest.param("a = 1", "complete", None),
303 pytest.param("for a in range(5):", "incomplete", 4),
308 pytest.param("for a in range(5):", "incomplete", 4),
304 pytest.param("for a in range(5):\n if a > 0:", "incomplete", 8),
309 pytest.param("for a in range(5):\n if a > 0:", "incomplete", 8),
305 pytest.param("raise = 2", "invalid", None),
310 pytest.param("raise = 2", "invalid", None),
306 pytest.param("a = [1,\n2,", "incomplete", 0),
311 pytest.param("a = [1,\n2,", "incomplete", 0),
307 pytest.param("(\n))", "incomplete", 0),
312 extra_closing_paren_param,
308 pytest.param("\\\r\n", "incomplete", 0),
313 pytest.param("\\\r\n", "incomplete", 0),
309 pytest.param("a = '''\n hi", "incomplete", 3),
314 pytest.param("a = '''\n hi", "incomplete", 3),
310 pytest.param("def a():\n x=1\n global x", "invalid", None),
315 pytest.param("def a():\n x=1\n global x", "invalid", None),
@@ -18,6 +18,7 b' Authors'
18 #-----------------------------------------------------------------------------
18 #-----------------------------------------------------------------------------
19
19
20 # our own
20 # our own
21 import sys
21 from IPython.utils.PyColorize import Parser
22 from IPython.utils.PyColorize import Parser
22 import io
23 import io
23 import pytest
24 import pytest
@@ -40,7 +41,7 b' def function(arg, *args, kwarg=True, **kwargs):'
40 False == None
41 False == None
41
42
42 with io.open(ru'unicode', encoding='utf-8'):
43 with io.open(ru'unicode', encoding='utf-8'):
43 raise ValueError("\n escape \r sequence")
44 raise ValueError("escape \r sequence")
44
45
45 print("wěird ünicoðe")
46 print("wěird ünicoðe")
46
47
@@ -64,6 +65,6 b' def test_parse_sample(style):'
64
65
65 def test_parse_error(style):
66 def test_parse_error(style):
66 p = Parser(style=style)
67 p = Parser(style=style)
67 f1 = p.format(")", "str")
68 f1 = p.format(r"\ " if sys.version_info >= (3, 12) else ")", "str")
68 if style != "NoColor":
69 if style != "NoColor":
69 assert "ERROR" in f1
70 assert "ERROR" in f1
@@ -21,6 +21,36 b' def generate_tokens(readline):'
21 # catch EOF error
21 # catch EOF error
22 return
22 return
23
23
24
25 def generate_tokens_catch_errors(readline, extra_errors_to_catch=None):
26 default_errors_to_catch = [
27 "unterminated string literal",
28 "invalid non-printable character",
29 "after line continuation character",
30 ]
31 assert extra_errors_to_catch is None or isinstance(extra_errors_to_catch, list)
32 errors_to_catch = default_errors_to_catch + (extra_errors_to_catch or [])
33
34 tokens = []
35 try:
36 for token in tokenize.generate_tokens(readline):
37 tokens.append(token)
38 yield token
39 except tokenize.TokenError as exc:
40 if any(error in exc.args[0] for error in errors_to_catch):
41 if tokens:
42 start = tokens[-1].start[0], tokens[-1].end[0]
43 end = start
44 line = tokens[-1].line
45 else:
46 start = end = (1, 0)
47 line = ""
48 yield tokenize.TokenInfo(tokenize.ERRORTOKEN, "", start, end, line)
49 else:
50 # Catch EOF
51 raise
52
53
24 def line_at_cursor(cell, cursor_pos=0):
54 def line_at_cursor(cell, cursor_pos=0):
25 """Return the line in a cell at a given cursor position
55 """Return the line in a cell at a given cursor position
26
56
@@ -123,5 +153,3 b' def token_at_cursor(cell, cursor_pos=0):'
123 return names[-1]
153 return names[-1]
124 else:
154 else:
125 return ''
155 return ''
126
127
General Comments 0
You need to be logged in to leave comments. Login now