##// END OF EJS Templates
Fix issues due to breaking tokenize changes in 3.12
Lysandros Nikolaou -
Show More
@@ -44,6 +44,7 b' from IPython.core.inputtransformer import (leading_indent,'
44 assign_from_system,
44 assign_from_system,
45 assemble_python_lines,
45 assemble_python_lines,
46 )
46 )
47 from IPython.utils import tokenutil
47
48
48 # These are available in this module for backwards compatibility.
49 # These are available in this module for backwards compatibility.
49 from IPython.core.inputtransformer import (ESC_SHELL, ESC_SH_CAP, ESC_HELP,
50 from IPython.core.inputtransformer import (ESC_SHELL, ESC_SH_CAP, ESC_HELP,
@@ -128,7 +129,7 b' def partial_tokens(s):'
128 readline = io.StringIO(s).readline
129 readline = io.StringIO(s).readline
129 token = tokenize.TokenInfo(tokenize.NEWLINE, '', (1, 0), (1, 0), '')
130 token = tokenize.TokenInfo(tokenize.NEWLINE, '', (1, 0), (1, 0), '')
130 try:
131 try:
131 for token in tokenize.generate_tokens(readline):
132 for token in tokenutil.generate_tokens_catch_errors(readline):
132 yield token
133 yield token
133 except tokenize.TokenError as e:
134 except tokenize.TokenError as e:
134 # catch EOF error
135 # catch EOF error
@@ -150,9 +151,17 b' def find_next_indent(code):'
150 tokens.pop()
151 tokens.pop()
151 if not tokens:
152 if not tokens:
152 return 0
153 return 0
153 while (tokens[-1].type in {tokenize.DEDENT, tokenize.NEWLINE, tokenize.COMMENT}):
154
155 while (tokens[-1].type in {tokenize.DEDENT, tokenize.NEWLINE, tokenize.COMMENT, tokenize.ERRORTOKEN}):
154 tokens.pop()
156 tokens.pop()
155
157
158 # Starting in Python 3.12, the tokenize module adds implicit newlines at the end
159 # of input. We need to remove those if we're in a multiline statement
160 if tokens[-1].type == IN_MULTILINE_STATEMENT:
161 while tokens[-2].type in {tokenize.NL}:
162 tokens.pop(-2)
163
164
156 if tokens[-1].type == INCOMPLETE_STRING:
165 if tokens[-1].type == INCOMPLETE_STRING:
157 # Inside a multiline string
166 # Inside a multiline string
158 return 0
167 return 0
@@ -9,10 +9,11 b' import abc'
9 import functools
9 import functools
10 import re
10 import re
11 import tokenize
11 import tokenize
12 from tokenize import generate_tokens, untokenize, TokenError
12 from tokenize import untokenize, TokenError
13 from io import StringIO
13 from io import StringIO
14
14
15 from IPython.core.splitinput import LineInfo
15 from IPython.core.splitinput import LineInfo
16 from IPython.utils import tokenutil
16
17
17 #-----------------------------------------------------------------------------
18 #-----------------------------------------------------------------------------
18 # Globals
19 # Globals
@@ -127,7 +128,7 b' class TokenInputTransformer(InputTransformer):'
127
128
128 def reset_tokenizer(self):
129 def reset_tokenizer(self):
129 it = iter(self.buf)
130 it = iter(self.buf)
130 self.tokenizer = generate_tokens(it.__next__)
131 self.tokenizer = tokenutil.generate_tokens_catch_errors(it.__next__)
131
132
132 def push(self, line):
133 def push(self, line):
133 self.buf.append(line + '\n')
134 self.buf.append(line + '\n')
@@ -295,7 +296,7 b' def _line_tokens(line):'
295 readline = StringIO(line).readline
296 readline = StringIO(line).readline
296 toktypes = set()
297 toktypes = set()
297 try:
298 try:
298 for t in generate_tokens(readline):
299 for t in tokenutil.generate_tokens_catch_errors(readline):
299 toktypes.add(t[0])
300 toktypes.add(t[0])
300 except TokenError as e:
301 except TokenError as e:
301 # There are only two cases where a TokenError is raised.
302 # There are only two cases where a TokenError is raised.
@@ -13,10 +13,13 b' deprecated in 7.0.'
13 import ast
13 import ast
14 from codeop import CommandCompiler, Compile
14 from codeop import CommandCompiler, Compile
15 import re
15 import re
16 import sys
16 import tokenize
17 import tokenize
17 from typing import List, Tuple, Optional, Any
18 from typing import List, Tuple, Optional, Any
18 import warnings
19 import warnings
19
20
21 from IPython.utils import tokenutil
22
20 _indent_re = re.compile(r'^[ \t]+')
23 _indent_re = re.compile(r'^[ \t]+')
21
24
22 def leading_empty_lines(lines):
25 def leading_empty_lines(lines):
@@ -269,9 +272,7 b' class MagicAssign(TokenTransformBase):'
269 class SystemAssign(TokenTransformBase):
272 class SystemAssign(TokenTransformBase):
270 """Transformer for assignments from system commands (a = !foo)"""
273 """Transformer for assignments from system commands (a = !foo)"""
271 @classmethod
274 @classmethod
272 def find(cls, tokens_by_line):
275 def find_pre_312(cls, tokens_by_line):
273 """Find the first system assignment (a = !foo) in the cell.
274 """
275 for line in tokens_by_line:
276 for line in tokens_by_line:
276 assign_ix = _find_assign_op(line)
277 assign_ix = _find_assign_op(line)
277 if (assign_ix is not None) \
278 if (assign_ix is not None) \
@@ -287,6 +288,25 b' class SystemAssign(TokenTransformBase):'
287 break
288 break
288 ix += 1
289 ix += 1
289
290
291 @classmethod
292 def find_post_312(cls, tokens_by_line):
293 for line in tokens_by_line:
294 assign_ix = _find_assign_op(line)
295 if (assign_ix is not None) \
296 and not line[assign_ix].line.strip().startswith('=') \
297 and (len(line) >= assign_ix + 2) \
298 and (line[assign_ix + 1].type == tokenize.OP) \
299 and (line[assign_ix + 1].string == '!'):
300 return cls(line[assign_ix + 1].start)
301
302 @classmethod
303 def find(cls, tokens_by_line):
304 """Find the first system assignment (a = !foo) in the cell.
305 """
306 if sys.version_info < (3, 12):
307 return cls.find_pre_312(tokens_by_line)
308 return cls.find_post_312(tokens_by_line)
309
290 def transform(self, lines: List[str]):
310 def transform(self, lines: List[str]):
291 """Transform a system assignment found by the ``find()`` classmethod.
311 """Transform a system assignment found by the ``find()`` classmethod.
292 """
312 """
@@ -511,7 +531,8 b' def make_tokens_by_line(lines:List[str]):'
511 )
531 )
512 parenlev = 0
532 parenlev = 0
513 try:
533 try:
514 for token in tokenize.generate_tokens(iter(lines).__next__):
534 for token in tokenutil.generate_tokens_catch_errors(iter(lines).__next__,
535 extra_errors_to_catch=['expected EOF']):
515 tokens_by_line[-1].append(token)
536 tokens_by_line[-1].append(token)
516 if (token.type == NEWLINE) \
537 if (token.type == NEWLINE) \
517 or ((token.type == NL) and (parenlev <= 0)):
538 or ((token.type == NL) and (parenlev <= 0)):
@@ -677,9 +698,13 b' class TransformerManager:'
677 if not lines:
698 if not lines:
678 return 'complete', None
699 return 'complete', None
679
700
680 if lines[-1].endswith('\\'):
701 for line in reversed(lines):
681 # Explicit backslash continuation
702 if not line.strip():
682 return 'incomplete', find_last_indent(lines)
703 continue
704 elif line.strip('\n').endswith('\\'):
705 return 'incomplete', find_last_indent(lines)
706 else:
707 break
683
708
684 try:
709 try:
685 for transform in self.cleanup_transforms:
710 for transform in self.cleanup_transforms:
@@ -717,7 +742,8 b' class TransformerManager:'
717 if not tokens_by_line:
742 if not tokens_by_line:
718 return 'incomplete', find_last_indent(lines)
743 return 'incomplete', find_last_indent(lines)
719
744
720 if tokens_by_line[-1][-1].type != tokenize.ENDMARKER:
745 if (tokens_by_line[-1][-1].type != tokenize.ENDMARKER
746 and tokens_by_line[-1][-1].type != tokenize.ERRORTOKEN):
721 # We're in a multiline string or expression
747 # We're in a multiline string or expression
722 return 'incomplete', find_last_indent(lines)
748 return 'incomplete', find_last_indent(lines)
723
749
@@ -297,14 +297,18 b' def test_find_assign_op_dedent():'
297 _find_assign_op([Tk(s) for s in ("", "(", "a", "=", "b", ")", "=", "5")]) == 6
297 _find_assign_op([Tk(s) for s in ("", "(", "a", "=", "b", ")", "=", "5")]) == 6
298 )
298 )
299
299
300
300 extra_closing_paren_param = (
301 pytest.param("(\n))", "invalid", None)
302 if sys.version_info >= (3, 12)
303 else pytest.param("(\n))", "incomplete", 0)
304 )
301 examples = [
305 examples = [
302 pytest.param("a = 1", "complete", None),
306 pytest.param("a = 1", "complete", None),
303 pytest.param("for a in range(5):", "incomplete", 4),
307 pytest.param("for a in range(5):", "incomplete", 4),
304 pytest.param("for a in range(5):\n if a > 0:", "incomplete", 8),
308 pytest.param("for a in range(5):\n if a > 0:", "incomplete", 8),
305 pytest.param("raise = 2", "invalid", None),
309 pytest.param("raise = 2", "invalid", None),
306 pytest.param("a = [1,\n2,", "incomplete", 0),
310 pytest.param("a = [1,\n2,", "incomplete", 0),
307 pytest.param("(\n))", "incomplete", 0),
311 extra_closing_paren_param,
308 pytest.param("\\\r\n", "incomplete", 0),
312 pytest.param("\\\r\n", "incomplete", 0),
309 pytest.param("a = '''\n hi", "incomplete", 3),
313 pytest.param("a = '''\n hi", "incomplete", 3),
310 pytest.param("def a():\n x=1\n global x", "invalid", None),
314 pytest.param("def a():\n x=1\n global x", "invalid", None),
@@ -18,6 +18,7 b' Authors'
18 #-----------------------------------------------------------------------------
18 #-----------------------------------------------------------------------------
19
19
20 # our own
20 # our own
21 import sys
21 from IPython.utils.PyColorize import Parser
22 from IPython.utils.PyColorize import Parser
22 import io
23 import io
23 import pytest
24 import pytest
@@ -40,7 +41,7 b' def function(arg, *args, kwarg=True, **kwargs):'
40 False == None
41 False == None
41
42
42 with io.open(ru'unicode', encoding='utf-8'):
43 with io.open(ru'unicode', encoding='utf-8'):
43 raise ValueError("\n escape \r sequence")
44 raise ValueError("escape \r sequence")
44
45
45 print("wΔ›ird ΓΌnicoΓ°e")
46 print("wΔ›ird ΓΌnicoΓ°e")
46
47
@@ -64,6 +65,6 b' def test_parse_sample(style):'
64
65
65 def test_parse_error(style):
66 def test_parse_error(style):
66 p = Parser(style=style)
67 p = Parser(style=style)
67 f1 = p.format(")", "str")
68 f1 = p.format(r"\ " if sys.version_info >= (3, 12) else ")", "str")
68 if style != "NoColor":
69 if style != "NoColor":
69 assert "ERROR" in f1
70 assert "ERROR" in f1
@@ -21,6 +21,31 b' def generate_tokens(readline):'
21 # catch EOF error
21 # catch EOF error
22 return
22 return
23
23
24 def generate_tokens_catch_errors(readline, extra_errors_to_catch=None):
25 default_errors_to_catch = ['unterminated string literal', 'invalid non-printable character',
26 'after line continuation character']
27 assert extra_errors_to_catch is None or isinstance(extra_errors_to_catch, list)
28 errors_to_catch = default_errors_to_catch + (extra_errors_to_catch or [])
29
30 tokens = []
31 try:
32 for token in tokenize.generate_tokens(readline):
33 tokens.append(token)
34 yield token
35 except tokenize.TokenError as exc:
36 if any(error in exc.args[0] for error in errors_to_catch):
37 if tokens:
38 start = tokens[-1].start[0], tokens[-1].end[0]
39 end = start
40 line = tokens[-1].line
41 else:
42 start = end = (1, 0)
43 line = ''
44 yield tokenize.TokenInfo(tokenize.ERRORTOKEN, '', start, end, line)
45 else:
46 # Catch EOF
47 raise
48
24 def line_at_cursor(cell, cursor_pos=0):
49 def line_at_cursor(cell, cursor_pos=0):
25 """Return the line in a cell at a given cursor position
50 """Return the line in a cell at a given cursor position
26
51
@@ -123,5 +148,3 b' def token_at_cursor(cell, cursor_pos=0):'
123 return names[-1]
148 return names[-1]
124 else:
149 else:
125 return ''
150 return ''
126
127
General Comments 0
You need to be logged in to leave comments. Login now