From 1098b2c170ae0c83fd8878c3e2f2adcefe96a79b 2023-06-28 17:02:48 From: Lysandros Nikolaou Date: 2023-06-28 17:02:48 Subject: [PATCH] Fix issues due to breaking tokenize changes in 3.12 --- diff --git a/IPython/core/inputsplitter.py b/IPython/core/inputsplitter.py index 10707d3..d33bd57 100644 --- a/IPython/core/inputsplitter.py +++ b/IPython/core/inputsplitter.py @@ -44,6 +44,7 @@ from IPython.core.inputtransformer import (leading_indent, assign_from_system, assemble_python_lines, ) +from IPython.utils import tokenutil # These are available in this module for backwards compatibility. from IPython.core.inputtransformer import (ESC_SHELL, ESC_SH_CAP, ESC_HELP, @@ -128,7 +129,7 @@ def partial_tokens(s): readline = io.StringIO(s).readline token = tokenize.TokenInfo(tokenize.NEWLINE, '', (1, 0), (1, 0), '') try: - for token in tokenize.generate_tokens(readline): + for token in tokenutil.generate_tokens_catch_errors(readline): yield token except tokenize.TokenError as e: # catch EOF error @@ -150,9 +151,17 @@ def find_next_indent(code): tokens.pop() if not tokens: return 0 - while (tokens[-1].type in {tokenize.DEDENT, tokenize.NEWLINE, tokenize.COMMENT}): + + while (tokens[-1].type in {tokenize.DEDENT, tokenize.NEWLINE, tokenize.COMMENT, tokenize.ERRORTOKEN}): tokens.pop() + # Starting in Python 3.12, the tokenize module adds implicit newlines at the end + # of input. We need to remove those if we're in a multiline statement + if tokens[-1].type == IN_MULTILINE_STATEMENT: + while tokens[-2].type in {tokenize.NL}: + tokens.pop(-2) + + if tokens[-1].type == INCOMPLETE_STRING: # Inside a multiline string return 0 diff --git a/IPython/core/inputtransformer.py b/IPython/core/inputtransformer.py index 77f69f3..81cd1fa 100644 --- a/IPython/core/inputtransformer.py +++ b/IPython/core/inputtransformer.py @@ -9,10 +9,11 @@ import abc import functools import re import tokenize -from tokenize import generate_tokens, untokenize, TokenError +from tokenize import untokenize, TokenError from io import StringIO from IPython.core.splitinput import LineInfo +from IPython.utils import tokenutil #----------------------------------------------------------------------------- # Globals @@ -127,7 +128,7 @@ class TokenInputTransformer(InputTransformer): def reset_tokenizer(self): it = iter(self.buf) - self.tokenizer = generate_tokens(it.__next__) + self.tokenizer = tokenutil.generate_tokens_catch_errors(it.__next__) def push(self, line): self.buf.append(line + '\n') @@ -295,7 +296,7 @@ def _line_tokens(line): readline = StringIO(line).readline toktypes = set() try: - for t in generate_tokens(readline): + for t in tokenutil.generate_tokens_catch_errors(readline): toktypes.add(t[0]) except TokenError as e: # There are only two cases where a TokenError is raised. diff --git a/IPython/core/inputtransformer2.py b/IPython/core/inputtransformer2.py index 37f0e76..7e22e26 100644 --- a/IPython/core/inputtransformer2.py +++ b/IPython/core/inputtransformer2.py @@ -13,10 +13,13 @@ deprecated in 7.0. import ast from codeop import CommandCompiler, Compile import re +import sys import tokenize from typing import List, Tuple, Optional, Any import warnings +from IPython.utils import tokenutil + _indent_re = re.compile(r'^[ \t]+') def leading_empty_lines(lines): @@ -269,9 +272,7 @@ class MagicAssign(TokenTransformBase): class SystemAssign(TokenTransformBase): """Transformer for assignments from system commands (a = !foo)""" @classmethod - def find(cls, tokens_by_line): - """Find the first system assignment (a = !foo) in the cell. - """ + def find_pre_312(cls, tokens_by_line): for line in tokens_by_line: assign_ix = _find_assign_op(line) if (assign_ix is not None) \ @@ -287,6 +288,25 @@ class SystemAssign(TokenTransformBase): break ix += 1 + @classmethod + def find_post_312(cls, tokens_by_line): + for line in tokens_by_line: + assign_ix = _find_assign_op(line) + if (assign_ix is not None) \ + and not line[assign_ix].line.strip().startswith('=') \ + and (len(line) >= assign_ix + 2) \ + and (line[assign_ix + 1].type == tokenize.OP) \ + and (line[assign_ix + 1].string == '!'): + return cls(line[assign_ix + 1].start) + + @classmethod + def find(cls, tokens_by_line): + """Find the first system assignment (a = !foo) in the cell. + """ + if sys.version_info < (3, 12): + return cls.find_pre_312(tokens_by_line) + return cls.find_post_312(tokens_by_line) + def transform(self, lines: List[str]): """Transform a system assignment found by the ``find()`` classmethod. """ @@ -511,7 +531,8 @@ def make_tokens_by_line(lines:List[str]): ) parenlev = 0 try: - for token in tokenize.generate_tokens(iter(lines).__next__): + for token in tokenutil.generate_tokens_catch_errors(iter(lines).__next__, + extra_errors_to_catch=['expected EOF']): tokens_by_line[-1].append(token) if (token.type == NEWLINE) \ or ((token.type == NL) and (parenlev <= 0)): @@ -677,9 +698,13 @@ class TransformerManager: if not lines: return 'complete', None - if lines[-1].endswith('\\'): - # Explicit backslash continuation - return 'incomplete', find_last_indent(lines) + for line in reversed(lines): + if not line.strip(): + continue + elif line.strip('\n').endswith('\\'): + return 'incomplete', find_last_indent(lines) + else: + break try: for transform in self.cleanup_transforms: @@ -717,7 +742,8 @@ class TransformerManager: if not tokens_by_line: return 'incomplete', find_last_indent(lines) - if tokens_by_line[-1][-1].type != tokenize.ENDMARKER: + if (tokens_by_line[-1][-1].type != tokenize.ENDMARKER + and tokens_by_line[-1][-1].type != tokenize.ERRORTOKEN): # We're in a multiline string or expression return 'incomplete', find_last_indent(lines) diff --git a/IPython/core/tests/test_inputtransformer2.py b/IPython/core/tests/test_inputtransformer2.py index cddb32f..0792f7c 100644 --- a/IPython/core/tests/test_inputtransformer2.py +++ b/IPython/core/tests/test_inputtransformer2.py @@ -297,14 +297,18 @@ def test_find_assign_op_dedent(): _find_assign_op([Tk(s) for s in ("", "(", "a", "=", "b", ")", "=", "5")]) == 6 ) - +extra_closing_paren_param = ( + pytest.param("(\n))", "invalid", None) + if sys.version_info >= (3, 12) + else pytest.param("(\n))", "incomplete", 0) +) examples = [ pytest.param("a = 1", "complete", None), pytest.param("for a in range(5):", "incomplete", 4), pytest.param("for a in range(5):\n if a > 0:", "incomplete", 8), pytest.param("raise = 2", "invalid", None), pytest.param("a = [1,\n2,", "incomplete", 0), - pytest.param("(\n))", "incomplete", 0), + extra_closing_paren_param, pytest.param("\\\r\n", "incomplete", 0), pytest.param("a = '''\n hi", "incomplete", 3), pytest.param("def a():\n x=1\n global x", "invalid", None), diff --git a/IPython/utils/tests/test_pycolorize.py b/IPython/utils/tests/test_pycolorize.py index 986b917..df2acd0 100644 --- a/IPython/utils/tests/test_pycolorize.py +++ b/IPython/utils/tests/test_pycolorize.py @@ -18,6 +18,7 @@ Authors #----------------------------------------------------------------------------- # our own +import sys from IPython.utils.PyColorize import Parser import io import pytest @@ -40,7 +41,7 @@ def function(arg, *args, kwarg=True, **kwargs): False == None with io.open(ru'unicode', encoding='utf-8'): - raise ValueError("\n escape \r sequence") + raise ValueError("escape \r sequence") print("wěird ünicoðe") @@ -64,6 +65,6 @@ def test_parse_sample(style): def test_parse_error(style): p = Parser(style=style) - f1 = p.format(")", "str") + f1 = p.format(r"\ " if sys.version_info >= (3, 12) else ")", "str") if style != "NoColor": assert "ERROR" in f1 diff --git a/IPython/utils/tokenutil.py b/IPython/utils/tokenutil.py index 697d2b5..c9228dc 100644 --- a/IPython/utils/tokenutil.py +++ b/IPython/utils/tokenutil.py @@ -21,6 +21,31 @@ def generate_tokens(readline): # catch EOF error return +def generate_tokens_catch_errors(readline, extra_errors_to_catch=None): + default_errors_to_catch = ['unterminated string literal', 'invalid non-printable character', + 'after line continuation character'] + assert extra_errors_to_catch is None or isinstance(extra_errors_to_catch, list) + errors_to_catch = default_errors_to_catch + (extra_errors_to_catch or []) + + tokens = [] + try: + for token in tokenize.generate_tokens(readline): + tokens.append(token) + yield token + except tokenize.TokenError as exc: + if any(error in exc.args[0] for error in errors_to_catch): + if tokens: + start = tokens[-1].start[0], tokens[-1].end[0] + end = start + line = tokens[-1].line + else: + start = end = (1, 0) + line = '' + yield tokenize.TokenInfo(tokenize.ERRORTOKEN, '', start, end, line) + else: + # Catch EOF + raise + def line_at_cursor(cell, cursor_pos=0): """Return the line in a cell at a given cursor position @@ -123,5 +148,3 @@ def token_at_cursor(cell, cursor_pos=0): return names[-1] else: return '' - -