upstream/ipython Commit - r28326:1098b2c1

Fix issues due to breaking tokenize changes in 3.12

Lysandros Nikolaou -

r28326:1098b2c1

parent child

IPython/core/inputsplitter.py

0 +11 -2

                                                        assign_from_system,
                                                        assemble_python_lines,
                                                        )
+            from IPython.utils import tokenutil
             # These are available in this module for backwards compatibility.
             from IPython.core.inputtransformer import (ESC_SHELL, ESC_SH_CAP, ESC_HELP,
                 readline = io.StringIO(s).readline
                 token = tokenize.TokenInfo(tokenize.NEWLINE, '', (1, 0), (1, 0), '')
                 try:
-                    for token in tokenize.generate_tokens(readline):
+                    for token in tokenutil.generate_tokens_catch_errors(readline):
                         yield token
                 except tokenize.TokenError as e:
                     # catch EOF error
                     tokens.pop()
                 if not tokens:
                     return 0
-                while (tokens[-1].type in {tokenize.DEDENT, tokenize.NEWLINE, tokenize.COMMENT}):
+                while (tokens[-1].type in {tokenize.DEDENT, tokenize.NEWLINE, tokenize.COMMENT, tokenize.ERRORTOKEN}):
                     tokens.pop()
+                # Starting in Python 3.12, the tokenize module adds implicit newlines at the end
+                # of input. We need to remove those if we're in a multiline statement
+                if tokens[-1].type == IN_MULTILINE_STATEMENT:
+                    while tokens[-2].type in {tokenize.NL}:
+                        tokens.pop(-2)
                 if tokens[-1].type == INCOMPLETE_STRING:
                     # Inside a multiline string
                     return 0

IPython/core/inputtransformer.py

0 +4 -3

             import functools
             import re
             import tokenize
-            from tokenize import generate_tokens, untokenize, TokenError
+            from tokenize import untokenize, TokenError
             from io import StringIO
             from IPython.core.splitinput import LineInfo
+            from IPython.utils import tokenutil
             #-----------------------------------------------------------------------------
             # Globals
                 def reset_tokenizer(self):
                     it = iter(self.buf)
-                    self.tokenizer = generate_tokens(it.__next__)
+                    self.tokenizer = tokenutil.generate_tokens_catch_errors(it.__next__)
                 def push(self, line):
                     self.buf.append(line + '\n')
                 readline = StringIO(line).readline
                 toktypes = set()
                 try:
-                    for t in generate_tokens(readline):
+                    for t in tokenutil.generate_tokens_catch_errors(readline):
                         toktypes.add(t[0])
                 except TokenError as e:
                     # There are only two cases where a TokenError is raised.

IPython/core/inputtransformer2.py

0 +34 -8

             import ast
             from codeop import CommandCompiler, Compile
             import re
+            import sys
             import tokenize
             from typing import List, Tuple, Optional, Any
             import warnings
+            from IPython.utils import tokenutil
             _indent_re = re.compile(r'^[ \t]+')
             def leading_empty_lines(lines):
             class SystemAssign(TokenTransformBase):
                 """Transformer for assignments from system commands (a = !foo)"""
                 @classmethod
-                def find(cls, tokens_by_line):
+                def find_pre_312(cls, tokens_by_line):
-                    """Find the first system assignment (a = !foo) in the cell.
-                    """
                     for line in tokens_by_line:
                         assign_ix = _find_assign_op(line)
                         if (assign_ix is not None) \
                                     break
                                 ix += 1
+                @classmethod
+                def find_post_312(cls, tokens_by_line):
+                    for line in tokens_by_line:
+                        assign_ix = _find_assign_op(line)
+                        if (assign_ix is not None) \
+                                and not line[assign_ix].line.strip().startswith('=') \
+                                and (len(line) >= assign_ix + 2) \
+                                and (line[assign_ix + 1].type == tokenize.OP) \
+                                and (line[assign_ix + 1].string == '!'):
+                            return cls(line[assign_ix + 1].start)
+                @classmethod
+                def find(cls, tokens_by_line):
+                    """Find the first system assignment (a = !foo) in the cell.
+                    """
+                    if sys.version_info < (3, 12):
+                        return cls.find_pre_312(tokens_by_line)
+                    return cls.find_post_312(tokens_by_line)
                 def transform(self, lines: List[str]):
                     """Transform a system assignment found by the ``find()`` classmethod.
                     """
                     )
                 parenlev = 0
                 try:
-                    for token in tokenize.generate_tokens(iter(lines).__next__):
+                    for token in tokenutil.generate_tokens_catch_errors(iter(lines).__next__,
+                                                                        extra_errors_to_catch=['expected EOF']):
                         tokens_by_line[-1].append(token)
                         if (token.type == NEWLINE) \
                                 or ((token.type == NL) and (parenlev <= 0)):
                     if not lines:
                         return 'complete', None
-                    if lines[-1].endswith('\\'):
+                    for line in reversed(lines):
-                        # Explicit backslash continuation
+                        if not line.strip():
-                        return 'incomplete', find_last_indent(lines)
+                            continue
+                        elif line.strip('\n').endswith('\\'):
+                            return 'incomplete', find_last_indent(lines)
+                        else:
+                            break
                     try:
                         for transform in self.cleanup_transforms:
                     if not tokens_by_line:
                         return 'incomplete', find_last_indent(lines)
-                    if tokens_by_line[-1][-1].type != tokenize.ENDMARKER:
+                    if (tokens_by_line[-1][-1].type != tokenize.ENDMARKER
+                            and tokens_by_line[-1][-1].type != tokenize.ERRORTOKEN):
                         # We're in a multiline string or expression
                         return 'incomplete', find_last_indent(lines)

IPython/core/tests/test_inputtransformer2.py

0 +6 -2

                     _find_assign_op([Tk(s) for s in ("", "(", "a", "=", "b", ")", "=", "5")]) == 6
                 )
+            extra_closing_paren_param = (
+                pytest.param("(\n))", "invalid", None)
+                if sys.version_info >= (3, 12)
+                else pytest.param("(\n))", "incomplete", 0)
+            )
             examples = [
                 pytest.param("a = 1", "complete", None),
                 pytest.param("for a in range(5):", "incomplete", 4),
                 pytest.param("for a in range(5):\n    if a > 0:", "incomplete", 8),
                 pytest.param("raise = 2", "invalid", None),
                 pytest.param("a = [1,\n2,", "incomplete", 0),
-                pytest.param("(\n))", "incomplete", 0),
+                extra_closing_paren_param,
                 pytest.param("\\\r\n", "incomplete", 0),
                 pytest.param("a = '''\n   hi", "incomplete", 3),
                 pytest.param("def a():\n x=1\n global x", "invalid", None),

IPython/utils/tests/test_pycolorize.py

0 +3 -2

             #-----------------------------------------------------------------------------
             # our own
+            import sys
             from IPython.utils.PyColorize import Parser
             import io
             import pytest
                 False == None
                 with io.open(ru'unicode', encoding='utf-8'):
-                    raise ValueError("\n escape \r sequence")
+                    raise ValueError("escape \r sequence")
                 print("wěird ünicoðe")
             def test_parse_error(style):
                 p = Parser(style=style)
-                f1 = p.format(")", "str")
+                f1 = p.format(r"\ " if sys.version_info >= (3, 12) else ")", "str")
                 if style != "NoColor":
                     assert "ERROR" in f1

IPython/utils/tokenutil.py

0 +25 -2

@@ -21,6 +21,31 b' def generate_tokens(readline):'
21	# catch EOF error	21	# catch EOF error
22	return	22	return
23		23
		24	def generate_tokens_catch_errors(readline, extra_errors_to_catch=None):
		25	default_errors_to_catch = ['unterminated string literal', 'invalid non-printable character',
		26	'after line continuation character']
		27	assert extra_errors_to_catch is None or isinstance(extra_errors_to_catch, list)
		28	errors_to_catch = default_errors_to_catch + (extra_errors_to_catch or [])
		29
		30	tokens = []
		31	try:
		32	for token in tokenize.generate_tokens(readline):
		33	tokens.append(token)
		34	yield token
		35	except tokenize.TokenError as exc:
		36	if any(error in exc.args[0] for error in errors_to_catch):
		37	if tokens:
		38	start = tokens[-1].start[0], tokens[-1].end[0]
		39	end = start
		40	line = tokens[-1].line
		41	else:
		42	start = end = (1, 0)
		43	line = ''
		44	yield tokenize.TokenInfo(tokenize.ERRORTOKEN, '', start, end, line)
		45	else:
		46	# Catch EOF
		47	raise
		48
24	def line_at_cursor(cell, cursor_pos=0):	49	def line_at_cursor(cell, cursor_pos=0):
25	"""Return the line in a cell at a given cursor position	50	"""Return the line in a cell at a given cursor position
26		51
@@ -123,5 +148,3 b' def token_at_cursor(cell, cursor_pos=0):'
123	return names[-1]	148	return names[-1]
124	else:	149	else:
125	return ''	150	return ''
126
127

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages