upstream/ipython Commit - r28328:635815e8

Fix issues due to breaking tokenize changes in 3.12 (#14107)...

Matthias Bussonnier -

r28328:635815e8

parent child

IPython/core/inputsplitter.py

0 +16 -2

                                                        assign_from_system,
                                                        assemble_python_lines,
                                                        )
+            from IPython.utils import tokenutil
             # These are available in this module for backwards compatibility.
             from IPython.core.inputtransformer import (ESC_SHELL, ESC_SH_CAP, ESC_HELP,
                 readline = io.StringIO(s).readline
                 token = tokenize.TokenInfo(tokenize.NEWLINE, '', (1, 0), (1, 0), '')
                 try:
-                    for token in tokenize.generate_tokens(readline):
+                    for token in tokenutil.generate_tokens_catch_errors(readline):
                         yield token
                 except tokenize.TokenError as e:
                     # catch EOF error
                     tokens.pop()
                 if not tokens:
                     return 0
-                while (tokens[-1].type in {tokenize.DEDENT, tokenize.NEWLINE, tokenize.COMMENT}):
+                while tokens[-1].type in {
+                    tokenize.DEDENT,
+                    tokenize.NEWLINE,
+                    tokenize.COMMENT,
+                    tokenize.ERRORTOKEN,
+                }:
                     tokens.pop()
+                # Starting in Python 3.12, the tokenize module adds implicit newlines at the end
+                # of input. We need to remove those if we're in a multiline statement
+                if tokens[-1].type == IN_MULTILINE_STATEMENT:
+                    while tokens[-2].type in {tokenize.NL}:
+                        tokens.pop(-2)
                 if tokens[-1].type == INCOMPLETE_STRING:
                     # Inside a multiline string
                     return 0

IPython/core/inputtransformer.py

0 +4 -3

             import functools
             import re
             import tokenize
-            from tokenize import generate_tokens, untokenize, TokenError
+            from tokenize import untokenize, TokenError
             from io import StringIO
             from IPython.core.splitinput import LineInfo
+            from IPython.utils import tokenutil
             #-----------------------------------------------------------------------------
             # Globals
                 def reset_tokenizer(self):
                     it = iter(self.buf)
-                    self.tokenizer = generate_tokens(it.__next__)
+                    self.tokenizer = tokenutil.generate_tokens_catch_errors(it.__next__)
                 def push(self, line):
                     self.buf.append(line + '\n')
                 readline = StringIO(line).readline
                 toktypes = set()
                 try:
-                    for t in generate_tokens(readline):
+                    for t in tokenutil.generate_tokens_catch_errors(readline):
                         toktypes.add(t[0])
                 except TokenError as e:
                     # There are only two cases where a TokenError is raised.

IPython/core/inputtransformer2.py

0 +38 -8

             import ast
             from codeop import CommandCompiler, Compile
             import re
+            import sys
             import tokenize
             from typing import List, Tuple, Optional, Any
             import warnings
+            from IPython.utils import tokenutil
             _indent_re = re.compile(r'^[ \t]+')
             def leading_empty_lines(lines):
             class SystemAssign(TokenTransformBase):
                 """Transformer for assignments from system commands (a = !foo)"""
                 @classmethod
-                def find(cls, tokens_by_line):
+                def find_pre_312(cls, tokens_by_line):
-                    """Find the first system assignment (a = !foo) in the cell.
-                    """
                     for line in tokens_by_line:
                         assign_ix = _find_assign_op(line)
                         if (assign_ix is not None) \
                                     break
                                 ix += 1
+                @classmethod
+                def find_post_312(cls, tokens_by_line):
+                    for line in tokens_by_line:
+                        assign_ix = _find_assign_op(line)
+                        if (
+                            (assign_ix is not None)
+                            and not line[assign_ix].line.strip().startswith("=")
+                            and (len(line) >= assign_ix + 2)
+                            and (line[assign_ix + 1].type == tokenize.OP)
+                            and (line[assign_ix + 1].string == "!")
+                        ):
+                            return cls(line[assign_ix + 1].start)
+                @classmethod
+                def find(cls, tokens_by_line):
+                    """Find the first system assignment (a = !foo) in the cell."""
+                    if sys.version_info < (3, 12):
+                        return cls.find_pre_312(tokens_by_line)
+                    return cls.find_post_312(tokens_by_line)
                 def transform(self, lines: List[str]):
                     """Transform a system assignment found by the ``find()`` classmethod.
                     """
                     )
                 parenlev = 0
                 try:
-                    for token in tokenize.generate_tokens(iter(lines).__next__):
+                    for token in tokenutil.generate_tokens_catch_errors(
+                        iter(lines).__next__, extra_errors_to_catch=["expected EOF"]
+                    ):
                         tokens_by_line[-1].append(token)
                         if (token.type == NEWLINE) \
                                 or ((token.type == NL) and (parenlev <= 0)):
                     if not lines:
                         return 'complete', None
-                    if lines[-1].endswith('\\'):
+                    for line in reversed(lines):
-                        # Explicit backslash continuation
+                        if not line.strip():
-                        return 'incomplete', find_last_indent(lines)
+                            continue
+                        elif line.strip("\n").endswith("\\"):
+                            return "incomplete", find_last_indent(lines)
+                        else:
+                            break
                     try:
                         for transform in self.cleanup_transforms:
                     if not tokens_by_line:
                         return 'incomplete', find_last_indent(lines)
-                    if tokens_by_line[-1][-1].type != tokenize.ENDMARKER:
+                    if (
+                        tokens_by_line[-1][-1].type != tokenize.ENDMARKER
+                        and tokens_by_line[-1][-1].type != tokenize.ERRORTOKEN
+                    ):
                         # We're in a multiline string or expression
                         return 'incomplete', find_last_indent(lines)

IPython/core/tests/test_inputtransformer2.py

0 +6 -1

                 )
+            extra_closing_paren_param = (
+                pytest.param("(\n))", "invalid", None)
+                if sys.version_info >= (3, 12)
+                else pytest.param("(\n))", "incomplete", 0)
+            )
             examples = [
                 pytest.param("a = 1", "complete", None),
                 pytest.param("for a in range(5):", "incomplete", 4),
                 pytest.param("for a in range(5):\n    if a > 0:", "incomplete", 8),
                 pytest.param("raise = 2", "invalid", None),
                 pytest.param("a = [1,\n2,", "incomplete", 0),
-                pytest.param("(\n))", "incomplete", 0),
+                extra_closing_paren_param,
                 pytest.param("\\\r\n", "incomplete", 0),
                 pytest.param("a = '''\n   hi", "incomplete", 3),
                 pytest.param("def a():\n x=1\n global x", "invalid", None),

IPython/utils/tests/test_pycolorize.py

0 +3 -2

             #-----------------------------------------------------------------------------
             # our own
+            import sys
             from IPython.utils.PyColorize import Parser
             import io
             import pytest
                 False == None
                 with io.open(ru'unicode', encoding='utf-8'):
-                    raise ValueError("\n escape \r sequence")
+                    raise ValueError("escape \r sequence")
                 print("wěird ünicoðe")
             def test_parse_error(style):
                 p = Parser(style=style)
-                f1 = p.format(")", "str")
+                f1 = p.format(r"\ " if sys.version_info >= (3, 12) else ")", "str")
                 if style != "NoColor":
                     assert "ERROR" in f1

IPython/utils/tokenutil.py

0 +30 -2

@@ -21,6 +21,36 b' def generate_tokens(readline):'
21	# catch EOF error	21	# catch EOF error
22	return	22	return
23		23
		24
		25	def generate_tokens_catch_errors(readline, extra_errors_to_catch=None):
		26	default_errors_to_catch = [
		27	"unterminated string literal",
		28	"invalid non-printable character",
		29	"after line continuation character",
		30	]
		31	assert extra_errors_to_catch is None or isinstance(extra_errors_to_catch, list)
		32	errors_to_catch = default_errors_to_catch + (extra_errors_to_catch or [])
		33
		34	tokens = []
		35	try:
		36	for token in tokenize.generate_tokens(readline):
		37	tokens.append(token)
		38	yield token
		39	except tokenize.TokenError as exc:
		40	if any(error in exc.args[0] for error in errors_to_catch):
		41	if tokens:
		42	start = tokens[-1].start[0], tokens[-1].end[0]
		43	end = start
		44	line = tokens[-1].line
		45	else:
		46	start = end = (1, 0)
		47	line = ""
		48	yield tokenize.TokenInfo(tokenize.ERRORTOKEN, "", start, end, line)
		49	else:
		50	# Catch EOF
		51	raise
		52
		53
24	def line_at_cursor(cell, cursor_pos=0):	54	def line_at_cursor(cell, cursor_pos=0):
25	"""Return the line in a cell at a given cursor position	55	"""Return the line in a cell at a given cursor position
26		56
@@ -123,5 +153,3 b' def token_at_cursor(cell, cursor_pos=0):'
123	return names[-1]	153	return names[-1]
124	else:	154	else:
125	return ''	155	return ''
126
127

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages