Show More
@@ -44,6 +44,7 b' from IPython.core.inputtransformer import (leading_indent,' | |||||
44 | assign_from_system, |
|
44 | assign_from_system, | |
45 | assemble_python_lines, |
|
45 | assemble_python_lines, | |
46 | ) |
|
46 | ) | |
|
47 | from IPython.utils import tokenutil | |||
47 |
|
48 | |||
48 | # These are available in this module for backwards compatibility. |
|
49 | # These are available in this module for backwards compatibility. | |
49 | from IPython.core.inputtransformer import (ESC_SHELL, ESC_SH_CAP, ESC_HELP, |
|
50 | from IPython.core.inputtransformer import (ESC_SHELL, ESC_SH_CAP, ESC_HELP, | |
@@ -128,7 +129,7 b' def partial_tokens(s):' | |||||
128 | readline = io.StringIO(s).readline |
|
129 | readline = io.StringIO(s).readline | |
129 | token = tokenize.TokenInfo(tokenize.NEWLINE, '', (1, 0), (1, 0), '') |
|
130 | token = tokenize.TokenInfo(tokenize.NEWLINE, '', (1, 0), (1, 0), '') | |
130 | try: |
|
131 | try: | |
131 |
for token in token |
|
132 | for token in tokenutil.generate_tokens_catch_errors(readline): | |
132 | yield token |
|
133 | yield token | |
133 | except tokenize.TokenError as e: |
|
134 | except tokenize.TokenError as e: | |
134 | # catch EOF error |
|
135 | # catch EOF error | |
@@ -150,9 +151,22 b' def find_next_indent(code):' | |||||
150 | tokens.pop() |
|
151 | tokens.pop() | |
151 | if not tokens: |
|
152 | if not tokens: | |
152 | return 0 |
|
153 | return 0 | |
153 | while (tokens[-1].type in {tokenize.DEDENT, tokenize.NEWLINE, tokenize.COMMENT}): |
|
154 | ||
|
155 | while tokens[-1].type in { | |||
|
156 | tokenize.DEDENT, | |||
|
157 | tokenize.NEWLINE, | |||
|
158 | tokenize.COMMENT, | |||
|
159 | tokenize.ERRORTOKEN, | |||
|
160 | }: | |||
154 | tokens.pop() |
|
161 | tokens.pop() | |
155 |
|
162 | |||
|
163 | # Starting in Python 3.12, the tokenize module adds implicit newlines at the end | |||
|
164 | # of input. We need to remove those if we're in a multiline statement | |||
|
165 | if tokens[-1].type == IN_MULTILINE_STATEMENT: | |||
|
166 | while tokens[-2].type in {tokenize.NL}: | |||
|
167 | tokens.pop(-2) | |||
|
168 | ||||
|
169 | ||||
156 | if tokens[-1].type == INCOMPLETE_STRING: |
|
170 | if tokens[-1].type == INCOMPLETE_STRING: | |
157 | # Inside a multiline string |
|
171 | # Inside a multiline string | |
158 | return 0 |
|
172 | return 0 |
@@ -9,10 +9,11 b' import abc' | |||||
9 | import functools |
|
9 | import functools | |
10 | import re |
|
10 | import re | |
11 | import tokenize |
|
11 | import tokenize | |
12 |
from tokenize import |
|
12 | from tokenize import untokenize, TokenError | |
13 | from io import StringIO |
|
13 | from io import StringIO | |
14 |
|
14 | |||
15 | from IPython.core.splitinput import LineInfo |
|
15 | from IPython.core.splitinput import LineInfo | |
|
16 | from IPython.utils import tokenutil | |||
16 |
|
17 | |||
17 | #----------------------------------------------------------------------------- |
|
18 | #----------------------------------------------------------------------------- | |
18 | # Globals |
|
19 | # Globals | |
@@ -127,7 +128,7 b' class TokenInputTransformer(InputTransformer):' | |||||
127 |
|
128 | |||
128 | def reset_tokenizer(self): |
|
129 | def reset_tokenizer(self): | |
129 | it = iter(self.buf) |
|
130 | it = iter(self.buf) | |
130 | self.tokenizer = generate_tokens(it.__next__) |
|
131 | self.tokenizer = tokenutil.generate_tokens_catch_errors(it.__next__) | |
131 |
|
132 | |||
132 | def push(self, line): |
|
133 | def push(self, line): | |
133 | self.buf.append(line + '\n') |
|
134 | self.buf.append(line + '\n') | |
@@ -295,7 +296,7 b' def _line_tokens(line):' | |||||
295 | readline = StringIO(line).readline |
|
296 | readline = StringIO(line).readline | |
296 | toktypes = set() |
|
297 | toktypes = set() | |
297 | try: |
|
298 | try: | |
298 | for t in generate_tokens(readline): |
|
299 | for t in tokenutil.generate_tokens_catch_errors(readline): | |
299 | toktypes.add(t[0]) |
|
300 | toktypes.add(t[0]) | |
300 | except TokenError as e: |
|
301 | except TokenError as e: | |
301 | # There are only two cases where a TokenError is raised. |
|
302 | # There are only two cases where a TokenError is raised. |
@@ -13,10 +13,13 b' deprecated in 7.0.' | |||||
13 | import ast |
|
13 | import ast | |
14 | from codeop import CommandCompiler, Compile |
|
14 | from codeop import CommandCompiler, Compile | |
15 | import re |
|
15 | import re | |
|
16 | import sys | |||
16 | import tokenize |
|
17 | import tokenize | |
17 | from typing import List, Tuple, Optional, Any |
|
18 | from typing import List, Tuple, Optional, Any | |
18 | import warnings |
|
19 | import warnings | |
19 |
|
20 | |||
|
21 | from IPython.utils import tokenutil | |||
|
22 | ||||
20 | _indent_re = re.compile(r'^[ \t]+') |
|
23 | _indent_re = re.compile(r'^[ \t]+') | |
21 |
|
24 | |||
22 | def leading_empty_lines(lines): |
|
25 | def leading_empty_lines(lines): | |
@@ -269,9 +272,7 b' class MagicAssign(TokenTransformBase):' | |||||
269 | class SystemAssign(TokenTransformBase): |
|
272 | class SystemAssign(TokenTransformBase): | |
270 | """Transformer for assignments from system commands (a = !foo)""" |
|
273 | """Transformer for assignments from system commands (a = !foo)""" | |
271 | @classmethod |
|
274 | @classmethod | |
272 | def find(cls, tokens_by_line): |
|
275 | def find_pre_312(cls, tokens_by_line): | |
273 | """Find the first system assignment (a = !foo) in the cell. |
|
|||
274 | """ |
|
|||
275 | for line in tokens_by_line: |
|
276 | for line in tokens_by_line: | |
276 | assign_ix = _find_assign_op(line) |
|
277 | assign_ix = _find_assign_op(line) | |
277 | if (assign_ix is not None) \ |
|
278 | if (assign_ix is not None) \ | |
@@ -287,6 +288,26 b' class SystemAssign(TokenTransformBase):' | |||||
287 | break |
|
288 | break | |
288 | ix += 1 |
|
289 | ix += 1 | |
289 |
|
290 | |||
|
291 | @classmethod | |||
|
292 | def find_post_312(cls, tokens_by_line): | |||
|
293 | for line in tokens_by_line: | |||
|
294 | assign_ix = _find_assign_op(line) | |||
|
295 | if ( | |||
|
296 | (assign_ix is not None) | |||
|
297 | and not line[assign_ix].line.strip().startswith("=") | |||
|
298 | and (len(line) >= assign_ix + 2) | |||
|
299 | and (line[assign_ix + 1].type == tokenize.OP) | |||
|
300 | and (line[assign_ix + 1].string == "!") | |||
|
301 | ): | |||
|
302 | return cls(line[assign_ix + 1].start) | |||
|
303 | ||||
|
304 | @classmethod | |||
|
305 | def find(cls, tokens_by_line): | |||
|
306 | """Find the first system assignment (a = !foo) in the cell.""" | |||
|
307 | if sys.version_info < (3, 12): | |||
|
308 | return cls.find_pre_312(tokens_by_line) | |||
|
309 | return cls.find_post_312(tokens_by_line) | |||
|
310 | ||||
290 | def transform(self, lines: List[str]): |
|
311 | def transform(self, lines: List[str]): | |
291 | """Transform a system assignment found by the ``find()`` classmethod. |
|
312 | """Transform a system assignment found by the ``find()`` classmethod. | |
292 | """ |
|
313 | """ | |
@@ -511,7 +532,9 b' def make_tokens_by_line(lines:List[str]):' | |||||
511 | ) |
|
532 | ) | |
512 | parenlev = 0 |
|
533 | parenlev = 0 | |
513 | try: |
|
534 | try: | |
514 |
for token in token |
|
535 | for token in tokenutil.generate_tokens_catch_errors( | |
|
536 | iter(lines).__next__, extra_errors_to_catch=["expected EOF"] | |||
|
537 | ): | |||
515 | tokens_by_line[-1].append(token) |
|
538 | tokens_by_line[-1].append(token) | |
516 | if (token.type == NEWLINE) \ |
|
539 | if (token.type == NEWLINE) \ | |
517 | or ((token.type == NL) and (parenlev <= 0)): |
|
540 | or ((token.type == NL) and (parenlev <= 0)): | |
@@ -677,9 +700,13 b' class TransformerManager:' | |||||
677 | if not lines: |
|
700 | if not lines: | |
678 | return 'complete', None |
|
701 | return 'complete', None | |
679 |
|
702 | |||
680 | if lines[-1].endswith('\\'): |
|
703 | for line in reversed(lines): | |
681 | # Explicit backslash continuation |
|
704 | if not line.strip(): | |
682 | return 'incomplete', find_last_indent(lines) |
|
705 | continue | |
|
706 | elif line.strip("\n").endswith("\\"): | |||
|
707 | return "incomplete", find_last_indent(lines) | |||
|
708 | else: | |||
|
709 | break | |||
683 |
|
710 | |||
684 | try: |
|
711 | try: | |
685 | for transform in self.cleanup_transforms: |
|
712 | for transform in self.cleanup_transforms: | |
@@ -717,7 +744,10 b' class TransformerManager:' | |||||
717 | if not tokens_by_line: |
|
744 | if not tokens_by_line: | |
718 | return 'incomplete', find_last_indent(lines) |
|
745 | return 'incomplete', find_last_indent(lines) | |
719 |
|
746 | |||
720 | if tokens_by_line[-1][-1].type != tokenize.ENDMARKER: |
|
747 | if ( | |
|
748 | tokens_by_line[-1][-1].type != tokenize.ENDMARKER | |||
|
749 | and tokens_by_line[-1][-1].type != tokenize.ERRORTOKEN | |||
|
750 | ): | |||
721 | # We're in a multiline string or expression |
|
751 | # We're in a multiline string or expression | |
722 | return 'incomplete', find_last_indent(lines) |
|
752 | return 'incomplete', find_last_indent(lines) | |
723 |
|
753 |
@@ -298,13 +298,18 b' def test_find_assign_op_dedent():' | |||||
298 | ) |
|
298 | ) | |
299 |
|
299 | |||
300 |
|
300 | |||
|
301 | extra_closing_paren_param = ( | |||
|
302 | pytest.param("(\n))", "invalid", None) | |||
|
303 | if sys.version_info >= (3, 12) | |||
|
304 | else pytest.param("(\n))", "incomplete", 0) | |||
|
305 | ) | |||
301 | examples = [ |
|
306 | examples = [ | |
302 | pytest.param("a = 1", "complete", None), |
|
307 | pytest.param("a = 1", "complete", None), | |
303 | pytest.param("for a in range(5):", "incomplete", 4), |
|
308 | pytest.param("for a in range(5):", "incomplete", 4), | |
304 | pytest.param("for a in range(5):\n if a > 0:", "incomplete", 8), |
|
309 | pytest.param("for a in range(5):\n if a > 0:", "incomplete", 8), | |
305 | pytest.param("raise = 2", "invalid", None), |
|
310 | pytest.param("raise = 2", "invalid", None), | |
306 | pytest.param("a = [1,\n2,", "incomplete", 0), |
|
311 | pytest.param("a = [1,\n2,", "incomplete", 0), | |
307 | pytest.param("(\n))", "incomplete", 0), |
|
312 | extra_closing_paren_param, | |
308 | pytest.param("\\\r\n", "incomplete", 0), |
|
313 | pytest.param("\\\r\n", "incomplete", 0), | |
309 | pytest.param("a = '''\n hi", "incomplete", 3), |
|
314 | pytest.param("a = '''\n hi", "incomplete", 3), | |
310 | pytest.param("def a():\n x=1\n global x", "invalid", None), |
|
315 | pytest.param("def a():\n x=1\n global x", "invalid", None), |
@@ -18,6 +18,7 b' Authors' | |||||
18 | #----------------------------------------------------------------------------- |
|
18 | #----------------------------------------------------------------------------- | |
19 |
|
19 | |||
20 | # our own |
|
20 | # our own | |
|
21 | import sys | |||
21 | from IPython.utils.PyColorize import Parser |
|
22 | from IPython.utils.PyColorize import Parser | |
22 | import io |
|
23 | import io | |
23 | import pytest |
|
24 | import pytest | |
@@ -40,7 +41,7 b' def function(arg, *args, kwarg=True, **kwargs):' | |||||
40 | False == None |
|
41 | False == None | |
41 |
|
42 | |||
42 | with io.open(ru'unicode', encoding='utf-8'): |
|
43 | with io.open(ru'unicode', encoding='utf-8'): | |
43 |
raise ValueError(" |
|
44 | raise ValueError("escape \r sequence") | |
44 |
|
45 | |||
45 | print("wěird ünicoðe") |
|
46 | print("wěird ünicoðe") | |
46 |
|
47 | |||
@@ -64,6 +65,6 b' def test_parse_sample(style):' | |||||
64 |
|
65 | |||
65 | def test_parse_error(style): |
|
66 | def test_parse_error(style): | |
66 | p = Parser(style=style) |
|
67 | p = Parser(style=style) | |
67 | f1 = p.format(")", "str") |
|
68 | f1 = p.format(r"\ " if sys.version_info >= (3, 12) else ")", "str") | |
68 | if style != "NoColor": |
|
69 | if style != "NoColor": | |
69 | assert "ERROR" in f1 |
|
70 | assert "ERROR" in f1 |
@@ -21,6 +21,36 b' def generate_tokens(readline):' | |||||
21 | # catch EOF error |
|
21 | # catch EOF error | |
22 | return |
|
22 | return | |
23 |
|
23 | |||
|
24 | ||||
|
25 | def generate_tokens_catch_errors(readline, extra_errors_to_catch=None): | |||
|
26 | default_errors_to_catch = [ | |||
|
27 | "unterminated string literal", | |||
|
28 | "invalid non-printable character", | |||
|
29 | "after line continuation character", | |||
|
30 | ] | |||
|
31 | assert extra_errors_to_catch is None or isinstance(extra_errors_to_catch, list) | |||
|
32 | errors_to_catch = default_errors_to_catch + (extra_errors_to_catch or []) | |||
|
33 | ||||
|
34 | tokens = [] | |||
|
35 | try: | |||
|
36 | for token in tokenize.generate_tokens(readline): | |||
|
37 | tokens.append(token) | |||
|
38 | yield token | |||
|
39 | except tokenize.TokenError as exc: | |||
|
40 | if any(error in exc.args[0] for error in errors_to_catch): | |||
|
41 | if tokens: | |||
|
42 | start = tokens[-1].start[0], tokens[-1].end[0] | |||
|
43 | end = start | |||
|
44 | line = tokens[-1].line | |||
|
45 | else: | |||
|
46 | start = end = (1, 0) | |||
|
47 | line = "" | |||
|
48 | yield tokenize.TokenInfo(tokenize.ERRORTOKEN, "", start, end, line) | |||
|
49 | else: | |||
|
50 | # Catch EOF | |||
|
51 | raise | |||
|
52 | ||||
|
53 | ||||
24 | def line_at_cursor(cell, cursor_pos=0): |
|
54 | def line_at_cursor(cell, cursor_pos=0): | |
25 | """Return the line in a cell at a given cursor position |
|
55 | """Return the line in a cell at a given cursor position | |
26 |
|
56 | |||
@@ -123,5 +153,3 b' def token_at_cursor(cell, cursor_pos=0):' | |||||
123 | return names[-1] |
|
153 | return names[-1] | |
124 | else: |
|
154 | else: | |
125 | return '' |
|
155 | return '' | |
126 |
|
||||
127 |
|
General Comments 0
You need to be logged in to leave comments.
Login now