From dd15f28f49472efd20a10805cf04d17f2d6154f9 2018-10-20 17:52:15 From: Matthias Bussonnier Date: 2018-10-20 17:52:15 Subject: [PATCH] Fix miss-capturing of assign statement after a dedent. closes #11415 This fixes a bug where assign statement were miscaptured when occuring after a dedent. This was due to the fact that : >>> '' in '({[' True That is to say the empty string is in any strings. Add a couple of integration tests and unit tests as well, and also add a warning to public function when not used properly, in particular, check that lines passed to make_tokens_by_line do end with an endline marker (at least for the first line), otherwise the function does not behave properly. --- diff --git a/IPython/core/inputtransformer2.py b/IPython/core/inputtransformer2.py index e2dd2d0..b73d701 100644 --- a/IPython/core/inputtransformer2.py +++ b/IPython/core/inputtransformer2.py @@ -13,7 +13,7 @@ deprecated in 7.0. from codeop import compile_command import re import tokenize -from typing import List, Tuple +from typing import List, Tuple, Union import warnings _indent_re = re.compile(r'^[ \t]+') @@ -87,7 +87,7 @@ def cell_magic(lines): % (magic_name, first_line, body)] -def _find_assign_op(token_line): +def _find_assign_op(token_line) -> Union[int, None]: """Get the index of the first assignment in the line ('=' not inside brackets) Note: We don't try to support multiple special assignment (a = b = %foo) @@ -97,9 +97,9 @@ def _find_assign_op(token_line): s = ti.string if s == '=' and paren_level == 0: return i - if s in '([{': + if s in {'(','[','{'}: paren_level += 1 - elif s in ')]}': + elif s in {')', ']', '}'}: if paren_level > 0: paren_level -= 1 @@ -449,11 +449,14 @@ class HelpEnd(TokenTransformBase): return lines_before + [new_line] + lines_after -def make_tokens_by_line(lines): +def make_tokens_by_line(lines:List[str]): """Tokenize a series of lines and group tokens by line. - The tokens for a multiline Python string or expression are - grouped as one line. + The tokens for a multiline Python string or expression are grouped as one + line. All lines except the last lines should keep their line ending ('\\n', + '\\r\\n') for this to properly work. Use `.splitlines(keeplineending=True)` + for example when passing block of text to this function. + """ # NL tokens are used inside multiline expressions, but also after blank # lines or comments. This is intentional - see https://bugs.python.org/issue17061 @@ -461,6 +464,8 @@ def make_tokens_by_line(lines): # track parentheses level, similar to the internals of tokenize. NEWLINE, NL = tokenize.NEWLINE, tokenize.NL tokens_by_line = [[]] + if len(lines) > 1 and not lines[0].endswith(('\n', '\r', '\r\n', '\x0b', '\x0c')): + warnings.warn("`make_tokens_by_line` received a list of lines which do not have lineending markers ('\\n', '\\r', '\\r\\n', '\\x0b', '\\x0c'), behavior will be unspecified") parenlev = 0 try: for token in tokenize.generate_tokens(iter(lines).__next__): diff --git a/IPython/core/tests/test_inputtransformer2.py b/IPython/core/tests/test_inputtransformer2.py index d6c2fa3..9c92c39 100644 --- a/IPython/core/tests/test_inputtransformer2.py +++ b/IPython/core/tests/test_inputtransformer2.py @@ -8,7 +8,7 @@ import nose.tools as nt import string from IPython.core import inputtransformer2 as ipt2 -from IPython.core.inputtransformer2 import make_tokens_by_line +from IPython.core.inputtransformer2 import make_tokens_by_line, _find_assign_op from textwrap import dedent @@ -53,6 +53,22 @@ b = get_ipython().getoutput('foo bar') g() """.splitlines(keepends=True)) +##### + +MULTILINE_SYSTEM_ASSIGN_AFTER_DEDENT = ("""\ +def test(): + for i in range(1): + print(i) + res =! ls +""".splitlines(keepends=True), (4, 7), '''\ +def test(): + for i in range(1): + print(i) + res =get_ipython().getoutput(\' ls\') +'''.splitlines(keepends=True)) + +###### + AUTOCALL_QUOTE = ( [",f 1 2 3\n"], (1, 0), ['f("1", "2", "3")\n'] @@ -103,6 +119,7 @@ b) = zip? [r"get_ipython().set_next_input('(a,\nb) = zip');get_ipython().run_line_magic('pinfo', 'zip')" + "\n"] ) + def null_cleanup_transformer(lines): """ A cleanup transform that returns an empty list. @@ -144,18 +161,21 @@ def test_continued_line(): def test_find_assign_magic(): check_find(ipt2.MagicAssign, MULTILINE_MAGIC_ASSIGN) check_find(ipt2.MagicAssign, MULTILINE_SYSTEM_ASSIGN, match=False) + check_find(ipt2.MagicAssign, MULTILINE_SYSTEM_ASSIGN_AFTER_DEDENT, match=False) def test_transform_assign_magic(): check_transform(ipt2.MagicAssign, MULTILINE_MAGIC_ASSIGN) def test_find_assign_system(): check_find(ipt2.SystemAssign, MULTILINE_SYSTEM_ASSIGN) + check_find(ipt2.SystemAssign, MULTILINE_SYSTEM_ASSIGN_AFTER_DEDENT) check_find(ipt2.SystemAssign, (["a = !ls\n"], (1, 5), None)) check_find(ipt2.SystemAssign, (["a=!ls\n"], (1, 2), None)) check_find(ipt2.SystemAssign, MULTILINE_MAGIC_ASSIGN, match=False) def test_transform_assign_system(): check_transform(ipt2.SystemAssign, MULTILINE_SYSTEM_ASSIGN) + check_transform(ipt2.SystemAssign, MULTILINE_SYSTEM_ASSIGN_AFTER_DEDENT) def test_find_magic_escape(): check_find(ipt2.EscapedCommand, MULTILINE_MAGIC) @@ -203,6 +223,17 @@ def test_transform_help(): tf = ipt2.HelpEnd((1, 0), (2, 8)) nt.assert_equal(tf.transform(HELP_MULTILINE[0]), HELP_MULTILINE[2]) +def test_find_assign_op_dedent(): + """ + be carefull that empty token like dedent are not counted as parens + """ + class Tk: + def __init__(self, s): + self.string = s + + nt.assert_equal(_find_assign_op([Tk(s) for s in ('','a','=','b')]), 2) + nt.assert_equal(_find_assign_op([Tk(s) for s in ('','(', 'a','=','b', ')', '=' ,'5')]), 6) + def test_check_complete(): cc = ipt2.TransformerManager().check_complete nt.assert_equal(cc("a = 1"), ('complete', None))