From dd15f28f49472efd20a10805cf04d17f2d6154f9 2018-10-20 17:52:15
From: Matthias Bussonnier <bussonniermatthias@gmail.com>
Date: 2018-10-20 17:52:15
Subject: [PATCH] Fix miss-capturing of assign statement after a dedent.

closes #11415

This fixes a bug where assign statement were miscaptured when occuring
after a dedent. This was due to the fact that :

>>> '' in '({['
True

That is to say the empty string is in any strings.
Add a couple of integration tests and unit tests as well, and also add a
warning to public function when not used properly, in particular, check
that lines passed to make_tokens_by_line do end with an endline marker
(at least for the first line), otherwise the function does not behave
properly.

---

diff --git a/IPython/core/inputtransformer2.py b/IPython/core/inputtransformer2.py
index e2dd2d0..b73d701 100644
--- a/IPython/core/inputtransformer2.py
+++ b/IPython/core/inputtransformer2.py
@@ -13,7 +13,7 @@ deprecated in 7.0.
 from codeop import compile_command
 import re
 import tokenize
-from typing import List, Tuple
+from typing import List, Tuple, Union
 import warnings
 
 _indent_re = re.compile(r'^[ \t]+')
@@ -87,7 +87,7 @@ def cell_magic(lines):
             % (magic_name, first_line, body)]
 
 
-def _find_assign_op(token_line):
+def _find_assign_op(token_line) -> Union[int, None]:
     """Get the index of the first assignment in the line ('=' not inside brackets)
 
     Note: We don't try to support multiple special assignment (a = b = %foo)
@@ -97,9 +97,9 @@ def _find_assign_op(token_line):
         s = ti.string
         if s == '=' and paren_level == 0:
             return i
-        if s in '([{':
+        if s in {'(','[','{'}:
             paren_level += 1
-        elif s in ')]}':
+        elif s in {')', ']', '}'}:
             if paren_level > 0:
                 paren_level -= 1
 
@@ -449,11 +449,14 @@ class HelpEnd(TokenTransformBase):
 
         return lines_before + [new_line] + lines_after
 
-def make_tokens_by_line(lines):
+def make_tokens_by_line(lines:List[str]):
     """Tokenize a series of lines and group tokens by line.
 
-    The tokens for a multiline Python string or expression are
-    grouped as one line.
+    The tokens for a multiline Python string or expression are grouped as one
+    line. All lines except the last lines should keep their line ending ('\\n',
+    '\\r\\n') for this to properly work. Use `.splitlines(keeplineending=True)`
+    for example when passing block of text to this function.
+
     """
     # NL tokens are used inside multiline expressions, but also after blank
     # lines or comments. This is intentional - see https://bugs.python.org/issue17061
@@ -461,6 +464,8 @@ def make_tokens_by_line(lines):
     # track parentheses level, similar to the internals of tokenize.
     NEWLINE, NL = tokenize.NEWLINE, tokenize.NL
     tokens_by_line = [[]]
+    if len(lines) > 1 and not lines[0].endswith(('\n', '\r', '\r\n', '\x0b', '\x0c')):
+        warnings.warn("`make_tokens_by_line` received a list of lines which do not have lineending markers ('\\n', '\\r', '\\r\\n', '\\x0b', '\\x0c'), behavior will be unspecified")
     parenlev = 0
     try:
         for token in tokenize.generate_tokens(iter(lines).__next__):
diff --git a/IPython/core/tests/test_inputtransformer2.py b/IPython/core/tests/test_inputtransformer2.py
index d6c2fa3..9c92c39 100644
--- a/IPython/core/tests/test_inputtransformer2.py
+++ b/IPython/core/tests/test_inputtransformer2.py
@@ -8,7 +8,7 @@ import nose.tools as nt
 import string
 
 from IPython.core import inputtransformer2 as ipt2
-from IPython.core.inputtransformer2 import make_tokens_by_line
+from IPython.core.inputtransformer2 import make_tokens_by_line, _find_assign_op
 
 from textwrap import dedent
 
@@ -53,6 +53,22 @@ b = get_ipython().getoutput('foo    bar')
 g()
 """.splitlines(keepends=True))
 
+#####
+
+MULTILINE_SYSTEM_ASSIGN_AFTER_DEDENT = ("""\
+def test():
+  for i in range(1):
+    print(i)
+  res =! ls
+""".splitlines(keepends=True), (4, 7), '''\
+def test():
+  for i in range(1):
+    print(i)
+  res =get_ipython().getoutput(\' ls\')
+'''.splitlines(keepends=True))
+
+######
+
 AUTOCALL_QUOTE = (
     [",f 1 2 3\n"], (1, 0),
     ['f("1", "2", "3")\n']
@@ -103,6 +119,7 @@ b) = zip?
 [r"get_ipython().set_next_input('(a,\nb) = zip');get_ipython().run_line_magic('pinfo', 'zip')" + "\n"]
 )
 
+
 def null_cleanup_transformer(lines):
     """
     A cleanup transform that returns an empty list.
@@ -144,18 +161,21 @@ def test_continued_line():
 def test_find_assign_magic():
     check_find(ipt2.MagicAssign, MULTILINE_MAGIC_ASSIGN)
     check_find(ipt2.MagicAssign, MULTILINE_SYSTEM_ASSIGN, match=False)
+    check_find(ipt2.MagicAssign, MULTILINE_SYSTEM_ASSIGN_AFTER_DEDENT, match=False)
 
 def test_transform_assign_magic():
     check_transform(ipt2.MagicAssign, MULTILINE_MAGIC_ASSIGN)
 
 def test_find_assign_system():
     check_find(ipt2.SystemAssign, MULTILINE_SYSTEM_ASSIGN)
+    check_find(ipt2.SystemAssign, MULTILINE_SYSTEM_ASSIGN_AFTER_DEDENT)
     check_find(ipt2.SystemAssign, (["a =  !ls\n"], (1, 5), None))
     check_find(ipt2.SystemAssign, (["a=!ls\n"], (1, 2), None))
     check_find(ipt2.SystemAssign, MULTILINE_MAGIC_ASSIGN, match=False)
 
 def test_transform_assign_system():
     check_transform(ipt2.SystemAssign, MULTILINE_SYSTEM_ASSIGN)
+    check_transform(ipt2.SystemAssign, MULTILINE_SYSTEM_ASSIGN_AFTER_DEDENT)
 
 def test_find_magic_escape():
     check_find(ipt2.EscapedCommand, MULTILINE_MAGIC)
@@ -203,6 +223,17 @@ def test_transform_help():
     tf = ipt2.HelpEnd((1, 0), (2, 8))
     nt.assert_equal(tf.transform(HELP_MULTILINE[0]), HELP_MULTILINE[2])
 
+def test_find_assign_op_dedent():
+    """
+    be carefull that empty token like dedent are not counted as parens
+    """
+    class Tk:
+        def __init__(self, s):
+            self.string = s
+
+    nt.assert_equal(_find_assign_op([Tk(s) for s in ('','a','=','b')]), 2)
+    nt.assert_equal(_find_assign_op([Tk(s) for s in ('','(', 'a','=','b', ')', '=' ,'5')]), 6)
+
 def test_check_complete():
     cc = ipt2.TransformerManager().check_complete
     nt.assert_equal(cc("a = 1"), ('complete', None))