From 13bf7e13c426e215af2eb11d38b0699ff860aa99 2018-03-10 11:42:38
From: Thomas Kluyver <thomas@kluyver.me.uk>
Date: 2018-03-10 11:42:38
Subject: [PATCH] Working on new input transformation machinery

---

diff --git a/IPython/core/inputtransformer2.py b/IPython/core/inputtransformer2.py
new file mode 100644
index 0000000..e7622a0
--- /dev/null
+++ b/IPython/core/inputtransformer2.py
@@ -0,0 +1,203 @@
+import re
+from typing import List, Tuple
+from IPython.utils import tokenize2
+from IPython.utils.tokenutil import generate_tokens
+
+def leading_indent(lines):
+    """Remove leading indentation.
+    
+    If the first line starts with a spaces or tabs, the same whitespace will be
+    removed from each following line.
+    """
+    m = re.match(r'^[ \t]+', lines[0])
+    if not m:
+        return lines
+    space = m.group(0)
+    n = len(space)
+    return [l[n:] if l.startswith(space) else l
+            for l in lines]
+
+class PromptStripper:
+    """Remove matching input prompts from a block of input.
+    
+    Parameters
+    ----------
+    prompt_re : regular expression
+        A regular expression matching any input prompt (including continuation)
+    initial_re : regular expression, optional
+        A regular expression matching only the initial prompt, but not continuation.
+        If no initial expression is given, prompt_re will be used everywhere.
+        Used mainly for plain Python prompts, where the continuation prompt
+        ``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
+    
+    If initial_re and prompt_re differ,
+    only initial_re will be tested against the first line.
+    If any prompt is found on the first two lines,
+    prompts will be stripped from the rest of the block.
+    """
+    def __init__(self, prompt_re, initial_re=None):
+        self.prompt_re = prompt_re
+        self.initial_re = initial_re or prompt_re
+
+    def _strip(self, lines):
+        return [self.prompt_re.sub('', l, count=1) for l in lines]
+
+    def __call__(self, lines):
+        if self.initial_re.match(lines[0]) or \
+                (len(lines) > 1 and self.prompt_re.match(lines[1])):
+            return self._strip(lines)
+        return lines
+
+classic_prompt = PromptStripper(
+    prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'),
+    initial_re=re.compile(r'^>>>( |$)')
+)
+
+ipython_prompt = PromptStripper(re.compile(r'^(In \[\d+\]: |\s*\.{3,}: ?)'))
+
+def cell_magic(lines):
+    if not lines[0].startswith('%%'):
+        return lines
+    if re.match('%%\w+\?', lines[0]):
+        # This case will be handled by help_end
+        return lines
+    magic_name, first_line = lines[0][2:].partition(' ')
+    body = '\n'.join(lines[1:])
+    return ['get_ipython().run_cell_magic(%r, %r, %r)' % (magic_name, first_line, body)]
+
+line_transforms = [
+    leading_indent,
+    classic_prompt,
+    ipython_prompt,
+    cell_magic,
+]
+
+# -----
+
+def help_end(tokens_by_line):
+    pass
+
+def escaped_command(tokens_by_line):
+    pass
+
+def _find_assign_op(token_line):
+    # Find the first assignment in the line ('=' not inside brackets)
+    # We don't try to support multiple special assignment (a = b = %foo)
+    paren_level = 0
+    for i, ti in enumerate(token_line):
+        s = ti.string
+        if s == '=' and paren_level == 0:
+            return i 
+        if s in '([{':
+            paren_level += 1
+        elif s in ')]}':
+            paren_level -= 1
+
+class MagicAssign:
+    @staticmethod
+    def find(tokens_by_line):
+        """Find the first magic assignment (a = %foo) in the cell.
+        
+        Returns (line, column) of the % if found, or None.
+        """
+        for line in tokens_by_line:
+            assign_ix = _find_assign_op(line)
+            if (assign_ix is not None) \
+                    and (len(line) >= assign_ix + 2) \
+                    and (line[assign_ix+1].string == '%') \
+                    and (line[assign_ix+2].type == tokenize2.NAME):
+                return line[assign_ix+1].start
+    
+    @staticmethod
+    def transform(lines: List[str], start: Tuple[int, int]):
+        """Transform a magic assignment found by find
+        """
+        start_line = start[0] - 1   # Shift from 1-index to 0-index
+        start_col  = start[1]
+        
+        print("Start at", start_line, start_col)
+        print("Line", lines[start_line])
+    
+        lhs, rhs = lines[start_line][:start_col], lines[start_line][start_col:-1]
+        assert rhs.startswith('%'), rhs
+        magic_name, _, args = rhs[1:].partition(' ')
+        args_parts = [args]
+        end_line = start_line
+        # Follow explicit (backslash) line continuations
+        while end_line < len(lines) and args_parts[-1].endswith('\\'):
+            end_line += 1
+            args_parts[-1] = args_parts[-1][:-1]  # Trim backslash
+            args_parts.append(lines[end_line][:-1])  # Trim newline
+        args = ' '.join(args_parts)
+        
+        lines_before = lines[:start_line]
+        call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)
+        new_line = lhs + call + '\n'
+        lines_after = lines[end_line+1:]
+        
+        return lines_before + [new_line] + lines_after
+
+def make_tokens_by_line(lines):
+    tokens_by_line = [[]]
+    for token in generate_tokens(iter(lines).__next__):
+        tokens_by_line[-1].append(token)
+        if token.type == tokenize2.NEWLINE:
+            tokens_by_line.append([])
+    
+    return tokens_by_line
+
+class TokenTransformers:
+    def __init__(self):
+        self.transformers = [
+            MagicAssign
+        ]
+    
+    def do_one_transform(self, lines):
+        """Find and run the transform earliest in the code.
+        
+        Returns (changed, lines).
+        
+        This method is called repeatedly until changed is False, indicating
+        that all available transformations are complete.
+
+        The tokens following IPython special syntax might not be valid, so
+        the transformed code is retokenised every time to identify the next
+        piece of special syntax. Hopefully long code cells are mostly valid
+        Python, not using lots of IPython special syntax, so this shouldn't be
+        a performance issue. 
+        """
+        tokens_by_line = make_tokens_by_line(lines)
+        candidates = []
+        for transformer in self.transformers:
+            locn = transformer.find(tokens_by_line)
+            if locn:
+                candidates.append((locn, transformer))
+        
+        if not candidates:
+            # Nothing to transform
+            return False, lines
+        
+        first_locn, transformer = min(candidates)
+        return True, transformer.transform(lines, first_locn)
+
+    def __call__(self, lines):
+        while True:
+            changed, lines = self.do_one_transform(lines)
+            if not changed:
+                return lines
+
+def assign_from_system(tokens_by_line, lines):
+    pass
+
+
+def transform_cell(cell):
+    if not cell.endswith('\n'):
+        cell += '\n'  # Ensure every line has a newline
+    lines = cell.splitlines(keepends=True)
+    for transform in line_transforms:
+        #print(transform, lines)
+        lines = transform(lines)
+    
+    lines = TokenTransformers()(lines)
+    for line in lines:
+        print('~~', line)