From 13bf7e13c426e215af2eb11d38b0699ff860aa99 2018-03-10 11:42:38 From: Thomas Kluyver Date: 2018-03-10 11:42:38 Subject: [PATCH] Working on new input transformation machinery --- diff --git a/IPython/core/inputtransformer2.py b/IPython/core/inputtransformer2.py new file mode 100644 index 0000000..e7622a0 --- /dev/null +++ b/IPython/core/inputtransformer2.py @@ -0,0 +1,203 @@ +import re +from typing import List, Tuple +from IPython.utils import tokenize2 +from IPython.utils.tokenutil import generate_tokens + +def leading_indent(lines): + """Remove leading indentation. + + If the first line starts with a spaces or tabs, the same whitespace will be + removed from each following line. + """ + m = re.match(r'^[ \t]+', lines[0]) + if not m: + return lines + space = m.group(0) + n = len(space) + return [l[n:] if l.startswith(space) else l + for l in lines] + +class PromptStripper: + """Remove matching input prompts from a block of input. + + Parameters + ---------- + prompt_re : regular expression + A regular expression matching any input prompt (including continuation) + initial_re : regular expression, optional + A regular expression matching only the initial prompt, but not continuation. + If no initial expression is given, prompt_re will be used everywhere. + Used mainly for plain Python prompts, where the continuation prompt + ``...`` is a valid Python expression in Python 3, so shouldn't be stripped. + + If initial_re and prompt_re differ, + only initial_re will be tested against the first line. + If any prompt is found on the first two lines, + prompts will be stripped from the rest of the block. + """ + def __init__(self, prompt_re, initial_re=None): + self.prompt_re = prompt_re + self.initial_re = initial_re or prompt_re + + def _strip(self, lines): + return [self.prompt_re.sub('', l, count=1) for l in lines] + + def __call__(self, lines): + if self.initial_re.match(lines[0]) or \ + (len(lines) > 1 and self.prompt_re.match(lines[1])): + return self._strip(lines) + return lines + +classic_prompt = PromptStripper( + prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'), + initial_re=re.compile(r'^>>>( |$)') +) + +ipython_prompt = PromptStripper(re.compile(r'^(In \[\d+\]: |\s*\.{3,}: ?)')) + +def cell_magic(lines): + if not lines[0].startswith('%%'): + return lines + if re.match('%%\w+\?', lines[0]): + # This case will be handled by help_end + return lines + magic_name, first_line = lines[0][2:].partition(' ') + body = '\n'.join(lines[1:]) + return ['get_ipython().run_cell_magic(%r, %r, %r)' % (magic_name, first_line, body)] + +line_transforms = [ + leading_indent, + classic_prompt, + ipython_prompt, + cell_magic, +] + +# ----- + +def help_end(tokens_by_line): + pass + +def escaped_command(tokens_by_line): + pass + +def _find_assign_op(token_line): + # Find the first assignment in the line ('=' not inside brackets) + # We don't try to support multiple special assignment (a = b = %foo) + paren_level = 0 + for i, ti in enumerate(token_line): + s = ti.string + if s == '=' and paren_level == 0: + return i + if s in '([{': + paren_level += 1 + elif s in ')]}': + paren_level -= 1 + +class MagicAssign: + @staticmethod + def find(tokens_by_line): + """Find the first magic assignment (a = %foo) in the cell. + + Returns (line, column) of the % if found, or None. + """ + for line in tokens_by_line: + assign_ix = _find_assign_op(line) + if (assign_ix is not None) \ + and (len(line) >= assign_ix + 2) \ + and (line[assign_ix+1].string == '%') \ + and (line[assign_ix+2].type == tokenize2.NAME): + return line[assign_ix+1].start + + @staticmethod + def transform(lines: List[str], start: Tuple[int, int]): + """Transform a magic assignment found by find + """ + start_line = start[0] - 1 # Shift from 1-index to 0-index + start_col = start[1] + + print("Start at", start_line, start_col) + print("Line", lines[start_line]) + + lhs, rhs = lines[start_line][:start_col], lines[start_line][start_col:-1] + assert rhs.startswith('%'), rhs + magic_name, _, args = rhs[1:].partition(' ') + args_parts = [args] + end_line = start_line + # Follow explicit (backslash) line continuations + while end_line < len(lines) and args_parts[-1].endswith('\\'): + end_line += 1 + args_parts[-1] = args_parts[-1][:-1] # Trim backslash + args_parts.append(lines[end_line][:-1]) # Trim newline + args = ' '.join(args_parts) + + lines_before = lines[:start_line] + call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args) + new_line = lhs + call + '\n' + lines_after = lines[end_line+1:] + + return lines_before + [new_line] + lines_after + +def make_tokens_by_line(lines): + tokens_by_line = [[]] + for token in generate_tokens(iter(lines).__next__): + tokens_by_line[-1].append(token) + if token.type == tokenize2.NEWLINE: + tokens_by_line.append([]) + + return tokens_by_line + +class TokenTransformers: + def __init__(self): + self.transformers = [ + MagicAssign + ] + + def do_one_transform(self, lines): + """Find and run the transform earliest in the code. + + Returns (changed, lines). + + This method is called repeatedly until changed is False, indicating + that all available transformations are complete. + + The tokens following IPython special syntax might not be valid, so + the transformed code is retokenised every time to identify the next + piece of special syntax. Hopefully long code cells are mostly valid + Python, not using lots of IPython special syntax, so this shouldn't be + a performance issue. + """ + tokens_by_line = make_tokens_by_line(lines) + candidates = [] + for transformer in self.transformers: + locn = transformer.find(tokens_by_line) + if locn: + candidates.append((locn, transformer)) + + if not candidates: + # Nothing to transform + return False, lines + + first_locn, transformer = min(candidates) + return True, transformer.transform(lines, first_locn) + + def __call__(self, lines): + while True: + changed, lines = self.do_one_transform(lines) + if not changed: + return lines + +def assign_from_system(tokens_by_line, lines): + pass + + +def transform_cell(cell): + if not cell.endswith('\n'): + cell += '\n' # Ensure every line has a newline + lines = cell.splitlines(keepends=True) + for transform in line_transforms: + #print(transform, lines) + lines = transform(lines) + + lines = TokenTransformers()(lines) + for line in lines: + print('~~', line)