diff --git a/IPython/core/inputsplitter.py b/IPython/core/inputsplitter.py index cfba71a..c27a042 100644 --- a/IPython/core/inputsplitter.py +++ b/IPython/core/inputsplitter.py @@ -77,6 +77,7 @@ from IPython.core.inputtransformer import (leading_indent, classic_prompt, ipy_prompt, cellmagic, + assemble_logical_lines, help_end, escaped_transformer, assign_from_magic, @@ -515,6 +516,7 @@ class IPythonInputSplitter(InputSplitter): classic_prompt(), ipy_prompt(), cellmagic(), + assemble_logical_lines(), help_end(), escaped_transformer(), assign_from_magic(), @@ -639,14 +641,11 @@ class IPythonInputSplitter(InputSplitter): def push_line(self, line): buf = self._buffer - not_in_string = self._is_complete or not buf or \ - (buf and buf[-1].rstrip().endswith((':', ','))) for transformer in self.transforms: - if not_in_string or transformer.look_in_string: - line = transformer.push(line) - if line is None: - self.transformer_accumulating = True - return False + line = transformer.push(line) + if line is None: + self.transformer_accumulating = True + return False self.transformer_accumulating = False return super(IPythonInputSplitter, self).push(line) diff --git a/IPython/core/inputtransformer.py b/IPython/core/inputtransformer.py index 4440b83..d3bb346 100644 --- a/IPython/core/inputtransformer.py +++ b/IPython/core/inputtransformer.py @@ -4,7 +4,15 @@ import re from StringIO import StringIO import tokenize +try: + generate_tokens = tokenize.generate_tokens +except AttributeError: + # Python 3. Note that we use the undocumented _tokenize because it expects + # strings, not bytes. See also Python issue #9969. + generate_tokens = tokenize._tokenize + from IPython.core.splitinput import split_user_input, LineInfo +from IPython.utils.untokenize import untokenize #----------------------------------------------------------------------------- # Globals @@ -119,8 +127,11 @@ class TokenInputTransformer(InputTransformer): def __init__(self, func): self.func = func self.current_line = "" - self.tokenizer = tokenize.generate_tokens(self.get_line) self.line_used= False + self.reset_tokenizer() + + def reset_tokenizer(self): + self.tokenizer = generate_tokens(self.get_line) def get_line(self): if self.line_used: @@ -140,13 +151,12 @@ class TokenInputTransformer(InputTransformer): break except tokenize.TokenError: # Multi-line statement - stop and try again with the next line - self.tokenizer = tokenize.generate_tokens(self.get_line) + self.reset_tokenizer() return None self.current_line = "" - # Python bug 8478 - untokenize doesn't work quite correctly with a - # generator. We call list() to avoid this. - return tokenize.untokenize(list(self.func(tokens))).rstrip('\n') + self.reset_tokenizer() + return untokenize(self.func(tokens)).rstrip('\n') def reset(self): l = self.current_line @@ -154,6 +164,9 @@ class TokenInputTransformer(InputTransformer): if l: return l.rstrip('\n') +@TokenInputTransformer.wrap +def assemble_logical_lines(tokens): + return tokens # Utilities def _make_help_call(target, esc, lspace, next_input=None): diff --git a/IPython/utils/untokenize.py b/IPython/utils/untokenize.py new file mode 100644 index 0000000..ee87f89 --- /dev/null +++ b/IPython/utils/untokenize.py @@ -0,0 +1,125 @@ +"""This is a patched copy of the untokenize machinery from the standard library. + +untokenize has a number of major bugs that render it almost useless. We're using +the patch written by Gareth Rees on Python issue 12961: + +http://bugs.python.org/issue12691 + +We've undone one part of the patch - it encoded the output to bytes, to neatly +round-trip from tokenize. We want to keep working with text, so we don't encode. +""" + +__author__ = 'Ka-Ping Yee ' +__credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, ' + 'Skip Montanaro, Raymond Hettinger, Trent Nelson, ' + 'Michael Foord') +from token import * + + +from tokenize import COMMENT, NL + +try: + # Python 3 + from tokenize import ENCODING +except: + ENCODING = 987654321 + +class Untokenizer: + + def __init__(self): + self.tokens = [] + self.prev_row = 1 + self.prev_col = 0 + self.encoding = 'utf-8' + + def add_whitespace(self, tok_type, start): + row, col = start + assert row >= self.prev_row + col_offset = col - self.prev_col + if col_offset > 0: + self.tokens.append(" " * col_offset) + elif row > self.prev_row and tok_type not in (NEWLINE, NL, ENDMARKER): + # Line was backslash-continued. + self.tokens.append(" ") + + def untokenize(self, tokens): + iterable = iter(tokens) + for t in iterable: + if len(t) == 2: + self.compat(t, iterable) + break + # IPython modification - valid Python 2 syntax + tok_type, token, start, end = t[:4] + if tok_type == ENCODING: + self.encoding = token + continue + self.add_whitespace(tok_type, start) + self.tokens.append(token) + self.prev_row, self.prev_col = end + if tok_type in (NEWLINE, NL): + self.prev_row += 1 + self.prev_col = 0 + # IPython modification - don't encode output + return "".join(self.tokens) + + def compat(self, token, iterable): + # This import is here to avoid problems when the itertools + # module is not built yet and tokenize is imported. + from itertools import chain + startline = False + prevstring = False + indents = [] + toks_append = self.tokens.append + + for tok in chain([token], iterable): + toknum, tokval = tok[:2] + if toknum == ENCODING: + self.encoding = tokval + continue + + if toknum in (NAME, NUMBER): + tokval += ' ' + + # Insert a space between two consecutive strings + if toknum == STRING: + if prevstring: + tokval = ' ' + tokval + prevstring = True + else: + prevstring = False + + if toknum == INDENT: + indents.append(tokval) + continue + elif toknum == DEDENT: + indents.pop() + continue + elif toknum in (NEWLINE, NL): + startline = True + elif startline and indents: + toks_append(indents[-1]) + startline = False + toks_append(tokval) + + +def untokenize(tokens): + """ + Convert ``tokens`` (an iterable) back into Python source code. Return + a bytes object, encoded using the encoding specified by the last + ENCODING token in ``tokens``, or UTF-8 if no ENCODING token is found. + + The result is guaranteed to tokenize back to match the input so that + the conversion is lossless and round-trips are assured. The + guarantee applies only to the token type and token string as the + spacing between tokens (column positions) may change. + + :func:`untokenize` has two modes. If the input tokens are sequences + of length 2 (``type``, ``string``) then spaces are added as necessary to + preserve the round-trip property. + + If the input tokens are sequences of length 4 or more (``type``, + ``string``, ``start``, ``end``), as returned by :func:`tokenize`, then + spaces are added so that each token appears in the result at the + position indicated by ``start`` and ``end``, if possible. + """ + return Untokenizer().untokenize(tokens)