##// END OF EJS Templates
Prototype transformer to assemble logical lines
Thomas Kluyver -
Show More
@@ -0,0 +1,125 b''
1 """This is a patched copy of the untokenize machinery from the standard library.
2
3 untokenize has a number of major bugs that render it almost useless. We're using
4 the patch written by Gareth Rees on Python issue 12961:
5
6 http://bugs.python.org/issue12691
7
8 We've undone one part of the patch - it encoded the output to bytes, to neatly
9 round-trip from tokenize. We want to keep working with text, so we don't encode.
10 """
11
12 __author__ = 'Ka-Ping Yee <ping@lfw.org>'
13 __credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, '
14 'Skip Montanaro, Raymond Hettinger, Trent Nelson, '
15 'Michael Foord')
16 from token import *
17
18
19 from tokenize import COMMENT, NL
20
21 try:
22 # Python 3
23 from tokenize import ENCODING
24 except:
25 ENCODING = 987654321
26
27 class Untokenizer:
28
29 def __init__(self):
30 self.tokens = []
31 self.prev_row = 1
32 self.prev_col = 0
33 self.encoding = 'utf-8'
34
35 def add_whitespace(self, tok_type, start):
36 row, col = start
37 assert row >= self.prev_row
38 col_offset = col - self.prev_col
39 if col_offset > 0:
40 self.tokens.append(" " * col_offset)
41 elif row > self.prev_row and tok_type not in (NEWLINE, NL, ENDMARKER):
42 # Line was backslash-continued.
43 self.tokens.append(" ")
44
45 def untokenize(self, tokens):
46 iterable = iter(tokens)
47 for t in iterable:
48 if len(t) == 2:
49 self.compat(t, iterable)
50 break
51 # IPython modification - valid Python 2 syntax
52 tok_type, token, start, end = t[:4]
53 if tok_type == ENCODING:
54 self.encoding = token
55 continue
56 self.add_whitespace(tok_type, start)
57 self.tokens.append(token)
58 self.prev_row, self.prev_col = end
59 if tok_type in (NEWLINE, NL):
60 self.prev_row += 1
61 self.prev_col = 0
62 # IPython modification - don't encode output
63 return "".join(self.tokens)
64
65 def compat(self, token, iterable):
66 # This import is here to avoid problems when the itertools
67 # module is not built yet and tokenize is imported.
68 from itertools import chain
69 startline = False
70 prevstring = False
71 indents = []
72 toks_append = self.tokens.append
73
74 for tok in chain([token], iterable):
75 toknum, tokval = tok[:2]
76 if toknum == ENCODING:
77 self.encoding = tokval
78 continue
79
80 if toknum in (NAME, NUMBER):
81 tokval += ' '
82
83 # Insert a space between two consecutive strings
84 if toknum == STRING:
85 if prevstring:
86 tokval = ' ' + tokval
87 prevstring = True
88 else:
89 prevstring = False
90
91 if toknum == INDENT:
92 indents.append(tokval)
93 continue
94 elif toknum == DEDENT:
95 indents.pop()
96 continue
97 elif toknum in (NEWLINE, NL):
98 startline = True
99 elif startline and indents:
100 toks_append(indents[-1])
101 startline = False
102 toks_append(tokval)
103
104
105 def untokenize(tokens):
106 """
107 Convert ``tokens`` (an iterable) back into Python source code. Return
108 a bytes object, encoded using the encoding specified by the last
109 ENCODING token in ``tokens``, or UTF-8 if no ENCODING token is found.
110
111 The result is guaranteed to tokenize back to match the input so that
112 the conversion is lossless and round-trips are assured. The
113 guarantee applies only to the token type and token string as the
114 spacing between tokens (column positions) may change.
115
116 :func:`untokenize` has two modes. If the input tokens are sequences
117 of length 2 (``type``, ``string``) then spaces are added as necessary to
118 preserve the round-trip property.
119
120 If the input tokens are sequences of length 4 or more (``type``,
121 ``string``, ``start``, ``end``), as returned by :func:`tokenize`, then
122 spaces are added so that each token appears in the result at the
123 position indicated by ``start`` and ``end``, if possible.
124 """
125 return Untokenizer().untokenize(tokens)
@@ -77,6 +77,7 b' from IPython.core.inputtransformer import (leading_indent,'
77 classic_prompt,
77 classic_prompt,
78 ipy_prompt,
78 ipy_prompt,
79 cellmagic,
79 cellmagic,
80 assemble_logical_lines,
80 help_end,
81 help_end,
81 escaped_transformer,
82 escaped_transformer,
82 assign_from_magic,
83 assign_from_magic,
@@ -515,6 +516,7 b' class IPythonInputSplitter(InputSplitter):'
515 classic_prompt(),
516 classic_prompt(),
516 ipy_prompt(),
517 ipy_prompt(),
517 cellmagic(),
518 cellmagic(),
519 assemble_logical_lines(),
518 help_end(),
520 help_end(),
519 escaped_transformer(),
521 escaped_transformer(),
520 assign_from_magic(),
522 assign_from_magic(),
@@ -639,14 +641,11 b' class IPythonInputSplitter(InputSplitter):'
639
641
640 def push_line(self, line):
642 def push_line(self, line):
641 buf = self._buffer
643 buf = self._buffer
642 not_in_string = self._is_complete or not buf or \
643 (buf and buf[-1].rstrip().endswith((':', ',')))
644 for transformer in self.transforms:
644 for transformer in self.transforms:
645 if not_in_string or transformer.look_in_string:
645 line = transformer.push(line)
646 line = transformer.push(line)
646 if line is None:
647 if line is None:
647 self.transformer_accumulating = True
648 self.transformer_accumulating = True
648 return False
649 return False
650
649
651 self.transformer_accumulating = False
650 self.transformer_accumulating = False
652 return super(IPythonInputSplitter, self).push(line)
651 return super(IPythonInputSplitter, self).push(line)
@@ -4,7 +4,15 b' import re'
4 from StringIO import StringIO
4 from StringIO import StringIO
5 import tokenize
5 import tokenize
6
6
7 try:
8 generate_tokens = tokenize.generate_tokens
9 except AttributeError:
10 # Python 3. Note that we use the undocumented _tokenize because it expects
11 # strings, not bytes. See also Python issue #9969.
12 generate_tokens = tokenize._tokenize
13
7 from IPython.core.splitinput import split_user_input, LineInfo
14 from IPython.core.splitinput import split_user_input, LineInfo
15 from IPython.utils.untokenize import untokenize
8
16
9 #-----------------------------------------------------------------------------
17 #-----------------------------------------------------------------------------
10 # Globals
18 # Globals
@@ -119,8 +127,11 b' class TokenInputTransformer(InputTransformer):'
119 def __init__(self, func):
127 def __init__(self, func):
120 self.func = func
128 self.func = func
121 self.current_line = ""
129 self.current_line = ""
122 self.tokenizer = tokenize.generate_tokens(self.get_line)
123 self.line_used= False
130 self.line_used= False
131 self.reset_tokenizer()
132
133 def reset_tokenizer(self):
134 self.tokenizer = generate_tokens(self.get_line)
124
135
125 def get_line(self):
136 def get_line(self):
126 if self.line_used:
137 if self.line_used:
@@ -140,13 +151,12 b' class TokenInputTransformer(InputTransformer):'
140 break
151 break
141 except tokenize.TokenError:
152 except tokenize.TokenError:
142 # Multi-line statement - stop and try again with the next line
153 # Multi-line statement - stop and try again with the next line
143 self.tokenizer = tokenize.generate_tokens(self.get_line)
154 self.reset_tokenizer()
144 return None
155 return None
145
156
146 self.current_line = ""
157 self.current_line = ""
147 # Python bug 8478 - untokenize doesn't work quite correctly with a
158 self.reset_tokenizer()
148 # generator. We call list() to avoid this.
159 return untokenize(self.func(tokens)).rstrip('\n')
149 return tokenize.untokenize(list(self.func(tokens))).rstrip('\n')
150
160
151 def reset(self):
161 def reset(self):
152 l = self.current_line
162 l = self.current_line
@@ -154,6 +164,9 b' class TokenInputTransformer(InputTransformer):'
154 if l:
164 if l:
155 return l.rstrip('\n')
165 return l.rstrip('\n')
156
166
167 @TokenInputTransformer.wrap
168 def assemble_logical_lines(tokens):
169 return tokens
157
170
158 # Utilities
171 # Utilities
159 def _make_help_call(target, esc, lspace, next_input=None):
172 def _make_help_call(target, esc, lspace, next_input=None):
General Comments 0
You need to be logged in to leave comments. Login now