Show More
@@ -0,0 +1,125 b'' | |||||
|
1 | """This is a patched copy of the untokenize machinery from the standard library. | |||
|
2 | ||||
|
3 | untokenize has a number of major bugs that render it almost useless. We're using | |||
|
4 | the patch written by Gareth Rees on Python issue 12961: | |||
|
5 | ||||
|
6 | http://bugs.python.org/issue12691 | |||
|
7 | ||||
|
8 | We've undone one part of the patch - it encoded the output to bytes, to neatly | |||
|
9 | round-trip from tokenize. We want to keep working with text, so we don't encode. | |||
|
10 | """ | |||
|
11 | ||||
|
12 | __author__ = 'Ka-Ping Yee <ping@lfw.org>' | |||
|
13 | __credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, ' | |||
|
14 | 'Skip Montanaro, Raymond Hettinger, Trent Nelson, ' | |||
|
15 | 'Michael Foord') | |||
|
16 | from token import * | |||
|
17 | ||||
|
18 | ||||
|
19 | from tokenize import COMMENT, NL | |||
|
20 | ||||
|
21 | try: | |||
|
22 | # Python 3 | |||
|
23 | from tokenize import ENCODING | |||
|
24 | except: | |||
|
25 | ENCODING = 987654321 | |||
|
26 | ||||
|
27 | class Untokenizer: | |||
|
28 | ||||
|
29 | def __init__(self): | |||
|
30 | self.tokens = [] | |||
|
31 | self.prev_row = 1 | |||
|
32 | self.prev_col = 0 | |||
|
33 | self.encoding = 'utf-8' | |||
|
34 | ||||
|
35 | def add_whitespace(self, tok_type, start): | |||
|
36 | row, col = start | |||
|
37 | assert row >= self.prev_row | |||
|
38 | col_offset = col - self.prev_col | |||
|
39 | if col_offset > 0: | |||
|
40 | self.tokens.append(" " * col_offset) | |||
|
41 | elif row > self.prev_row and tok_type not in (NEWLINE, NL, ENDMARKER): | |||
|
42 | # Line was backslash-continued. | |||
|
43 | self.tokens.append(" ") | |||
|
44 | ||||
|
45 | def untokenize(self, tokens): | |||
|
46 | iterable = iter(tokens) | |||
|
47 | for t in iterable: | |||
|
48 | if len(t) == 2: | |||
|
49 | self.compat(t, iterable) | |||
|
50 | break | |||
|
51 | # IPython modification - valid Python 2 syntax | |||
|
52 | tok_type, token, start, end = t[:4] | |||
|
53 | if tok_type == ENCODING: | |||
|
54 | self.encoding = token | |||
|
55 | continue | |||
|
56 | self.add_whitespace(tok_type, start) | |||
|
57 | self.tokens.append(token) | |||
|
58 | self.prev_row, self.prev_col = end | |||
|
59 | if tok_type in (NEWLINE, NL): | |||
|
60 | self.prev_row += 1 | |||
|
61 | self.prev_col = 0 | |||
|
62 | # IPython modification - don't encode output | |||
|
63 | return "".join(self.tokens) | |||
|
64 | ||||
|
65 | def compat(self, token, iterable): | |||
|
66 | # This import is here to avoid problems when the itertools | |||
|
67 | # module is not built yet and tokenize is imported. | |||
|
68 | from itertools import chain | |||
|
69 | startline = False | |||
|
70 | prevstring = False | |||
|
71 | indents = [] | |||
|
72 | toks_append = self.tokens.append | |||
|
73 | ||||
|
74 | for tok in chain([token], iterable): | |||
|
75 | toknum, tokval = tok[:2] | |||
|
76 | if toknum == ENCODING: | |||
|
77 | self.encoding = tokval | |||
|
78 | continue | |||
|
79 | ||||
|
80 | if toknum in (NAME, NUMBER): | |||
|
81 | tokval += ' ' | |||
|
82 | ||||
|
83 | # Insert a space between two consecutive strings | |||
|
84 | if toknum == STRING: | |||
|
85 | if prevstring: | |||
|
86 | tokval = ' ' + tokval | |||
|
87 | prevstring = True | |||
|
88 | else: | |||
|
89 | prevstring = False | |||
|
90 | ||||
|
91 | if toknum == INDENT: | |||
|
92 | indents.append(tokval) | |||
|
93 | continue | |||
|
94 | elif toknum == DEDENT: | |||
|
95 | indents.pop() | |||
|
96 | continue | |||
|
97 | elif toknum in (NEWLINE, NL): | |||
|
98 | startline = True | |||
|
99 | elif startline and indents: | |||
|
100 | toks_append(indents[-1]) | |||
|
101 | startline = False | |||
|
102 | toks_append(tokval) | |||
|
103 | ||||
|
104 | ||||
|
105 | def untokenize(tokens): | |||
|
106 | """ | |||
|
107 | Convert ``tokens`` (an iterable) back into Python source code. Return | |||
|
108 | a bytes object, encoded using the encoding specified by the last | |||
|
109 | ENCODING token in ``tokens``, or UTF-8 if no ENCODING token is found. | |||
|
110 | ||||
|
111 | The result is guaranteed to tokenize back to match the input so that | |||
|
112 | the conversion is lossless and round-trips are assured. The | |||
|
113 | guarantee applies only to the token type and token string as the | |||
|
114 | spacing between tokens (column positions) may change. | |||
|
115 | ||||
|
116 | :func:`untokenize` has two modes. If the input tokens are sequences | |||
|
117 | of length 2 (``type``, ``string``) then spaces are added as necessary to | |||
|
118 | preserve the round-trip property. | |||
|
119 | ||||
|
120 | If the input tokens are sequences of length 4 or more (``type``, | |||
|
121 | ``string``, ``start``, ``end``), as returned by :func:`tokenize`, then | |||
|
122 | spaces are added so that each token appears in the result at the | |||
|
123 | position indicated by ``start`` and ``end``, if possible. | |||
|
124 | """ | |||
|
125 | return Untokenizer().untokenize(tokens) |
@@ -77,6 +77,7 b' from IPython.core.inputtransformer import (leading_indent,' | |||||
77 | classic_prompt, |
|
77 | classic_prompt, | |
78 | ipy_prompt, |
|
78 | ipy_prompt, | |
79 | cellmagic, |
|
79 | cellmagic, | |
|
80 | assemble_logical_lines, | |||
80 | help_end, |
|
81 | help_end, | |
81 | escaped_transformer, |
|
82 | escaped_transformer, | |
82 | assign_from_magic, |
|
83 | assign_from_magic, | |
@@ -515,6 +516,7 b' class IPythonInputSplitter(InputSplitter):' | |||||
515 | classic_prompt(), |
|
516 | classic_prompt(), | |
516 | ipy_prompt(), |
|
517 | ipy_prompt(), | |
517 | cellmagic(), |
|
518 | cellmagic(), | |
|
519 | assemble_logical_lines(), | |||
518 | help_end(), |
|
520 | help_end(), | |
519 | escaped_transformer(), |
|
521 | escaped_transformer(), | |
520 | assign_from_magic(), |
|
522 | assign_from_magic(), | |
@@ -639,14 +641,11 b' class IPythonInputSplitter(InputSplitter):' | |||||
639 |
|
641 | |||
640 | def push_line(self, line): |
|
642 | def push_line(self, line): | |
641 | buf = self._buffer |
|
643 | buf = self._buffer | |
642 | not_in_string = self._is_complete or not buf or \ |
|
|||
643 | (buf and buf[-1].rstrip().endswith((':', ','))) |
|
|||
644 | for transformer in self.transforms: |
|
644 | for transformer in self.transforms: | |
645 |
|
|
645 | line = transformer.push(line) | |
646 |
|
|
646 | if line is None: | |
647 | if line is None: |
|
647 | self.transformer_accumulating = True | |
648 | self.transformer_accumulating = True |
|
648 | return False | |
649 | return False |
|
|||
650 |
|
649 | |||
651 | self.transformer_accumulating = False |
|
650 | self.transformer_accumulating = False | |
652 | return super(IPythonInputSplitter, self).push(line) |
|
651 | return super(IPythonInputSplitter, self).push(line) |
@@ -4,7 +4,15 b' import re' | |||||
4 | from StringIO import StringIO |
|
4 | from StringIO import StringIO | |
5 | import tokenize |
|
5 | import tokenize | |
6 |
|
6 | |||
|
7 | try: | |||
|
8 | generate_tokens = tokenize.generate_tokens | |||
|
9 | except AttributeError: | |||
|
10 | # Python 3. Note that we use the undocumented _tokenize because it expects | |||
|
11 | # strings, not bytes. See also Python issue #9969. | |||
|
12 | generate_tokens = tokenize._tokenize | |||
|
13 | ||||
7 | from IPython.core.splitinput import split_user_input, LineInfo |
|
14 | from IPython.core.splitinput import split_user_input, LineInfo | |
|
15 | from IPython.utils.untokenize import untokenize | |||
8 |
|
16 | |||
9 | #----------------------------------------------------------------------------- |
|
17 | #----------------------------------------------------------------------------- | |
10 | # Globals |
|
18 | # Globals | |
@@ -119,8 +127,11 b' class TokenInputTransformer(InputTransformer):' | |||||
119 | def __init__(self, func): |
|
127 | def __init__(self, func): | |
120 | self.func = func |
|
128 | self.func = func | |
121 | self.current_line = "" |
|
129 | self.current_line = "" | |
122 | self.tokenizer = tokenize.generate_tokens(self.get_line) |
|
|||
123 | self.line_used= False |
|
130 | self.line_used= False | |
|
131 | self.reset_tokenizer() | |||
|
132 | ||||
|
133 | def reset_tokenizer(self): | |||
|
134 | self.tokenizer = generate_tokens(self.get_line) | |||
124 |
|
135 | |||
125 | def get_line(self): |
|
136 | def get_line(self): | |
126 | if self.line_used: |
|
137 | if self.line_used: | |
@@ -140,13 +151,12 b' class TokenInputTransformer(InputTransformer):' | |||||
140 | break |
|
151 | break | |
141 | except tokenize.TokenError: |
|
152 | except tokenize.TokenError: | |
142 | # Multi-line statement - stop and try again with the next line |
|
153 | # Multi-line statement - stop and try again with the next line | |
143 | self.tokenizer = tokenize.generate_tokens(self.get_line) |
|
154 | self.reset_tokenizer() | |
144 | return None |
|
155 | return None | |
145 |
|
156 | |||
146 | self.current_line = "" |
|
157 | self.current_line = "" | |
147 | # Python bug 8478 - untokenize doesn't work quite correctly with a |
|
158 | self.reset_tokenizer() | |
148 | # generator. We call list() to avoid this. |
|
159 | return untokenize(self.func(tokens)).rstrip('\n') | |
149 | return tokenize.untokenize(list(self.func(tokens))).rstrip('\n') |
|
|||
150 |
|
160 | |||
151 | def reset(self): |
|
161 | def reset(self): | |
152 | l = self.current_line |
|
162 | l = self.current_line | |
@@ -154,6 +164,9 b' class TokenInputTransformer(InputTransformer):' | |||||
154 | if l: |
|
164 | if l: | |
155 | return l.rstrip('\n') |
|
165 | return l.rstrip('\n') | |
156 |
|
166 | |||
|
167 | @TokenInputTransformer.wrap | |||
|
168 | def assemble_logical_lines(tokens): | |||
|
169 | return tokens | |||
157 |
|
170 | |||
158 | # Utilities |
|
171 | # Utilities | |
159 | def _make_help_call(target, esc, lspace, next_input=None): |
|
172 | def _make_help_call(target, esc, lspace, next_input=None): |
General Comments 0
You need to be logged in to leave comments.
Login now