Show More
@@ -0,0 +1,125 b'' | |||
|
1 | """This is a patched copy of the untokenize machinery from the standard library. | |
|
2 | ||
|
3 | untokenize has a number of major bugs that render it almost useless. We're using | |
|
4 | the patch written by Gareth Rees on Python issue 12961: | |
|
5 | ||
|
6 | http://bugs.python.org/issue12691 | |
|
7 | ||
|
8 | We've undone one part of the patch - it encoded the output to bytes, to neatly | |
|
9 | round-trip from tokenize. We want to keep working with text, so we don't encode. | |
|
10 | """ | |
|
11 | ||
|
12 | __author__ = 'Ka-Ping Yee <ping@lfw.org>' | |
|
13 | __credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, ' | |
|
14 | 'Skip Montanaro, Raymond Hettinger, Trent Nelson, ' | |
|
15 | 'Michael Foord') | |
|
16 | from token import * | |
|
17 | ||
|
18 | ||
|
19 | from tokenize import COMMENT, NL | |
|
20 | ||
|
21 | try: | |
|
22 | # Python 3 | |
|
23 | from tokenize import ENCODING | |
|
24 | except: | |
|
25 | ENCODING = 987654321 | |
|
26 | ||
|
27 | class Untokenizer: | |
|
28 | ||
|
29 | def __init__(self): | |
|
30 | self.tokens = [] | |
|
31 | self.prev_row = 1 | |
|
32 | self.prev_col = 0 | |
|
33 | self.encoding = 'utf-8' | |
|
34 | ||
|
35 | def add_whitespace(self, tok_type, start): | |
|
36 | row, col = start | |
|
37 | assert row >= self.prev_row | |
|
38 | col_offset = col - self.prev_col | |
|
39 | if col_offset > 0: | |
|
40 | self.tokens.append(" " * col_offset) | |
|
41 | elif row > self.prev_row and tok_type not in (NEWLINE, NL, ENDMARKER): | |
|
42 | # Line was backslash-continued. | |
|
43 | self.tokens.append(" ") | |
|
44 | ||
|
45 | def untokenize(self, tokens): | |
|
46 | iterable = iter(tokens) | |
|
47 | for t in iterable: | |
|
48 | if len(t) == 2: | |
|
49 | self.compat(t, iterable) | |
|
50 | break | |
|
51 | # IPython modification - valid Python 2 syntax | |
|
52 | tok_type, token, start, end = t[:4] | |
|
53 | if tok_type == ENCODING: | |
|
54 | self.encoding = token | |
|
55 | continue | |
|
56 | self.add_whitespace(tok_type, start) | |
|
57 | self.tokens.append(token) | |
|
58 | self.prev_row, self.prev_col = end | |
|
59 | if tok_type in (NEWLINE, NL): | |
|
60 | self.prev_row += 1 | |
|
61 | self.prev_col = 0 | |
|
62 | # IPython modification - don't encode output | |
|
63 | return "".join(self.tokens) | |
|
64 | ||
|
65 | def compat(self, token, iterable): | |
|
66 | # This import is here to avoid problems when the itertools | |
|
67 | # module is not built yet and tokenize is imported. | |
|
68 | from itertools import chain | |
|
69 | startline = False | |
|
70 | prevstring = False | |
|
71 | indents = [] | |
|
72 | toks_append = self.tokens.append | |
|
73 | ||
|
74 | for tok in chain([token], iterable): | |
|
75 | toknum, tokval = tok[:2] | |
|
76 | if toknum == ENCODING: | |
|
77 | self.encoding = tokval | |
|
78 | continue | |
|
79 | ||
|
80 | if toknum in (NAME, NUMBER): | |
|
81 | tokval += ' ' | |
|
82 | ||
|
83 | # Insert a space between two consecutive strings | |
|
84 | if toknum == STRING: | |
|
85 | if prevstring: | |
|
86 | tokval = ' ' + tokval | |
|
87 | prevstring = True | |
|
88 | else: | |
|
89 | prevstring = False | |
|
90 | ||
|
91 | if toknum == INDENT: | |
|
92 | indents.append(tokval) | |
|
93 | continue | |
|
94 | elif toknum == DEDENT: | |
|
95 | indents.pop() | |
|
96 | continue | |
|
97 | elif toknum in (NEWLINE, NL): | |
|
98 | startline = True | |
|
99 | elif startline and indents: | |
|
100 | toks_append(indents[-1]) | |
|
101 | startline = False | |
|
102 | toks_append(tokval) | |
|
103 | ||
|
104 | ||
|
105 | def untokenize(tokens): | |
|
106 | """ | |
|
107 | Convert ``tokens`` (an iterable) back into Python source code. Return | |
|
108 | a bytes object, encoded using the encoding specified by the last | |
|
109 | ENCODING token in ``tokens``, or UTF-8 if no ENCODING token is found. | |
|
110 | ||
|
111 | The result is guaranteed to tokenize back to match the input so that | |
|
112 | the conversion is lossless and round-trips are assured. The | |
|
113 | guarantee applies only to the token type and token string as the | |
|
114 | spacing between tokens (column positions) may change. | |
|
115 | ||
|
116 | :func:`untokenize` has two modes. If the input tokens are sequences | |
|
117 | of length 2 (``type``, ``string``) then spaces are added as necessary to | |
|
118 | preserve the round-trip property. | |
|
119 | ||
|
120 | If the input tokens are sequences of length 4 or more (``type``, | |
|
121 | ``string``, ``start``, ``end``), as returned by :func:`tokenize`, then | |
|
122 | spaces are added so that each token appears in the result at the | |
|
123 | position indicated by ``start`` and ``end``, if possible. | |
|
124 | """ | |
|
125 | return Untokenizer().untokenize(tokens) |
@@ -77,6 +77,7 b' from IPython.core.inputtransformer import (leading_indent,' | |||
|
77 | 77 | classic_prompt, |
|
78 | 78 | ipy_prompt, |
|
79 | 79 | cellmagic, |
|
80 | assemble_logical_lines, | |
|
80 | 81 | help_end, |
|
81 | 82 | escaped_transformer, |
|
82 | 83 | assign_from_magic, |
@@ -515,6 +516,7 b' class IPythonInputSplitter(InputSplitter):' | |||
|
515 | 516 | classic_prompt(), |
|
516 | 517 | ipy_prompt(), |
|
517 | 518 | cellmagic(), |
|
519 | assemble_logical_lines(), | |
|
518 | 520 | help_end(), |
|
519 | 521 | escaped_transformer(), |
|
520 | 522 | assign_from_magic(), |
@@ -639,14 +641,11 b' class IPythonInputSplitter(InputSplitter):' | |||
|
639 | 641 | |
|
640 | 642 | def push_line(self, line): |
|
641 | 643 | buf = self._buffer |
|
642 | not_in_string = self._is_complete or not buf or \ | |
|
643 | (buf and buf[-1].rstrip().endswith((':', ','))) | |
|
644 | 644 | for transformer in self.transforms: |
|
645 |
|
|
|
646 |
|
|
|
647 | if line is None: | |
|
648 | self.transformer_accumulating = True | |
|
649 | return False | |
|
645 | line = transformer.push(line) | |
|
646 | if line is None: | |
|
647 | self.transformer_accumulating = True | |
|
648 | return False | |
|
650 | 649 | |
|
651 | 650 | self.transformer_accumulating = False |
|
652 | 651 | return super(IPythonInputSplitter, self).push(line) |
@@ -4,7 +4,15 b' import re' | |||
|
4 | 4 | from StringIO import StringIO |
|
5 | 5 | import tokenize |
|
6 | 6 | |
|
7 | try: | |
|
8 | generate_tokens = tokenize.generate_tokens | |
|
9 | except AttributeError: | |
|
10 | # Python 3. Note that we use the undocumented _tokenize because it expects | |
|
11 | # strings, not bytes. See also Python issue #9969. | |
|
12 | generate_tokens = tokenize._tokenize | |
|
13 | ||
|
7 | 14 | from IPython.core.splitinput import split_user_input, LineInfo |
|
15 | from IPython.utils.untokenize import untokenize | |
|
8 | 16 | |
|
9 | 17 | #----------------------------------------------------------------------------- |
|
10 | 18 | # Globals |
@@ -119,8 +127,11 b' class TokenInputTransformer(InputTransformer):' | |||
|
119 | 127 | def __init__(self, func): |
|
120 | 128 | self.func = func |
|
121 | 129 | self.current_line = "" |
|
122 | self.tokenizer = tokenize.generate_tokens(self.get_line) | |
|
123 | 130 | self.line_used= False |
|
131 | self.reset_tokenizer() | |
|
132 | ||
|
133 | def reset_tokenizer(self): | |
|
134 | self.tokenizer = generate_tokens(self.get_line) | |
|
124 | 135 | |
|
125 | 136 | def get_line(self): |
|
126 | 137 | if self.line_used: |
@@ -140,13 +151,12 b' class TokenInputTransformer(InputTransformer):' | |||
|
140 | 151 | break |
|
141 | 152 | except tokenize.TokenError: |
|
142 | 153 | # Multi-line statement - stop and try again with the next line |
|
143 | self.tokenizer = tokenize.generate_tokens(self.get_line) | |
|
154 | self.reset_tokenizer() | |
|
144 | 155 | return None |
|
145 | 156 | |
|
146 | 157 | self.current_line = "" |
|
147 | # Python bug 8478 - untokenize doesn't work quite correctly with a | |
|
148 | # generator. We call list() to avoid this. | |
|
149 | return tokenize.untokenize(list(self.func(tokens))).rstrip('\n') | |
|
158 | self.reset_tokenizer() | |
|
159 | return untokenize(self.func(tokens)).rstrip('\n') | |
|
150 | 160 | |
|
151 | 161 | def reset(self): |
|
152 | 162 | l = self.current_line |
@@ -154,6 +164,9 b' class TokenInputTransformer(InputTransformer):' | |||
|
154 | 164 | if l: |
|
155 | 165 | return l.rstrip('\n') |
|
156 | 166 | |
|
167 | @TokenInputTransformer.wrap | |
|
168 | def assemble_logical_lines(tokens): | |
|
169 | return tokens | |
|
157 | 170 | |
|
158 | 171 | # Utilities |
|
159 | 172 | def _make_help_call(target, esc, lspace, next_input=None): |
General Comments 0
You need to be logged in to leave comments.
Login now