##// END OF EJS Templates
Prototype transformer to assemble logical lines
Thomas Kluyver -
Show More
@@ -0,0 +1,125 b''
1 """This is a patched copy of the untokenize machinery from the standard library.
2
3 untokenize has a number of major bugs that render it almost useless. We're using
4 the patch written by Gareth Rees on Python issue 12961:
5
6 http://bugs.python.org/issue12691
7
8 We've undone one part of the patch - it encoded the output to bytes, to neatly
9 round-trip from tokenize. We want to keep working with text, so we don't encode.
10 """
11
12 __author__ = 'Ka-Ping Yee <ping@lfw.org>'
13 __credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, '
14 'Skip Montanaro, Raymond Hettinger, Trent Nelson, '
15 'Michael Foord')
16 from token import *
17
18
19 from tokenize import COMMENT, NL
20
21 try:
22 # Python 3
23 from tokenize import ENCODING
24 except:
25 ENCODING = 987654321
26
27 class Untokenizer:
28
29 def __init__(self):
30 self.tokens = []
31 self.prev_row = 1
32 self.prev_col = 0
33 self.encoding = 'utf-8'
34
35 def add_whitespace(self, tok_type, start):
36 row, col = start
37 assert row >= self.prev_row
38 col_offset = col - self.prev_col
39 if col_offset > 0:
40 self.tokens.append(" " * col_offset)
41 elif row > self.prev_row and tok_type not in (NEWLINE, NL, ENDMARKER):
42 # Line was backslash-continued.
43 self.tokens.append(" ")
44
45 def untokenize(self, tokens):
46 iterable = iter(tokens)
47 for t in iterable:
48 if len(t) == 2:
49 self.compat(t, iterable)
50 break
51 # IPython modification - valid Python 2 syntax
52 tok_type, token, start, end = t[:4]
53 if tok_type == ENCODING:
54 self.encoding = token
55 continue
56 self.add_whitespace(tok_type, start)
57 self.tokens.append(token)
58 self.prev_row, self.prev_col = end
59 if tok_type in (NEWLINE, NL):
60 self.prev_row += 1
61 self.prev_col = 0
62 # IPython modification - don't encode output
63 return "".join(self.tokens)
64
65 def compat(self, token, iterable):
66 # This import is here to avoid problems when the itertools
67 # module is not built yet and tokenize is imported.
68 from itertools import chain
69 startline = False
70 prevstring = False
71 indents = []
72 toks_append = self.tokens.append
73
74 for tok in chain([token], iterable):
75 toknum, tokval = tok[:2]
76 if toknum == ENCODING:
77 self.encoding = tokval
78 continue
79
80 if toknum in (NAME, NUMBER):
81 tokval += ' '
82
83 # Insert a space between two consecutive strings
84 if toknum == STRING:
85 if prevstring:
86 tokval = ' ' + tokval
87 prevstring = True
88 else:
89 prevstring = False
90
91 if toknum == INDENT:
92 indents.append(tokval)
93 continue
94 elif toknum == DEDENT:
95 indents.pop()
96 continue
97 elif toknum in (NEWLINE, NL):
98 startline = True
99 elif startline and indents:
100 toks_append(indents[-1])
101 startline = False
102 toks_append(tokval)
103
104
105 def untokenize(tokens):
106 """
107 Convert ``tokens`` (an iterable) back into Python source code. Return
108 a bytes object, encoded using the encoding specified by the last
109 ENCODING token in ``tokens``, or UTF-8 if no ENCODING token is found.
110
111 The result is guaranteed to tokenize back to match the input so that
112 the conversion is lossless and round-trips are assured. The
113 guarantee applies only to the token type and token string as the
114 spacing between tokens (column positions) may change.
115
116 :func:`untokenize` has two modes. If the input tokens are sequences
117 of length 2 (``type``, ``string``) then spaces are added as necessary to
118 preserve the round-trip property.
119
120 If the input tokens are sequences of length 4 or more (``type``,
121 ``string``, ``start``, ``end``), as returned by :func:`tokenize`, then
122 spaces are added so that each token appears in the result at the
123 position indicated by ``start`` and ``end``, if possible.
124 """
125 return Untokenizer().untokenize(tokens)
@@ -1,652 +1,651 b''
1 """Analysis of text input into executable blocks.
1 """Analysis of text input into executable blocks.
2
2
3 The main class in this module, :class:`InputSplitter`, is designed to break
3 The main class in this module, :class:`InputSplitter`, is designed to break
4 input from either interactive, line-by-line environments or block-based ones,
4 input from either interactive, line-by-line environments or block-based ones,
5 into standalone blocks that can be executed by Python as 'single' statements
5 into standalone blocks that can be executed by Python as 'single' statements
6 (thus triggering sys.displayhook).
6 (thus triggering sys.displayhook).
7
7
8 A companion, :class:`IPythonInputSplitter`, provides the same functionality but
8 A companion, :class:`IPythonInputSplitter`, provides the same functionality but
9 with full support for the extended IPython syntax (magics, system calls, etc).
9 with full support for the extended IPython syntax (magics, system calls, etc).
10
10
11 For more details, see the class docstring below.
11 For more details, see the class docstring below.
12
12
13 Syntax Transformations
13 Syntax Transformations
14 ----------------------
14 ----------------------
15
15
16 One of the main jobs of the code in this file is to apply all syntax
16 One of the main jobs of the code in this file is to apply all syntax
17 transformations that make up 'the IPython language', i.e. magics, shell
17 transformations that make up 'the IPython language', i.e. magics, shell
18 escapes, etc. All transformations should be implemented as *fully stateless*
18 escapes, etc. All transformations should be implemented as *fully stateless*
19 entities, that simply take one line as their input and return a line.
19 entities, that simply take one line as their input and return a line.
20 Internally for implementation purposes they may be a normal function or a
20 Internally for implementation purposes they may be a normal function or a
21 callable object, but the only input they receive will be a single line and they
21 callable object, but the only input they receive will be a single line and they
22 should only return a line, without holding any data-dependent state between
22 should only return a line, without holding any data-dependent state between
23 calls.
23 calls.
24
24
25 As an example, the EscapedTransformer is a class so we can more clearly group
25 As an example, the EscapedTransformer is a class so we can more clearly group
26 together the functionality of dispatching to individual functions based on the
26 together the functionality of dispatching to individual functions based on the
27 starting escape character, but the only method for public use is its call
27 starting escape character, but the only method for public use is its call
28 method.
28 method.
29
29
30
30
31 ToDo
31 ToDo
32 ----
32 ----
33
33
34 - Should we make push() actually raise an exception once push_accepts_more()
34 - Should we make push() actually raise an exception once push_accepts_more()
35 returns False?
35 returns False?
36
36
37 - Naming cleanups. The tr_* names aren't the most elegant, though now they are
37 - Naming cleanups. The tr_* names aren't the most elegant, though now they are
38 at least just attributes of a class so not really very exposed.
38 at least just attributes of a class so not really very exposed.
39
39
40 - Think about the best way to support dynamic things: automagic, autocall,
40 - Think about the best way to support dynamic things: automagic, autocall,
41 macros, etc.
41 macros, etc.
42
42
43 - Think of a better heuristic for the application of the transforms in
43 - Think of a better heuristic for the application of the transforms in
44 IPythonInputSplitter.push() than looking at the buffer ending in ':'. Idea:
44 IPythonInputSplitter.push() than looking at the buffer ending in ':'. Idea:
45 track indentation change events (indent, dedent, nothing) and apply them only
45 track indentation change events (indent, dedent, nothing) and apply them only
46 if the indentation went up, but not otherwise.
46 if the indentation went up, but not otherwise.
47
47
48 - Think of the cleanest way for supporting user-specified transformations (the
48 - Think of the cleanest way for supporting user-specified transformations (the
49 user prefilters we had before).
49 user prefilters we had before).
50
50
51 Authors
51 Authors
52 -------
52 -------
53
53
54 * Fernando Perez
54 * Fernando Perez
55 * Brian Granger
55 * Brian Granger
56 """
56 """
57 #-----------------------------------------------------------------------------
57 #-----------------------------------------------------------------------------
58 # Copyright (C) 2010 The IPython Development Team
58 # Copyright (C) 2010 The IPython Development Team
59 #
59 #
60 # Distributed under the terms of the BSD License. The full license is in
60 # Distributed under the terms of the BSD License. The full license is in
61 # the file COPYING, distributed as part of this software.
61 # the file COPYING, distributed as part of this software.
62 #-----------------------------------------------------------------------------
62 #-----------------------------------------------------------------------------
63
63
64 #-----------------------------------------------------------------------------
64 #-----------------------------------------------------------------------------
65 # Imports
65 # Imports
66 #-----------------------------------------------------------------------------
66 #-----------------------------------------------------------------------------
67 # stdlib
67 # stdlib
68 import ast
68 import ast
69 import codeop
69 import codeop
70 import re
70 import re
71 import sys
71 import sys
72
72
73 # IPython modules
73 # IPython modules
74 from IPython.core.splitinput import split_user_input, LineInfo
74 from IPython.core.splitinput import split_user_input, LineInfo
75 from IPython.utils.py3compat import cast_unicode
75 from IPython.utils.py3compat import cast_unicode
76 from IPython.core.inputtransformer import (leading_indent,
76 from IPython.core.inputtransformer import (leading_indent,
77 classic_prompt,
77 classic_prompt,
78 ipy_prompt,
78 ipy_prompt,
79 cellmagic,
79 cellmagic,
80 assemble_logical_lines,
80 help_end,
81 help_end,
81 escaped_transformer,
82 escaped_transformer,
82 assign_from_magic,
83 assign_from_magic,
83 assign_from_system,
84 assign_from_system,
84 )
85 )
85
86
86 # Temporary!
87 # Temporary!
87 from IPython.core.inputtransformer import (ESC_SHELL, ESC_SH_CAP, ESC_HELP,
88 from IPython.core.inputtransformer import (ESC_SHELL, ESC_SH_CAP, ESC_HELP,
88 ESC_HELP2, ESC_MAGIC, ESC_MAGIC2,
89 ESC_HELP2, ESC_MAGIC, ESC_MAGIC2,
89 ESC_QUOTE, ESC_QUOTE2, ESC_PAREN, ESC_SEQUENCES)
90 ESC_QUOTE, ESC_QUOTE2, ESC_PAREN, ESC_SEQUENCES)
90
91
91 #-----------------------------------------------------------------------------
92 #-----------------------------------------------------------------------------
92 # Utilities
93 # Utilities
93 #-----------------------------------------------------------------------------
94 #-----------------------------------------------------------------------------
94
95
95 # FIXME: These are general-purpose utilities that later can be moved to the
96 # FIXME: These are general-purpose utilities that later can be moved to the
96 # general ward. Kept here for now because we're being very strict about test
97 # general ward. Kept here for now because we're being very strict about test
97 # coverage with this code, and this lets us ensure that we keep 100% coverage
98 # coverage with this code, and this lets us ensure that we keep 100% coverage
98 # while developing.
99 # while developing.
99
100
100 # compiled regexps for autoindent management
101 # compiled regexps for autoindent management
101 dedent_re = re.compile('|'.join([
102 dedent_re = re.compile('|'.join([
102 r'^\s+raise(\s.*)?$', # raise statement (+ space + other stuff, maybe)
103 r'^\s+raise(\s.*)?$', # raise statement (+ space + other stuff, maybe)
103 r'^\s+raise\([^\)]*\).*$', # wacky raise with immediate open paren
104 r'^\s+raise\([^\)]*\).*$', # wacky raise with immediate open paren
104 r'^\s+return(\s.*)?$', # normal return (+ space + other stuff, maybe)
105 r'^\s+return(\s.*)?$', # normal return (+ space + other stuff, maybe)
105 r'^\s+return\([^\)]*\).*$', # wacky return with immediate open paren
106 r'^\s+return\([^\)]*\).*$', # wacky return with immediate open paren
106 r'^\s+pass\s*$', # pass (optionally followed by trailing spaces)
107 r'^\s+pass\s*$', # pass (optionally followed by trailing spaces)
107 r'^\s+break\s*$', # break (optionally followed by trailing spaces)
108 r'^\s+break\s*$', # break (optionally followed by trailing spaces)
108 r'^\s+continue\s*$', # continue (optionally followed by trailing spaces)
109 r'^\s+continue\s*$', # continue (optionally followed by trailing spaces)
109 ]))
110 ]))
110 ini_spaces_re = re.compile(r'^([ \t\r\f\v]+)')
111 ini_spaces_re = re.compile(r'^([ \t\r\f\v]+)')
111
112
112 # regexp to match pure comment lines so we don't accidentally insert 'if 1:'
113 # regexp to match pure comment lines so we don't accidentally insert 'if 1:'
113 # before pure comments
114 # before pure comments
114 comment_line_re = re.compile('^\s*\#')
115 comment_line_re = re.compile('^\s*\#')
115
116
116
117
117 def num_ini_spaces(s):
118 def num_ini_spaces(s):
118 """Return the number of initial spaces in a string.
119 """Return the number of initial spaces in a string.
119
120
120 Note that tabs are counted as a single space. For now, we do *not* support
121 Note that tabs are counted as a single space. For now, we do *not* support
121 mixing of tabs and spaces in the user's input.
122 mixing of tabs and spaces in the user's input.
122
123
123 Parameters
124 Parameters
124 ----------
125 ----------
125 s : string
126 s : string
126
127
127 Returns
128 Returns
128 -------
129 -------
129 n : int
130 n : int
130 """
131 """
131
132
132 ini_spaces = ini_spaces_re.match(s)
133 ini_spaces = ini_spaces_re.match(s)
133 if ini_spaces:
134 if ini_spaces:
134 return ini_spaces.end()
135 return ini_spaces.end()
135 else:
136 else:
136 return 0
137 return 0
137
138
138 def last_blank(src):
139 def last_blank(src):
139 """Determine if the input source ends in a blank.
140 """Determine if the input source ends in a blank.
140
141
141 A blank is either a newline or a line consisting of whitespace.
142 A blank is either a newline or a line consisting of whitespace.
142
143
143 Parameters
144 Parameters
144 ----------
145 ----------
145 src : string
146 src : string
146 A single or multiline string.
147 A single or multiline string.
147 """
148 """
148 if not src: return False
149 if not src: return False
149 ll = src.splitlines()[-1]
150 ll = src.splitlines()[-1]
150 return (ll == '') or ll.isspace()
151 return (ll == '') or ll.isspace()
151
152
152
153
153 last_two_blanks_re = re.compile(r'\n\s*\n\s*$', re.MULTILINE)
154 last_two_blanks_re = re.compile(r'\n\s*\n\s*$', re.MULTILINE)
154 last_two_blanks_re2 = re.compile(r'.+\n\s*\n\s+$', re.MULTILINE)
155 last_two_blanks_re2 = re.compile(r'.+\n\s*\n\s+$', re.MULTILINE)
155
156
156 def last_two_blanks(src):
157 def last_two_blanks(src):
157 """Determine if the input source ends in two blanks.
158 """Determine if the input source ends in two blanks.
158
159
159 A blank is either a newline or a line consisting of whitespace.
160 A blank is either a newline or a line consisting of whitespace.
160
161
161 Parameters
162 Parameters
162 ----------
163 ----------
163 src : string
164 src : string
164 A single or multiline string.
165 A single or multiline string.
165 """
166 """
166 if not src: return False
167 if not src: return False
167 # The logic here is tricky: I couldn't get a regexp to work and pass all
168 # The logic here is tricky: I couldn't get a regexp to work and pass all
168 # the tests, so I took a different approach: split the source by lines,
169 # the tests, so I took a different approach: split the source by lines,
169 # grab the last two and prepend '###\n' as a stand-in for whatever was in
170 # grab the last two and prepend '###\n' as a stand-in for whatever was in
170 # the body before the last two lines. Then, with that structure, it's
171 # the body before the last two lines. Then, with that structure, it's
171 # possible to analyze with two regexps. Not the most elegant solution, but
172 # possible to analyze with two regexps. Not the most elegant solution, but
172 # it works. If anyone tries to change this logic, make sure to validate
173 # it works. If anyone tries to change this logic, make sure to validate
173 # the whole test suite first!
174 # the whole test suite first!
174 new_src = '\n'.join(['###\n'] + src.splitlines()[-2:])
175 new_src = '\n'.join(['###\n'] + src.splitlines()[-2:])
175 return (bool(last_two_blanks_re.match(new_src)) or
176 return (bool(last_two_blanks_re.match(new_src)) or
176 bool(last_two_blanks_re2.match(new_src)) )
177 bool(last_two_blanks_re2.match(new_src)) )
177
178
178
179
179 def remove_comments(src):
180 def remove_comments(src):
180 """Remove all comments from input source.
181 """Remove all comments from input source.
181
182
182 Note: comments are NOT recognized inside of strings!
183 Note: comments are NOT recognized inside of strings!
183
184
184 Parameters
185 Parameters
185 ----------
186 ----------
186 src : string
187 src : string
187 A single or multiline input string.
188 A single or multiline input string.
188
189
189 Returns
190 Returns
190 -------
191 -------
191 String with all Python comments removed.
192 String with all Python comments removed.
192 """
193 """
193
194
194 return re.sub('#.*', '', src)
195 return re.sub('#.*', '', src)
195
196
196
197
197 def get_input_encoding():
198 def get_input_encoding():
198 """Return the default standard input encoding.
199 """Return the default standard input encoding.
199
200
200 If sys.stdin has no encoding, 'ascii' is returned."""
201 If sys.stdin has no encoding, 'ascii' is returned."""
201 # There are strange environments for which sys.stdin.encoding is None. We
202 # There are strange environments for which sys.stdin.encoding is None. We
202 # ensure that a valid encoding is returned.
203 # ensure that a valid encoding is returned.
203 encoding = getattr(sys.stdin, 'encoding', None)
204 encoding = getattr(sys.stdin, 'encoding', None)
204 if encoding is None:
205 if encoding is None:
205 encoding = 'ascii'
206 encoding = 'ascii'
206 return encoding
207 return encoding
207
208
208 #-----------------------------------------------------------------------------
209 #-----------------------------------------------------------------------------
209 # Classes and functions for normal Python syntax handling
210 # Classes and functions for normal Python syntax handling
210 #-----------------------------------------------------------------------------
211 #-----------------------------------------------------------------------------
211
212
212 class InputSplitter(object):
213 class InputSplitter(object):
213 """An object that can accumulate lines of Python source before execution.
214 """An object that can accumulate lines of Python source before execution.
214
215
215 This object is designed to be fed python source line-by-line, using
216 This object is designed to be fed python source line-by-line, using
216 :meth:`push`. It will return on each push whether the currently pushed
217 :meth:`push`. It will return on each push whether the currently pushed
217 code could be executed already. In addition, it provides a method called
218 code could be executed already. In addition, it provides a method called
218 :meth:`push_accepts_more` that can be used to query whether more input
219 :meth:`push_accepts_more` that can be used to query whether more input
219 can be pushed into a single interactive block.
220 can be pushed into a single interactive block.
220
221
221 This is a simple example of how an interactive terminal-based client can use
222 This is a simple example of how an interactive terminal-based client can use
222 this tool::
223 this tool::
223
224
224 isp = InputSplitter()
225 isp = InputSplitter()
225 while isp.push_accepts_more():
226 while isp.push_accepts_more():
226 indent = ' '*isp.indent_spaces
227 indent = ' '*isp.indent_spaces
227 prompt = '>>> ' + indent
228 prompt = '>>> ' + indent
228 line = indent + raw_input(prompt)
229 line = indent + raw_input(prompt)
229 isp.push(line)
230 isp.push(line)
230 print 'Input source was:\n', isp.source_reset(),
231 print 'Input source was:\n', isp.source_reset(),
231 """
232 """
232 # Number of spaces of indentation computed from input that has been pushed
233 # Number of spaces of indentation computed from input that has been pushed
233 # so far. This is the attributes callers should query to get the current
234 # so far. This is the attributes callers should query to get the current
234 # indentation level, in order to provide auto-indent facilities.
235 # indentation level, in order to provide auto-indent facilities.
235 indent_spaces = 0
236 indent_spaces = 0
236 # String, indicating the default input encoding. It is computed by default
237 # String, indicating the default input encoding. It is computed by default
237 # at initialization time via get_input_encoding(), but it can be reset by a
238 # at initialization time via get_input_encoding(), but it can be reset by a
238 # client with specific knowledge of the encoding.
239 # client with specific knowledge of the encoding.
239 encoding = ''
240 encoding = ''
240 # String where the current full source input is stored, properly encoded.
241 # String where the current full source input is stored, properly encoded.
241 # Reading this attribute is the normal way of querying the currently pushed
242 # Reading this attribute is the normal way of querying the currently pushed
242 # source code, that has been properly encoded.
243 # source code, that has been properly encoded.
243 source = ''
244 source = ''
244 # Code object corresponding to the current source. It is automatically
245 # Code object corresponding to the current source. It is automatically
245 # synced to the source, so it can be queried at any time to obtain the code
246 # synced to the source, so it can be queried at any time to obtain the code
246 # object; it will be None if the source doesn't compile to valid Python.
247 # object; it will be None if the source doesn't compile to valid Python.
247 code = None
248 code = None
248 # Input mode
249 # Input mode
249 input_mode = 'line'
250 input_mode = 'line'
250
251
251 # Private attributes
252 # Private attributes
252
253
253 # List with lines of input accumulated so far
254 # List with lines of input accumulated so far
254 _buffer = None
255 _buffer = None
255 # Command compiler
256 # Command compiler
256 _compile = None
257 _compile = None
257 # Mark when input has changed indentation all the way back to flush-left
258 # Mark when input has changed indentation all the way back to flush-left
258 _full_dedent = False
259 _full_dedent = False
259 # Boolean indicating whether the current block is complete
260 # Boolean indicating whether the current block is complete
260 _is_complete = None
261 _is_complete = None
261
262
262 def __init__(self, input_mode=None):
263 def __init__(self, input_mode=None):
263 """Create a new InputSplitter instance.
264 """Create a new InputSplitter instance.
264
265
265 Parameters
266 Parameters
266 ----------
267 ----------
267 input_mode : str
268 input_mode : str
268
269
269 One of ['line', 'cell']; default is 'line'.
270 One of ['line', 'cell']; default is 'line'.
270
271
271 The input_mode parameter controls how new inputs are used when fed via
272 The input_mode parameter controls how new inputs are used when fed via
272 the :meth:`push` method:
273 the :meth:`push` method:
273
274
274 - 'line': meant for line-oriented clients, inputs are appended one at a
275 - 'line': meant for line-oriented clients, inputs are appended one at a
275 time to the internal buffer and the whole buffer is compiled.
276 time to the internal buffer and the whole buffer is compiled.
276
277
277 - 'cell': meant for clients that can edit multi-line 'cells' of text at
278 - 'cell': meant for clients that can edit multi-line 'cells' of text at
278 a time. A cell can contain one or more blocks that can be compile in
279 a time. A cell can contain one or more blocks that can be compile in
279 'single' mode by Python. In this mode, each new input new input
280 'single' mode by Python. In this mode, each new input new input
280 completely replaces all prior inputs. Cell mode is thus equivalent
281 completely replaces all prior inputs. Cell mode is thus equivalent
281 to prepending a full reset() to every push() call.
282 to prepending a full reset() to every push() call.
282 """
283 """
283 self._buffer = []
284 self._buffer = []
284 self._compile = codeop.CommandCompiler()
285 self._compile = codeop.CommandCompiler()
285 self.encoding = get_input_encoding()
286 self.encoding = get_input_encoding()
286 self.input_mode = InputSplitter.input_mode if input_mode is None \
287 self.input_mode = InputSplitter.input_mode if input_mode is None \
287 else input_mode
288 else input_mode
288
289
289 def reset(self):
290 def reset(self):
290 """Reset the input buffer and associated state."""
291 """Reset the input buffer and associated state."""
291 self.indent_spaces = 0
292 self.indent_spaces = 0
292 self._buffer[:] = []
293 self._buffer[:] = []
293 self.source = ''
294 self.source = ''
294 self.code = None
295 self.code = None
295 self._is_complete = False
296 self._is_complete = False
296 self._full_dedent = False
297 self._full_dedent = False
297
298
298 def source_reset(self):
299 def source_reset(self):
299 """Return the input source and perform a full reset.
300 """Return the input source and perform a full reset.
300 """
301 """
301 out = self.source
302 out = self.source
302 self.reset()
303 self.reset()
303 return out
304 return out
304
305
305 def push(self, lines):
306 def push(self, lines):
306 """Push one or more lines of input.
307 """Push one or more lines of input.
307
308
308 This stores the given lines and returns a status code indicating
309 This stores the given lines and returns a status code indicating
309 whether the code forms a complete Python block or not.
310 whether the code forms a complete Python block or not.
310
311
311 Any exceptions generated in compilation are swallowed, but if an
312 Any exceptions generated in compilation are swallowed, but if an
312 exception was produced, the method returns True.
313 exception was produced, the method returns True.
313
314
314 Parameters
315 Parameters
315 ----------
316 ----------
316 lines : string
317 lines : string
317 One or more lines of Python input.
318 One or more lines of Python input.
318
319
319 Returns
320 Returns
320 -------
321 -------
321 is_complete : boolean
322 is_complete : boolean
322 True if the current input source (the result of the current input
323 True if the current input source (the result of the current input
323 plus prior inputs) forms a complete Python execution block. Note that
324 plus prior inputs) forms a complete Python execution block. Note that
324 this value is also stored as a private attribute (``_is_complete``), so it
325 this value is also stored as a private attribute (``_is_complete``), so it
325 can be queried at any time.
326 can be queried at any time.
326 """
327 """
327 if self.input_mode == 'cell':
328 if self.input_mode == 'cell':
328 self.reset()
329 self.reset()
329
330
330 self._store(lines)
331 self._store(lines)
331 source = self.source
332 source = self.source
332
333
333 # Before calling _compile(), reset the code object to None so that if an
334 # Before calling _compile(), reset the code object to None so that if an
334 # exception is raised in compilation, we don't mislead by having
335 # exception is raised in compilation, we don't mislead by having
335 # inconsistent code/source attributes.
336 # inconsistent code/source attributes.
336 self.code, self._is_complete = None, None
337 self.code, self._is_complete = None, None
337
338
338 # Honor termination lines properly
339 # Honor termination lines properly
339 if source.endswith('\\\n'):
340 if source.endswith('\\\n'):
340 return False
341 return False
341
342
342 self._update_indent(lines)
343 self._update_indent(lines)
343 try:
344 try:
344 self.code = self._compile(source, symbol="exec")
345 self.code = self._compile(source, symbol="exec")
345 # Invalid syntax can produce any of a number of different errors from
346 # Invalid syntax can produce any of a number of different errors from
346 # inside the compiler, so we have to catch them all. Syntax errors
347 # inside the compiler, so we have to catch them all. Syntax errors
347 # immediately produce a 'ready' block, so the invalid Python can be
348 # immediately produce a 'ready' block, so the invalid Python can be
348 # sent to the kernel for evaluation with possible ipython
349 # sent to the kernel for evaluation with possible ipython
349 # special-syntax conversion.
350 # special-syntax conversion.
350 except (SyntaxError, OverflowError, ValueError, TypeError,
351 except (SyntaxError, OverflowError, ValueError, TypeError,
351 MemoryError):
352 MemoryError):
352 self._is_complete = True
353 self._is_complete = True
353 else:
354 else:
354 # Compilation didn't produce any exceptions (though it may not have
355 # Compilation didn't produce any exceptions (though it may not have
355 # given a complete code object)
356 # given a complete code object)
356 self._is_complete = self.code is not None
357 self._is_complete = self.code is not None
357
358
358 return self._is_complete
359 return self._is_complete
359
360
360 def push_accepts_more(self):
361 def push_accepts_more(self):
361 """Return whether a block of interactive input can accept more input.
362 """Return whether a block of interactive input can accept more input.
362
363
363 This method is meant to be used by line-oriented frontends, who need to
364 This method is meant to be used by line-oriented frontends, who need to
364 guess whether a block is complete or not based solely on prior and
365 guess whether a block is complete or not based solely on prior and
365 current input lines. The InputSplitter considers it has a complete
366 current input lines. The InputSplitter considers it has a complete
366 interactive block and will not accept more input only when either a
367 interactive block and will not accept more input only when either a
367 SyntaxError is raised, or *all* of the following are true:
368 SyntaxError is raised, or *all* of the following are true:
368
369
369 1. The input compiles to a complete statement.
370 1. The input compiles to a complete statement.
370
371
371 2. The indentation level is flush-left (because if we are indented,
372 2. The indentation level is flush-left (because if we are indented,
372 like inside a function definition or for loop, we need to keep
373 like inside a function definition or for loop, we need to keep
373 reading new input).
374 reading new input).
374
375
375 3. There is one extra line consisting only of whitespace.
376 3. There is one extra line consisting only of whitespace.
376
377
377 Because of condition #3, this method should be used only by
378 Because of condition #3, this method should be used only by
378 *line-oriented* frontends, since it means that intermediate blank lines
379 *line-oriented* frontends, since it means that intermediate blank lines
379 are not allowed in function definitions (or any other indented block).
380 are not allowed in function definitions (or any other indented block).
380
381
381 If the current input produces a syntax error, this method immediately
382 If the current input produces a syntax error, this method immediately
382 returns False but does *not* raise the syntax error exception, as
383 returns False but does *not* raise the syntax error exception, as
383 typically clients will want to send invalid syntax to an execution
384 typically clients will want to send invalid syntax to an execution
384 backend which might convert the invalid syntax into valid Python via
385 backend which might convert the invalid syntax into valid Python via
385 one of the dynamic IPython mechanisms.
386 one of the dynamic IPython mechanisms.
386 """
387 """
387
388
388 # With incomplete input, unconditionally accept more
389 # With incomplete input, unconditionally accept more
389 if not self._is_complete:
390 if not self._is_complete:
390 return True
391 return True
391
392
392 # If we already have complete input and we're flush left, the answer
393 # If we already have complete input and we're flush left, the answer
393 # depends. In line mode, if there hasn't been any indentation,
394 # depends. In line mode, if there hasn't been any indentation,
394 # that's it. If we've come back from some indentation, we need
395 # that's it. If we've come back from some indentation, we need
395 # the blank final line to finish.
396 # the blank final line to finish.
396 # In cell mode, we need to check how many blocks the input so far
397 # In cell mode, we need to check how many blocks the input so far
397 # compiles into, because if there's already more than one full
398 # compiles into, because if there's already more than one full
398 # independent block of input, then the client has entered full
399 # independent block of input, then the client has entered full
399 # 'cell' mode and is feeding lines that each is complete. In this
400 # 'cell' mode and is feeding lines that each is complete. In this
400 # case we should then keep accepting. The Qt terminal-like console
401 # case we should then keep accepting. The Qt terminal-like console
401 # does precisely this, to provide the convenience of terminal-like
402 # does precisely this, to provide the convenience of terminal-like
402 # input of single expressions, but allowing the user (with a
403 # input of single expressions, but allowing the user (with a
403 # separate keystroke) to switch to 'cell' mode and type multiple
404 # separate keystroke) to switch to 'cell' mode and type multiple
404 # expressions in one shot.
405 # expressions in one shot.
405 if self.indent_spaces==0:
406 if self.indent_spaces==0:
406 if self.input_mode=='line':
407 if self.input_mode=='line':
407 if not self._full_dedent:
408 if not self._full_dedent:
408 return False
409 return False
409 else:
410 else:
410 try:
411 try:
411 code_ast = ast.parse(u''.join(self._buffer))
412 code_ast = ast.parse(u''.join(self._buffer))
412 except Exception:
413 except Exception:
413 return False
414 return False
414 else:
415 else:
415 if len(code_ast.body) == 1:
416 if len(code_ast.body) == 1:
416 return False
417 return False
417
418
418 # When input is complete, then termination is marked by an extra blank
419 # When input is complete, then termination is marked by an extra blank
419 # line at the end.
420 # line at the end.
420 last_line = self.source.splitlines()[-1]
421 last_line = self.source.splitlines()[-1]
421 return bool(last_line and not last_line.isspace())
422 return bool(last_line and not last_line.isspace())
422
423
423 #------------------------------------------------------------------------
424 #------------------------------------------------------------------------
424 # Private interface
425 # Private interface
425 #------------------------------------------------------------------------
426 #------------------------------------------------------------------------
426
427
427 def _find_indent(self, line):
428 def _find_indent(self, line):
428 """Compute the new indentation level for a single line.
429 """Compute the new indentation level for a single line.
429
430
430 Parameters
431 Parameters
431 ----------
432 ----------
432 line : str
433 line : str
433 A single new line of non-whitespace, non-comment Python input.
434 A single new line of non-whitespace, non-comment Python input.
434
435
435 Returns
436 Returns
436 -------
437 -------
437 indent_spaces : int
438 indent_spaces : int
438 New value for the indent level (it may be equal to self.indent_spaces
439 New value for the indent level (it may be equal to self.indent_spaces
439 if indentation doesn't change.
440 if indentation doesn't change.
440
441
441 full_dedent : boolean
442 full_dedent : boolean
442 Whether the new line causes a full flush-left dedent.
443 Whether the new line causes a full flush-left dedent.
443 """
444 """
444 indent_spaces = self.indent_spaces
445 indent_spaces = self.indent_spaces
445 full_dedent = self._full_dedent
446 full_dedent = self._full_dedent
446
447
447 inisp = num_ini_spaces(line)
448 inisp = num_ini_spaces(line)
448 if inisp < indent_spaces:
449 if inisp < indent_spaces:
449 indent_spaces = inisp
450 indent_spaces = inisp
450 if indent_spaces <= 0:
451 if indent_spaces <= 0:
451 #print 'Full dedent in text',self.source # dbg
452 #print 'Full dedent in text',self.source # dbg
452 full_dedent = True
453 full_dedent = True
453
454
454 if line.rstrip()[-1] == ':':
455 if line.rstrip()[-1] == ':':
455 indent_spaces += 4
456 indent_spaces += 4
456 elif dedent_re.match(line):
457 elif dedent_re.match(line):
457 indent_spaces -= 4
458 indent_spaces -= 4
458 if indent_spaces <= 0:
459 if indent_spaces <= 0:
459 full_dedent = True
460 full_dedent = True
460
461
461 # Safety
462 # Safety
462 if indent_spaces < 0:
463 if indent_spaces < 0:
463 indent_spaces = 0
464 indent_spaces = 0
464 #print 'safety' # dbg
465 #print 'safety' # dbg
465
466
466 return indent_spaces, full_dedent
467 return indent_spaces, full_dedent
467
468
468 def _update_indent(self, lines):
469 def _update_indent(self, lines):
469 for line in remove_comments(lines).splitlines():
470 for line in remove_comments(lines).splitlines():
470 if line and not line.isspace():
471 if line and not line.isspace():
471 self.indent_spaces, self._full_dedent = self._find_indent(line)
472 self.indent_spaces, self._full_dedent = self._find_indent(line)
472
473
473 def _store(self, lines, buffer=None, store='source'):
474 def _store(self, lines, buffer=None, store='source'):
474 """Store one or more lines of input.
475 """Store one or more lines of input.
475
476
476 If input lines are not newline-terminated, a newline is automatically
477 If input lines are not newline-terminated, a newline is automatically
477 appended."""
478 appended."""
478
479
479 if buffer is None:
480 if buffer is None:
480 buffer = self._buffer
481 buffer = self._buffer
481
482
482 if lines.endswith('\n'):
483 if lines.endswith('\n'):
483 buffer.append(lines)
484 buffer.append(lines)
484 else:
485 else:
485 buffer.append(lines+'\n')
486 buffer.append(lines+'\n')
486 setattr(self, store, self._set_source(buffer))
487 setattr(self, store, self._set_source(buffer))
487
488
488 def _set_source(self, buffer):
489 def _set_source(self, buffer):
489 return u''.join(buffer)
490 return u''.join(buffer)
490
491
491
492
492 class IPythonInputSplitter(InputSplitter):
493 class IPythonInputSplitter(InputSplitter):
493 """An input splitter that recognizes all of IPython's special syntax."""
494 """An input splitter that recognizes all of IPython's special syntax."""
494
495
495 # String with raw, untransformed input.
496 # String with raw, untransformed input.
496 source_raw = ''
497 source_raw = ''
497
498
498 # Flag to track when a transformer has stored input that it hasn't given
499 # Flag to track when a transformer has stored input that it hasn't given
499 # back yet.
500 # back yet.
500 transformer_accumulating = False
501 transformer_accumulating = False
501
502
502 # Private attributes
503 # Private attributes
503
504
504 # List with lines of raw input accumulated so far.
505 # List with lines of raw input accumulated so far.
505 _buffer_raw = None
506 _buffer_raw = None
506
507
507 def __init__(self, input_mode=None, transforms=None):
508 def __init__(self, input_mode=None, transforms=None):
508 super(IPythonInputSplitter, self).__init__(input_mode)
509 super(IPythonInputSplitter, self).__init__(input_mode)
509 self._buffer_raw = []
510 self._buffer_raw = []
510 self._validate = True
511 self._validate = True
511 if transforms is not None:
512 if transforms is not None:
512 self.transforms = transforms
513 self.transforms = transforms
513 else:
514 else:
514 self.transforms = [leading_indent(),
515 self.transforms = [leading_indent(),
515 classic_prompt(),
516 classic_prompt(),
516 ipy_prompt(),
517 ipy_prompt(),
517 cellmagic(),
518 cellmagic(),
519 assemble_logical_lines(),
518 help_end(),
520 help_end(),
519 escaped_transformer(),
521 escaped_transformer(),
520 assign_from_magic(),
522 assign_from_magic(),
521 assign_from_system(),
523 assign_from_system(),
522 ]
524 ]
523
525
524 def reset(self):
526 def reset(self):
525 """Reset the input buffer and associated state."""
527 """Reset the input buffer and associated state."""
526 super(IPythonInputSplitter, self).reset()
528 super(IPythonInputSplitter, self).reset()
527 self._buffer_raw[:] = []
529 self._buffer_raw[:] = []
528 self.source_raw = ''
530 self.source_raw = ''
529 self.transformer_accumulating = False
531 self.transformer_accumulating = False
530 for t in self.transforms:
532 for t in self.transforms:
531 t.reset()
533 t.reset()
532
534
533 def flush_transformers(self):
535 def flush_transformers(self):
534 out = None
536 out = None
535 for t in self.transforms:
537 for t in self.transforms:
536 tmp = t.reset()
538 tmp = t.reset()
537 if tmp:
539 if tmp:
538 out = tmp
540 out = tmp
539 if out:
541 if out:
540 self._store(out)
542 self._store(out)
541
543
542 def source_raw_reset(self):
544 def source_raw_reset(self):
543 """Return input and raw source and perform a full reset.
545 """Return input and raw source and perform a full reset.
544 """
546 """
545 self.flush_transformers()
547 self.flush_transformers()
546 out = self.source
548 out = self.source
547 out_r = self.source_raw
549 out_r = self.source_raw
548 self.reset()
550 self.reset()
549 return out, out_r
551 return out, out_r
550
552
551 def source_reset(self):
553 def source_reset(self):
552 self.flush_transformers()
554 self.flush_transformers()
553 return super(IPythonInputSplitter, self).source_reset()
555 return super(IPythonInputSplitter, self).source_reset()
554
556
555 def push_accepts_more(self):
557 def push_accepts_more(self):
556 if self.transformer_accumulating:
558 if self.transformer_accumulating:
557 return True
559 return True
558 else:
560 else:
559 return super(IPythonInputSplitter, self).push_accepts_more()
561 return super(IPythonInputSplitter, self).push_accepts_more()
560
562
561 def transform_cell(self, cell):
563 def transform_cell(self, cell):
562 """Process and translate a cell of input.
564 """Process and translate a cell of input.
563 """
565 """
564 self.reset()
566 self.reset()
565 self.push(cell)
567 self.push(cell)
566 return self.source_reset()
568 return self.source_reset()
567
569
568 def push(self, lines):
570 def push(self, lines):
569 """Push one or more lines of IPython input.
571 """Push one or more lines of IPython input.
570
572
571 This stores the given lines and returns a status code indicating
573 This stores the given lines and returns a status code indicating
572 whether the code forms a complete Python block or not, after processing
574 whether the code forms a complete Python block or not, after processing
573 all input lines for special IPython syntax.
575 all input lines for special IPython syntax.
574
576
575 Any exceptions generated in compilation are swallowed, but if an
577 Any exceptions generated in compilation are swallowed, but if an
576 exception was produced, the method returns True.
578 exception was produced, the method returns True.
577
579
578 Parameters
580 Parameters
579 ----------
581 ----------
580 lines : string
582 lines : string
581 One or more lines of Python input.
583 One or more lines of Python input.
582
584
583 Returns
585 Returns
584 -------
586 -------
585 is_complete : boolean
587 is_complete : boolean
586 True if the current input source (the result of the current input
588 True if the current input source (the result of the current input
587 plus prior inputs) forms a complete Python execution block. Note that
589 plus prior inputs) forms a complete Python execution block. Note that
588 this value is also stored as a private attribute (_is_complete), so it
590 this value is also stored as a private attribute (_is_complete), so it
589 can be queried at any time.
591 can be queried at any time.
590 """
592 """
591
593
592 # We must ensure all input is pure unicode
594 # We must ensure all input is pure unicode
593 lines = cast_unicode(lines, self.encoding)
595 lines = cast_unicode(lines, self.encoding)
594
596
595 # ''.splitlines() --> [], but we need to push the empty line to transformers
597 # ''.splitlines() --> [], but we need to push the empty line to transformers
596 lines_list = lines.splitlines()
598 lines_list = lines.splitlines()
597 if not lines_list:
599 if not lines_list:
598 lines_list = ['']
600 lines_list = ['']
599
601
600 # Transform logic
602 # Transform logic
601 #
603 #
602 # We only apply the line transformers to the input if we have either no
604 # We only apply the line transformers to the input if we have either no
603 # input yet, or complete input, or if the last line of the buffer ends
605 # input yet, or complete input, or if the last line of the buffer ends
604 # with ':' (opening an indented block). This prevents the accidental
606 # with ':' (opening an indented block). This prevents the accidental
605 # transformation of escapes inside multiline expressions like
607 # transformation of escapes inside multiline expressions like
606 # triple-quoted strings or parenthesized expressions.
608 # triple-quoted strings or parenthesized expressions.
607 #
609 #
608 # The last heuristic, while ugly, ensures that the first line of an
610 # The last heuristic, while ugly, ensures that the first line of an
609 # indented block is correctly transformed.
611 # indented block is correctly transformed.
610 #
612 #
611 # FIXME: try to find a cleaner approach for this last bit.
613 # FIXME: try to find a cleaner approach for this last bit.
612
614
613 # If we were in 'block' mode, since we're going to pump the parent
615 # If we were in 'block' mode, since we're going to pump the parent
614 # class by hand line by line, we need to temporarily switch out to
616 # class by hand line by line, we need to temporarily switch out to
615 # 'line' mode, do a single manual reset and then feed the lines one
617 # 'line' mode, do a single manual reset and then feed the lines one
616 # by one. Note that this only matters if the input has more than one
618 # by one. Note that this only matters if the input has more than one
617 # line.
619 # line.
618 changed_input_mode = False
620 changed_input_mode = False
619
621
620 if self.input_mode == 'cell':
622 if self.input_mode == 'cell':
621 self.reset()
623 self.reset()
622 changed_input_mode = True
624 changed_input_mode = True
623 saved_input_mode = 'cell'
625 saved_input_mode = 'cell'
624 self.input_mode = 'line'
626 self.input_mode = 'line'
625
627
626 # Store raw source before applying any transformations to it. Note
628 # Store raw source before applying any transformations to it. Note
627 # that this must be done *after* the reset() call that would otherwise
629 # that this must be done *after* the reset() call that would otherwise
628 # flush the buffer.
630 # flush the buffer.
629 self._store(lines, self._buffer_raw, 'source_raw')
631 self._store(lines, self._buffer_raw, 'source_raw')
630
632
631 try:
633 try:
632 for line in lines_list:
634 for line in lines_list:
633 out = self.push_line(line)
635 out = self.push_line(line)
634 finally:
636 finally:
635 if changed_input_mode:
637 if changed_input_mode:
636 self.input_mode = saved_input_mode
638 self.input_mode = saved_input_mode
637
639
638 return out
640 return out
639
641
640 def push_line(self, line):
642 def push_line(self, line):
641 buf = self._buffer
643 buf = self._buffer
642 not_in_string = self._is_complete or not buf or \
643 (buf and buf[-1].rstrip().endswith((':', ',')))
644 for transformer in self.transforms:
644 for transformer in self.transforms:
645 if not_in_string or transformer.look_in_string:
645 line = transformer.push(line)
646 line = transformer.push(line)
646 if line is None:
647 if line is None:
647 self.transformer_accumulating = True
648 self.transformer_accumulating = True
648 return False
649 return False
650
649
651 self.transformer_accumulating = False
650 self.transformer_accumulating = False
652 return super(IPythonInputSplitter, self).push(line)
651 return super(IPythonInputSplitter, self).push(line)
@@ -1,441 +1,454 b''
1 import abc
1 import abc
2 import functools
2 import functools
3 import re
3 import re
4 from StringIO import StringIO
4 from StringIO import StringIO
5 import tokenize
5 import tokenize
6
6
7 try:
8 generate_tokens = tokenize.generate_tokens
9 except AttributeError:
10 # Python 3. Note that we use the undocumented _tokenize because it expects
11 # strings, not bytes. See also Python issue #9969.
12 generate_tokens = tokenize._tokenize
13
7 from IPython.core.splitinput import split_user_input, LineInfo
14 from IPython.core.splitinput import split_user_input, LineInfo
15 from IPython.utils.untokenize import untokenize
8
16
9 #-----------------------------------------------------------------------------
17 #-----------------------------------------------------------------------------
10 # Globals
18 # Globals
11 #-----------------------------------------------------------------------------
19 #-----------------------------------------------------------------------------
12
20
13 # The escape sequences that define the syntax transformations IPython will
21 # The escape sequences that define the syntax transformations IPython will
14 # apply to user input. These can NOT be just changed here: many regular
22 # apply to user input. These can NOT be just changed here: many regular
15 # expressions and other parts of the code may use their hardcoded values, and
23 # expressions and other parts of the code may use their hardcoded values, and
16 # for all intents and purposes they constitute the 'IPython syntax', so they
24 # for all intents and purposes they constitute the 'IPython syntax', so they
17 # should be considered fixed.
25 # should be considered fixed.
18
26
19 ESC_SHELL = '!' # Send line to underlying system shell
27 ESC_SHELL = '!' # Send line to underlying system shell
20 ESC_SH_CAP = '!!' # Send line to system shell and capture output
28 ESC_SH_CAP = '!!' # Send line to system shell and capture output
21 ESC_HELP = '?' # Find information about object
29 ESC_HELP = '?' # Find information about object
22 ESC_HELP2 = '??' # Find extra-detailed information about object
30 ESC_HELP2 = '??' # Find extra-detailed information about object
23 ESC_MAGIC = '%' # Call magic function
31 ESC_MAGIC = '%' # Call magic function
24 ESC_MAGIC2 = '%%' # Call cell-magic function
32 ESC_MAGIC2 = '%%' # Call cell-magic function
25 ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call
33 ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call
26 ESC_QUOTE2 = ';' # Quote all args as a single string, call
34 ESC_QUOTE2 = ';' # Quote all args as a single string, call
27 ESC_PAREN = '/' # Call first argument with rest of line as arguments
35 ESC_PAREN = '/' # Call first argument with rest of line as arguments
28
36
29 ESC_SEQUENCES = [ESC_SHELL, ESC_SH_CAP, ESC_HELP ,\
37 ESC_SEQUENCES = [ESC_SHELL, ESC_SH_CAP, ESC_HELP ,\
30 ESC_HELP2, ESC_MAGIC, ESC_MAGIC2,\
38 ESC_HELP2, ESC_MAGIC, ESC_MAGIC2,\
31 ESC_QUOTE, ESC_QUOTE2, ESC_PAREN ]
39 ESC_QUOTE, ESC_QUOTE2, ESC_PAREN ]
32
40
33
41
34 class InputTransformer(object):
42 class InputTransformer(object):
35 """Abstract base class for line-based input transformers."""
43 """Abstract base class for line-based input transformers."""
36 __metaclass__ = abc.ABCMeta
44 __metaclass__ = abc.ABCMeta
37
45
38 @abc.abstractmethod
46 @abc.abstractmethod
39 def push(self, line):
47 def push(self, line):
40 """Send a line of input to the transformer, returning the transformed
48 """Send a line of input to the transformer, returning the transformed
41 input or None if the transformer is waiting for more input.
49 input or None if the transformer is waiting for more input.
42
50
43 Must be overridden by subclasses.
51 Must be overridden by subclasses.
44 """
52 """
45 pass
53 pass
46
54
47 @abc.abstractmethod
55 @abc.abstractmethod
48 def reset(self):
56 def reset(self):
49 """Return, transformed any lines that the transformer has accumulated,
57 """Return, transformed any lines that the transformer has accumulated,
50 and reset its internal state.
58 and reset its internal state.
51
59
52 Must be overridden by subclasses.
60 Must be overridden by subclasses.
53 """
61 """
54 pass
62 pass
55
63
56 # Set this to True to allow the transformer to act on lines inside strings.
64 # Set this to True to allow the transformer to act on lines inside strings.
57 look_in_string = False
65 look_in_string = False
58
66
59 @classmethod
67 @classmethod
60 def wrap(cls, func):
68 def wrap(cls, func):
61 """Can be used by subclasses as a decorator, to return a factory that
69 """Can be used by subclasses as a decorator, to return a factory that
62 will allow instantiation with the decorated object.
70 will allow instantiation with the decorated object.
63 """
71 """
64 @functools.wraps(func)
72 @functools.wraps(func)
65 def transformer_factory():
73 def transformer_factory():
66 transformer = cls(func)
74 transformer = cls(func)
67 if getattr(transformer_factory, 'look_in_string', False):
75 if getattr(transformer_factory, 'look_in_string', False):
68 transformer.look_in_string = True
76 transformer.look_in_string = True
69 return transformer
77 return transformer
70
78
71 return transformer_factory
79 return transformer_factory
72
80
73 class StatelessInputTransformer(InputTransformer):
81 class StatelessInputTransformer(InputTransformer):
74 """Wrapper for a stateless input transformer implemented as a function."""
82 """Wrapper for a stateless input transformer implemented as a function."""
75 def __init__(self, func):
83 def __init__(self, func):
76 self.func = func
84 self.func = func
77
85
78 def __repr__(self):
86 def __repr__(self):
79 return "StatelessInputTransformer(func={!r})".format(self.func)
87 return "StatelessInputTransformer(func={!r})".format(self.func)
80
88
81 def push(self, line):
89 def push(self, line):
82 """Send a line of input to the transformer, returning the
90 """Send a line of input to the transformer, returning the
83 transformed input."""
91 transformed input."""
84 return self.func(line)
92 return self.func(line)
85
93
86 def reset(self):
94 def reset(self):
87 """No-op - exists for compatibility."""
95 """No-op - exists for compatibility."""
88 pass
96 pass
89
97
90 class CoroutineInputTransformer(InputTransformer):
98 class CoroutineInputTransformer(InputTransformer):
91 """Wrapper for an input transformer implemented as a coroutine."""
99 """Wrapper for an input transformer implemented as a coroutine."""
92 def __init__(self, coro):
100 def __init__(self, coro):
93 # Prime it
101 # Prime it
94 self.coro = coro()
102 self.coro = coro()
95 next(self.coro)
103 next(self.coro)
96
104
97 def __repr__(self):
105 def __repr__(self):
98 return "CoroutineInputTransformer(coro={!r})".format(self.coro)
106 return "CoroutineInputTransformer(coro={!r})".format(self.coro)
99
107
100 def push(self, line):
108 def push(self, line):
101 """Send a line of input to the transformer, returning the
109 """Send a line of input to the transformer, returning the
102 transformed input or None if the transformer is waiting for more
110 transformed input or None if the transformer is waiting for more
103 input.
111 input.
104 """
112 """
105 return self.coro.send(line)
113 return self.coro.send(line)
106
114
107 def reset(self):
115 def reset(self):
108 """Return, transformed any lines that the transformer has
116 """Return, transformed any lines that the transformer has
109 accumulated, and reset its internal state.
117 accumulated, and reset its internal state.
110 """
118 """
111 return self.coro.send(None)
119 return self.coro.send(None)
112
120
113 class TokenInputTransformer(InputTransformer):
121 class TokenInputTransformer(InputTransformer):
114 """Wrapper for a token-based input transformer.
122 """Wrapper for a token-based input transformer.
115
123
116 func should accept a list of tokens (5-tuples, see tokenize docs), and
124 func should accept a list of tokens (5-tuples, see tokenize docs), and
117 return an iterable which can be passed to tokenize.untokenize().
125 return an iterable which can be passed to tokenize.untokenize().
118 """
126 """
119 def __init__(self, func):
127 def __init__(self, func):
120 self.func = func
128 self.func = func
121 self.current_line = ""
129 self.current_line = ""
122 self.tokenizer = tokenize.generate_tokens(self.get_line)
123 self.line_used= False
130 self.line_used= False
131 self.reset_tokenizer()
132
133 def reset_tokenizer(self):
134 self.tokenizer = generate_tokens(self.get_line)
124
135
125 def get_line(self):
136 def get_line(self):
126 if self.line_used:
137 if self.line_used:
127 raise tokenize.TokenError
138 raise tokenize.TokenError
128 self.line_used = True
139 self.line_used = True
129 return self.current_line
140 return self.current_line
130
141
131 def push(self, line):
142 def push(self, line):
132 self.current_line += line + "\n"
143 self.current_line += line + "\n"
133 self.line_used = False
144 self.line_used = False
134 tokens = []
145 tokens = []
135 try:
146 try:
136 for intok in self.tokenizer:
147 for intok in self.tokenizer:
137 tokens.append(intok)
148 tokens.append(intok)
138 if intok[0] in (tokenize.NEWLINE, tokenize.NL):
149 if intok[0] in (tokenize.NEWLINE, tokenize.NL):
139 # Stop before we try to pull a line we don't have yet
150 # Stop before we try to pull a line we don't have yet
140 break
151 break
141 except tokenize.TokenError:
152 except tokenize.TokenError:
142 # Multi-line statement - stop and try again with the next line
153 # Multi-line statement - stop and try again with the next line
143 self.tokenizer = tokenize.generate_tokens(self.get_line)
154 self.reset_tokenizer()
144 return None
155 return None
145
156
146 self.current_line = ""
157 self.current_line = ""
147 # Python bug 8478 - untokenize doesn't work quite correctly with a
158 self.reset_tokenizer()
148 # generator. We call list() to avoid this.
159 return untokenize(self.func(tokens)).rstrip('\n')
149 return tokenize.untokenize(list(self.func(tokens))).rstrip('\n')
150
160
151 def reset(self):
161 def reset(self):
152 l = self.current_line
162 l = self.current_line
153 self.current_line = ""
163 self.current_line = ""
154 if l:
164 if l:
155 return l.rstrip('\n')
165 return l.rstrip('\n')
156
166
167 @TokenInputTransformer.wrap
168 def assemble_logical_lines(tokens):
169 return tokens
157
170
158 # Utilities
171 # Utilities
159 def _make_help_call(target, esc, lspace, next_input=None):
172 def _make_help_call(target, esc, lspace, next_input=None):
160 """Prepares a pinfo(2)/psearch call from a target name and the escape
173 """Prepares a pinfo(2)/psearch call from a target name and the escape
161 (i.e. ? or ??)"""
174 (i.e. ? or ??)"""
162 method = 'pinfo2' if esc == '??' \
175 method = 'pinfo2' if esc == '??' \
163 else 'psearch' if '*' in target \
176 else 'psearch' if '*' in target \
164 else 'pinfo'
177 else 'pinfo'
165 arg = " ".join([method, target])
178 arg = " ".join([method, target])
166 if next_input is None:
179 if next_input is None:
167 return '%sget_ipython().magic(%r)' % (lspace, arg)
180 return '%sget_ipython().magic(%r)' % (lspace, arg)
168 else:
181 else:
169 return '%sget_ipython().set_next_input(%r);get_ipython().magic(%r)' % \
182 return '%sget_ipython().set_next_input(%r);get_ipython().magic(%r)' % \
170 (lspace, next_input, arg)
183 (lspace, next_input, arg)
171
184
172 @CoroutineInputTransformer.wrap
185 @CoroutineInputTransformer.wrap
173 def escaped_transformer():
186 def escaped_transformer():
174 """Translate lines beginning with one of IPython's escape characters.
187 """Translate lines beginning with one of IPython's escape characters.
175
188
176 This is stateful to allow magic commands etc. to be continued over several
189 This is stateful to allow magic commands etc. to be continued over several
177 lines using explicit line continuations (\ at the end of a line).
190 lines using explicit line continuations (\ at the end of a line).
178 """
191 """
179
192
180 # These define the transformations for the different escape characters.
193 # These define the transformations for the different escape characters.
181 def _tr_system(line_info):
194 def _tr_system(line_info):
182 "Translate lines escaped with: !"
195 "Translate lines escaped with: !"
183 cmd = line_info.line.lstrip().lstrip(ESC_SHELL)
196 cmd = line_info.line.lstrip().lstrip(ESC_SHELL)
184 return '%sget_ipython().system(%r)' % (line_info.pre, cmd)
197 return '%sget_ipython().system(%r)' % (line_info.pre, cmd)
185
198
186 def _tr_system2(line_info):
199 def _tr_system2(line_info):
187 "Translate lines escaped with: !!"
200 "Translate lines escaped with: !!"
188 cmd = line_info.line.lstrip()[2:]
201 cmd = line_info.line.lstrip()[2:]
189 return '%sget_ipython().getoutput(%r)' % (line_info.pre, cmd)
202 return '%sget_ipython().getoutput(%r)' % (line_info.pre, cmd)
190
203
191 def _tr_help(line_info):
204 def _tr_help(line_info):
192 "Translate lines escaped with: ?/??"
205 "Translate lines escaped with: ?/??"
193 # A naked help line should just fire the intro help screen
206 # A naked help line should just fire the intro help screen
194 if not line_info.line[1:]:
207 if not line_info.line[1:]:
195 return 'get_ipython().show_usage()'
208 return 'get_ipython().show_usage()'
196
209
197 return _make_help_call(line_info.ifun, line_info.esc, line_info.pre)
210 return _make_help_call(line_info.ifun, line_info.esc, line_info.pre)
198
211
199 def _tr_magic(line_info):
212 def _tr_magic(line_info):
200 "Translate lines escaped with: %"
213 "Translate lines escaped with: %"
201 tpl = '%sget_ipython().magic(%r)'
214 tpl = '%sget_ipython().magic(%r)'
202 cmd = ' '.join([line_info.ifun, line_info.the_rest]).strip()
215 cmd = ' '.join([line_info.ifun, line_info.the_rest]).strip()
203 return tpl % (line_info.pre, cmd)
216 return tpl % (line_info.pre, cmd)
204
217
205 def _tr_quote(line_info):
218 def _tr_quote(line_info):
206 "Translate lines escaped with: ,"
219 "Translate lines escaped with: ,"
207 return '%s%s("%s")' % (line_info.pre, line_info.ifun,
220 return '%s%s("%s")' % (line_info.pre, line_info.ifun,
208 '", "'.join(line_info.the_rest.split()) )
221 '", "'.join(line_info.the_rest.split()) )
209
222
210 def _tr_quote2(line_info):
223 def _tr_quote2(line_info):
211 "Translate lines escaped with: ;"
224 "Translate lines escaped with: ;"
212 return '%s%s("%s")' % (line_info.pre, line_info.ifun,
225 return '%s%s("%s")' % (line_info.pre, line_info.ifun,
213 line_info.the_rest)
226 line_info.the_rest)
214
227
215 def _tr_paren(line_info):
228 def _tr_paren(line_info):
216 "Translate lines escaped with: /"
229 "Translate lines escaped with: /"
217 return '%s%s(%s)' % (line_info.pre, line_info.ifun,
230 return '%s%s(%s)' % (line_info.pre, line_info.ifun,
218 ", ".join(line_info.the_rest.split()))
231 ", ".join(line_info.the_rest.split()))
219
232
220 tr = { ESC_SHELL : _tr_system,
233 tr = { ESC_SHELL : _tr_system,
221 ESC_SH_CAP : _tr_system2,
234 ESC_SH_CAP : _tr_system2,
222 ESC_HELP : _tr_help,
235 ESC_HELP : _tr_help,
223 ESC_HELP2 : _tr_help,
236 ESC_HELP2 : _tr_help,
224 ESC_MAGIC : _tr_magic,
237 ESC_MAGIC : _tr_magic,
225 ESC_QUOTE : _tr_quote,
238 ESC_QUOTE : _tr_quote,
226 ESC_QUOTE2 : _tr_quote2,
239 ESC_QUOTE2 : _tr_quote2,
227 ESC_PAREN : _tr_paren }
240 ESC_PAREN : _tr_paren }
228
241
229 line = ''
242 line = ''
230 while True:
243 while True:
231 line = (yield line)
244 line = (yield line)
232 if not line or line.isspace():
245 if not line or line.isspace():
233 continue
246 continue
234 lineinf = LineInfo(line)
247 lineinf = LineInfo(line)
235 if lineinf.esc not in tr:
248 if lineinf.esc not in tr:
236 continue
249 continue
237
250
238 parts = []
251 parts = []
239 while line is not None:
252 while line is not None:
240 parts.append(line.rstrip('\\'))
253 parts.append(line.rstrip('\\'))
241 if not line.endswith('\\'):
254 if not line.endswith('\\'):
242 break
255 break
243 line = (yield None)
256 line = (yield None)
244
257
245 # Output
258 # Output
246 lineinf = LineInfo(' '.join(parts))
259 lineinf = LineInfo(' '.join(parts))
247 line = tr[lineinf.esc](lineinf)
260 line = tr[lineinf.esc](lineinf)
248
261
249 _initial_space_re = re.compile(r'\s*')
262 _initial_space_re = re.compile(r'\s*')
250
263
251 _help_end_re = re.compile(r"""(%{0,2}
264 _help_end_re = re.compile(r"""(%{0,2}
252 [a-zA-Z_*][\w*]* # Variable name
265 [a-zA-Z_*][\w*]* # Variable name
253 (\.[a-zA-Z_*][\w*]*)* # .etc.etc
266 (\.[a-zA-Z_*][\w*]*)* # .etc.etc
254 )
267 )
255 (\?\??)$ # ? or ??""",
268 (\?\??)$ # ? or ??""",
256 re.VERBOSE)
269 re.VERBOSE)
257
270
258 def has_comment(src):
271 def has_comment(src):
259 """Indicate whether an input line has (i.e. ends in, or is) a comment.
272 """Indicate whether an input line has (i.e. ends in, or is) a comment.
260
273
261 This uses tokenize, so it can distinguish comments from # inside strings.
274 This uses tokenize, so it can distinguish comments from # inside strings.
262
275
263 Parameters
276 Parameters
264 ----------
277 ----------
265 src : string
278 src : string
266 A single line input string.
279 A single line input string.
267
280
268 Returns
281 Returns
269 -------
282 -------
270 Boolean: True if source has a comment.
283 Boolean: True if source has a comment.
271 """
284 """
272 readline = StringIO(src).readline
285 readline = StringIO(src).readline
273 toktypes = set()
286 toktypes = set()
274 try:
287 try:
275 for t in tokenize.generate_tokens(readline):
288 for t in tokenize.generate_tokens(readline):
276 toktypes.add(t[0])
289 toktypes.add(t[0])
277 except tokenize.TokenError:
290 except tokenize.TokenError:
278 pass
291 pass
279 return(tokenize.COMMENT in toktypes)
292 return(tokenize.COMMENT in toktypes)
280
293
281
294
282 @StatelessInputTransformer.wrap
295 @StatelessInputTransformer.wrap
283 def help_end(line):
296 def help_end(line):
284 """Translate lines with ?/?? at the end"""
297 """Translate lines with ?/?? at the end"""
285 m = _help_end_re.search(line)
298 m = _help_end_re.search(line)
286 if m is None or has_comment(line):
299 if m is None or has_comment(line):
287 return line
300 return line
288 target = m.group(1)
301 target = m.group(1)
289 esc = m.group(3)
302 esc = m.group(3)
290 lspace = _initial_space_re.match(line).group(0)
303 lspace = _initial_space_re.match(line).group(0)
291
304
292 # If we're mid-command, put it back on the next prompt for the user.
305 # If we're mid-command, put it back on the next prompt for the user.
293 next_input = line.rstrip('?') if line.strip() != m.group(0) else None
306 next_input = line.rstrip('?') if line.strip() != m.group(0) else None
294
307
295 return _make_help_call(target, esc, lspace, next_input)
308 return _make_help_call(target, esc, lspace, next_input)
296
309
297
310
298 @CoroutineInputTransformer.wrap
311 @CoroutineInputTransformer.wrap
299 def cellmagic():
312 def cellmagic():
300 """Captures & transforms cell magics.
313 """Captures & transforms cell magics.
301
314
302 After a cell magic is started, this stores up any lines it gets until it is
315 After a cell magic is started, this stores up any lines it gets until it is
303 reset (sent None).
316 reset (sent None).
304 """
317 """
305 tpl = 'get_ipython().run_cell_magic(%r, %r, %r)'
318 tpl = 'get_ipython().run_cell_magic(%r, %r, %r)'
306 cellmagic_help_re = re.compile('%%\w+\?')
319 cellmagic_help_re = re.compile('%%\w+\?')
307 line = ''
320 line = ''
308 while True:
321 while True:
309 line = (yield line)
322 line = (yield line)
310 if (not line) or (not line.startswith(ESC_MAGIC2)):
323 if (not line) or (not line.startswith(ESC_MAGIC2)):
311 continue
324 continue
312
325
313 if cellmagic_help_re.match(line):
326 if cellmagic_help_re.match(line):
314 # This case will be handled by help_end
327 # This case will be handled by help_end
315 continue
328 continue
316
329
317 first = line
330 first = line
318 body = []
331 body = []
319 line = (yield None)
332 line = (yield None)
320 while (line is not None) and (line.strip() != ''):
333 while (line is not None) and (line.strip() != ''):
321 body.append(line)
334 body.append(line)
322 line = (yield None)
335 line = (yield None)
323
336
324 # Output
337 # Output
325 magic_name, _, first = first.partition(' ')
338 magic_name, _, first = first.partition(' ')
326 magic_name = magic_name.lstrip(ESC_MAGIC2)
339 magic_name = magic_name.lstrip(ESC_MAGIC2)
327 line = tpl % (magic_name, first, u'\n'.join(body))
340 line = tpl % (magic_name, first, u'\n'.join(body))
328
341
329
342
330 def _strip_prompts(prompt1_re, prompt2_re):
343 def _strip_prompts(prompt1_re, prompt2_re):
331 """Remove matching input prompts from a block of input."""
344 """Remove matching input prompts from a block of input."""
332 line = ''
345 line = ''
333 while True:
346 while True:
334 line = (yield line)
347 line = (yield line)
335
348
336 if line is None:
349 if line is None:
337 continue
350 continue
338
351
339 m = prompt1_re.match(line)
352 m = prompt1_re.match(line)
340 if m:
353 if m:
341 while m:
354 while m:
342 line = (yield line[len(m.group(0)):])
355 line = (yield line[len(m.group(0)):])
343 if line is None:
356 if line is None:
344 break
357 break
345 m = prompt2_re.match(line)
358 m = prompt2_re.match(line)
346 else:
359 else:
347 # Prompts not in input - wait for reset
360 # Prompts not in input - wait for reset
348 while line is not None:
361 while line is not None:
349 line = (yield line)
362 line = (yield line)
350
363
351 @CoroutineInputTransformer.wrap
364 @CoroutineInputTransformer.wrap
352 def classic_prompt():
365 def classic_prompt():
353 """Strip the >>>/... prompts of the Python interactive shell."""
366 """Strip the >>>/... prompts of the Python interactive shell."""
354 prompt1_re = re.compile(r'^(>>> )')
367 prompt1_re = re.compile(r'^(>>> )')
355 prompt2_re = re.compile(r'^(>>> |^\.\.\. )')
368 prompt2_re = re.compile(r'^(>>> |^\.\.\. )')
356 return _strip_prompts(prompt1_re, prompt2_re)
369 return _strip_prompts(prompt1_re, prompt2_re)
357
370
358 classic_prompt.look_in_string = True
371 classic_prompt.look_in_string = True
359
372
360 @CoroutineInputTransformer.wrap
373 @CoroutineInputTransformer.wrap
361 def ipy_prompt():
374 def ipy_prompt():
362 """Strip IPython's In [1]:/...: prompts."""
375 """Strip IPython's In [1]:/...: prompts."""
363 prompt1_re = re.compile(r'^In \[\d+\]: ')
376 prompt1_re = re.compile(r'^In \[\d+\]: ')
364 prompt2_re = re.compile(r'^(In \[\d+\]: |^\ \ \ \.\.\.+: )')
377 prompt2_re = re.compile(r'^(In \[\d+\]: |^\ \ \ \.\.\.+: )')
365 return _strip_prompts(prompt1_re, prompt2_re)
378 return _strip_prompts(prompt1_re, prompt2_re)
366
379
367 ipy_prompt.look_in_string = True
380 ipy_prompt.look_in_string = True
368
381
369
382
370 @CoroutineInputTransformer.wrap
383 @CoroutineInputTransformer.wrap
371 def leading_indent():
384 def leading_indent():
372 """Remove leading indentation.
385 """Remove leading indentation.
373
386
374 If the first line starts with a spaces or tabs, the same whitespace will be
387 If the first line starts with a spaces or tabs, the same whitespace will be
375 removed from each following line until it is reset.
388 removed from each following line until it is reset.
376 """
389 """
377 space_re = re.compile(r'^[ \t]+')
390 space_re = re.compile(r'^[ \t]+')
378 line = ''
391 line = ''
379 while True:
392 while True:
380 line = (yield line)
393 line = (yield line)
381
394
382 if line is None:
395 if line is None:
383 continue
396 continue
384
397
385 m = space_re.match(line)
398 m = space_re.match(line)
386 if m:
399 if m:
387 space = m.group(0)
400 space = m.group(0)
388 while line is not None:
401 while line is not None:
389 if line.startswith(space):
402 if line.startswith(space):
390 line = line[len(space):]
403 line = line[len(space):]
391 line = (yield line)
404 line = (yield line)
392 else:
405 else:
393 # No leading spaces - wait for reset
406 # No leading spaces - wait for reset
394 while line is not None:
407 while line is not None:
395 line = (yield line)
408 line = (yield line)
396
409
397 leading_indent.look_in_string = True
410 leading_indent.look_in_string = True
398
411
399
412
400 def _special_assignment(assignment_re, template):
413 def _special_assignment(assignment_re, template):
401 """Transform assignment from system & magic commands.
414 """Transform assignment from system & magic commands.
402
415
403 This is stateful so that it can handle magic commands continued on several
416 This is stateful so that it can handle magic commands continued on several
404 lines.
417 lines.
405 """
418 """
406 line = ''
419 line = ''
407 while True:
420 while True:
408 line = (yield line)
421 line = (yield line)
409 if not line or line.isspace():
422 if not line or line.isspace():
410 continue
423 continue
411
424
412 m = assignment_re.match(line)
425 m = assignment_re.match(line)
413 if not m:
426 if not m:
414 continue
427 continue
415
428
416 parts = []
429 parts = []
417 while line is not None:
430 while line is not None:
418 parts.append(line.rstrip('\\'))
431 parts.append(line.rstrip('\\'))
419 if not line.endswith('\\'):
432 if not line.endswith('\\'):
420 break
433 break
421 line = (yield None)
434 line = (yield None)
422
435
423 # Output
436 # Output
424 whole = assignment_re.match(' '.join(parts))
437 whole = assignment_re.match(' '.join(parts))
425 line = template % (whole.group('lhs'), whole.group('cmd'))
438 line = template % (whole.group('lhs'), whole.group('cmd'))
426
439
427 @CoroutineInputTransformer.wrap
440 @CoroutineInputTransformer.wrap
428 def assign_from_system():
441 def assign_from_system():
429 """Transform assignment from system commands (e.g. files = !ls)"""
442 """Transform assignment from system commands (e.g. files = !ls)"""
430 assignment_re = re.compile(r'(?P<lhs>(\s*)([\w\.]+)((\s*,\s*[\w\.]+)*))'
443 assignment_re = re.compile(r'(?P<lhs>(\s*)([\w\.]+)((\s*,\s*[\w\.]+)*))'
431 r'\s*=\s*!\s*(?P<cmd>.*)')
444 r'\s*=\s*!\s*(?P<cmd>.*)')
432 template = '%s = get_ipython().getoutput(%r)'
445 template = '%s = get_ipython().getoutput(%r)'
433 return _special_assignment(assignment_re, template)
446 return _special_assignment(assignment_re, template)
434
447
435 @CoroutineInputTransformer.wrap
448 @CoroutineInputTransformer.wrap
436 def assign_from_magic():
449 def assign_from_magic():
437 """Transform assignment from magic commands (e.g. a = %who_ls)"""
450 """Transform assignment from magic commands (e.g. a = %who_ls)"""
438 assignment_re = re.compile(r'(?P<lhs>(\s*)([\w\.]+)((\s*,\s*[\w\.]+)*))'
451 assignment_re = re.compile(r'(?P<lhs>(\s*)([\w\.]+)((\s*,\s*[\w\.]+)*))'
439 r'\s*=\s*%\s*(?P<cmd>.*)')
452 r'\s*=\s*%\s*(?P<cmd>.*)')
440 template = '%s = get_ipython().magic(%r)'
453 template = '%s = get_ipython().magic(%r)'
441 return _special_assignment(assignment_re, template)
454 return _special_assignment(assignment_re, template)
General Comments 0
You need to be logged in to leave comments. Login now