##// END OF EJS Templates
Expand a bit the documentation about transformers....
Matthias Bussonnier -
Show More
@@ -1,743 +1,741 b''
1 """Input handling and transformation machinery.
1 """Input handling and transformation machinery.
2
2
3 The first class in this module, :class:`InputSplitter`, is designed to tell when
3 The first class in this module, :class:`InputSplitter`, is designed to tell when
4 input from a line-oriented frontend is complete and should be executed, and when
4 input from a line-oriented frontend is complete and should be executed, and when
5 the user should be prompted for another line of code instead. The name 'input
5 the user should be prompted for another line of code instead. The name 'input
6 splitter' is largely for historical reasons.
6 splitter' is largely for historical reasons.
7
7
8 A companion, :class:`IPythonInputSplitter`, provides the same functionality but
8 A companion, :class:`IPythonInputSplitter`, provides the same functionality but
9 with full support for the extended IPython syntax (magics, system calls, etc).
9 with full support for the extended IPython syntax (magics, system calls, etc).
10 The code to actually do these transformations is in :mod:`IPython.core.inputtransformer`.
10 The code to actually do these transformations is in :mod:`IPython.core.inputtransformer`.
11 :class:`IPythonInputSplitter` feeds the raw code to the transformers in order
11 :class:`IPythonInputSplitter` feeds the raw code to the transformers in order
12 and stores the results.
12 and stores the results.
13
13
14 For more details, see the class docstrings below.
14 For more details, see the class docstrings below.
15 """
15 """
16
16
17 # Copyright (c) IPython Development Team.
17 # Copyright (c) IPython Development Team.
18 # Distributed under the terms of the Modified BSD License.
18 # Distributed under the terms of the Modified BSD License.
19 import ast
19 import ast
20 import codeop
20 import codeop
21 import io
21 import io
22 import re
22 import re
23 import sys
23 import sys
24 import tokenize
24 import tokenize
25 import warnings
25 import warnings
26
26
27 from IPython.utils.py3compat import cast_unicode
28 from IPython.core.inputtransformer import (leading_indent,
27 from IPython.core.inputtransformer import (leading_indent,
29 classic_prompt,
28 classic_prompt,
30 ipy_prompt,
29 ipy_prompt,
31 cellmagic,
30 cellmagic,
32 assemble_logical_lines,
31 assemble_logical_lines,
33 help_end,
32 help_end,
34 escaped_commands,
33 escaped_commands,
35 assign_from_magic,
34 assign_from_magic,
36 assign_from_system,
35 assign_from_system,
37 assemble_python_lines,
36 assemble_python_lines,
38 )
37 )
39
38
40 # These are available in this module for backwards compatibility.
39 # These are available in this module for backwards compatibility.
41 from IPython.core.inputtransformer import (ESC_SHELL, ESC_SH_CAP, ESC_HELP,
40 from IPython.core.inputtransformer import (ESC_SHELL, ESC_SH_CAP, ESC_HELP,
42 ESC_HELP2, ESC_MAGIC, ESC_MAGIC2,
41 ESC_HELP2, ESC_MAGIC, ESC_MAGIC2,
43 ESC_QUOTE, ESC_QUOTE2, ESC_PAREN, ESC_SEQUENCES)
42 ESC_QUOTE, ESC_QUOTE2, ESC_PAREN, ESC_SEQUENCES)
44
43
45 #-----------------------------------------------------------------------------
44 #-----------------------------------------------------------------------------
46 # Utilities
45 # Utilities
47 #-----------------------------------------------------------------------------
46 #-----------------------------------------------------------------------------
48
47
49 # FIXME: These are general-purpose utilities that later can be moved to the
48 # FIXME: These are general-purpose utilities that later can be moved to the
50 # general ward. Kept here for now because we're being very strict about test
49 # general ward. Kept here for now because we're being very strict about test
51 # coverage with this code, and this lets us ensure that we keep 100% coverage
50 # coverage with this code, and this lets us ensure that we keep 100% coverage
52 # while developing.
51 # while developing.
53
52
54 # compiled regexps for autoindent management
53 # compiled regexps for autoindent management
55 dedent_re = re.compile('|'.join([
54 dedent_re = re.compile('|'.join([
56 r'^\s+raise(\s.*)?$', # raise statement (+ space + other stuff, maybe)
55 r'^\s+raise(\s.*)?$', # raise statement (+ space + other stuff, maybe)
57 r'^\s+raise\([^\)]*\).*$', # wacky raise with immediate open paren
56 r'^\s+raise\([^\)]*\).*$', # wacky raise with immediate open paren
58 r'^\s+return(\s.*)?$', # normal return (+ space + other stuff, maybe)
57 r'^\s+return(\s.*)?$', # normal return (+ space + other stuff, maybe)
59 r'^\s+return\([^\)]*\).*$', # wacky return with immediate open paren
58 r'^\s+return\([^\)]*\).*$', # wacky return with immediate open paren
60 r'^\s+pass\s*$', # pass (optionally followed by trailing spaces)
59 r'^\s+pass\s*$', # pass (optionally followed by trailing spaces)
61 r'^\s+break\s*$', # break (optionally followed by trailing spaces)
60 r'^\s+break\s*$', # break (optionally followed by trailing spaces)
62 r'^\s+continue\s*$', # continue (optionally followed by trailing spaces)
61 r'^\s+continue\s*$', # continue (optionally followed by trailing spaces)
63 ]))
62 ]))
64 ini_spaces_re = re.compile(r'^([ \t\r\f\v]+)')
63 ini_spaces_re = re.compile(r'^([ \t\r\f\v]+)')
65
64
66 # regexp to match pure comment lines so we don't accidentally insert 'if 1:'
65 # regexp to match pure comment lines so we don't accidentally insert 'if 1:'
67 # before pure comments
66 # before pure comments
68 comment_line_re = re.compile('^\s*\#')
67 comment_line_re = re.compile('^\s*\#')
69
68
70
69
71 def num_ini_spaces(s):
70 def num_ini_spaces(s):
72 """Return the number of initial spaces in a string.
71 """Return the number of initial spaces in a string.
73
72
74 Note that tabs are counted as a single space. For now, we do *not* support
73 Note that tabs are counted as a single space. For now, we do *not* support
75 mixing of tabs and spaces in the user's input.
74 mixing of tabs and spaces in the user's input.
76
75
77 Parameters
76 Parameters
78 ----------
77 ----------
79 s : string
78 s : string
80
79
81 Returns
80 Returns
82 -------
81 -------
83 n : int
82 n : int
84 """
83 """
85
84
86 ini_spaces = ini_spaces_re.match(s)
85 ini_spaces = ini_spaces_re.match(s)
87 if ini_spaces:
86 if ini_spaces:
88 return ini_spaces.end()
87 return ini_spaces.end()
89 else:
88 else:
90 return 0
89 return 0
91
90
92 # Fake token types for partial_tokenize:
91 # Fake token types for partial_tokenize:
93 INCOMPLETE_STRING = tokenize.N_TOKENS
92 INCOMPLETE_STRING = tokenize.N_TOKENS
94 IN_MULTILINE_STATEMENT = tokenize.N_TOKENS + 1
93 IN_MULTILINE_STATEMENT = tokenize.N_TOKENS + 1
95
94
96 # The 2 classes below have the same API as TokenInfo, but don't try to look up
95 # The 2 classes below have the same API as TokenInfo, but don't try to look up
97 # a token type name that they won't find.
96 # a token type name that they won't find.
98 class IncompleteString:
97 class IncompleteString:
99 type = exact_type = INCOMPLETE_STRING
98 type = exact_type = INCOMPLETE_STRING
100 def __init__(self, s, start, end, line):
99 def __init__(self, s, start, end, line):
101 self.s = s
100 self.s = s
102 self.start = start
101 self.start = start
103 self.end = end
102 self.end = end
104 self.line = line
103 self.line = line
105
104
106 class InMultilineStatement:
105 class InMultilineStatement:
107 type = exact_type = IN_MULTILINE_STATEMENT
106 type = exact_type = IN_MULTILINE_STATEMENT
108 def __init__(self, pos, line):
107 def __init__(self, pos, line):
109 self.s = ''
108 self.s = ''
110 self.start = self.end = pos
109 self.start = self.end = pos
111 self.line = line
110 self.line = line
112
111
113 def partial_tokens(s):
112 def partial_tokens(s):
114 """Iterate over tokens from a possibly-incomplete string of code.
113 """Iterate over tokens from a possibly-incomplete string of code.
115
114
116 This adds two special token types: INCOMPLETE_STRING and
115 This adds two special token types: INCOMPLETE_STRING and
117 IN_MULTILINE_STATEMENT. These can only occur as the last token yielded, and
116 IN_MULTILINE_STATEMENT. These can only occur as the last token yielded, and
118 represent the two main ways for code to be incomplete.
117 represent the two main ways for code to be incomplete.
119 """
118 """
120 readline = io.StringIO(s).readline
119 readline = io.StringIO(s).readline
121 token = tokenize.TokenInfo(tokenize.NEWLINE, '', (1, 0), (1, 0), '')
120 token = tokenize.TokenInfo(tokenize.NEWLINE, '', (1, 0), (1, 0), '')
122 try:
121 try:
123 for token in tokenize.generate_tokens(readline):
122 for token in tokenize.generate_tokens(readline):
124 yield token
123 yield token
125 except tokenize.TokenError as e:
124 except tokenize.TokenError as e:
126 # catch EOF error
125 # catch EOF error
127 lines = s.splitlines(keepends=True)
126 lines = s.splitlines(keepends=True)
128 end = len(lines), len(lines[-1])
127 end = len(lines), len(lines[-1])
129 if 'multi-line string' in e.args[0]:
128 if 'multi-line string' in e.args[0]:
130 l, c = start = token.end
129 l, c = start = token.end
131 s = lines[l-1][c:] + ''.join(lines[l:])
130 s = lines[l-1][c:] + ''.join(lines[l:])
132 yield IncompleteString(s, start, end, lines[-1])
131 yield IncompleteString(s, start, end, lines[-1])
133 elif 'multi-line statement' in e.args[0]:
132 elif 'multi-line statement' in e.args[0]:
134 yield InMultilineStatement(end, lines[-1])
133 yield InMultilineStatement(end, lines[-1])
135 else:
134 else:
136 raise
135 raise
137
136
138 def find_next_indent(code):
137 def find_next_indent(code):
139 """Find the number of spaces for the next line of indentation"""
138 """Find the number of spaces for the next line of indentation"""
140 tokens = list(partial_tokens(code))
139 tokens = list(partial_tokens(code))
141 if tokens[-1].type == tokenize.ENDMARKER:
140 if tokens[-1].type == tokenize.ENDMARKER:
142 tokens.pop()
141 tokens.pop()
143 if not tokens:
142 if not tokens:
144 return 0
143 return 0
145 while (tokens[-1].type in {tokenize.DEDENT, tokenize.NEWLINE, tokenize.COMMENT}):
144 while (tokens[-1].type in {tokenize.DEDENT, tokenize.NEWLINE, tokenize.COMMENT}):
146 tokens.pop()
145 tokens.pop()
147
146
148 if tokens[-1].type == INCOMPLETE_STRING:
147 if tokens[-1].type == INCOMPLETE_STRING:
149 # Inside a multiline string
148 # Inside a multiline string
150 return 0
149 return 0
151
150
152 # Find the indents used before
151 # Find the indents used before
153 prev_indents = [0]
152 prev_indents = [0]
154 def _add_indent(n):
153 def _add_indent(n):
155 if n != prev_indents[-1]:
154 if n != prev_indents[-1]:
156 prev_indents.append(n)
155 prev_indents.append(n)
157
156
158 tokiter = iter(tokens)
157 tokiter = iter(tokens)
159 for tok in tokiter:
158 for tok in tokiter:
160 if tok.type in {tokenize.INDENT, tokenize.DEDENT}:
159 if tok.type in {tokenize.INDENT, tokenize.DEDENT}:
161 _add_indent(tok.end[1])
160 _add_indent(tok.end[1])
162 elif (tok.type == tokenize.NL):
161 elif (tok.type == tokenize.NL):
163 try:
162 try:
164 _add_indent(next(tokiter).start[1])
163 _add_indent(next(tokiter).start[1])
165 except StopIteration:
164 except StopIteration:
166 break
165 break
167
166
168 last_indent = prev_indents.pop()
167 last_indent = prev_indents.pop()
169
168
170 # If we've just opened a multiline statement (e.g. 'a = ['), indent more
169 # If we've just opened a multiline statement (e.g. 'a = ['), indent more
171 if tokens[-1].type == IN_MULTILINE_STATEMENT:
170 if tokens[-1].type == IN_MULTILINE_STATEMENT:
172 if tokens[-2].exact_type in {tokenize.LPAR, tokenize.LSQB, tokenize.LBRACE}:
171 if tokens[-2].exact_type in {tokenize.LPAR, tokenize.LSQB, tokenize.LBRACE}:
173 return last_indent + 4
172 return last_indent + 4
174 return last_indent
173 return last_indent
175
174
176 if tokens[-1].exact_type == tokenize.COLON:
175 if tokens[-1].exact_type == tokenize.COLON:
177 # Line ends with colon - indent
176 # Line ends with colon - indent
178 return last_indent + 4
177 return last_indent + 4
179
178
180 if last_indent:
179 if last_indent:
181 # Examine the last line for dedent cues - statements like return or
180 # Examine the last line for dedent cues - statements like return or
182 # raise which normally end a block of code.
181 # raise which normally end a block of code.
183 last_line_starts = 0
182 last_line_starts = 0
184 for i, tok in enumerate(tokens):
183 for i, tok in enumerate(tokens):
185 if tok.type == tokenize.NEWLINE:
184 if tok.type == tokenize.NEWLINE:
186 last_line_starts = i + 1
185 last_line_starts = i + 1
187
186
188 last_line_tokens = tokens[last_line_starts:]
187 last_line_tokens = tokens[last_line_starts:]
189 names = [t.string for t in last_line_tokens if t.type == tokenize.NAME]
188 names = [t.string for t in last_line_tokens if t.type == tokenize.NAME]
190 if names and names[0] in {'raise', 'return', 'pass', 'break', 'continue'}:
189 if names and names[0] in {'raise', 'return', 'pass', 'break', 'continue'}:
191 # Find the most recent indentation less than the current level
190 # Find the most recent indentation less than the current level
192 for indent in reversed(prev_indents):
191 for indent in reversed(prev_indents):
193 if indent < last_indent:
192 if indent < last_indent:
194 return indent
193 return indent
195
194
196 return last_indent
195 return last_indent
197
196
198
197
199 def last_blank(src):
198 def last_blank(src):
200 """Determine if the input source ends in a blank.
199 """Determine if the input source ends in a blank.
201
200
202 A blank is either a newline or a line consisting of whitespace.
201 A blank is either a newline or a line consisting of whitespace.
203
202
204 Parameters
203 Parameters
205 ----------
204 ----------
206 src : string
205 src : string
207 A single or multiline string.
206 A single or multiline string.
208 """
207 """
209 if not src: return False
208 if not src: return False
210 ll = src.splitlines()[-1]
209 ll = src.splitlines()[-1]
211 return (ll == '') or ll.isspace()
210 return (ll == '') or ll.isspace()
212
211
213
212
214 last_two_blanks_re = re.compile(r'\n\s*\n\s*$', re.MULTILINE)
213 last_two_blanks_re = re.compile(r'\n\s*\n\s*$', re.MULTILINE)
215 last_two_blanks_re2 = re.compile(r'.+\n\s*\n\s+$', re.MULTILINE)
214 last_two_blanks_re2 = re.compile(r'.+\n\s*\n\s+$', re.MULTILINE)
216
215
217 def last_two_blanks(src):
216 def last_two_blanks(src):
218 """Determine if the input source ends in two blanks.
217 """Determine if the input source ends in two blanks.
219
218
220 A blank is either a newline or a line consisting of whitespace.
219 A blank is either a newline or a line consisting of whitespace.
221
220
222 Parameters
221 Parameters
223 ----------
222 ----------
224 src : string
223 src : string
225 A single or multiline string.
224 A single or multiline string.
226 """
225 """
227 if not src: return False
226 if not src: return False
228 # The logic here is tricky: I couldn't get a regexp to work and pass all
227 # The logic here is tricky: I couldn't get a regexp to work and pass all
229 # the tests, so I took a different approach: split the source by lines,
228 # the tests, so I took a different approach: split the source by lines,
230 # grab the last two and prepend '###\n' as a stand-in for whatever was in
229 # grab the last two and prepend '###\n' as a stand-in for whatever was in
231 # the body before the last two lines. Then, with that structure, it's
230 # the body before the last two lines. Then, with that structure, it's
232 # possible to analyze with two regexps. Not the most elegant solution, but
231 # possible to analyze with two regexps. Not the most elegant solution, but
233 # it works. If anyone tries to change this logic, make sure to validate
232 # it works. If anyone tries to change this logic, make sure to validate
234 # the whole test suite first!
233 # the whole test suite first!
235 new_src = '\n'.join(['###\n'] + src.splitlines()[-2:])
234 new_src = '\n'.join(['###\n'] + src.splitlines()[-2:])
236 return (bool(last_two_blanks_re.match(new_src)) or
235 return (bool(last_two_blanks_re.match(new_src)) or
237 bool(last_two_blanks_re2.match(new_src)) )
236 bool(last_two_blanks_re2.match(new_src)) )
238
237
239
238
240 def remove_comments(src):
239 def remove_comments(src):
241 """Remove all comments from input source.
240 """Remove all comments from input source.
242
241
243 Note: comments are NOT recognized inside of strings!
242 Note: comments are NOT recognized inside of strings!
244
243
245 Parameters
244 Parameters
246 ----------
245 ----------
247 src : string
246 src : string
248 A single or multiline input string.
247 A single or multiline input string.
249
248
250 Returns
249 Returns
251 -------
250 -------
252 String with all Python comments removed.
251 String with all Python comments removed.
253 """
252 """
254
253
255 return re.sub('#.*', '', src)
254 return re.sub('#.*', '', src)
256
255
257
256
258 def get_input_encoding():
257 def get_input_encoding():
259 """Return the default standard input encoding.
258 """Return the default standard input encoding.
260
259
261 If sys.stdin has no encoding, 'ascii' is returned."""
260 If sys.stdin has no encoding, 'ascii' is returned."""
262 # There are strange environments for which sys.stdin.encoding is None. We
261 # There are strange environments for which sys.stdin.encoding is None. We
263 # ensure that a valid encoding is returned.
262 # ensure that a valid encoding is returned.
264 encoding = getattr(sys.stdin, 'encoding', None)
263 encoding = getattr(sys.stdin, 'encoding', None)
265 if encoding is None:
264 if encoding is None:
266 encoding = 'ascii'
265 encoding = 'ascii'
267 return encoding
266 return encoding
268
267
269 #-----------------------------------------------------------------------------
268 #-----------------------------------------------------------------------------
270 # Classes and functions for normal Python syntax handling
269 # Classes and functions for normal Python syntax handling
271 #-----------------------------------------------------------------------------
270 #-----------------------------------------------------------------------------
272
271
273 class InputSplitter(object):
272 class InputSplitter(object):
274 r"""An object that can accumulate lines of Python source before execution.
273 r"""An object that can accumulate lines of Python source before execution.
275
274
276 This object is designed to be fed python source line-by-line, using
275 This object is designed to be fed python source line-by-line, using
277 :meth:`push`. It will return on each push whether the currently pushed
276 :meth:`push`. It will return on each push whether the currently pushed
278 code could be executed already. In addition, it provides a method called
277 code could be executed already. In addition, it provides a method called
279 :meth:`push_accepts_more` that can be used to query whether more input
278 :meth:`push_accepts_more` that can be used to query whether more input
280 can be pushed into a single interactive block.
279 can be pushed into a single interactive block.
281
280
282 This is a simple example of how an interactive terminal-based client can use
281 This is a simple example of how an interactive terminal-based client can use
283 this tool::
282 this tool::
284
283
285 isp = InputSplitter()
284 isp = InputSplitter()
286 while isp.push_accepts_more():
285 while isp.push_accepts_more():
287 indent = ' '*isp.indent_spaces
286 indent = ' '*isp.indent_spaces
288 prompt = '>>> ' + indent
287 prompt = '>>> ' + indent
289 line = indent + raw_input(prompt)
288 line = indent + raw_input(prompt)
290 isp.push(line)
289 isp.push(line)
291 print 'Input source was:\n', isp.source_reset(),
290 print 'Input source was:\n', isp.source_reset(),
292 """
291 """
293 # Number of spaces of indentation computed from input that has been pushed
292 # Number of spaces of indentation computed from input that has been pushed
294 # so far. This is the attributes callers should query to get the current
293 # so far. This is the attributes callers should query to get the current
295 # indentation level, in order to provide auto-indent facilities.
294 # indentation level, in order to provide auto-indent facilities.
296 indent_spaces = 0
295 indent_spaces = 0
297 # String, indicating the default input encoding. It is computed by default
296 # String, indicating the default input encoding. It is computed by default
298 # at initialization time via get_input_encoding(), but it can be reset by a
297 # at initialization time via get_input_encoding(), but it can be reset by a
299 # client with specific knowledge of the encoding.
298 # client with specific knowledge of the encoding.
300 encoding = ''
299 encoding = ''
301 # String where the current full source input is stored, properly encoded.
300 # String where the current full source input is stored, properly encoded.
302 # Reading this attribute is the normal way of querying the currently pushed
301 # Reading this attribute is the normal way of querying the currently pushed
303 # source code, that has been properly encoded.
302 # source code, that has been properly encoded.
304 source = ''
303 source = ''
305 # Code object corresponding to the current source. It is automatically
304 # Code object corresponding to the current source. It is automatically
306 # synced to the source, so it can be queried at any time to obtain the code
305 # synced to the source, so it can be queried at any time to obtain the code
307 # object; it will be None if the source doesn't compile to valid Python.
306 # object; it will be None if the source doesn't compile to valid Python.
308 code = None
307 code = None
309
308
310 # Private attributes
309 # Private attributes
311
310
312 # List with lines of input accumulated so far
311 # List with lines of input accumulated so far
313 _buffer = None
312 _buffer = None
314 # Command compiler
313 # Command compiler
315 _compile = None
314 _compile = None
316 # Mark when input has changed indentation all the way back to flush-left
315 # Mark when input has changed indentation all the way back to flush-left
317 _full_dedent = False
316 _full_dedent = False
318 # Boolean indicating whether the current block is complete
317 # Boolean indicating whether the current block is complete
319 _is_complete = None
318 _is_complete = None
320 # Boolean indicating whether the current block has an unrecoverable syntax error
319 # Boolean indicating whether the current block has an unrecoverable syntax error
321 _is_invalid = False
320 _is_invalid = False
322
321
323 def __init__(self):
322 def __init__(self):
324 """Create a new InputSplitter instance.
323 """Create a new InputSplitter instance.
325 """
324 """
326 self._buffer = []
325 self._buffer = []
327 self._compile = codeop.CommandCompiler()
326 self._compile = codeop.CommandCompiler()
328 self.encoding = get_input_encoding()
327 self.encoding = get_input_encoding()
329
328
330 def reset(self):
329 def reset(self):
331 """Reset the input buffer and associated state."""
330 """Reset the input buffer and associated state."""
332 self.indent_spaces = 0
331 self.indent_spaces = 0
333 self._buffer[:] = []
332 self._buffer[:] = []
334 self.source = ''
333 self.source = ''
335 self.code = None
334 self.code = None
336 self._is_complete = False
335 self._is_complete = False
337 self._is_invalid = False
336 self._is_invalid = False
338 self._full_dedent = False
337 self._full_dedent = False
339
338
340 def source_reset(self):
339 def source_reset(self):
341 """Return the input source and perform a full reset.
340 """Return the input source and perform a full reset.
342 """
341 """
343 out = self.source
342 out = self.source
344 self.reset()
343 self.reset()
345 return out
344 return out
346
345
347 def check_complete(self, source):
346 def check_complete(self, source):
348 """Return whether a block of code is ready to execute, or should be continued
347 """Return whether a block of code is ready to execute, or should be continued
349
348
350 This is a non-stateful API, and will reset the state of this InputSplitter.
349 This is a non-stateful API, and will reset the state of this InputSplitter.
351
350
352 Parameters
351 Parameters
353 ----------
352 ----------
354 source : string
353 source : string
355 Python input code, which can be multiline.
354 Python input code, which can be multiline.
356
355
357 Returns
356 Returns
358 -------
357 -------
359 status : str
358 status : str
360 One of 'complete', 'incomplete', or 'invalid' if source is not a
359 One of 'complete', 'incomplete', or 'invalid' if source is not a
361 prefix of valid code.
360 prefix of valid code.
362 indent_spaces : int or None
361 indent_spaces : int or None
363 The number of spaces by which to indent the next line of code. If
362 The number of spaces by which to indent the next line of code. If
364 status is not 'incomplete', this is None.
363 status is not 'incomplete', this is None.
365 """
364 """
366 self.reset()
365 self.reset()
367 try:
366 try:
368 self.push(source)
367 self.push(source)
369 except SyntaxError:
368 except SyntaxError:
370 # Transformers in IPythonInputSplitter can raise SyntaxError,
369 # Transformers in IPythonInputSplitter can raise SyntaxError,
371 # which push() will not catch.
370 # which push() will not catch.
372 return 'invalid', None
371 return 'invalid', None
373 else:
372 else:
374 if self._is_invalid:
373 if self._is_invalid:
375 return 'invalid', None
374 return 'invalid', None
376 elif self.push_accepts_more():
375 elif self.push_accepts_more():
377 return 'incomplete', self.indent_spaces
376 return 'incomplete', self.indent_spaces
378 else:
377 else:
379 return 'complete', None
378 return 'complete', None
380 finally:
379 finally:
381 self.reset()
380 self.reset()
382
381
383 def push(self, lines):
382 def push(self, lines):
384 """Push one or more lines of input.
383 """Push one or more lines of input.
385
384
386 This stores the given lines and returns a status code indicating
385 This stores the given lines and returns a status code indicating
387 whether the code forms a complete Python block or not.
386 whether the code forms a complete Python block or not.
388
387
389 Any exceptions generated in compilation are swallowed, but if an
388 Any exceptions generated in compilation are swallowed, but if an
390 exception was produced, the method returns True.
389 exception was produced, the method returns True.
391
390
392 Parameters
391 Parameters
393 ----------
392 ----------
394 lines : string
393 lines : string
395 One or more lines of Python input.
394 One or more lines of Python input.
396
395
397 Returns
396 Returns
398 -------
397 -------
399 is_complete : boolean
398 is_complete : boolean
400 True if the current input source (the result of the current input
399 True if the current input source (the result of the current input
401 plus prior inputs) forms a complete Python execution block. Note that
400 plus prior inputs) forms a complete Python execution block. Note that
402 this value is also stored as a private attribute (``_is_complete``), so it
401 this value is also stored as a private attribute (``_is_complete``), so it
403 can be queried at any time.
402 can be queried at any time.
404 """
403 """
405 self._store(lines)
404 self._store(lines)
406 source = self.source
405 source = self.source
407
406
408 # Before calling _compile(), reset the code object to None so that if an
407 # Before calling _compile(), reset the code object to None so that if an
409 # exception is raised in compilation, we don't mislead by having
408 # exception is raised in compilation, we don't mislead by having
410 # inconsistent code/source attributes.
409 # inconsistent code/source attributes.
411 self.code, self._is_complete = None, None
410 self.code, self._is_complete = None, None
412 self._is_invalid = False
411 self._is_invalid = False
413
412
414 # Honor termination lines properly
413 # Honor termination lines properly
415 if source.endswith('\\\n'):
414 if source.endswith('\\\n'):
416 return False
415 return False
417
416
418 self._update_indent()
417 self._update_indent()
419 try:
418 try:
420 with warnings.catch_warnings():
419 with warnings.catch_warnings():
421 warnings.simplefilter('error', SyntaxWarning)
420 warnings.simplefilter('error', SyntaxWarning)
422 self.code = self._compile(source, symbol="exec")
421 self.code = self._compile(source, symbol="exec")
423 # Invalid syntax can produce any of a number of different errors from
422 # Invalid syntax can produce any of a number of different errors from
424 # inside the compiler, so we have to catch them all. Syntax errors
423 # inside the compiler, so we have to catch them all. Syntax errors
425 # immediately produce a 'ready' block, so the invalid Python can be
424 # immediately produce a 'ready' block, so the invalid Python can be
426 # sent to the kernel for evaluation with possible ipython
425 # sent to the kernel for evaluation with possible ipython
427 # special-syntax conversion.
426 # special-syntax conversion.
428 except (SyntaxError, OverflowError, ValueError, TypeError,
427 except (SyntaxError, OverflowError, ValueError, TypeError,
429 MemoryError, SyntaxWarning):
428 MemoryError, SyntaxWarning):
430 self._is_complete = True
429 self._is_complete = True
431 self._is_invalid = True
430 self._is_invalid = True
432 else:
431 else:
433 # Compilation didn't produce any exceptions (though it may not have
432 # Compilation didn't produce any exceptions (though it may not have
434 # given a complete code object)
433 # given a complete code object)
435 self._is_complete = self.code is not None
434 self._is_complete = self.code is not None
436
435
437 return self._is_complete
436 return self._is_complete
438
437
439 def push_accepts_more(self):
438 def push_accepts_more(self):
440 """Return whether a block of interactive input can accept more input.
439 """Return whether a block of interactive input can accept more input.
441
440
442 This method is meant to be used by line-oriented frontends, who need to
441 This method is meant to be used by line-oriented frontends, who need to
443 guess whether a block is complete or not based solely on prior and
442 guess whether a block is complete or not based solely on prior and
444 current input lines. The InputSplitter considers it has a complete
443 current input lines. The InputSplitter considers it has a complete
445 interactive block and will not accept more input when either:
444 interactive block and will not accept more input when either:
446
445
447 * A SyntaxError is raised
446 * A SyntaxError is raised
448
447
449 * The code is complete and consists of a single line or a single
448 * The code is complete and consists of a single line or a single
450 non-compound statement
449 non-compound statement
451
450
452 * The code is complete and has a blank line at the end
451 * The code is complete and has a blank line at the end
453
452
454 If the current input produces a syntax error, this method immediately
453 If the current input produces a syntax error, this method immediately
455 returns False but does *not* raise the syntax error exception, as
454 returns False but does *not* raise the syntax error exception, as
456 typically clients will want to send invalid syntax to an execution
455 typically clients will want to send invalid syntax to an execution
457 backend which might convert the invalid syntax into valid Python via
456 backend which might convert the invalid syntax into valid Python via
458 one of the dynamic IPython mechanisms.
457 one of the dynamic IPython mechanisms.
459 """
458 """
460
459
461 # With incomplete input, unconditionally accept more
460 # With incomplete input, unconditionally accept more
462 # A syntax error also sets _is_complete to True - see push()
461 # A syntax error also sets _is_complete to True - see push()
463 if not self._is_complete:
462 if not self._is_complete:
464 #print("Not complete") # debug
463 #print("Not complete") # debug
465 return True
464 return True
466
465
467 # The user can make any (complete) input execute by leaving a blank line
466 # The user can make any (complete) input execute by leaving a blank line
468 last_line = self.source.splitlines()[-1]
467 last_line = self.source.splitlines()[-1]
469 if (not last_line) or last_line.isspace():
468 if (not last_line) or last_line.isspace():
470 #print("Blank line") # debug
469 #print("Blank line") # debug
471 return False
470 return False
472
471
473 # If there's just a single line or AST node, and we're flush left, as is
472 # If there's just a single line or AST node, and we're flush left, as is
474 # the case after a simple statement such as 'a=1', we want to execute it
473 # the case after a simple statement such as 'a=1', we want to execute it
475 # straight away.
474 # straight away.
476 if self.indent_spaces==0:
475 if self.indent_spaces==0:
477 if len(self.source.splitlines()) <= 1:
476 if len(self.source.splitlines()) <= 1:
478 return False
477 return False
479
478
480 try:
479 try:
481 code_ast = ast.parse(u''.join(self._buffer))
480 code_ast = ast.parse(u''.join(self._buffer))
482 except Exception:
481 except Exception:
483 #print("Can't parse AST") # debug
482 #print("Can't parse AST") # debug
484 return False
483 return False
485 else:
484 else:
486 if len(code_ast.body) == 1 and \
485 if len(code_ast.body) == 1 and \
487 not hasattr(code_ast.body[0], 'body'):
486 not hasattr(code_ast.body[0], 'body'):
488 #print("Simple statement") # debug
487 #print("Simple statement") # debug
489 return False
488 return False
490
489
491 # General fallback - accept more code
490 # General fallback - accept more code
492 return True
491 return True
493
492
494 def _update_indent(self):
493 def _update_indent(self):
495 # self.source always has a trailing newline
494 # self.source always has a trailing newline
496 self.indent_spaces = find_next_indent(self.source[:-1])
495 self.indent_spaces = find_next_indent(self.source[:-1])
497 self._full_dedent = (self.indent_spaces == 0)
496 self._full_dedent = (self.indent_spaces == 0)
498
497
499 def _store(self, lines, buffer=None, store='source'):
498 def _store(self, lines, buffer=None, store='source'):
500 """Store one or more lines of input.
499 """Store one or more lines of input.
501
500
502 If input lines are not newline-terminated, a newline is automatically
501 If input lines are not newline-terminated, a newline is automatically
503 appended."""
502 appended."""
504
503
505 if buffer is None:
504 if buffer is None:
506 buffer = self._buffer
505 buffer = self._buffer
507
506
508 if lines.endswith('\n'):
507 if lines.endswith('\n'):
509 buffer.append(lines)
508 buffer.append(lines)
510 else:
509 else:
511 buffer.append(lines+'\n')
510 buffer.append(lines+'\n')
512 setattr(self, store, self._set_source(buffer))
511 setattr(self, store, self._set_source(buffer))
513
512
514 def _set_source(self, buffer):
513 def _set_source(self, buffer):
515 return u''.join(buffer)
514 return u''.join(buffer)
516
515
517
516
518 class IPythonInputSplitter(InputSplitter):
517 class IPythonInputSplitter(InputSplitter):
519 """An input splitter that recognizes all of IPython's special syntax."""
518 """An input splitter that recognizes all of IPython's special syntax."""
520
519
521 # String with raw, untransformed input.
520 # String with raw, untransformed input.
522 source_raw = ''
521 source_raw = ''
523
522
524 # Flag to track when a transformer has stored input that it hasn't given
523 # Flag to track when a transformer has stored input that it hasn't given
525 # back yet.
524 # back yet.
526 transformer_accumulating = False
525 transformer_accumulating = False
527
526
528 # Flag to track when assemble_python_lines has stored input that it hasn't
527 # Flag to track when assemble_python_lines has stored input that it hasn't
529 # given back yet.
528 # given back yet.
530 within_python_line = False
529 within_python_line = False
531
530
532 # Private attributes
531 # Private attributes
533
532
534 # List with lines of raw input accumulated so far.
533 # List with lines of raw input accumulated so far.
535 _buffer_raw = None
534 _buffer_raw = None
536
535
537 def __init__(self, line_input_checker=True, physical_line_transforms=None,
536 def __init__(self, line_input_checker=True, physical_line_transforms=None,
538 logical_line_transforms=None, python_line_transforms=None):
537 logical_line_transforms=None, python_line_transforms=None):
539 super(IPythonInputSplitter, self).__init__()
538 super(IPythonInputSplitter, self).__init__()
540 self._buffer_raw = []
539 self._buffer_raw = []
541 self._validate = True
540 self._validate = True
542
541
543 if physical_line_transforms is not None:
542 if physical_line_transforms is not None:
544 self.physical_line_transforms = physical_line_transforms
543 self.physical_line_transforms = physical_line_transforms
545 else:
544 else:
546 self.physical_line_transforms = [
545 self.physical_line_transforms = [
547 leading_indent(),
546 leading_indent(),
548 classic_prompt(),
547 classic_prompt(),
549 ipy_prompt(),
548 ipy_prompt(),
550 cellmagic(end_on_blank_line=line_input_checker),
549 cellmagic(end_on_blank_line=line_input_checker),
551 ]
550 ]
552
551
553 self.assemble_logical_lines = assemble_logical_lines()
552 self.assemble_logical_lines = assemble_logical_lines()
554 if logical_line_transforms is not None:
553 if logical_line_transforms is not None:
555 self.logical_line_transforms = logical_line_transforms
554 self.logical_line_transforms = logical_line_transforms
556 else:
555 else:
557 self.logical_line_transforms = [
556 self.logical_line_transforms = [
558 help_end(),
557 help_end(),
559 escaped_commands(),
558 escaped_commands(),
560 assign_from_magic(),
559 assign_from_magic(),
561 assign_from_system(),
560 assign_from_system(),
562 ]
561 ]
563
562
564 self.assemble_python_lines = assemble_python_lines()
563 self.assemble_python_lines = assemble_python_lines()
565 if python_line_transforms is not None:
564 if python_line_transforms is not None:
566 self.python_line_transforms = python_line_transforms
565 self.python_line_transforms = python_line_transforms
567 else:
566 else:
568 # We don't use any of these at present
567 # We don't use any of these at present
569 self.python_line_transforms = []
568 self.python_line_transforms = []
570
569
571 @property
570 @property
572 def transforms(self):
571 def transforms(self):
573 "Quick access to all transformers."
572 "Quick access to all transformers."
574 return self.physical_line_transforms + \
573 return self.physical_line_transforms + \
575 [self.assemble_logical_lines] + self.logical_line_transforms + \
574 [self.assemble_logical_lines] + self.logical_line_transforms + \
576 [self.assemble_python_lines] + self.python_line_transforms
575 [self.assemble_python_lines] + self.python_line_transforms
577
576
578 @property
577 @property
579 def transforms_in_use(self):
578 def transforms_in_use(self):
580 """Transformers, excluding logical line transformers if we're in a
579 """Transformers, excluding logical line transformers if we're in a
581 Python line."""
580 Python line."""
582 t = self.physical_line_transforms[:]
581 t = self.physical_line_transforms[:]
583 if not self.within_python_line:
582 if not self.within_python_line:
584 t += [self.assemble_logical_lines] + self.logical_line_transforms
583 t += [self.assemble_logical_lines] + self.logical_line_transforms
585 return t + [self.assemble_python_lines] + self.python_line_transforms
584 return t + [self.assemble_python_lines] + self.python_line_transforms
586
585
587 def reset(self):
586 def reset(self):
588 """Reset the input buffer and associated state."""
587 """Reset the input buffer and associated state."""
589 super(IPythonInputSplitter, self).reset()
588 super(IPythonInputSplitter, self).reset()
590 self._buffer_raw[:] = []
589 self._buffer_raw[:] = []
591 self.source_raw = ''
590 self.source_raw = ''
592 self.transformer_accumulating = False
591 self.transformer_accumulating = False
593 self.within_python_line = False
592 self.within_python_line = False
594
593
595 for t in self.transforms:
594 for t in self.transforms:
596 try:
595 try:
597 t.reset()
596 t.reset()
598 except SyntaxError:
597 except SyntaxError:
599 # Nothing that calls reset() expects to handle transformer
598 # Nothing that calls reset() expects to handle transformer
600 # errors
599 # errors
601 pass
600 pass
602
601
603 def flush_transformers(self):
602 def flush_transformers(self):
604 def _flush(transform, outs):
603 def _flush(transform, outs):
605 """yield transformed lines
604 """yield transformed lines
606
605
607 always strings, never None
606 always strings, never None
608
607
609 transform: the current transform
608 transform: the current transform
610 outs: an iterable of previously transformed inputs.
609 outs: an iterable of previously transformed inputs.
611 Each may be multiline, which will be passed
610 Each may be multiline, which will be passed
612 one line at a time to transform.
611 one line at a time to transform.
613 """
612 """
614 for out in outs:
613 for out in outs:
615 for line in out.splitlines():
614 for line in out.splitlines():
616 # push one line at a time
615 # push one line at a time
617 tmp = transform.push(line)
616 tmp = transform.push(line)
618 if tmp is not None:
617 if tmp is not None:
619 yield tmp
618 yield tmp
620
619
621 # reset the transform
620 # reset the transform
622 tmp = transform.reset()
621 tmp = transform.reset()
623 if tmp is not None:
622 if tmp is not None:
624 yield tmp
623 yield tmp
625
624
626 out = []
625 out = []
627 for t in self.transforms_in_use:
626 for t in self.transforms_in_use:
628 out = _flush(t, out)
627 out = _flush(t, out)
629
628
630 out = list(out)
629 out = list(out)
631 if out:
630 if out:
632 self._store('\n'.join(out))
631 self._store('\n'.join(out))
633
632
634 def raw_reset(self):
633 def raw_reset(self):
635 """Return raw input only and perform a full reset.
634 """Return raw input only and perform a full reset.
636 """
635 """
637 out = self.source_raw
636 out = self.source_raw
638 self.reset()
637 self.reset()
639 return out
638 return out
640
639
641 def source_reset(self):
640 def source_reset(self):
642 try:
641 try:
643 self.flush_transformers()
642 self.flush_transformers()
644 return self.source
643 return self.source
645 finally:
644 finally:
646 self.reset()
645 self.reset()
647
646
648 def push_accepts_more(self):
647 def push_accepts_more(self):
649 if self.transformer_accumulating:
648 if self.transformer_accumulating:
650 return True
649 return True
651 else:
650 else:
652 return super(IPythonInputSplitter, self).push_accepts_more()
651 return super(IPythonInputSplitter, self).push_accepts_more()
653
652
654 def transform_cell(self, cell):
653 def transform_cell(self, cell):
655 """Process and translate a cell of input.
654 """Process and translate a cell of input.
656 """
655 """
657 self.reset()
656 self.reset()
658 try:
657 try:
659 self.push(cell)
658 self.push(cell)
660 self.flush_transformers()
659 self.flush_transformers()
661 return self.source
660 return self.source
662 finally:
661 finally:
663 self.reset()
662 self.reset()
664
663
665 def push(self, lines):
664 def push(self, lines):
666 """Push one or more lines of IPython input.
665 """Push one or more lines of IPython input.
667
666
668 This stores the given lines and returns a status code indicating
667 This stores the given lines and returns a status code indicating
669 whether the code forms a complete Python block or not, after processing
668 whether the code forms a complete Python block or not, after processing
670 all input lines for special IPython syntax.
669 all input lines for special IPython syntax.
671
670
672 Any exceptions generated in compilation are swallowed, but if an
671 Any exceptions generated in compilation are swallowed, but if an
673 exception was produced, the method returns True.
672 exception was produced, the method returns True.
674
673
675 Parameters
674 Parameters
676 ----------
675 ----------
677 lines : string
676 lines : string
678 One or more lines of Python input.
677 One or more lines of Python input.
679
678
680 Returns
679 Returns
681 -------
680 -------
682 is_complete : boolean
681 is_complete : boolean
683 True if the current input source (the result of the current input
682 True if the current input source (the result of the current input
684 plus prior inputs) forms a complete Python execution block. Note that
683 plus prior inputs) forms a complete Python execution block. Note that
685 this value is also stored as a private attribute (_is_complete), so it
684 this value is also stored as a private attribute (_is_complete), so it
686 can be queried at any time.
685 can be queried at any time.
687 """
686 """
688
687
689 # We must ensure all input is pure unicode
688 # We must ensure all input is pure unicode
690 lines = cast_unicode(lines, self.encoding)
691 # ''.splitlines() --> [], but we need to push the empty line to transformers
689 # ''.splitlines() --> [], but we need to push the empty line to transformers
692 lines_list = lines.splitlines()
690 lines_list = lines.splitlines()
693 if not lines_list:
691 if not lines_list:
694 lines_list = ['']
692 lines_list = ['']
695
693
696 # Store raw source before applying any transformations to it. Note
694 # Store raw source before applying any transformations to it. Note
697 # that this must be done *after* the reset() call that would otherwise
695 # that this must be done *after* the reset() call that would otherwise
698 # flush the buffer.
696 # flush the buffer.
699 self._store(lines, self._buffer_raw, 'source_raw')
697 self._store(lines, self._buffer_raw, 'source_raw')
700
698
701 for line in lines_list:
699 for line in lines_list:
702 out = self.push_line(line)
700 out = self.push_line(line)
703
701
704 return out
702 return out
705
703
706 def push_line(self, line):
704 def push_line(self, line):
707 buf = self._buffer
705 buf = self._buffer
708
706
709 def _accumulating(dbg):
707 def _accumulating(dbg):
710 #print(dbg)
708 #print(dbg)
711 self.transformer_accumulating = True
709 self.transformer_accumulating = True
712 return False
710 return False
713
711
714 for transformer in self.physical_line_transforms:
712 for transformer in self.physical_line_transforms:
715 line = transformer.push(line)
713 line = transformer.push(line)
716 if line is None:
714 if line is None:
717 return _accumulating(transformer)
715 return _accumulating(transformer)
718
716
719 if not self.within_python_line:
717 if not self.within_python_line:
720 line = self.assemble_logical_lines.push(line)
718 line = self.assemble_logical_lines.push(line)
721 if line is None:
719 if line is None:
722 return _accumulating('acc logical line')
720 return _accumulating('acc logical line')
723
721
724 for transformer in self.logical_line_transforms:
722 for transformer in self.logical_line_transforms:
725 line = transformer.push(line)
723 line = transformer.push(line)
726 if line is None:
724 if line is None:
727 return _accumulating(transformer)
725 return _accumulating(transformer)
728
726
729 line = self.assemble_python_lines.push(line)
727 line = self.assemble_python_lines.push(line)
730 if line is None:
728 if line is None:
731 self.within_python_line = True
729 self.within_python_line = True
732 return _accumulating('acc python line')
730 return _accumulating('acc python line')
733 else:
731 else:
734 self.within_python_line = False
732 self.within_python_line = False
735
733
736 for transformer in self.python_line_transforms:
734 for transformer in self.python_line_transforms:
737 line = transformer.push(line)
735 line = transformer.push(line)
738 if line is None:
736 if line is None:
739 return _accumulating(transformer)
737 return _accumulating(transformer)
740
738
741 #print("transformers clear") #debug
739 #print("transformers clear") #debug
742 self.transformer_accumulating = False
740 self.transformer_accumulating = False
743 return super(IPythonInputSplitter, self).push(line)
741 return super(IPythonInputSplitter, self).push(line)
@@ -1,139 +1,184 b''
1
1
2 ===========================
2 ===========================
3 Custom input transformation
3 Custom input transformation
4 ===========================
4 ===========================
5
5
6 IPython extends Python syntax to allow things like magic commands, and help with
6 IPython extends Python syntax to allow things like magic commands, and help with
7 the ``?`` syntax. There are several ways to customise how the user's input is
7 the ``?`` syntax. There are several ways to customise how the user's input is
8 processed into Python code to be executed.
8 processed into Python code to be executed.
9
9
10 These hooks are mainly for other projects using IPython as the core of their
10 These hooks are mainly for other projects using IPython as the core of their
11 interactive interface. Using them carelessly can easily break IPython!
11 interactive interface. Using them carelessly can easily break IPython!
12
12
13 String based transformations
13 String based transformations
14 ============================
14 ============================
15
15
16 .. currentmodule:: IPython.core.inputtransforms
16 .. currentmodule:: IPython.core.inputtransforms
17
17
18 When the user enters a line of code, it is first processed as a string. By the
18 When the user enters a line of code, it is first processed as a string. By the
19 end of this stage, it must be valid Python syntax.
19 end of this stage, it must be valid Python syntax.
20
20
21 These transformers all subclass :class:`IPython.core.inputtransformer.InputTransformer`,
21 These transformers all subclass :class:`IPython.core.inputtransformer.InputTransformer`,
22 and are used by :class:`IPython.core.inputsplitter.IPythonInputSplitter`.
22 and are used by :class:`IPython.core.inputsplitter.IPythonInputSplitter`.
23
23
24 These transformers act in three groups, stored separately as lists of instances
24 These transformers act in three groups, stored separately as lists of instances
25 in attributes of :class:`~IPython.core.inputsplitter.IPythonInputSplitter`:
25 in attributes of :class:`~IPython.core.inputsplitter.IPythonInputSplitter`:
26
26
27 * ``physical_line_transforms`` act on the lines as the user enters them. For
27 * ``physical_line_transforms`` act on the lines as the user enters them. For
28 example, these strip Python prompts from examples pasted in.
28 example, these strip Python prompts from examples pasted in.
29 * ``logical_line_transforms`` act on lines as connected by explicit line
29 * ``logical_line_transforms`` act on lines as connected by explicit line
30 continuations, i.e. ``\`` at the end of physical lines. They are skipped
30 continuations, i.e. ``\`` at the end of physical lines. They are skipped
31 inside multiline Python statements. This is the point where IPython recognises
31 inside multiline Python statements. This is the point where IPython recognises
32 ``%magic`` commands, for instance.
32 ``%magic`` commands, for instance.
33 * ``python_line_transforms`` act on blocks containing complete Python statements.
33 * ``python_line_transforms`` act on blocks containing complete Python statements.
34 Multi-line strings, lists and function calls are reassembled before being
34 Multi-line strings, lists and function calls are reassembled before being
35 passed to these, but note that function and class *definitions* are still a
35 passed to these, but note that function and class *definitions* are still a
36 series of separate statements. IPython does not use any of these by default.
36 series of separate statements. IPython does not use any of these by default.
37
37
38 An InteractiveShell instance actually has two
38 An InteractiveShell instance actually has two
39 :class:`~IPython.core.inputsplitter.IPythonInputSplitter` instances, as the
39 :class:`~IPython.core.inputsplitter.IPythonInputSplitter` instances, as the
40 attributes :attr:`~IPython.core.interactiveshell.InteractiveShell.input_splitter`,
40 attributes :attr:`~IPython.core.interactiveshell.InteractiveShell.input_splitter`,
41 to tell when a block of input is complete, and
41 to tell when a block of input is complete, and
42 :attr:`~IPython.core.interactiveshell.InteractiveShell.input_transformer_manager`,
42 :attr:`~IPython.core.interactiveshell.InteractiveShell.input_transformer_manager`,
43 to transform complete cells. If you add a transformer, you should make sure that
43 to transform complete cells. If you add a transformer, you should make sure that
44 it gets added to both, e.g.::
44 it gets added to both, e.g.::
45
45
46 ip.input_splitter.logical_line_transforms.append(my_transformer())
46 ip.input_splitter.logical_line_transforms.append(my_transformer())
47 ip.input_transformer_manager.logical_line_transforms.append(my_transformer())
47 ip.input_transformer_manager.logical_line_transforms.append(my_transformer())
48
48
49 These transformers may raise :exc:`SyntaxError` if the input code is invalid, but
49 These transformers may raise :exc:`SyntaxError` if the input code is invalid, but
50 in most cases it is clearer to pass unrecognised code through unmodified and let
50 in most cases it is clearer to pass unrecognised code through unmodified and let
51 Python's own parser decide whether it is valid.
51 Python's own parser decide whether it is valid.
52
52
53 .. versionchanged:: 2.0
53 .. versionchanged:: 2.0
54
54
55 Added the option to raise :exc:`SyntaxError`.
55 Added the option to raise :exc:`SyntaxError`.
56
56
57 Stateless transformations
57 Stateless transformations
58 -------------------------
58 -------------------------
59
59
60 The simplest kind of transformations work one line at a time. Write a function
60 The simplest kind of transformations work one line at a time. Write a function
61 which takes a line and returns a line, and decorate it with
61 which takes a line and returns a line, and decorate it with
62 :meth:`StatelessInputTransformer.wrap`::
62 :meth:`StatelessInputTransformer.wrap`::
63
63
64 @StatelessInputTransformer.wrap
64 @StatelessInputTransformer.wrap
65 def my_special_commands(line):
65 def my_special_commands(line):
66 if line.startswith("Β¬"):
66 if line.startswith("Β¬"):
67 return "specialcommand(" + repr(line) + ")"
67 return "specialcommand(" + repr(line) + ")"
68 return line
68 return line
69
69
70 The decorator returns a factory function which will produce instances of
70 The decorator returns a factory function which will produce instances of
71 :class:`~IPython.core.inputtransformer.StatelessInputTransformer` using your
71 :class:`~IPython.core.inputtransformer.StatelessInputTransformer` using your
72 function.
72 function.
73
73
74 Transforming a full block
75 -------------------------
76
77 Transforming a full block of python code is possible by implementing a
78 :class:`~IPython.core.inputtransformer.Inputtransformer` and overwriting the
79 ``push`` and ``reset`` methods. The reset method should send the full block of
80 transformed text. As an example a transformer the reversed the lines from last
81 to first.
82
83 from IPython.core.inputtransformer import InputTransformer
84
85 class ReverseLineTransformer(InputTransformer):
86
87 def __init__(self):
88 self.acc = []
89
90 def push(self, line):
91 self.acc.append(line)
92 return None
93
94 def reset(self):
95 ret = '\n'.join(self.acc[::-1])
96 self.acc = []
97 return ret
98
99
74 Coroutine transformers
100 Coroutine transformers
75 ----------------------
101 ----------------------
76
102
77 More advanced transformers can be written as coroutines. The coroutine will be
103 More advanced transformers can be written as coroutines. The coroutine will be
78 sent each line in turn, followed by ``None`` to reset it. It can yield lines, or
104 sent each line in turn, followed by ``None`` to reset it. It can yield lines, or
79 ``None`` if it is accumulating text to yield at a later point. When reset, it
105 ``None`` if it is accumulating text to yield at a later point. When reset, it
80 should give up any code it has accumulated.
106 should give up any code it has accumulated.
81
107
108 You may use :meth:`CoroutineInputTransformer.wrap` to simplify the creation of
109 such a transformer.
110
111 Here is a simple :class:`CoroutineInputTransformer` that can be though of be
112 being the identity::
113
114 @CoroutineInputTransformer.wrap
115 def noop():
116 line = ''
117 while True:
118 line = (yield line)
119
120 ip = get_ipython()
121
122 ip.input_splitter.logical_line_transforms.append(noop())
123 ip.input_transformer_manager.logical_line_transforms.append(noop())
124
82 This code in IPython strips a constant amount of leading indentation from each
125 This code in IPython strips a constant amount of leading indentation from each
83 line in a cell::
126 line in a cell::
84
127
128 from IPython.core.inputtransformer import CoroutineInputTransformer
129
85 @CoroutineInputTransformer.wrap
130 @CoroutineInputTransformer.wrap
86 def leading_indent():
131 def leading_indent():
87 """Remove leading indentation.
132 """Remove leading indentation.
88
133
89 If the first line starts with a spaces or tabs, the same whitespace will be
134 If the first line starts with a spaces or tabs, the same whitespace will be
90 removed from each following line until it is reset.
135 removed from each following line until it is reset.
91 """
136 """
92 space_re = re.compile(r'^[ \t]+')
137 space_re = re.compile(r'^[ \t]+')
93 line = ''
138 line = ''
94 while True:
139 while True:
95 line = (yield line)
140 line = (yield line)
96
141
97 if line is None:
142 if line is None:
98 continue
143 continue
99
144
100 m = space_re.match(line)
145 m = space_re.match(line)
101 if m:
146 if m:
102 space = m.group(0)
147 space = m.group(0)
103 while line is not None:
148 while line is not None:
104 if line.startswith(space):
149 if line.startswith(space):
105 line = line[len(space):]
150 line = line[len(space):]
106 line = (yield line)
151 line = (yield line)
107 else:
152 else:
108 # No leading spaces - wait for reset
153 # No leading spaces - wait for reset
109 while line is not None:
154 while line is not None:
110 line = (yield line)
155 line = (yield line)
111
156
112
157
113 Token-based transformers
158 Token-based transformers
114 ------------------------
159 ------------------------
115
160
116 There is an experimental framework that takes care of tokenizing and
161 There is an experimental framework that takes care of tokenizing and
117 untokenizing lines of code. Define a function that accepts a list of tokens, and
162 untokenizing lines of code. Define a function that accepts a list of tokens, and
118 returns an iterable of output tokens, and decorate it with
163 returns an iterable of output tokens, and decorate it with
119 :meth:`TokenInputTransformer.wrap`. These should only be used in
164 :meth:`TokenInputTransformer.wrap`. These should only be used in
120 ``python_line_transforms``.
165 ``python_line_transforms``.
121
166
122 AST transformations
167 AST transformations
123 ===================
168 ===================
124
169
125 After the code has been parsed as Python syntax, you can use Python's powerful
170 After the code has been parsed as Python syntax, you can use Python's powerful
126 *Abstract Syntax Tree* tools to modify it. Subclass :class:`ast.NodeTransformer`,
171 *Abstract Syntax Tree* tools to modify it. Subclass :class:`ast.NodeTransformer`,
127 and add an instance to ``shell.ast_transformers``.
172 and add an instance to ``shell.ast_transformers``.
128
173
129 This example wraps integer literals in an ``Integer`` class, which is useful for
174 This example wraps integer literals in an ``Integer`` class, which is useful for
130 mathematical frameworks that want to handle e.g. ``1/3`` as a precise fraction::
175 mathematical frameworks that want to handle e.g. ``1/3`` as a precise fraction::
131
176
132
177
133 class IntegerWrapper(ast.NodeTransformer):
178 class IntegerWrapper(ast.NodeTransformer):
134 """Wraps all integers in a call to Integer()"""
179 """Wraps all integers in a call to Integer()"""
135 def visit_Num(self, node):
180 def visit_Num(self, node):
136 if isinstance(node.n, int):
181 if isinstance(node.n, int):
137 return ast.Call(func=ast.Name(id='Integer', ctx=ast.Load()),
182 return ast.Call(func=ast.Name(id='Integer', ctx=ast.Load()),
138 args=[node], keywords=[])
183 args=[node], keywords=[])
139 return node
184 return node
General Comments 0
You need to be logged in to leave comments. Login now