##// END OF EJS Templates
Fix for unicode in inputsplitter.
Thomas Kluyver -
Show More
@@ -1,1021 +1,1023 b''
1 """Analysis of text input into executable blocks.
1 """Analysis of text input into executable blocks.
2
2
3 The main class in this module, :class:`InputSplitter`, is designed to break
3 The main class in this module, :class:`InputSplitter`, is designed to break
4 input from either interactive, line-by-line environments or block-based ones,
4 input from either interactive, line-by-line environments or block-based ones,
5 into standalone blocks that can be executed by Python as 'single' statements
5 into standalone blocks that can be executed by Python as 'single' statements
6 (thus triggering sys.displayhook).
6 (thus triggering sys.displayhook).
7
7
8 A companion, :class:`IPythonInputSplitter`, provides the same functionality but
8 A companion, :class:`IPythonInputSplitter`, provides the same functionality but
9 with full support for the extended IPython syntax (magics, system calls, etc).
9 with full support for the extended IPython syntax (magics, system calls, etc).
10
10
11 For more details, see the class docstring below.
11 For more details, see the class docstring below.
12
12
13 Syntax Transformations
13 Syntax Transformations
14 ----------------------
14 ----------------------
15
15
16 One of the main jobs of the code in this file is to apply all syntax
16 One of the main jobs of the code in this file is to apply all syntax
17 transformations that make up 'the IPython language', i.e. magics, shell
17 transformations that make up 'the IPython language', i.e. magics, shell
18 escapes, etc. All transformations should be implemented as *fully stateless*
18 escapes, etc. All transformations should be implemented as *fully stateless*
19 entities, that simply take one line as their input and return a line.
19 entities, that simply take one line as their input and return a line.
20 Internally for implementation purposes they may be a normal function or a
20 Internally for implementation purposes they may be a normal function or a
21 callable object, but the only input they receive will be a single line and they
21 callable object, but the only input they receive will be a single line and they
22 should only return a line, without holding any data-dependent state between
22 should only return a line, without holding any data-dependent state between
23 calls.
23 calls.
24
24
25 As an example, the EscapedTransformer is a class so we can more clearly group
25 As an example, the EscapedTransformer is a class so we can more clearly group
26 together the functionality of dispatching to individual functions based on the
26 together the functionality of dispatching to individual functions based on the
27 starting escape character, but the only method for public use is its call
27 starting escape character, but the only method for public use is its call
28 method.
28 method.
29
29
30
30
31 ToDo
31 ToDo
32 ----
32 ----
33
33
34 - Should we make push() actually raise an exception once push_accepts_more()
34 - Should we make push() actually raise an exception once push_accepts_more()
35 returns False?
35 returns False?
36
36
37 - Naming cleanups. The tr_* names aren't the most elegant, though now they are
37 - Naming cleanups. The tr_* names aren't the most elegant, though now they are
38 at least just attributes of a class so not really very exposed.
38 at least just attributes of a class so not really very exposed.
39
39
40 - Think about the best way to support dynamic things: automagic, autocall,
40 - Think about the best way to support dynamic things: automagic, autocall,
41 macros, etc.
41 macros, etc.
42
42
43 - Think of a better heuristic for the application of the transforms in
43 - Think of a better heuristic for the application of the transforms in
44 IPythonInputSplitter.push() than looking at the buffer ending in ':'. Idea:
44 IPythonInputSplitter.push() than looking at the buffer ending in ':'. Idea:
45 track indentation change events (indent, dedent, nothing) and apply them only
45 track indentation change events (indent, dedent, nothing) and apply them only
46 if the indentation went up, but not otherwise.
46 if the indentation went up, but not otherwise.
47
47
48 - Think of the cleanest way for supporting user-specified transformations (the
48 - Think of the cleanest way for supporting user-specified transformations (the
49 user prefilters we had before).
49 user prefilters we had before).
50
50
51 Authors
51 Authors
52 -------
52 -------
53
53
54 * Fernando Perez
54 * Fernando Perez
55 * Brian Granger
55 * Brian Granger
56 """
56 """
57 #-----------------------------------------------------------------------------
57 #-----------------------------------------------------------------------------
58 # Copyright (C) 2010 The IPython Development Team
58 # Copyright (C) 2010 The IPython Development Team
59 #
59 #
60 # Distributed under the terms of the BSD License. The full license is in
60 # Distributed under the terms of the BSD License. The full license is in
61 # the file COPYING, distributed as part of this software.
61 # the file COPYING, distributed as part of this software.
62 #-----------------------------------------------------------------------------
62 #-----------------------------------------------------------------------------
63 from __future__ import print_function
63 from __future__ import print_function
64
64
65 #-----------------------------------------------------------------------------
65 #-----------------------------------------------------------------------------
66 # Imports
66 # Imports
67 #-----------------------------------------------------------------------------
67 #-----------------------------------------------------------------------------
68 # stdlib
68 # stdlib
69 import codeop
69 import codeop
70 import re
70 import re
71 import sys
71 import sys
72
72
73 # IPython modules
73 # IPython modules
74 from IPython.utils.text import make_quoted_expr
74 from IPython.utils.text import make_quoted_expr
75
75
76 #-----------------------------------------------------------------------------
76 #-----------------------------------------------------------------------------
77 # Globals
77 # Globals
78 #-----------------------------------------------------------------------------
78 #-----------------------------------------------------------------------------
79
79
80 # The escape sequences that define the syntax transformations IPython will
80 # The escape sequences that define the syntax transformations IPython will
81 # apply to user input. These can NOT be just changed here: many regular
81 # apply to user input. These can NOT be just changed here: many regular
82 # expressions and other parts of the code may use their hardcoded values, and
82 # expressions and other parts of the code may use their hardcoded values, and
83 # for all intents and purposes they constitute the 'IPython syntax', so they
83 # for all intents and purposes they constitute the 'IPython syntax', so they
84 # should be considered fixed.
84 # should be considered fixed.
85
85
86 ESC_SHELL = '!' # Send line to underlying system shell
86 ESC_SHELL = '!' # Send line to underlying system shell
87 ESC_SH_CAP = '!!' # Send line to system shell and capture output
87 ESC_SH_CAP = '!!' # Send line to system shell and capture output
88 ESC_HELP = '?' # Find information about object
88 ESC_HELP = '?' # Find information about object
89 ESC_HELP2 = '??' # Find extra-detailed information about object
89 ESC_HELP2 = '??' # Find extra-detailed information about object
90 ESC_MAGIC = '%' # Call magic function
90 ESC_MAGIC = '%' # Call magic function
91 ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call
91 ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call
92 ESC_QUOTE2 = ';' # Quote all args as a single string, call
92 ESC_QUOTE2 = ';' # Quote all args as a single string, call
93 ESC_PAREN = '/' # Call first argument with rest of line as arguments
93 ESC_PAREN = '/' # Call first argument with rest of line as arguments
94
94
95 #-----------------------------------------------------------------------------
95 #-----------------------------------------------------------------------------
96 # Utilities
96 # Utilities
97 #-----------------------------------------------------------------------------
97 #-----------------------------------------------------------------------------
98
98
99 # FIXME: These are general-purpose utilities that later can be moved to the
99 # FIXME: These are general-purpose utilities that later can be moved to the
100 # general ward. Kept here for now because we're being very strict about test
100 # general ward. Kept here for now because we're being very strict about test
101 # coverage with this code, and this lets us ensure that we keep 100% coverage
101 # coverage with this code, and this lets us ensure that we keep 100% coverage
102 # while developing.
102 # while developing.
103
103
104 # compiled regexps for autoindent management
104 # compiled regexps for autoindent management
105 dedent_re = re.compile(r'^\s+raise|^\s+return|^\s+pass')
105 dedent_re = re.compile(r'^\s+raise|^\s+return|^\s+pass')
106 ini_spaces_re = re.compile(r'^([ \t\r\f\v]+)')
106 ini_spaces_re = re.compile(r'^([ \t\r\f\v]+)')
107
107
108 # regexp to match pure comment lines so we don't accidentally insert 'if 1:'
108 # regexp to match pure comment lines so we don't accidentally insert 'if 1:'
109 # before pure comments
109 # before pure comments
110 comment_line_re = re.compile('^\s*\#')
110 comment_line_re = re.compile('^\s*\#')
111
111
112
112
113 def num_ini_spaces(s):
113 def num_ini_spaces(s):
114 """Return the number of initial spaces in a string.
114 """Return the number of initial spaces in a string.
115
115
116 Note that tabs are counted as a single space. For now, we do *not* support
116 Note that tabs are counted as a single space. For now, we do *not* support
117 mixing of tabs and spaces in the user's input.
117 mixing of tabs and spaces in the user's input.
118
118
119 Parameters
119 Parameters
120 ----------
120 ----------
121 s : string
121 s : string
122
122
123 Returns
123 Returns
124 -------
124 -------
125 n : int
125 n : int
126 """
126 """
127
127
128 ini_spaces = ini_spaces_re.match(s)
128 ini_spaces = ini_spaces_re.match(s)
129 if ini_spaces:
129 if ini_spaces:
130 return ini_spaces.end()
130 return ini_spaces.end()
131 else:
131 else:
132 return 0
132 return 0
133
133
134
134
135 def remove_comments(src):
135 def remove_comments(src):
136 """Remove all comments from input source.
136 """Remove all comments from input source.
137
137
138 Note: comments are NOT recognized inside of strings!
138 Note: comments are NOT recognized inside of strings!
139
139
140 Parameters
140 Parameters
141 ----------
141 ----------
142 src : string
142 src : string
143 A single or multiline input string.
143 A single or multiline input string.
144
144
145 Returns
145 Returns
146 -------
146 -------
147 String with all Python comments removed.
147 String with all Python comments removed.
148 """
148 """
149
149
150 return re.sub('#.*', '', src)
150 return re.sub('#.*', '', src)
151
151
152
152
153 def get_input_encoding():
153 def get_input_encoding():
154 """Return the default standard input encoding.
154 """Return the default standard input encoding.
155
155
156 If sys.stdin has no encoding, 'ascii' is returned."""
156 If sys.stdin has no encoding, 'ascii' is returned."""
157 # There are strange environments for which sys.stdin.encoding is None. We
157 # There are strange environments for which sys.stdin.encoding is None. We
158 # ensure that a valid encoding is returned.
158 # ensure that a valid encoding is returned.
159 encoding = getattr(sys.stdin, 'encoding', None)
159 encoding = getattr(sys.stdin, 'encoding', None)
160 if encoding is None:
160 if encoding is None:
161 encoding = 'ascii'
161 encoding = 'ascii'
162 return encoding
162 return encoding
163
163
164 #-----------------------------------------------------------------------------
164 #-----------------------------------------------------------------------------
165 # Classes and functions for normal Python syntax handling
165 # Classes and functions for normal Python syntax handling
166 #-----------------------------------------------------------------------------
166 #-----------------------------------------------------------------------------
167
167
168 # HACK! This implementation, written by Robert K a while ago using the
168 # HACK! This implementation, written by Robert K a while ago using the
169 # compiler module, is more robust than the other one below, but it expects its
169 # compiler module, is more robust than the other one below, but it expects its
170 # input to be pure python (no ipython syntax). For now we're using it as a
170 # input to be pure python (no ipython syntax). For now we're using it as a
171 # second-pass splitter after the first pass transforms the input to pure
171 # second-pass splitter after the first pass transforms the input to pure
172 # python.
172 # python.
173
173
174 def split_blocks(python):
174 def split_blocks(python):
175 """ Split multiple lines of code into discrete commands that can be
175 """ Split multiple lines of code into discrete commands that can be
176 executed singly.
176 executed singly.
177
177
178 Parameters
178 Parameters
179 ----------
179 ----------
180 python : str
180 python : str
181 Pure, exec'able Python code.
181 Pure, exec'able Python code.
182
182
183 Returns
183 Returns
184 -------
184 -------
185 commands : list of str
185 commands : list of str
186 Separate commands that can be exec'ed independently.
186 Separate commands that can be exec'ed independently.
187 """
187 """
188
188
189 import compiler
189 import compiler
190
190
191 # compiler.parse treats trailing spaces after a newline as a
191 # compiler.parse treats trailing spaces after a newline as a
192 # SyntaxError. This is different than codeop.CommandCompiler, which
192 # SyntaxError. This is different than codeop.CommandCompiler, which
193 # will compile the trailng spaces just fine. We simply strip any
193 # will compile the trailng spaces just fine. We simply strip any
194 # trailing whitespace off. Passing a string with trailing whitespace
194 # trailing whitespace off. Passing a string with trailing whitespace
195 # to exec will fail however. There seems to be some inconsistency in
195 # to exec will fail however. There seems to be some inconsistency in
196 # how trailing whitespace is handled, but this seems to work.
196 # how trailing whitespace is handled, but this seems to work.
197 python_ori = python # save original in case we bail on error
197 python_ori = python # save original in case we bail on error
198 python = python.strip()
198 python = python.strip()
199
199
200 # The compiler module does not like unicode. We need to convert
200 # The compiler module does not like unicode. We need to convert
201 # it encode it:
201 # it encode it:
202 if isinstance(python, unicode):
202 if isinstance(python, unicode):
203 # Use the utf-8-sig BOM so the compiler detects this a UTF-8
203 # Use the utf-8-sig BOM so the compiler detects this a UTF-8
204 # encode string.
204 # encode string.
205 python = '\xef\xbb\xbf' + python.encode('utf-8')
205 python = '\xef\xbb\xbf' + python.encode('utf-8')
206
206
207 # The compiler module will parse the code into an abstract syntax tree.
207 # The compiler module will parse the code into an abstract syntax tree.
208 # This has a bug with str("a\nb"), but not str("""a\nb""")!!!
208 # This has a bug with str("a\nb"), but not str("""a\nb""")!!!
209 try:
209 try:
210 ast = compiler.parse(python)
210 ast = compiler.parse(python)
211 except:
211 except:
212 return [python_ori]
212 return [python_ori]
213
213
214 # Uncomment to help debug the ast tree
214 # Uncomment to help debug the ast tree
215 # for n in ast.node:
215 # for n in ast.node:
216 # print n.lineno,'->',n
216 # print n.lineno,'->',n
217
217
218 # Each separate command is available by iterating over ast.node. The
218 # Each separate command is available by iterating over ast.node. The
219 # lineno attribute is the line number (1-indexed) beginning the commands
219 # lineno attribute is the line number (1-indexed) beginning the commands
220 # suite.
220 # suite.
221 # lines ending with ";" yield a Discard Node that doesn't have a lineno
221 # lines ending with ";" yield a Discard Node that doesn't have a lineno
222 # attribute. These nodes can and should be discarded. But there are
222 # attribute. These nodes can and should be discarded. But there are
223 # other situations that cause Discard nodes that shouldn't be discarded.
223 # other situations that cause Discard nodes that shouldn't be discarded.
224 # We might eventually discover other cases where lineno is None and have
224 # We might eventually discover other cases where lineno is None and have
225 # to put in a more sophisticated test.
225 # to put in a more sophisticated test.
226 linenos = [x.lineno-1 for x in ast.node if x.lineno is not None]
226 linenos = [x.lineno-1 for x in ast.node if x.lineno is not None]
227
227
228 # When we have a bare string as the first statement, it does not end up as
228 # When we have a bare string as the first statement, it does not end up as
229 # a Discard Node in the AST as we might expect. Instead, it gets interpreted
229 # a Discard Node in the AST as we might expect. Instead, it gets interpreted
230 # as the docstring of the module. Check for this case and prepend 0 (the
230 # as the docstring of the module. Check for this case and prepend 0 (the
231 # first line number) to the list of linenos to account for it.
231 # first line number) to the list of linenos to account for it.
232 if ast.doc is not None:
232 if ast.doc is not None:
233 linenos.insert(0, 0)
233 linenos.insert(0, 0)
234
234
235 # When we finally get the slices, we will need to slice all the way to
235 # When we finally get the slices, we will need to slice all the way to
236 # the end even though we don't have a line number for it. Fortunately,
236 # the end even though we don't have a line number for it. Fortunately,
237 # None does the job nicely.
237 # None does the job nicely.
238 linenos.append(None)
238 linenos.append(None)
239
239
240 # Same problem at the other end: sometimes the ast tree has its
240 # Same problem at the other end: sometimes the ast tree has its
241 # first complete statement not starting on line 0. In this case
241 # first complete statement not starting on line 0. In this case
242 # we might miss part of it. This fixes ticket 266993. Thanks Gael!
242 # we might miss part of it. This fixes ticket 266993. Thanks Gael!
243 linenos[0] = 0
243 linenos[0] = 0
244
244
245 lines = python.splitlines()
245 lines = python.splitlines()
246
246
247 # Create a list of atomic commands.
247 # Create a list of atomic commands.
248 cmds = []
248 cmds = []
249 for i, j in zip(linenos[:-1], linenos[1:]):
249 for i, j in zip(linenos[:-1], linenos[1:]):
250 cmd = lines[i:j]
250 cmd = lines[i:j]
251 if cmd:
251 if cmd:
252 cmds.append('\n'.join(cmd)+'\n')
252 cmds.append('\n'.join(cmd)+'\n')
253
253
254 return cmds
254 return cmds
255
255
256
256
257 class InputSplitter(object):
257 class InputSplitter(object):
258 """An object that can split Python source input in executable blocks.
258 """An object that can split Python source input in executable blocks.
259
259
260 This object is designed to be used in one of two basic modes:
260 This object is designed to be used in one of two basic modes:
261
261
262 1. By feeding it python source line-by-line, using :meth:`push`. In this
262 1. By feeding it python source line-by-line, using :meth:`push`. In this
263 mode, it will return on each push whether the currently pushed code
263 mode, it will return on each push whether the currently pushed code
264 could be executed already. In addition, it provides a method called
264 could be executed already. In addition, it provides a method called
265 :meth:`push_accepts_more` that can be used to query whether more input
265 :meth:`push_accepts_more` that can be used to query whether more input
266 can be pushed into a single interactive block.
266 can be pushed into a single interactive block.
267
267
268 2. By calling :meth:`split_blocks` with a single, multiline Python string,
268 2. By calling :meth:`split_blocks` with a single, multiline Python string,
269 that is then split into blocks each of which can be executed
269 that is then split into blocks each of which can be executed
270 interactively as a single statement.
270 interactively as a single statement.
271
271
272 This is a simple example of how an interactive terminal-based client can use
272 This is a simple example of how an interactive terminal-based client can use
273 this tool::
273 this tool::
274
274
275 isp = InputSplitter()
275 isp = InputSplitter()
276 while isp.push_accepts_more():
276 while isp.push_accepts_more():
277 indent = ' '*isp.indent_spaces
277 indent = ' '*isp.indent_spaces
278 prompt = '>>> ' + indent
278 prompt = '>>> ' + indent
279 line = indent + raw_input(prompt)
279 line = indent + raw_input(prompt)
280 isp.push(line)
280 isp.push(line)
281 print 'Input source was:\n', isp.source_reset(),
281 print 'Input source was:\n', isp.source_reset(),
282 """
282 """
283 # Number of spaces of indentation computed from input that has been pushed
283 # Number of spaces of indentation computed from input that has been pushed
284 # so far. This is the attributes callers should query to get the current
284 # so far. This is the attributes callers should query to get the current
285 # indentation level, in order to provide auto-indent facilities.
285 # indentation level, in order to provide auto-indent facilities.
286 indent_spaces = 0
286 indent_spaces = 0
287 # String, indicating the default input encoding. It is computed by default
287 # String, indicating the default input encoding. It is computed by default
288 # at initialization time via get_input_encoding(), but it can be reset by a
288 # at initialization time via get_input_encoding(), but it can be reset by a
289 # client with specific knowledge of the encoding.
289 # client with specific knowledge of the encoding.
290 encoding = ''
290 encoding = ''
291 # String where the current full source input is stored, properly encoded.
291 # String where the current full source input is stored, properly encoded.
292 # Reading this attribute is the normal way of querying the currently pushed
292 # Reading this attribute is the normal way of querying the currently pushed
293 # source code, that has been properly encoded.
293 # source code, that has been properly encoded.
294 source = ''
294 source = ''
295 # Code object corresponding to the current source. It is automatically
295 # Code object corresponding to the current source. It is automatically
296 # synced to the source, so it can be queried at any time to obtain the code
296 # synced to the source, so it can be queried at any time to obtain the code
297 # object; it will be None if the source doesn't compile to valid Python.
297 # object; it will be None if the source doesn't compile to valid Python.
298 code = None
298 code = None
299 # Input mode
299 # Input mode
300 input_mode = 'line'
300 input_mode = 'line'
301
301
302 # Private attributes
302 # Private attributes
303
303
304 # List with lines of input accumulated so far
304 # List with lines of input accumulated so far
305 _buffer = None
305 _buffer = None
306 # Command compiler
306 # Command compiler
307 _compile = None
307 _compile = None
308 # Mark when input has changed indentation all the way back to flush-left
308 # Mark when input has changed indentation all the way back to flush-left
309 _full_dedent = False
309 _full_dedent = False
310 # Boolean indicating whether the current block is complete
310 # Boolean indicating whether the current block is complete
311 _is_complete = None
311 _is_complete = None
312
312
313 def __init__(self, input_mode=None):
313 def __init__(self, input_mode=None):
314 """Create a new InputSplitter instance.
314 """Create a new InputSplitter instance.
315
315
316 Parameters
316 Parameters
317 ----------
317 ----------
318 input_mode : str
318 input_mode : str
319
319
320 One of ['line', 'cell']; default is 'line'.
320 One of ['line', 'cell']; default is 'line'.
321
321
322 The input_mode parameter controls how new inputs are used when fed via
322 The input_mode parameter controls how new inputs are used when fed via
323 the :meth:`push` method:
323 the :meth:`push` method:
324
324
325 - 'line': meant for line-oriented clients, inputs are appended one at a
325 - 'line': meant for line-oriented clients, inputs are appended one at a
326 time to the internal buffer and the whole buffer is compiled.
326 time to the internal buffer and the whole buffer is compiled.
327
327
328 - 'cell': meant for clients that can edit multi-line 'cells' of text at
328 - 'cell': meant for clients that can edit multi-line 'cells' of text at
329 a time. A cell can contain one or more blocks that can be compile in
329 a time. A cell can contain one or more blocks that can be compile in
330 'single' mode by Python. In this mode, each new input new input
330 'single' mode by Python. In this mode, each new input new input
331 completely replaces all prior inputs. Cell mode is thus equivalent
331 completely replaces all prior inputs. Cell mode is thus equivalent
332 to prepending a full reset() to every push() call.
332 to prepending a full reset() to every push() call.
333 """
333 """
334 self._buffer = []
334 self._buffer = []
335 self._compile = codeop.CommandCompiler()
335 self._compile = codeop.CommandCompiler()
336 self.encoding = get_input_encoding()
336 self.encoding = get_input_encoding()
337 self.input_mode = InputSplitter.input_mode if input_mode is None \
337 self.input_mode = InputSplitter.input_mode if input_mode is None \
338 else input_mode
338 else input_mode
339
339
340 def reset(self):
340 def reset(self):
341 """Reset the input buffer and associated state."""
341 """Reset the input buffer and associated state."""
342 self.indent_spaces = 0
342 self.indent_spaces = 0
343 self._buffer[:] = []
343 self._buffer[:] = []
344 self.source = ''
344 self.source = ''
345 self.code = None
345 self.code = None
346 self._is_complete = False
346 self._is_complete = False
347 self._full_dedent = False
347 self._full_dedent = False
348
348
349 def source_reset(self):
349 def source_reset(self):
350 """Return the input source and perform a full reset.
350 """Return the input source and perform a full reset.
351 """
351 """
352 out = self.source
352 out = self.source
353 self.reset()
353 self.reset()
354 return out
354 return out
355
355
356 def push(self, lines):
356 def push(self, lines):
357 """Push one or more lines of input.
357 """Push one or more lines of input.
358
358
359 This stores the given lines and returns a status code indicating
359 This stores the given lines and returns a status code indicating
360 whether the code forms a complete Python block or not.
360 whether the code forms a complete Python block or not.
361
361
362 Any exceptions generated in compilation are swallowed, but if an
362 Any exceptions generated in compilation are swallowed, but if an
363 exception was produced, the method returns True.
363 exception was produced, the method returns True.
364
364
365 Parameters
365 Parameters
366 ----------
366 ----------
367 lines : string
367 lines : string
368 One or more lines of Python input.
368 One or more lines of Python input.
369
369
370 Returns
370 Returns
371 -------
371 -------
372 is_complete : boolean
372 is_complete : boolean
373 True if the current input source (the result of the current input
373 True if the current input source (the result of the current input
374 plus prior inputs) forms a complete Python execution block. Note that
374 plus prior inputs) forms a complete Python execution block. Note that
375 this value is also stored as a private attribute (_is_complete), so it
375 this value is also stored as a private attribute (_is_complete), so it
376 can be queried at any time.
376 can be queried at any time.
377 """
377 """
378 if self.input_mode == 'cell':
378 if self.input_mode == 'cell':
379 self.reset()
379 self.reset()
380
380
381 self._store(lines)
381 self._store(lines)
382 source = self.source
382 source = self.source
383
383
384 # Before calling _compile(), reset the code object to None so that if an
384 # Before calling _compile(), reset the code object to None so that if an
385 # exception is raised in compilation, we don't mislead by having
385 # exception is raised in compilation, we don't mislead by having
386 # inconsistent code/source attributes.
386 # inconsistent code/source attributes.
387 self.code, self._is_complete = None, None
387 self.code, self._is_complete = None, None
388
388
389 # Honor termination lines properly
389 # Honor termination lines properly
390 if source.rstrip().endswith('\\'):
390 if source.rstrip().endswith('\\'):
391 return False
391 return False
392
392
393 self._update_indent(lines)
393 self._update_indent(lines)
394 try:
394 try:
395 self.code = self._compile(source)
395 self.code = self._compile(source)
396 # Invalid syntax can produce any of a number of different errors from
396 # Invalid syntax can produce any of a number of different errors from
397 # inside the compiler, so we have to catch them all. Syntax errors
397 # inside the compiler, so we have to catch them all. Syntax errors
398 # immediately produce a 'ready' block, so the invalid Python can be
398 # immediately produce a 'ready' block, so the invalid Python can be
399 # sent to the kernel for evaluation with possible ipython
399 # sent to the kernel for evaluation with possible ipython
400 # special-syntax conversion.
400 # special-syntax conversion.
401 except (SyntaxError, OverflowError, ValueError, TypeError,
401 except (SyntaxError, OverflowError, ValueError, TypeError,
402 MemoryError):
402 MemoryError):
403 self._is_complete = True
403 self._is_complete = True
404 else:
404 else:
405 # Compilation didn't produce any exceptions (though it may not have
405 # Compilation didn't produce any exceptions (though it may not have
406 # given a complete code object)
406 # given a complete code object)
407 self._is_complete = self.code is not None
407 self._is_complete = self.code is not None
408
408
409 return self._is_complete
409 return self._is_complete
410
410
411 def push_accepts_more(self):
411 def push_accepts_more(self):
412 """Return whether a block of interactive input can accept more input.
412 """Return whether a block of interactive input can accept more input.
413
413
414 This method is meant to be used by line-oriented frontends, who need to
414 This method is meant to be used by line-oriented frontends, who need to
415 guess whether a block is complete or not based solely on prior and
415 guess whether a block is complete or not based solely on prior and
416 current input lines. The InputSplitter considers it has a complete
416 current input lines. The InputSplitter considers it has a complete
417 interactive block and will not accept more input only when either a
417 interactive block and will not accept more input only when either a
418 SyntaxError is raised, or *all* of the following are true:
418 SyntaxError is raised, or *all* of the following are true:
419
419
420 1. The input compiles to a complete statement.
420 1. The input compiles to a complete statement.
421
421
422 2. The indentation level is flush-left (because if we are indented,
422 2. The indentation level is flush-left (because if we are indented,
423 like inside a function definition or for loop, we need to keep
423 like inside a function definition or for loop, we need to keep
424 reading new input).
424 reading new input).
425
425
426 3. There is one extra line consisting only of whitespace.
426 3. There is one extra line consisting only of whitespace.
427
427
428 Because of condition #3, this method should be used only by
428 Because of condition #3, this method should be used only by
429 *line-oriented* frontends, since it means that intermediate blank lines
429 *line-oriented* frontends, since it means that intermediate blank lines
430 are not allowed in function definitions (or any other indented block).
430 are not allowed in function definitions (or any other indented block).
431
431
432 Block-oriented frontends that have a separate keyboard event to
432 Block-oriented frontends that have a separate keyboard event to
433 indicate execution should use the :meth:`split_blocks` method instead.
433 indicate execution should use the :meth:`split_blocks` method instead.
434
434
435 If the current input produces a syntax error, this method immediately
435 If the current input produces a syntax error, this method immediately
436 returns False but does *not* raise the syntax error exception, as
436 returns False but does *not* raise the syntax error exception, as
437 typically clients will want to send invalid syntax to an execution
437 typically clients will want to send invalid syntax to an execution
438 backend which might convert the invalid syntax into valid Python via
438 backend which might convert the invalid syntax into valid Python via
439 one of the dynamic IPython mechanisms.
439 one of the dynamic IPython mechanisms.
440 """
440 """
441
441
442 # With incomplete input, unconditionally accept more
442 # With incomplete input, unconditionally accept more
443 if not self._is_complete:
443 if not self._is_complete:
444 return True
444 return True
445
445
446 # If we already have complete input and we're flush left, the answer
446 # If we already have complete input and we're flush left, the answer
447 # depends. In line mode, we're done. But in cell mode, we need to
447 # depends. In line mode, we're done. But in cell mode, we need to
448 # check how many blocks the input so far compiles into, because if
448 # check how many blocks the input so far compiles into, because if
449 # there's already more than one full independent block of input, then
449 # there's already more than one full independent block of input, then
450 # the client has entered full 'cell' mode and is feeding lines that
450 # the client has entered full 'cell' mode and is feeding lines that
451 # each is complete. In this case we should then keep accepting.
451 # each is complete. In this case we should then keep accepting.
452 # The Qt terminal-like console does precisely this, to provide the
452 # The Qt terminal-like console does precisely this, to provide the
453 # convenience of terminal-like input of single expressions, but
453 # convenience of terminal-like input of single expressions, but
454 # allowing the user (with a separate keystroke) to switch to 'cell'
454 # allowing the user (with a separate keystroke) to switch to 'cell'
455 # mode and type multiple expressions in one shot.
455 # mode and type multiple expressions in one shot.
456 if self.indent_spaces==0:
456 if self.indent_spaces==0:
457 if self.input_mode=='line':
457 if self.input_mode=='line':
458 return False
458 return False
459 else:
459 else:
460 nblocks = len(split_blocks(''.join(self._buffer)))
460 nblocks = len(split_blocks(''.join(self._buffer)))
461 if nblocks==1:
461 if nblocks==1:
462 return False
462 return False
463
463
464 # When input is complete, then termination is marked by an extra blank
464 # When input is complete, then termination is marked by an extra blank
465 # line at the end.
465 # line at the end.
466 last_line = self.source.splitlines()[-1]
466 last_line = self.source.splitlines()[-1]
467 return bool(last_line and not last_line.isspace())
467 return bool(last_line and not last_line.isspace())
468
468
469 def split_blocks(self, lines):
469 def split_blocks(self, lines):
470 """Split a multiline string into multiple input blocks.
470 """Split a multiline string into multiple input blocks.
471
471
472 Note: this method starts by performing a full reset().
472 Note: this method starts by performing a full reset().
473
473
474 Parameters
474 Parameters
475 ----------
475 ----------
476 lines : str
476 lines : str
477 A possibly multiline string.
477 A possibly multiline string.
478
478
479 Returns
479 Returns
480 -------
480 -------
481 blocks : list
481 blocks : list
482 A list of strings, each possibly multiline. Each string corresponds
482 A list of strings, each possibly multiline. Each string corresponds
483 to a single block that can be compiled in 'single' mode (unless it
483 to a single block that can be compiled in 'single' mode (unless it
484 has a syntax error)."""
484 has a syntax error)."""
485
485
486 # This code is fairly delicate. If you make any changes here, make
486 # This code is fairly delicate. If you make any changes here, make
487 # absolutely sure that you do run the full test suite and ALL tests
487 # absolutely sure that you do run the full test suite and ALL tests
488 # pass.
488 # pass.
489
489
490 self.reset()
490 self.reset()
491 blocks = []
491 blocks = []
492
492
493 # Reversed copy so we can use pop() efficiently and consume the input
493 # Reversed copy so we can use pop() efficiently and consume the input
494 # as a stack
494 # as a stack
495 lines = lines.splitlines()[::-1]
495 lines = lines.splitlines()[::-1]
496 # Outer loop over all input
496 # Outer loop over all input
497 while lines:
497 while lines:
498 #print 'Current lines:', lines # dbg
498 #print 'Current lines:', lines # dbg
499 # Inner loop to build each block
499 # Inner loop to build each block
500 while True:
500 while True:
501 # Safety exit from inner loop
501 # Safety exit from inner loop
502 if not lines:
502 if not lines:
503 break
503 break
504 # Grab next line but don't push it yet
504 # Grab next line but don't push it yet
505 next_line = lines.pop()
505 next_line = lines.pop()
506 # Blank/empty lines are pushed as-is
506 # Blank/empty lines are pushed as-is
507 if not next_line or next_line.isspace():
507 if not next_line or next_line.isspace():
508 self.push(next_line)
508 self.push(next_line)
509 continue
509 continue
510
510
511 # Check indentation changes caused by the *next* line
511 # Check indentation changes caused by the *next* line
512 indent_spaces, _full_dedent = self._find_indent(next_line)
512 indent_spaces, _full_dedent = self._find_indent(next_line)
513
513
514 # If the next line causes a dedent, it can be for two differnt
514 # If the next line causes a dedent, it can be for two differnt
515 # reasons: either an explicit de-dent by the user or a
515 # reasons: either an explicit de-dent by the user or a
516 # return/raise/pass statement. These MUST be handled
516 # return/raise/pass statement. These MUST be handled
517 # separately:
517 # separately:
518 #
518 #
519 # 1. the first case is only detected when the actual explicit
519 # 1. the first case is only detected when the actual explicit
520 # dedent happens, and that would be the *first* line of a *new*
520 # dedent happens, and that would be the *first* line of a *new*
521 # block. Thus, we must put the line back into the input buffer
521 # block. Thus, we must put the line back into the input buffer
522 # so that it starts a new block on the next pass.
522 # so that it starts a new block on the next pass.
523 #
523 #
524 # 2. the second case is detected in the line before the actual
524 # 2. the second case is detected in the line before the actual
525 # dedent happens, so , we consume the line and we can break out
525 # dedent happens, so , we consume the line and we can break out
526 # to start a new block.
526 # to start a new block.
527
527
528 # Case 1, explicit dedent causes a break.
528 # Case 1, explicit dedent causes a break.
529 # Note: check that we weren't on the very last line, else we'll
529 # Note: check that we weren't on the very last line, else we'll
530 # enter an infinite loop adding/removing the last line.
530 # enter an infinite loop adding/removing the last line.
531 if _full_dedent and lines and not next_line.startswith(' '):
531 if _full_dedent and lines and not next_line.startswith(' '):
532 lines.append(next_line)
532 lines.append(next_line)
533 break
533 break
534
534
535 # Otherwise any line is pushed
535 # Otherwise any line is pushed
536 self.push(next_line)
536 self.push(next_line)
537
537
538 # Case 2, full dedent with full block ready:
538 # Case 2, full dedent with full block ready:
539 if _full_dedent or \
539 if _full_dedent or \
540 self.indent_spaces==0 and not self.push_accepts_more():
540 self.indent_spaces==0 and not self.push_accepts_more():
541 break
541 break
542 # Form the new block with the current source input
542 # Form the new block with the current source input
543 blocks.append(self.source_reset())
543 blocks.append(self.source_reset())
544
544
545 #return blocks
545 #return blocks
546 # HACK!!! Now that our input is in blocks but guaranteed to be pure
546 # HACK!!! Now that our input is in blocks but guaranteed to be pure
547 # python syntax, feed it back a second time through the AST-based
547 # python syntax, feed it back a second time through the AST-based
548 # splitter, which is more accurate than ours.
548 # splitter, which is more accurate than ours.
549 return split_blocks(''.join(blocks))
549 return split_blocks(''.join(blocks))
550
550
551 #------------------------------------------------------------------------
551 #------------------------------------------------------------------------
552 # Private interface
552 # Private interface
553 #------------------------------------------------------------------------
553 #------------------------------------------------------------------------
554
554
555 def _find_indent(self, line):
555 def _find_indent(self, line):
556 """Compute the new indentation level for a single line.
556 """Compute the new indentation level for a single line.
557
557
558 Parameters
558 Parameters
559 ----------
559 ----------
560 line : str
560 line : str
561 A single new line of non-whitespace, non-comment Python input.
561 A single new line of non-whitespace, non-comment Python input.
562
562
563 Returns
563 Returns
564 -------
564 -------
565 indent_spaces : int
565 indent_spaces : int
566 New value for the indent level (it may be equal to self.indent_spaces
566 New value for the indent level (it may be equal to self.indent_spaces
567 if indentation doesn't change.
567 if indentation doesn't change.
568
568
569 full_dedent : boolean
569 full_dedent : boolean
570 Whether the new line causes a full flush-left dedent.
570 Whether the new line causes a full flush-left dedent.
571 """
571 """
572 indent_spaces = self.indent_spaces
572 indent_spaces = self.indent_spaces
573 full_dedent = self._full_dedent
573 full_dedent = self._full_dedent
574
574
575 inisp = num_ini_spaces(line)
575 inisp = num_ini_spaces(line)
576 if inisp < indent_spaces:
576 if inisp < indent_spaces:
577 indent_spaces = inisp
577 indent_spaces = inisp
578 if indent_spaces <= 0:
578 if indent_spaces <= 0:
579 #print 'Full dedent in text',self.source # dbg
579 #print 'Full dedent in text',self.source # dbg
580 full_dedent = True
580 full_dedent = True
581
581
582 if line[-1] == ':':
582 if line[-1] == ':':
583 indent_spaces += 4
583 indent_spaces += 4
584 elif dedent_re.match(line):
584 elif dedent_re.match(line):
585 indent_spaces -= 4
585 indent_spaces -= 4
586 if indent_spaces <= 0:
586 if indent_spaces <= 0:
587 full_dedent = True
587 full_dedent = True
588
588
589 # Safety
589 # Safety
590 if indent_spaces < 0:
590 if indent_spaces < 0:
591 indent_spaces = 0
591 indent_spaces = 0
592 #print 'safety' # dbg
592 #print 'safety' # dbg
593
593
594 return indent_spaces, full_dedent
594 return indent_spaces, full_dedent
595
595
596 def _update_indent(self, lines):
596 def _update_indent(self, lines):
597 for line in remove_comments(lines).splitlines():
597 for line in remove_comments(lines).splitlines():
598 if line and not line.isspace():
598 if line and not line.isspace():
599 self.indent_spaces, self._full_dedent = self._find_indent(line)
599 self.indent_spaces, self._full_dedent = self._find_indent(line)
600
600
601 def _store(self, lines, buffer=None, store='source'):
601 def _store(self, lines, buffer=None, store='source'):
602 """Store one or more lines of input.
602 """Store one or more lines of input.
603
603
604 If input lines are not newline-terminated, a newline is automatically
604 If input lines are not newline-terminated, a newline is automatically
605 appended."""
605 appended."""
606
606 if not isinstance(lines, unicode):
607 lines = lines.decode(self.encoding)
608
607 if buffer is None:
609 if buffer is None:
608 buffer = self._buffer
610 buffer = self._buffer
609
611
610 if lines.endswith('\n'):
612 if lines.endswith('\n'):
611 buffer.append(lines)
613 buffer.append(lines)
612 else:
614 else:
613 buffer.append(lines+'\n')
615 buffer.append(lines+'\n')
614 setattr(self, store, self._set_source(buffer))
616 setattr(self, store, self._set_source(buffer))
615
617
616 def _set_source(self, buffer):
618 def _set_source(self, buffer):
617 return ''.join(buffer).encode(self.encoding)
619 return ''.join(buffer).encode(self.encoding)
618
620
619
621
620 #-----------------------------------------------------------------------------
622 #-----------------------------------------------------------------------------
621 # Functions and classes for IPython-specific syntactic support
623 # Functions and classes for IPython-specific syntactic support
622 #-----------------------------------------------------------------------------
624 #-----------------------------------------------------------------------------
623
625
624 # RegExp for splitting line contents into pre-char//first word-method//rest.
626 # RegExp for splitting line contents into pre-char//first word-method//rest.
625 # For clarity, each group in on one line.
627 # For clarity, each group in on one line.
626
628
627 line_split = re.compile("""
629 line_split = re.compile("""
628 ^(\s*) # any leading space
630 ^(\s*) # any leading space
629 ([,;/%]|!!?|\?\??) # escape character or characters
631 ([,;/%]|!!?|\?\??) # escape character or characters
630 \s*(%?[\w\.\*]*) # function/method, possibly with leading %
632 \s*(%?[\w\.\*]*) # function/method, possibly with leading %
631 # to correctly treat things like '?%magic'
633 # to correctly treat things like '?%magic'
632 (\s+.*$|$) # rest of line
634 (\s+.*$|$) # rest of line
633 """, re.VERBOSE)
635 """, re.VERBOSE)
634
636
635
637
636 def split_user_input(line):
638 def split_user_input(line):
637 """Split user input into early whitespace, esc-char, function part and rest.
639 """Split user input into early whitespace, esc-char, function part and rest.
638
640
639 This is currently handles lines with '=' in them in a very inconsistent
641 This is currently handles lines with '=' in them in a very inconsistent
640 manner.
642 manner.
641
643
642 Examples
644 Examples
643 ========
645 ========
644 >>> split_user_input('x=1')
646 >>> split_user_input('x=1')
645 ('', '', 'x=1', '')
647 ('', '', 'x=1', '')
646 >>> split_user_input('?')
648 >>> split_user_input('?')
647 ('', '?', '', '')
649 ('', '?', '', '')
648 >>> split_user_input('??')
650 >>> split_user_input('??')
649 ('', '??', '', '')
651 ('', '??', '', '')
650 >>> split_user_input(' ?')
652 >>> split_user_input(' ?')
651 (' ', '?', '', '')
653 (' ', '?', '', '')
652 >>> split_user_input(' ??')
654 >>> split_user_input(' ??')
653 (' ', '??', '', '')
655 (' ', '??', '', '')
654 >>> split_user_input('??x')
656 >>> split_user_input('??x')
655 ('', '??', 'x', '')
657 ('', '??', 'x', '')
656 >>> split_user_input('?x=1')
658 >>> split_user_input('?x=1')
657 ('', '', '?x=1', '')
659 ('', '', '?x=1', '')
658 >>> split_user_input('!ls')
660 >>> split_user_input('!ls')
659 ('', '!', 'ls', '')
661 ('', '!', 'ls', '')
660 >>> split_user_input(' !ls')
662 >>> split_user_input(' !ls')
661 (' ', '!', 'ls', '')
663 (' ', '!', 'ls', '')
662 >>> split_user_input('!!ls')
664 >>> split_user_input('!!ls')
663 ('', '!!', 'ls', '')
665 ('', '!!', 'ls', '')
664 >>> split_user_input(' !!ls')
666 >>> split_user_input(' !!ls')
665 (' ', '!!', 'ls', '')
667 (' ', '!!', 'ls', '')
666 >>> split_user_input(',ls')
668 >>> split_user_input(',ls')
667 ('', ',', 'ls', '')
669 ('', ',', 'ls', '')
668 >>> split_user_input(';ls')
670 >>> split_user_input(';ls')
669 ('', ';', 'ls', '')
671 ('', ';', 'ls', '')
670 >>> split_user_input(' ;ls')
672 >>> split_user_input(' ;ls')
671 (' ', ';', 'ls', '')
673 (' ', ';', 'ls', '')
672 >>> split_user_input('f.g(x)')
674 >>> split_user_input('f.g(x)')
673 ('', '', 'f.g(x)', '')
675 ('', '', 'f.g(x)', '')
674 >>> split_user_input('f.g (x)')
676 >>> split_user_input('f.g (x)')
675 ('', '', 'f.g', '(x)')
677 ('', '', 'f.g', '(x)')
676 >>> split_user_input('?%hist')
678 >>> split_user_input('?%hist')
677 ('', '?', '%hist', '')
679 ('', '?', '%hist', '')
678 >>> split_user_input('?x*')
680 >>> split_user_input('?x*')
679 ('', '?', 'x*', '')
681 ('', '?', 'x*', '')
680 """
682 """
681 match = line_split.match(line)
683 match = line_split.match(line)
682 if match:
684 if match:
683 lspace, esc, fpart, rest = match.groups()
685 lspace, esc, fpart, rest = match.groups()
684 else:
686 else:
685 # print "match failed for line '%s'" % line
687 # print "match failed for line '%s'" % line
686 try:
688 try:
687 fpart, rest = line.split(None, 1)
689 fpart, rest = line.split(None, 1)
688 except ValueError:
690 except ValueError:
689 # print "split failed for line '%s'" % line
691 # print "split failed for line '%s'" % line
690 fpart, rest = line,''
692 fpart, rest = line,''
691 lspace = re.match('^(\s*)(.*)', line).groups()[0]
693 lspace = re.match('^(\s*)(.*)', line).groups()[0]
692 esc = ''
694 esc = ''
693
695
694 # fpart has to be a valid python identifier, so it better be only pure
696 # fpart has to be a valid python identifier, so it better be only pure
695 # ascii, no unicode:
697 # ascii, no unicode:
696 try:
698 try:
697 fpart = fpart.encode('ascii')
699 fpart = fpart.encode('ascii')
698 except UnicodeEncodeError:
700 except UnicodeEncodeError:
699 lspace = unicode(lspace)
701 lspace = unicode(lspace)
700 rest = fpart + u' ' + rest
702 rest = fpart + u' ' + rest
701 fpart = u''
703 fpart = u''
702
704
703 #print 'line:<%s>' % line # dbg
705 #print 'line:<%s>' % line # dbg
704 #print 'esc <%s> fpart <%s> rest <%s>' % (esc,fpart.strip(),rest) # dbg
706 #print 'esc <%s> fpart <%s> rest <%s>' % (esc,fpart.strip(),rest) # dbg
705 return lspace, esc, fpart.strip(), rest.lstrip()
707 return lspace, esc, fpart.strip(), rest.lstrip()
706
708
707
709
708 # The escaped translators ALL receive a line where their own escape has been
710 # The escaped translators ALL receive a line where their own escape has been
709 # stripped. Only '?' is valid at the end of the line, all others can only be
711 # stripped. Only '?' is valid at the end of the line, all others can only be
710 # placed at the start.
712 # placed at the start.
711
713
712 class LineInfo(object):
714 class LineInfo(object):
713 """A single line of input and associated info.
715 """A single line of input and associated info.
714
716
715 This is a utility class that mostly wraps the output of
717 This is a utility class that mostly wraps the output of
716 :func:`split_user_input` into a convenient object to be passed around
718 :func:`split_user_input` into a convenient object to be passed around
717 during input transformations.
719 during input transformations.
718
720
719 Includes the following as properties:
721 Includes the following as properties:
720
722
721 line
723 line
722 The original, raw line
724 The original, raw line
723
725
724 lspace
726 lspace
725 Any early whitespace before actual text starts.
727 Any early whitespace before actual text starts.
726
728
727 esc
729 esc
728 The initial esc character (or characters, for double-char escapes like
730 The initial esc character (or characters, for double-char escapes like
729 '??' or '!!').
731 '??' or '!!').
730
732
731 fpart
733 fpart
732 The 'function part', which is basically the maximal initial sequence
734 The 'function part', which is basically the maximal initial sequence
733 of valid python identifiers and the '.' character. This is what is
735 of valid python identifiers and the '.' character. This is what is
734 checked for alias and magic transformations, used for auto-calling,
736 checked for alias and magic transformations, used for auto-calling,
735 etc.
737 etc.
736
738
737 rest
739 rest
738 Everything else on the line.
740 Everything else on the line.
739 """
741 """
740 def __init__(self, line):
742 def __init__(self, line):
741 self.line = line
743 self.line = line
742 self.lspace, self.esc, self.fpart, self.rest = \
744 self.lspace, self.esc, self.fpart, self.rest = \
743 split_user_input(line)
745 split_user_input(line)
744
746
745 def __str__(self):
747 def __str__(self):
746 return "LineInfo [%s|%s|%s|%s]" % (self.lspace, self.esc,
748 return "LineInfo [%s|%s|%s|%s]" % (self.lspace, self.esc,
747 self.fpart, self.rest)
749 self.fpart, self.rest)
748
750
749
751
750 # Transformations of the special syntaxes that don't rely on an explicit escape
752 # Transformations of the special syntaxes that don't rely on an explicit escape
751 # character but instead on patterns on the input line
753 # character but instead on patterns on the input line
752
754
753 # The core transformations are implemented as standalone functions that can be
755 # The core transformations are implemented as standalone functions that can be
754 # tested and validated in isolation. Each of these uses a regexp, we
756 # tested and validated in isolation. Each of these uses a regexp, we
755 # pre-compile these and keep them close to each function definition for clarity
757 # pre-compile these and keep them close to each function definition for clarity
756
758
757 _assign_system_re = re.compile(r'(?P<lhs>(\s*)([\w\.]+)((\s*,\s*[\w\.]+)*))'
759 _assign_system_re = re.compile(r'(?P<lhs>(\s*)([\w\.]+)((\s*,\s*[\w\.]+)*))'
758 r'\s*=\s*!\s*(?P<cmd>.*)')
760 r'\s*=\s*!\s*(?P<cmd>.*)')
759
761
760 def transform_assign_system(line):
762 def transform_assign_system(line):
761 """Handle the `files = !ls` syntax."""
763 """Handle the `files = !ls` syntax."""
762 m = _assign_system_re.match(line)
764 m = _assign_system_re.match(line)
763 if m is not None:
765 if m is not None:
764 cmd = m.group('cmd')
766 cmd = m.group('cmd')
765 lhs = m.group('lhs')
767 lhs = m.group('lhs')
766 expr = make_quoted_expr(cmd)
768 expr = make_quoted_expr(cmd)
767 new_line = '%s = get_ipython().getoutput(%s)' % (lhs, expr)
769 new_line = '%s = get_ipython().getoutput(%s)' % (lhs, expr)
768 return new_line
770 return new_line
769 return line
771 return line
770
772
771
773
772 _assign_magic_re = re.compile(r'(?P<lhs>(\s*)([\w\.]+)((\s*,\s*[\w\.]+)*))'
774 _assign_magic_re = re.compile(r'(?P<lhs>(\s*)([\w\.]+)((\s*,\s*[\w\.]+)*))'
773 r'\s*=\s*%\s*(?P<cmd>.*)')
775 r'\s*=\s*%\s*(?P<cmd>.*)')
774
776
775 def transform_assign_magic(line):
777 def transform_assign_magic(line):
776 """Handle the `a = %who` syntax."""
778 """Handle the `a = %who` syntax."""
777 m = _assign_magic_re.match(line)
779 m = _assign_magic_re.match(line)
778 if m is not None:
780 if m is not None:
779 cmd = m.group('cmd')
781 cmd = m.group('cmd')
780 lhs = m.group('lhs')
782 lhs = m.group('lhs')
781 expr = make_quoted_expr(cmd)
783 expr = make_quoted_expr(cmd)
782 new_line = '%s = get_ipython().magic(%s)' % (lhs, expr)
784 new_line = '%s = get_ipython().magic(%s)' % (lhs, expr)
783 return new_line
785 return new_line
784 return line
786 return line
785
787
786
788
787 _classic_prompt_re = re.compile(r'^([ \t]*>>> |^[ \t]*\.\.\. )')
789 _classic_prompt_re = re.compile(r'^([ \t]*>>> |^[ \t]*\.\.\. )')
788
790
789 def transform_classic_prompt(line):
791 def transform_classic_prompt(line):
790 """Handle inputs that start with '>>> ' syntax."""
792 """Handle inputs that start with '>>> ' syntax."""
791
793
792 if not line or line.isspace():
794 if not line or line.isspace():
793 return line
795 return line
794 m = _classic_prompt_re.match(line)
796 m = _classic_prompt_re.match(line)
795 if m:
797 if m:
796 return line[len(m.group(0)):]
798 return line[len(m.group(0)):]
797 else:
799 else:
798 return line
800 return line
799
801
800
802
801 _ipy_prompt_re = re.compile(r'^([ \t]*In \[\d+\]: |^[ \t]*\ \ \ \.\.\.+: )')
803 _ipy_prompt_re = re.compile(r'^([ \t]*In \[\d+\]: |^[ \t]*\ \ \ \.\.\.+: )')
802
804
803 def transform_ipy_prompt(line):
805 def transform_ipy_prompt(line):
804 """Handle inputs that start classic IPython prompt syntax."""
806 """Handle inputs that start classic IPython prompt syntax."""
805
807
806 if not line or line.isspace():
808 if not line or line.isspace():
807 return line
809 return line
808 #print 'LINE: %r' % line # dbg
810 #print 'LINE: %r' % line # dbg
809 m = _ipy_prompt_re.match(line)
811 m = _ipy_prompt_re.match(line)
810 if m:
812 if m:
811 #print 'MATCH! %r -> %r' % (line, line[len(m.group(0)):]) # dbg
813 #print 'MATCH! %r -> %r' % (line, line[len(m.group(0)):]) # dbg
812 return line[len(m.group(0)):]
814 return line[len(m.group(0)):]
813 else:
815 else:
814 return line
816 return line
815
817
816
818
817 class EscapedTransformer(object):
819 class EscapedTransformer(object):
818 """Class to transform lines that are explicitly escaped out."""
820 """Class to transform lines that are explicitly escaped out."""
819
821
820 def __init__(self):
822 def __init__(self):
821 tr = { ESC_SHELL : self._tr_system,
823 tr = { ESC_SHELL : self._tr_system,
822 ESC_SH_CAP : self._tr_system2,
824 ESC_SH_CAP : self._tr_system2,
823 ESC_HELP : self._tr_help,
825 ESC_HELP : self._tr_help,
824 ESC_HELP2 : self._tr_help,
826 ESC_HELP2 : self._tr_help,
825 ESC_MAGIC : self._tr_magic,
827 ESC_MAGIC : self._tr_magic,
826 ESC_QUOTE : self._tr_quote,
828 ESC_QUOTE : self._tr_quote,
827 ESC_QUOTE2 : self._tr_quote2,
829 ESC_QUOTE2 : self._tr_quote2,
828 ESC_PAREN : self._tr_paren }
830 ESC_PAREN : self._tr_paren }
829 self.tr = tr
831 self.tr = tr
830
832
831 # Support for syntax transformations that use explicit escapes typed by the
833 # Support for syntax transformations that use explicit escapes typed by the
832 # user at the beginning of a line
834 # user at the beginning of a line
833 @staticmethod
835 @staticmethod
834 def _tr_system(line_info):
836 def _tr_system(line_info):
835 "Translate lines escaped with: !"
837 "Translate lines escaped with: !"
836 cmd = line_info.line.lstrip().lstrip(ESC_SHELL)
838 cmd = line_info.line.lstrip().lstrip(ESC_SHELL)
837 return '%sget_ipython().system(%s)' % (line_info.lspace,
839 return '%sget_ipython().system(%s)' % (line_info.lspace,
838 make_quoted_expr(cmd))
840 make_quoted_expr(cmd))
839
841
840 @staticmethod
842 @staticmethod
841 def _tr_system2(line_info):
843 def _tr_system2(line_info):
842 "Translate lines escaped with: !!"
844 "Translate lines escaped with: !!"
843 cmd = line_info.line.lstrip()[2:]
845 cmd = line_info.line.lstrip()[2:]
844 return '%sget_ipython().getoutput(%s)' % (line_info.lspace,
846 return '%sget_ipython().getoutput(%s)' % (line_info.lspace,
845 make_quoted_expr(cmd))
847 make_quoted_expr(cmd))
846
848
847 @staticmethod
849 @staticmethod
848 def _tr_help(line_info):
850 def _tr_help(line_info):
849 "Translate lines escaped with: ?/??"
851 "Translate lines escaped with: ?/??"
850 # A naked help line should just fire the intro help screen
852 # A naked help line should just fire the intro help screen
851 if not line_info.line[1:]:
853 if not line_info.line[1:]:
852 return 'get_ipython().show_usage()'
854 return 'get_ipython().show_usage()'
853
855
854 # There may be one or two '?' at the end, move them to the front so that
856 # There may be one or two '?' at the end, move them to the front so that
855 # the rest of the logic can assume escapes are at the start
857 # the rest of the logic can assume escapes are at the start
856 l_ori = line_info
858 l_ori = line_info
857 line = line_info.line
859 line = line_info.line
858 if line.endswith('?'):
860 if line.endswith('?'):
859 line = line[-1] + line[:-1]
861 line = line[-1] + line[:-1]
860 if line.endswith('?'):
862 if line.endswith('?'):
861 line = line[-1] + line[:-1]
863 line = line[-1] + line[:-1]
862 line_info = LineInfo(line)
864 line_info = LineInfo(line)
863
865
864 # From here on, simply choose which level of detail to get, and
866 # From here on, simply choose which level of detail to get, and
865 # special-case the psearch syntax
867 # special-case the psearch syntax
866 pinfo = 'pinfo' # default
868 pinfo = 'pinfo' # default
867 if '*' in line_info.line:
869 if '*' in line_info.line:
868 pinfo = 'psearch'
870 pinfo = 'psearch'
869 elif line_info.esc == '??':
871 elif line_info.esc == '??':
870 pinfo = 'pinfo2'
872 pinfo = 'pinfo2'
871
873
872 tpl = '%sget_ipython().magic("%s %s")'
874 tpl = '%sget_ipython().magic("%s %s")'
873 return tpl % (line_info.lspace, pinfo,
875 return tpl % (line_info.lspace, pinfo,
874 ' '.join([line_info.fpart, line_info.rest]).strip())
876 ' '.join([line_info.fpart, line_info.rest]).strip())
875
877
876 @staticmethod
878 @staticmethod
877 def _tr_magic(line_info):
879 def _tr_magic(line_info):
878 "Translate lines escaped with: %"
880 "Translate lines escaped with: %"
879 tpl = '%sget_ipython().magic(%s)'
881 tpl = '%sget_ipython().magic(%s)'
880 cmd = make_quoted_expr(' '.join([line_info.fpart,
882 cmd = make_quoted_expr(' '.join([line_info.fpart,
881 line_info.rest]).strip())
883 line_info.rest]).strip())
882 return tpl % (line_info.lspace, cmd)
884 return tpl % (line_info.lspace, cmd)
883
885
884 @staticmethod
886 @staticmethod
885 def _tr_quote(line_info):
887 def _tr_quote(line_info):
886 "Translate lines escaped with: ,"
888 "Translate lines escaped with: ,"
887 return '%s%s("%s")' % (line_info.lspace, line_info.fpart,
889 return '%s%s("%s")' % (line_info.lspace, line_info.fpart,
888 '", "'.join(line_info.rest.split()) )
890 '", "'.join(line_info.rest.split()) )
889
891
890 @staticmethod
892 @staticmethod
891 def _tr_quote2(line_info):
893 def _tr_quote2(line_info):
892 "Translate lines escaped with: ;"
894 "Translate lines escaped with: ;"
893 return '%s%s("%s")' % (line_info.lspace, line_info.fpart,
895 return '%s%s("%s")' % (line_info.lspace, line_info.fpart,
894 line_info.rest)
896 line_info.rest)
895
897
896 @staticmethod
898 @staticmethod
897 def _tr_paren(line_info):
899 def _tr_paren(line_info):
898 "Translate lines escaped with: /"
900 "Translate lines escaped with: /"
899 return '%s%s(%s)' % (line_info.lspace, line_info.fpart,
901 return '%s%s(%s)' % (line_info.lspace, line_info.fpart,
900 ", ".join(line_info.rest.split()))
902 ", ".join(line_info.rest.split()))
901
903
902 def __call__(self, line):
904 def __call__(self, line):
903 """Class to transform lines that are explicitly escaped out.
905 """Class to transform lines that are explicitly escaped out.
904
906
905 This calls the above _tr_* static methods for the actual line
907 This calls the above _tr_* static methods for the actual line
906 translations."""
908 translations."""
907
909
908 # Empty lines just get returned unmodified
910 # Empty lines just get returned unmodified
909 if not line or line.isspace():
911 if not line or line.isspace():
910 return line
912 return line
911
913
912 # Get line endpoints, where the escapes can be
914 # Get line endpoints, where the escapes can be
913 line_info = LineInfo(line)
915 line_info = LineInfo(line)
914
916
915 # If the escape is not at the start, only '?' needs to be special-cased.
917 # If the escape is not at the start, only '?' needs to be special-cased.
916 # All other escapes are only valid at the start
918 # All other escapes are only valid at the start
917 if not line_info.esc in self.tr:
919 if not line_info.esc in self.tr:
918 if line.endswith(ESC_HELP):
920 if line.endswith(ESC_HELP):
919 return self._tr_help(line_info)
921 return self._tr_help(line_info)
920 else:
922 else:
921 # If we don't recognize the escape, don't modify the line
923 # If we don't recognize the escape, don't modify the line
922 return line
924 return line
923
925
924 return self.tr[line_info.esc](line_info)
926 return self.tr[line_info.esc](line_info)
925
927
926
928
927 # A function-looking object to be used by the rest of the code. The purpose of
929 # A function-looking object to be used by the rest of the code. The purpose of
928 # the class in this case is to organize related functionality, more than to
930 # the class in this case is to organize related functionality, more than to
929 # manage state.
931 # manage state.
930 transform_escaped = EscapedTransformer()
932 transform_escaped = EscapedTransformer()
931
933
932
934
933 class IPythonInputSplitter(InputSplitter):
935 class IPythonInputSplitter(InputSplitter):
934 """An input splitter that recognizes all of IPython's special syntax."""
936 """An input splitter that recognizes all of IPython's special syntax."""
935
937
936 # String with raw, untransformed input.
938 # String with raw, untransformed input.
937 source_raw = ''
939 source_raw = ''
938
940
939 # Private attributes
941 # Private attributes
940
942
941 # List with lines of raw input accumulated so far.
943 # List with lines of raw input accumulated so far.
942 _buffer_raw = None
944 _buffer_raw = None
943
945
944 def __init__(self, input_mode=None):
946 def __init__(self, input_mode=None):
945 InputSplitter.__init__(self, input_mode)
947 InputSplitter.__init__(self, input_mode)
946 self._buffer_raw = []
948 self._buffer_raw = []
947
949
948 def reset(self):
950 def reset(self):
949 """Reset the input buffer and associated state."""
951 """Reset the input buffer and associated state."""
950 InputSplitter.reset(self)
952 InputSplitter.reset(self)
951 self._buffer_raw[:] = []
953 self._buffer_raw[:] = []
952 self.source_raw = ''
954 self.source_raw = ''
953
955
954 def source_raw_reset(self):
956 def source_raw_reset(self):
955 """Return input and raw source and perform a full reset.
957 """Return input and raw source and perform a full reset.
956 """
958 """
957 out = self.source
959 out = self.source
958 out_r = self.source_raw
960 out_r = self.source_raw
959 self.reset()
961 self.reset()
960 return out, out_r
962 return out, out_r
961
963
962 def push(self, lines):
964 def push(self, lines):
963 """Push one or more lines of IPython input.
965 """Push one or more lines of IPython input.
964 """
966 """
965 if not lines:
967 if not lines:
966 return super(IPythonInputSplitter, self).push(lines)
968 return super(IPythonInputSplitter, self).push(lines)
967
969
968 # We must ensure all input is pure unicode
970 # We must ensure all input is pure unicode
969 if type(lines)==str:
971 if type(lines)==str:
970 lines = lines.decode(self.encoding)
972 lines = lines.decode(self.encoding)
971
973
972 lines_list = lines.splitlines()
974 lines_list = lines.splitlines()
973
975
974 transforms = [transform_escaped, transform_assign_system,
976 transforms = [transform_escaped, transform_assign_system,
975 transform_assign_magic, transform_ipy_prompt,
977 transform_assign_magic, transform_ipy_prompt,
976 transform_classic_prompt]
978 transform_classic_prompt]
977
979
978 # Transform logic
980 # Transform logic
979 #
981 #
980 # We only apply the line transformers to the input if we have either no
982 # We only apply the line transformers to the input if we have either no
981 # input yet, or complete input, or if the last line of the buffer ends
983 # input yet, or complete input, or if the last line of the buffer ends
982 # with ':' (opening an indented block). This prevents the accidental
984 # with ':' (opening an indented block). This prevents the accidental
983 # transformation of escapes inside multiline expressions like
985 # transformation of escapes inside multiline expressions like
984 # triple-quoted strings or parenthesized expressions.
986 # triple-quoted strings or parenthesized expressions.
985 #
987 #
986 # The last heuristic, while ugly, ensures that the first line of an
988 # The last heuristic, while ugly, ensures that the first line of an
987 # indented block is correctly transformed.
989 # indented block is correctly transformed.
988 #
990 #
989 # FIXME: try to find a cleaner approach for this last bit.
991 # FIXME: try to find a cleaner approach for this last bit.
990
992
991 # If we were in 'block' mode, since we're going to pump the parent
993 # If we were in 'block' mode, since we're going to pump the parent
992 # class by hand line by line, we need to temporarily switch out to
994 # class by hand line by line, we need to temporarily switch out to
993 # 'line' mode, do a single manual reset and then feed the lines one
995 # 'line' mode, do a single manual reset and then feed the lines one
994 # by one. Note that this only matters if the input has more than one
996 # by one. Note that this only matters if the input has more than one
995 # line.
997 # line.
996 changed_input_mode = False
998 changed_input_mode = False
997
999
998 if self.input_mode == 'cell':
1000 if self.input_mode == 'cell':
999 self.reset()
1001 self.reset()
1000 changed_input_mode = True
1002 changed_input_mode = True
1001 saved_input_mode = 'cell'
1003 saved_input_mode = 'cell'
1002 self.input_mode = 'line'
1004 self.input_mode = 'line'
1003
1005
1004 # Store raw source before applying any transformations to it. Note
1006 # Store raw source before applying any transformations to it. Note
1005 # that this must be done *after* the reset() call that would otherwise
1007 # that this must be done *after* the reset() call that would otherwise
1006 # flush the buffer.
1008 # flush the buffer.
1007 self._store(lines, self._buffer_raw, 'source_raw')
1009 self._store(lines, self._buffer_raw, 'source_raw')
1008
1010
1009 try:
1011 try:
1010 push = super(IPythonInputSplitter, self).push
1012 push = super(IPythonInputSplitter, self).push
1011 for line in lines_list:
1013 for line in lines_list:
1012 if self._is_complete or not self._buffer or \
1014 if self._is_complete or not self._buffer or \
1013 (self._buffer and self._buffer[-1].rstrip().endswith(':')):
1015 (self._buffer and self._buffer[-1].rstrip().endswith(':')):
1014 for f in transforms:
1016 for f in transforms:
1015 line = f(line)
1017 line = f(line)
1016
1018
1017 out = push(line)
1019 out = push(line)
1018 finally:
1020 finally:
1019 if changed_input_mode:
1021 if changed_input_mode:
1020 self.input_mode = saved_input_mode
1022 self.input_mode = saved_input_mode
1021 return out
1023 return out
General Comments 0
You need to be logged in to leave comments. Login now