##// END OF EJS Templates
Merge branch 'ast-splitter' of https://github.com/takluyver/ipython into takluyver-ast-splitter
Thomas Kluyver -
r3534:8edf5198 merge
parent child Browse files
Show More
@@ -7,6 +7,7 b' Authors'
7 7 -------
8 8 * Robert Kern
9 9 * Fernando Perez
10 * Thomas Kluyver
10 11 """
11 12
12 13 # Note: though it might be more natural to name this module 'compiler', that
@@ -51,12 +52,12 b' def code_name(code, number=0):'
51 52 # Classes and functions
52 53 #-----------------------------------------------------------------------------
53 54
54 class CachingCompiler(object):
55 class CachingCompiler(codeop.Compile):
55 56 """A compiler that caches code compiled from interactive statements.
56 57 """
57 58
58 59 def __init__(self):
59 self._compiler = codeop.CommandCompiler()
60 codeop.Compile.__init__(self)
60 61
61 62 # This is ugly, but it must be done this way to allow multiple
62 63 # simultaneous ipython instances to coexist. Since Python itself
@@ -81,35 +82,30 b' class CachingCompiler(object):'
81 82 def compiler_flags(self):
82 83 """Flags currently active in the compilation process.
83 84 """
84 return self._compiler.compiler.flags
85 return self.flags
85 86
86 def __call__(self, code, symbol, number=0):
87 """Compile some code while caching its contents such that the inspect
88 module can find it later.
87 def cache(self, code, number=0):
88 """Make a name for a block of code, and cache the code.
89 89
90 90 Parameters
91 91 ----------
92 92 code : str
93 Source code to be compiled, one or more lines.
93 The Python source code to cache.
94 number : int
95 A number which forms part of the code's name. Used for the execution
96 counter.
94 97
95 symbol : str
96 One of 'single', 'exec' or 'eval' (see the builtin ``compile``
97 documentation for further details on these fields).
98
99 number : int, optional
100 An integer argument identifying the code, useful for informational
101 purposes in tracebacks (typically it will be the IPython prompt
102 number).
98 Returns
99 -------
100 The name of the cached code (as a string). Pass this as the filename
101 argument to compilation, so that tracebacks are correctly hooked up.
103 102 """
104 103 name = code_name(code, number)
105 code_obj = self._compiler(code, name, symbol)
106 104 entry = (len(code), time.time(),
107 105 [line+'\n' for line in code.splitlines()], name)
108 # Cache the info both in the linecache (a global cache used internally
109 # by most of Python's inspect/traceback machinery), and in our cache
110 106 linecache.cache[name] = entry
111 107 linecache._ipython_cache[name] = entry
112 return code_obj
108 return name
113 109
114 110 def check_cache(self, *args):
115 111 """Call linecache.checkcache() safely protecting our cached values.
@@ -166,78 +166,6 b' def get_input_encoding():'
166 166 # Classes and functions for normal Python syntax handling
167 167 #-----------------------------------------------------------------------------
168 168
169 # HACK! This implementation, written by Robert K a while ago using the
170 # compiler module, is more robust than the other one below, but it expects its
171 # input to be pure python (no ipython syntax). For now we're using it as a
172 # second-pass splitter after the first pass transforms the input to pure
173 # python.
174
175 def split_blocks(python):
176 """ Split multiple lines of code into discrete commands that can be
177 executed singly.
178
179 Parameters
180 ----------
181 python : str
182 Pure, exec'able Python code.
183
184 Returns
185 -------
186 commands : list of str
187 Separate commands that can be exec'ed independently.
188 """
189 # compiler.parse treats trailing spaces after a newline as a
190 # SyntaxError. This is different than codeop.CommandCompiler, which
191 # will compile the trailng spaces just fine. We simply strip any
192 # trailing whitespace off. Passing a string with trailing whitespace
193 # to exec will fail however. There seems to be some inconsistency in
194 # how trailing whitespace is handled, but this seems to work.
195 python_ori = python # save original in case we bail on error
196 python = python.strip()
197
198 # The compiler module will parse the code into an abstract syntax tree.
199 # This has a bug with str("a\nb"), but not str("""a\nb""")!!!
200 try:
201 code_ast = ast.parse(python)
202 except:
203 return [python_ori]
204
205 # Uncomment to help debug the ast tree
206 # for n in code_ast.body:
207 # print n.lineno,'->',n
208
209 # Each separate command is available by iterating over ast.node. The
210 # lineno attribute is the line number (1-indexed) beginning the commands
211 # suite.
212 # lines ending with ";" yield a Discard Node that doesn't have a lineno
213 # attribute. These nodes can and should be discarded. But there are
214 # other situations that cause Discard nodes that shouldn't be discarded.
215 # We might eventually discover other cases where lineno is None and have
216 # to put in a more sophisticated test.
217 linenos = [x.lineno-1 for x in code_ast.body if x.lineno is not None]
218
219 # When we finally get the slices, we will need to slice all the way to
220 # the end even though we don't have a line number for it. Fortunately,
221 # None does the job nicely.
222 linenos.append(None)
223
224 # Same problem at the other end: sometimes the ast tree has its
225 # first complete statement not starting on line 0. In this case
226 # we might miss part of it. This fixes ticket 266993. Thanks Gael!
227 linenos[0] = 0
228
229 lines = python.splitlines()
230
231 # Create a list of atomic commands.
232 cmds = []
233 for i, j in zip(linenos[:-1], linenos[1:]):
234 cmd = lines[i:j]
235 if cmd:
236 cmds.append('\n'.join(cmd)+'\n')
237
238 return cmds
239
240
241 169 class InputSplitter(object):
242 170 """An object that can split Python source input in executable blocks.
243 171
@@ -445,8 +373,12 b' class InputSplitter(object):'
445 373 if not self._full_dedent:
446 374 return False
447 375 else:
448 nblocks = len(split_blocks(''.join(self._buffer)))
449 if nblocks==1:
376 try:
377 code_ast = ast.parse(u''.join(self._buffer))
378 except Exception:
379 return False
380 else:
381 if len(code_ast.body) == 1:
450 382 return False
451 383
452 384 # When input is complete, then termination is marked by an extra blank
@@ -454,88 +386,6 b' class InputSplitter(object):'
454 386 last_line = self.source.splitlines()[-1]
455 387 return bool(last_line and not last_line.isspace())
456 388
457 def split_blocks(self, lines):
458 """Split a multiline string into multiple input blocks.
459
460 Note: this method starts by performing a full reset().
461
462 Parameters
463 ----------
464 lines : str
465 A possibly multiline string.
466
467 Returns
468 -------
469 blocks : list
470 A list of strings, each possibly multiline. Each string corresponds
471 to a single block that can be compiled in 'single' mode (unless it
472 has a syntax error)."""
473
474 # This code is fairly delicate. If you make any changes here, make
475 # absolutely sure that you do run the full test suite and ALL tests
476 # pass.
477
478 self.reset()
479 blocks = []
480
481 # Reversed copy so we can use pop() efficiently and consume the input
482 # as a stack
483 lines = lines.splitlines()[::-1]
484 # Outer loop over all input
485 while lines:
486 #print 'Current lines:', lines # dbg
487 # Inner loop to build each block
488 while True:
489 # Safety exit from inner loop
490 if not lines:
491 break
492 # Grab next line but don't push it yet
493 next_line = lines.pop()
494 # Blank/empty lines are pushed as-is
495 if not next_line or next_line.isspace():
496 self.push(next_line)
497 continue
498
499 # Check indentation changes caused by the *next* line
500 indent_spaces, _full_dedent = self._find_indent(next_line)
501
502 # If the next line causes a dedent, it can be for two differnt
503 # reasons: either an explicit de-dent by the user or a
504 # return/raise/pass statement. These MUST be handled
505 # separately:
506 #
507 # 1. the first case is only detected when the actual explicit
508 # dedent happens, and that would be the *first* line of a *new*
509 # block. Thus, we must put the line back into the input buffer
510 # so that it starts a new block on the next pass.
511 #
512 # 2. the second case is detected in the line before the actual
513 # dedent happens, so , we consume the line and we can break out
514 # to start a new block.
515
516 # Case 1, explicit dedent causes a break.
517 # Note: check that we weren't on the very last line, else we'll
518 # enter an infinite loop adding/removing the last line.
519 if _full_dedent and lines and not next_line.startswith(' '):
520 lines.append(next_line)
521 break
522
523 # Otherwise any line is pushed
524 self.push(next_line)
525
526 # Case 2, full dedent with full block ready:
527 if _full_dedent or \
528 self.indent_spaces==0 and not self.push_accepts_more():
529 break
530 # Form the new block with the current source input
531 blocks.append(self.source_reset())
532
533 #return blocks
534 # HACK!!! Now that our input is in blocks but guaranteed to be pure
535 # python syntax, feed it back a second time through the AST-based
536 # splitter, which is more accurate than ours.
537 return split_blocks(''.join(blocks))
538
539 389 #------------------------------------------------------------------------
540 390 # Private interface
541 391 #------------------------------------------------------------------------
@@ -20,6 +20,7 b' from __future__ import absolute_import'
20 20 import __builtin__
21 21 import __future__
22 22 import abc
23 import ast
23 24 import atexit
24 25 import codeop
25 26 import inspect
@@ -2100,58 +2101,20 b' class InteractiveShell(Configurable, Magic):'
2100 2101 warn('Unknown failure executing file: <%s>' % fname)
2101 2102
2102 2103 def run_cell(self, cell, store_history=True):
2103 """Run the contents of an entire multiline 'cell' of code, and store it
2104 in the history.
2105
2106 The cell is split into separate blocks which can be executed
2107 individually. Then, based on how many blocks there are, they are
2108 executed as follows:
2109
2110 - A single block: 'single' mode. If it is also a single line, dynamic
2111 transformations, including automagic and macros, will be applied.
2112
2113 If there's more than one block, it depends:
2114
2115 - if the last one is no more than two lines long, run all but the last
2116 in 'exec' mode and the very last one in 'single' mode. This makes it
2117 easy to type simple expressions at the end to see computed values. -
2118 otherwise (last one is also multiline), run all in 'exec' mode
2119
2120 When code is executed in 'single' mode, :func:`sys.displayhook` fires,
2121 results are displayed and output prompts are computed. In 'exec' mode,
2122 no results are displayed unless :func:`print` is called explicitly;
2123 this mode is more akin to running a script.
2104 """Run a complete IPython cell.
2124 2105
2125 2106 Parameters
2126 2107 ----------
2127 2108 cell : str
2128 A single or multiline string.
2109 The code (including IPython code such as %magic functions) to run.
2110 store_history : bool
2111 If True, the raw and translated cell will be stored in IPython's
2112 history. For user code calling back into IPython's machinery, this
2113 should be set to False.
2129 2114 """
2130 # Store the untransformed code
2131 2115 raw_cell = cell
2132
2133 # Code transformation and execution must take place with our
2134 # modifications to builtins.
2135 2116 with self.builtin_trap:
2136
2137 # We need to break up the input into executable blocks that can
2138 # be runin 'single' mode, to provide comfortable user behavior.
2139 blocks = self.input_splitter.split_blocks(cell)
2140
2141 if not blocks: # Blank cell
2142 return
2143
2144 # We only do dynamic transforms on a single line. But a macro
2145 # can be expanded to several lines, so we need to split it
2146 # into input blocks again.
2147 if len(cell.splitlines()) <= 1:
2148 cell = self.prefilter_manager.prefilter_line(blocks[0])
2149 blocks = self.input_splitter.split_blocks(cell)
2150
2151 # Store the 'ipython' version of the cell as well, since
2152 # that's what needs to go into the translated history and get
2153 # executed (the original cell may contain non-python syntax).
2154 cell = ''.join(blocks)
2117 cell = self.prefilter_manager.prefilter_lines(cell)
2155 2118
2156 2119 # Store raw and processed history
2157 2120 if store_history:
@@ -2160,53 +2123,71 b' class InteractiveShell(Configurable, Magic):'
2160 2123
2161 2124 self.logger.log(cell, raw_cell)
2162 2125
2163 # All user code execution should take place with our
2164 # modified displayhook.
2126 cell_name = self.compile.cache(cell, self.execution_count)
2127
2165 2128 with self.display_trap:
2166 # Single-block input should behave like an interactive prompt
2167 if len(blocks) == 1:
2168 out = self.run_source(blocks[0])
2169 # Write output to the database. Does nothing unless
2170 # history output logging is enabled.
2171 if store_history:
2172 self.history_manager.store_output(self.execution_count)
2173 # Since we return here, we need to update the
2174 # execution count
2129 try:
2130 code_ast = ast.parse(cell, filename=cell_name)
2131 except (OverflowError, SyntaxError, ValueError, TypeError, MemoryError):
2132 # Case 1
2133 self.showsyntaxerror()
2175 2134 self.execution_count += 1
2176 return out
2135 return None
2177 2136
2178 # In multi-block input, if the last block is a simple (one-two
2179 # lines) expression, run it in single mode so it produces output.
2180 # Otherwise just run it all in 'exec' mode. This seems like a
2181 # reasonable usability design.
2182 last = blocks[-1]
2183 last_nlines = len(last.splitlines())
2184
2185 if last_nlines < 2:
2186 # Here we consider the cell split between 'body' and 'last',
2187 # store all history and execute 'body', and if successful, then
2188 # proceed to execute 'last'.
2189
2190 # Get the main body to run as a cell
2191 ipy_body = ''.join(blocks[:-1])
2192 retcode = self.run_source(ipy_body, symbol='exec',
2193 post_execute=False)
2194 if retcode==0:
2195 # Last expression compiled as 'single' so it
2196 # produces output
2197 self.run_source(last)
2198 else:
2199 # Run the whole cell as one entity, storing both raw and
2200 # processed input in history
2201 self.run_source(cell, symbol='exec')
2137 interactivity = 'last' # Last node to be run interactive
2138 if len(cell.splitlines()) == 1:
2139 interactivity = 'all' # Single line; run fully interactive
2202 2140
2141 self.run_ast_nodes(code_ast.body, cell_name, interactivity)
2142
2143 if store_history:
2203 2144 # Write output to the database. Does nothing unless
2204 2145 # history output logging is enabled.
2205 if store_history:
2206 2146 self.history_manager.store_output(self.execution_count)
2207 2147 # Each cell is a *single* input, regardless of how many lines it has
2208 2148 self.execution_count += 1
2209 2149
2150 def run_ast_nodes(self, nodelist, cell_name, interactivity='last'):
2151 """Run a sequence of AST nodes. The execution mode depends on the
2152 interactivity parameter.
2153
2154 Parameters
2155 ----------
2156 nodelist : list
2157 A sequence of AST nodes to run.
2158 cell_name : str
2159 Will be passed to the compiler as the filename of the cell. Typically
2160 the value returned by ip.compile.cache(cell).
2161 interactivity : str
2162 'all', 'last' or 'none', specifying which nodes should be run
2163 interactively (displaying output from expressions). Other values for
2164 this parameter will raise a ValueError.
2165 """
2166 if not nodelist:
2167 return
2168
2169 if interactivity == 'none':
2170 to_run_exec, to_run_interactive = nodelist, []
2171 elif interactivity == 'last':
2172 to_run_exec, to_run_interactive = nodelist[:-1], nodelist[-1:]
2173 elif interactivity == 'all':
2174 to_run_exec, to_run_interactive = [], nodelist
2175 else:
2176 raise ValueError("Interactivity was %r" % interactivity)
2177
2178 exec_count = self.execution_count
2179 if to_run_exec:
2180 mod = ast.Module(to_run_exec)
2181 self.code_to_run = code = self.compile(mod, cell_name, "exec")
2182 if self.run_code(code) == 1:
2183 return
2184
2185 if to_run_interactive:
2186 mod = ast.Interactive(to_run_interactive)
2187 self.code_to_run = code = self.compile(mod, cell_name, "single")
2188 return self.run_code(code)
2189
2190
2210 2191 # PENDING REMOVAL: this method is slated for deletion, once our new
2211 2192 # input logic has been 100% moved to frontends and is stable.
2212 2193 def runlines(self, lines, clean=False):
@@ -2296,7 +2277,8 b' class InteractiveShell(Configurable, Magic):'
2296 2277 print 'encoding', self.stdin_encoding # dbg
2297 2278
2298 2279 try:
2299 code = self.compile(usource, symbol, self.execution_count)
2280 code_name = self.compile.cache(usource, self.execution_count)
2281 code = self.compile(usource, code_name, symbol)
2300 2282 except (OverflowError, SyntaxError, ValueError, TypeError, MemoryError):
2301 2283 # Case 1
2302 2284 self.showsyntaxerror(filename)
@@ -40,12 +40,12 b' def test_code_name2():'
40 40 nt.assert_true(name.startswith('<ipython-input-9'))
41 41
42 42
43 def test_compiler():
43 def test_cache():
44 44 """Test the compiler correctly compiles and caches inputs
45 45 """
46 46 cp = compilerop.CachingCompiler()
47 47 ncache = len(linecache.cache)
48 cp('x=1', 'single')
48 cp.cache('x=1')
49 49 nt.assert_true(len(linecache.cache) > ncache)
50 50
51 51 def setUp():
@@ -53,10 +53,10 b' def setUp():'
53 53 # as GTK, can change the default encoding, which can hide bugs.)
54 54 nt.assert_equal(sys.getdefaultencoding(), "ascii")
55 55
56 def test_compiler_unicode():
56 def test_cache_unicode():
57 57 cp = compilerop.CachingCompiler()
58 58 ncache = len(linecache.cache)
59 cp(u"t = 'žćčšđ'", "single")
59 cp.cache(u"t = 'žćčšđ'")
60 60 nt.assert_true(len(linecache.cache) > ncache)
61 61
62 62 def test_compiler_check_cache():
@@ -64,7 +64,7 b' def test_compiler_check_cache():'
64 64 """
65 65 # Rather simple-minded tests that just exercise the API
66 66 cp = compilerop.CachingCompiler()
67 cp('x=1', 'single', 99)
67 cp.cache('x=1', 99)
68 68 # Ensure now that after clearing the cache, our entries survive
69 69 cp.check_cache()
70 70 for k in linecache.cache:
@@ -43,9 +43,7 b' def run(tests):'
43 43 for pre, post in tests:
44 44 global num_tests
45 45 num_tests += 1
46 ip.runlines(pre)
47 ip.runlines('_i') # Not sure why I need this...
48 actual = ip.user_ns['_i']
46 actual = ip.prefilter_manager.prefilter_lines(pre)
49 47 if actual != None:
50 48 actual = actual.rstrip('\n')
51 49 if actual != post:
@@ -286,92 +286,6 b' class InputSplitterTestCase(unittest.TestCase):'
286 286 isp.push('run foo')
287 287 self.assertFalse(isp.push_accepts_more())
288 288
289 def check_split(self, block_lines, compile=True):
290 blocks = assemble(block_lines)
291 lines = ''.join(blocks)
292 oblock = self.isp.split_blocks(lines)
293 self.assertEqual(oblock, blocks)
294 if compile:
295 for block in blocks:
296 self.isp._compile(block)
297
298 def test_split(self):
299 # All blocks of input we want to test in a list. The format for each
300 # block is a list of lists, with each inner lists consisting of all the
301 # lines (as single-lines) that should make up a sub-block.
302
303 # Note: do NOT put here sub-blocks that don't compile, as the
304 # check_split() routine makes a final verification pass to check that
305 # each sub_block, as returned by split_blocks(), does compile
306 # correctly.
307 all_blocks = [ [['x=1']],
308
309 [['x=1'],
310 ['y=2']],
311
312 [['x=1',
313 '# a comment'],
314 ['y=11']],
315
316 [['if 1:',
317 ' x=1'],
318 ['y=3']],
319
320 [['def f(x):',
321 ' return x'],
322 ['x=1']],
323
324 [['def f(x):',
325 ' x+=1',
326 ' ',
327 ' return x'],
328 ['x=1']],
329
330 [['def f(x):',
331 ' if x>0:',
332 ' y=1',
333 ' # a comment',
334 ' else:',
335 ' y=4',
336 ' ',
337 ' return y'],
338 ['x=1'],
339 ['if 1:',
340 ' y=11'] ],
341
342 [['for i in range(10):'
343 ' x=i**2']],
344
345 [['for i in range(10):'
346 ' x=i**2'],
347 ['z = 1']],
348
349 [['"asdf"']],
350
351 [['"asdf"'],
352 ['10'],
353 ],
354
355 [['"""foo',
356 'bar"""']],
357 ]
358 for block_lines in all_blocks:
359 self.check_split(block_lines)
360
361 def test_split_syntax_errors(self):
362 # Block splitting with invalid syntax
363 all_blocks = [ [['a syntax error']],
364
365 [['x=1',
366 'another syntax error']],
367
368 [['for i in range(10):'
369 ' yet another error']],
370
371 ]
372 for block_lines in all_blocks:
373 self.check_split(block_lines, compile=False)
374
375 289 def test_unicode(self):
376 290 self.isp.push(u"Pérez")
377 291 self.isp.push(u'\xc3\xa9')
@@ -56,7 +56,6 b' class InteractiveShellTestCase(unittest.TestCase):'
56 56 self.assertEquals(ip.user_ns['x'], 2)
57 57 self.assertEquals(ip.user_ns['y'], 3)
58 58
59 @dec.skip_known_failure
60 59 def test_multiline_string_cells(self):
61 60 "Code sprinkled with multiline strings should execute (GH-306)"
62 61 ip = get_ipython()
@@ -97,10 +97,10 b' In [7]: autocall 0'
97 97 Automatic calling is: OFF
98 98
99 99 In [8]: cos pi
100 File "<ipython-input-8-6bd7313dd9a9>", line 1
100 File "<ipython-input-8-586f1104ea44>", line 1
101 101 cos pi
102 102 ^
103 SyntaxError: invalid syntax
103 SyntaxError: unexpected EOF while parsing
104 104
105 105
106 106 In [9]: cos(pi)
General Comments 0
You need to be logged in to leave comments. Login now