From d61942ff9c707c7a7d5bebfca039f401bba51314 2010-07-28 04:55:36 From: Fernando Perez Date: 2010-07-28 04:55:36 Subject: [PATCH] Renamed to inputsplitter, added more tests and examples. All items from code review are now in. Still need to finish the special syntax and it will be ready for trunk. --- diff --git a/IPython/core/blockbreaker.py b/IPython/core/inputsplitter.py similarity index 66% rename from IPython/core/blockbreaker.py rename to IPython/core/inputsplitter.py index d7e4ac2..f775364 100644 --- a/IPython/core/blockbreaker.py +++ b/IPython/core/inputsplitter.py @@ -1,12 +1,11 @@ """Analysis of text input into executable blocks. -This is a simple example of how an interactive terminal-based client can use -this tool:: +The main class in this module, :class:`InputSplitter`, is designed to break +input from either interactive, line-by-line environments or block-based ones, +into standalone blocks that can be executed by Python as 'single' statements +(thus triggering sys.displayhook). - bb = BlockBreaker() - while not bb.interactive_block_ready(): - bb.push(raw_input('>>> ')) - print 'Input source was:\n', bb.source, +For more details, see the class docstring below. """ #----------------------------------------------------------------------------- # Copyright (C) 2010 The IPython Development Team @@ -43,6 +42,10 @@ def num_ini_spaces(s): Parameters ---------- s : string + + Returns + ------- + n : int """ ini_spaces = ini_spaces_re.match(s) @@ -78,31 +81,64 @@ def get_input_encoding(): # Classes and functions #----------------------------------------------------------------------------- -class BlockBreaker(object): - # Command compiler - compile = None - # Number of spaces of indentation +class InputSplitter(object): + """An object that can split Python source input in executable blocks. + + This object is designed to be used in one of two basic modes: + + 1. By feeding it python source line-by-line, using :meth:`push`. In this + mode, it will return on each push whether the currently pushed code + could be executed already. In addition, it provides a method called + :meth:`push_accepts_more` that can be used to query whether more input + can be pushed into a single interactive block. + + 2. By calling :meth:`split_blocks` with a single, multiline Python string, + that is then split into blocks each of which can be executed + interactively as a single statement. + + This is a simple example of how an interactive terminal-based client can use + this tool:: + + isp = InputSplitter() + while isp.push_accepts_more(): + indent = ' '*isp.indent_spaces + prompt = '>>> ' + indent + line = indent + raw_input(prompt) + isp.push(line) + print 'Input source was:\n', isp.source_reset(), + """ + # Number of spaces of indentation computed from input that has been pushed + # so far. This is the attributes callers should query to get the current + # indentation level, in order to provide auto-indent facilities. indent_spaces = 0 - # Mark when input has changed indentation all the way back to flush-left - full_dedent = False - # String, indicating the default input encoding + # String, indicating the default input encoding. It is computed by default + # at initialization time via get_input_encoding(), but it can be reset by a + # client with specific knowledge of the encoding. encoding = '' - # String where the current full source input is stored, properly encoded + # String where the current full source input is stored, properly encoded. + # Reading this attribute is the normal way of querying the currently pushed + # source code, that has been properly encoded. source = '' - # Code object corresponding to the current source + # Code object corresponding to the current source. It is automatically + # synced to the source, so it can be queried at any time to obtain the code + # object; it will be None if the source doesn't compile to valid Python. code = None - # Boolean indicating whether the current block is complete - is_complete = None # Input mode input_mode = 'append' # Private attributes - # List + # List with lines of input accumulated so far _buffer = None + # Command compiler + _compile = None + # Mark when input has changed indentation all the way back to flush-left + _full_dedent = False + # Boolean indicating whether the current block is complete + _is_complete = None def __init__(self, input_mode=None): - """Create a new BlockBreaker instance. + """Create a new InputSplitter instance. Parameters ---------- @@ -118,9 +154,9 @@ class BlockBreaker(object): while block-oriented ones will want to use 'replace'. """ self._buffer = [] - self.compile = codeop.CommandCompiler() + self._compile = codeop.CommandCompiler() self.encoding = get_input_encoding() - self.input_mode = BlockBreaker.input_mode if input_mode is None \ + self.input_mode = InputSplitter.input_mode if input_mode is None \ else input_mode def reset(self): @@ -129,8 +165,8 @@ class BlockBreaker(object): self._buffer[:] = [] self.source = '' self.code = None - self.is_complete = False - self.full_dedent = False + self._is_complete = False + self._full_dedent = False def source_reset(self): """Return the input source and perform a full reset. @@ -145,7 +181,8 @@ class BlockBreaker(object): This stores the given lines and returns a status code indicating whether the code forms a complete Python block or not. - Any exceptions generated in compilation are allowed to propagate. + Any exceptions generated in compilation are swallowed, but if an + exception was produced, the method returns True. Parameters ---------- @@ -157,8 +194,8 @@ class BlockBreaker(object): is_complete : boolean True if the current input source (the result of the current input plus prior inputs) forms a complete Python execution block. Note that - this value is also stored as an attribute so it can be queried at any - time. + this value is also stored as a private attribute (_is_complete), so it + can be queried at any time. """ if self.input_mode == 'replace': self.reset() @@ -173,14 +210,14 @@ class BlockBreaker(object): self._store(lines) source = self.source - # Before calling compile(), reset the code object to None so that if an + # Before calling _compile(), reset the code object to None so that if an # exception is raised in compilation, we don't mislead by having # inconsistent code/source attributes. - self.code, self.is_complete = None, None + self.code, self._is_complete = None, None self._update_indent(lines) try: - self.code = self.compile(source) + self.code = self._compile(source) # Invalid syntax can produce any of a number of different errors from # inside the compiler, so we have to catch them all. Syntax errors # immediately produce a 'ready' block, so the invalid Python can be @@ -188,21 +225,22 @@ class BlockBreaker(object): # special-syntax conversion. except (SyntaxError, OverflowError, ValueError, TypeError, MemoryError): - self.is_complete = True + self._is_complete = True else: # Compilation didn't produce any exceptions (though it may not have # given a complete code object) - self.is_complete = self.code is not None + self._is_complete = self.code is not None - return self.is_complete + return self._is_complete - def interactive_block_ready(self): - """Return whether a block of interactive input is ready for execution. + def push_accepts_more(self): + """Return whether a block of interactive input can accept more input. This method is meant to be used by line-oriented frontends, who need to guess whether a block is complete or not based solely on prior and - current input lines. The BlockBreaker considers it has a complete - interactive block when *all* of the following are true: + current input lines. The InputSplitter considers it has a complete + interactive block and will not accept more input only when either a + SyntaxError is raised, or *all* of the following are true: 1. The input compiles to a complete statement. @@ -218,21 +256,23 @@ class BlockBreaker(object): Block-oriented frontends that have a separate keyboard event to indicate execution should use the :meth:`split_blocks` method instead. + + If the current input produces a syntax error, this method immediately + returns False but does *not* raise the syntax error exception, as + typically clients will want to send invalid syntax to an execution + backend which might convert the invalid syntax into valid Python via + one of the dynamic IPython mechanisms. """ - #print 'complete?', self.source # dbg - #if self.full_dedent: - # True - if not self.is_complete: - return False - if self.indent_spaces==0: + if not self._is_complete: return True - last_line = self.source.splitlines()[-1] - if not last_line or last_line.isspace(): - return True - else: - return False + if self.indent_spaces==0: + return False + + last_line = self.source.splitlines()[-1] + return bool(last_line and not last_line.isspace()) + def split_blocks(self, lines): """Split a multiline string into multiple input blocks. @@ -275,7 +315,7 @@ class BlockBreaker(object): continue # Check indentation changes caused by the *next* line - indent_spaces, full_dedent = self._find_indent(next_line) + indent_spaces, _full_dedent = self._find_indent(next_line) # If the next line causes a dedent, it can be for two differnt # reasons: either an explicit de-dent by the user or a @@ -292,7 +332,7 @@ class BlockBreaker(object): # to start a new block. # Case 1, explicit dedent causes a break - if full_dedent and not next_line.startswith(' '): + if _full_dedent and not next_line.startswith(' '): lines.append(next_line) break @@ -300,8 +340,8 @@ class BlockBreaker(object): self.push(next_line) # Case 2, full dedent with full block ready: - if full_dedent or \ - self.indent_spaces==0 and self.interactive_block_ready(): + if _full_dedent or \ + self.indent_spaces==0 and not self.push_accepts_more(): break # Form the new block with the current source input blocks.append(self.source_reset()) @@ -330,7 +370,7 @@ class BlockBreaker(object): Whether the new line causes a full flush-left dedent. """ indent_spaces = self.indent_spaces - full_dedent = self.full_dedent + full_dedent = self._full_dedent inisp = num_ini_spaces(line) if inisp < indent_spaces: @@ -356,7 +396,7 @@ class BlockBreaker(object): def _update_indent(self, lines): for line in remove_comments(lines).splitlines(): if line and not line.isspace(): - self.indent_spaces, self.full_dedent = self._find_indent(line) + self.indent_spaces, self._full_dedent = self._find_indent(line) def _store(self, lines): """Store one or more lines of input. @@ -372,4 +412,3 @@ class BlockBreaker(object): def _set_source(self): self.source = ''.join(self._buffer).encode(self.encoding) - diff --git a/IPython/core/tests/test_blockbreaker.py b/IPython/core/tests/test_inputsplitter.py similarity index 50% rename from IPython/core/tests/test_blockbreaker.py rename to IPython/core/tests/test_inputsplitter.py index b316529..5910d1f 100644 --- a/IPython/core/tests/test_blockbreaker.py +++ b/IPython/core/tests/test_inputsplitter.py @@ -1,4 +1,4 @@ -"""Tests for the blockbreaker module. +"""Tests for the inputsplitter module. """ #----------------------------------------------------------------------------- # Copyright (C) 2010 The IPython Development Team @@ -17,7 +17,34 @@ import unittest import nose.tools as nt # Our own -from IPython.core import blockbreaker as BB +from IPython.core import inputsplitter as isp + +#----------------------------------------------------------------------------- +# Semi-complete examples (also used as tests) +#----------------------------------------------------------------------------- +def mini_interactive_loop(raw_input): + """Minimal example of the logic of an interactive interpreter loop. + + This serves as an example, and it is used by the test system with a fake + raw_input that simulates interactive input.""" + + from IPython.core.inputsplitter import InputSplitter + + isp = InputSplitter() + # In practice, this input loop would be wrapped in an outside loop to read + # input indefinitely, until some exit/quit command was issued. Here we + # only illustrate the basic inner loop. + while isp.push_accepts_more(): + indent = ' '*isp.indent_spaces + prompt = '>>> ' + indent + line = indent + raw_input(prompt) + isp.push(line) + + # Here we just return input so we can use it in a test suite, but a real + # interpreter would instead send it for execution somewhere. + src = isp.source_reset() + print 'Input source was:\n', src + return src #----------------------------------------------------------------------------- # Test utilities, just for local use @@ -27,6 +54,17 @@ def assemble(block): """Assemble a block into multi-line sub-blocks.""" return ['\n'.join(sub_block)+'\n' for sub_block in block] + +def pseudo_input(lines): + """Return a function that acts like raw_input but feeds the input list.""" + ilines = iter(lines) + def raw_in(prompt): + try: + return next(ilines) + except StopIteration: + return '' + return raw_in + #----------------------------------------------------------------------------- # Tests #----------------------------------------------------------------------------- @@ -45,7 +83,7 @@ def test_spaces(): ] for s, nsp in tests: - nt.assert_equal(BB.num_ini_spaces(s), nsp) + nt.assert_equal(isp.num_ini_spaces(s), nsp) def test_remove_comments(): @@ -59,120 +97,117 @@ def test_remove_comments(): ] for inp, out in tests: - nt.assert_equal(BB.remove_comments(inp), out) + nt.assert_equal(isp.remove_comments(inp), out) def test_get_input_encoding(): - encoding = BB.get_input_encoding() + encoding = isp.get_input_encoding() nt.assert_true(isinstance(encoding, basestring)) # simple-minded check that at least encoding a simple string works with the # encoding we got. nt.assert_equal('test'.encode(encoding), 'test') -class BlockBreakerTestCase(unittest.TestCase): +class InputSplitterTestCase(unittest.TestCase): def setUp(self): - self.bb = BB.BlockBreaker() + self.isp = isp.InputSplitter() def test_reset(self): - bb = self.bb - bb.push('x=1') - bb.reset() - self.assertEqual(bb._buffer, []) - self.assertEqual(bb.indent_spaces, 0) - self.assertEqual(bb.source, '') - self.assertEqual(bb.code, None) - self.assertEqual(bb.is_complete, False) + isp = self.isp + isp.push('x=1') + isp.reset() + self.assertEqual(isp._buffer, []) + self.assertEqual(isp.indent_spaces, 0) + self.assertEqual(isp.source, '') + self.assertEqual(isp.code, None) + self.assertEqual(isp._is_complete, False) def test_source(self): - self.bb._store('1') - self.bb._store('2') - self.assertEqual(self.bb.source, '1\n2\n') - self.assertTrue(len(self.bb._buffer)>0) - self.assertEqual(self.bb.source_reset(), '1\n2\n') - self.assertEqual(self.bb._buffer, []) - self.assertEqual(self.bb.source, '') + self.isp._store('1') + self.isp._store('2') + self.assertEqual(self.isp.source, '1\n2\n') + self.assertTrue(len(self.isp._buffer)>0) + self.assertEqual(self.isp.source_reset(), '1\n2\n') + self.assertEqual(self.isp._buffer, []) + self.assertEqual(self.isp.source, '') def test_indent(self): - bb = self.bb # shorthand - bb.push('x=1') - self.assertEqual(bb.indent_spaces, 0) - bb.push('if 1:\n x=1') - self.assertEqual(bb.indent_spaces, 4) - bb.push('y=2\n') - self.assertEqual(bb.indent_spaces, 0) - bb.push('if 1:') - self.assertEqual(bb.indent_spaces, 4) - bb.push(' x=1') - self.assertEqual(bb.indent_spaces, 4) + isp = self.isp # shorthand + isp.push('x=1') + self.assertEqual(isp.indent_spaces, 0) + isp.push('if 1:\n x=1') + self.assertEqual(isp.indent_spaces, 4) + isp.push('y=2\n') + self.assertEqual(isp.indent_spaces, 0) + isp.push('if 1:') + self.assertEqual(isp.indent_spaces, 4) + isp.push(' x=1') + self.assertEqual(isp.indent_spaces, 4) # Blank lines shouldn't change the indent level - bb.push(' '*2) - self.assertEqual(bb.indent_spaces, 4) + isp.push(' '*2) + self.assertEqual(isp.indent_spaces, 4) def test_indent2(self): - bb = self.bb + isp = self.isp # When a multiline statement contains parens or multiline strings, we # shouldn't get confused. - bb.push("if 1:") - bb.push(" x = (1+\n 2)") - self.assertEqual(bb.indent_spaces, 4) + isp.push("if 1:") + isp.push(" x = (1+\n 2)") + self.assertEqual(isp.indent_spaces, 4) def test_dedent(self): - bb = self.bb # shorthand - bb.push('if 1:') - self.assertEqual(bb.indent_spaces, 4) - bb.push(' pass') - self.assertEqual(bb.indent_spaces, 0) + isp = self.isp # shorthand + isp.push('if 1:') + self.assertEqual(isp.indent_spaces, 4) + isp.push(' pass') + self.assertEqual(isp.indent_spaces, 0) def test_push(self): - bb = self.bb - bb.push('x=1') - self.assertTrue(bb.is_complete) + isp = self.isp + self.assertTrue(isp.push('x=1')) def test_push2(self): - bb = self.bb - bb.push('if 1:') - self.assertFalse(bb.is_complete) + isp = self.isp + self.assertFalse(isp.push('if 1:')) for line in [' x=1', '# a comment', ' y=2']: - bb.push(line) - self.assertTrue(bb.is_complete) + self.assertTrue(isp.push(line)) def test_push3(self): """Test input with leading whitespace""" - bb = self.bb - bb.push(' x=1') - bb.push(' y=2') - self.assertEqual(bb.source, 'if 1:\n x=1\n y=2\n') + isp = self.isp + isp.push(' x=1') + isp.push(' y=2') + self.assertEqual(isp.source, 'if 1:\n x=1\n y=2\n') def test_replace_mode(self): - bb = self.bb - bb.input_mode = 'replace' - bb.push('x=1') - self.assertEqual(bb.source, 'x=1\n') - bb.push('x=2') - self.assertEqual(bb.source, 'x=2\n') - - def test_interactive_block_ready(self): - bb = self.bb - bb.push('x=1') - self.assertTrue(bb.interactive_block_ready()) - - def test_interactive_block_ready2(self): - bb = self.bb - bb.push('if 1:') - self.assertFalse(bb.interactive_block_ready()) - bb.push(' x=1') - self.assertFalse(bb.interactive_block_ready()) - bb.push('') - self.assertTrue(bb.interactive_block_ready()) + isp = self.isp + isp.input_mode = 'replace' + isp.push('x=1') + self.assertEqual(isp.source, 'x=1\n') + isp.push('x=2') + self.assertEqual(isp.source, 'x=2\n') + + def test_push_accepts_more(self): + isp = self.isp + isp.push('x=1') + self.assertFalse(isp.push_accepts_more()) + + def test_push_accepts_more2(self): + isp = self.isp + isp.push('if 1:') + self.assertTrue(isp.push_accepts_more()) + isp.push(' x=1') + self.assertTrue(isp.push_accepts_more()) + isp.push('') + self.assertFalse(isp.push_accepts_more()) - def test_interactive_block_ready3(self): - bb = self.bb - bb.push("x = (2+\n3)") - self.assertTrue(bb.interactive_block_ready()) + def test_push_accepts_more3(self): + isp = self.isp + isp.push("x = (2+\n3)") + self.assertFalse(isp.push_accepts_more()) - def test_interactive_block_ready4(self): - bb = self.bb + def test_push_accepts_more4(self): + isp = self.isp # When a multiline statement contains parens or multiline strings, we # shouldn't get confused. # FIXME: we should be able to better handle de-dents in statements like @@ -180,31 +215,31 @@ class BlockBreakerTestCase(unittest.TestCase): # parens). Right now we aren't handling the indentation tracking quite # correctly with this, though in practice it may not be too much of a # problem. We'll need to see. - bb.push("if 1:") - bb.push(" x = (2+") - bb.push(" 3)") - self.assertFalse(bb.interactive_block_ready()) - bb.push(" y = 3") - self.assertFalse(bb.interactive_block_ready()) - bb.push('') - self.assertTrue(bb.interactive_block_ready()) + isp.push("if 1:") + isp.push(" x = (2+") + isp.push(" 3)") + self.assertTrue(isp.push_accepts_more()) + isp.push(" y = 3") + self.assertTrue(isp.push_accepts_more()) + isp.push('') + self.assertFalse(isp.push_accepts_more()) def test_syntax_error(self): - bb = self.bb + isp = self.isp # Syntax errors immediately produce a 'ready' block, so the invalid # Python can be sent to the kernel for evaluation with possible ipython # special-syntax conversion. - bb.push('run foo') - self.assertTrue(bb.interactive_block_ready()) + isp.push('run foo') + self.assertFalse(isp.push_accepts_more()) def check_split(self, block_lines, compile=True): blocks = assemble(block_lines) lines = ''.join(blocks) - oblock = self.bb.split_blocks(lines) + oblock = self.isp.split_blocks(lines) self.assertEqual(oblock, blocks) if compile: for block in blocks: - self.bb.compile(block) + self.isp._compile(block) def test_split(self): # All blocks of input we want to test in a list. The format for each @@ -273,4 +308,39 @@ class BlockBreakerTestCase(unittest.TestCase): ] for block_lines in all_blocks: self.check_split(block_lines, compile=False) + + +class InteractiveLoopTestCase(unittest.TestCase): + """Tests for an interactive loop like a python shell. + """ + def check_ns(self, lines, ns): + """Validate that the given input lines produce the resulting namespace. + + Note: the input lines are given exactly as they would be typed in an + auto-indenting environment, as mini_interactive_loop above already does + auto-indenting and prepends spaces to the input. + """ + src = mini_interactive_loop(pseudo_input(lines)) + test_ns = {} + exec src in test_ns + # We can't check that the provided ns is identical to the test_ns, + # because Python fills test_ns with extra keys (copyright, etc). But + # we can check that the given dict is *contained* in test_ns + for k,v in ns.items(): + self.assertEqual(test_ns[k], v) + def test_simple(self): + self.check_ns(['x=1'], dict(x=1)) + + def test_simple2(self): + self.check_ns(['if 1:', 'x=2'], dict(x=2)) + + def test_xy(self): + self.check_ns(['x=1; y=2'], dict(x=1, y=2)) + + def test_abc(self): + self.check_ns(['if 1:','a=1','b=2','c=3'], dict(a=1, b=2, c=3)) + + def test_multi(self): + self.check_ns(['x =(1+','1+','2)'], dict(x=4)) +