##// END OF EJS Templates
Completed full block splitting for block-based frontends.
Fernando Perez -
Show More
@@ -1,260 +1,375 b''
1 1 """Analysis of text input into executable blocks.
2 2
3 3 This is a simple example of how an interactive terminal-based client can use
4 4 this tool::
5 5
6 6 bb = BlockBreaker()
7 7 while not bb.interactive_block_ready():
8 8 bb.push(raw_input('>>> '))
9 9 print 'Input source was:\n', bb.source,
10 10 """
11 11 #-----------------------------------------------------------------------------
12 12 # Copyright (C) 2010 The IPython Development Team
13 13 #
14 14 # Distributed under the terms of the BSD License. The full license is in
15 15 # the file COPYING, distributed as part of this software.
16 16 #-----------------------------------------------------------------------------
17 17
18 18 #-----------------------------------------------------------------------------
19 19 # Imports
20 20 #-----------------------------------------------------------------------------
21 21 # stdlib
22 22 import codeop
23 23 import re
24 24 import sys
25 25
26 26 #-----------------------------------------------------------------------------
27 27 # Utilities
28 28 #-----------------------------------------------------------------------------
29 29
30 30 # FIXME: move these utilities to the general ward...
31 31
32 32 # compiled regexps for autoindent management
33 33 dedent_re = re.compile(r'^\s+raise|^\s+return|^\s+pass')
34 34 ini_spaces_re = re.compile(r'^([ \t\r\f\v]+)')
35 35
36 36
37 37 def num_ini_spaces(s):
38 38 """Return the number of initial spaces in a string.
39 39
40 40 Note that tabs are counted as a single space. For now, we do *not* support
41 41 mixing of tabs and spaces in the user's input.
42 42
43 43 Parameters
44 44 ----------
45 45 s : string
46 46 """
47 47
48 48 ini_spaces = ini_spaces_re.match(s)
49 49 if ini_spaces:
50 50 return ini_spaces.end()
51 51 else:
52 52 return 0
53 53
54 54
55 55 def remove_comments(src):
56 56 """Remove all comments from input source.
57 57
58 58 Note: comments are NOT recognized inside of strings!
59 59
60 60 Parameters
61 61 ----------
62 62 src : string
63 63 A single or multiline input string.
64 64
65 65 Returns
66 66 -------
67 67 String with all Python comments removed.
68 68 """
69 69
70 70 return re.sub('#.*', '', src)
71 71
72 72
73 73 def get_input_encoding():
74 74 """Return the default standard input encoding."""
75 75 return getattr(sys.stdin, 'encoding', 'ascii')
76 76
77 77 #-----------------------------------------------------------------------------
78 78 # Classes and functions
79 79 #-----------------------------------------------------------------------------
80 80
81 81 class BlockBreaker(object):
82 82 # Command compiler
83 83 compile = None
84 84 # Number of spaces of indentation
85 85 indent_spaces = 0
86 # Mark when input has changed indentation all the way back to flush-left
87 full_dedent = False
86 88 # String, indicating the default input encoding
87 89 encoding = ''
88 90 # String where the current full source input is stored, properly encoded
89 91 source = ''
90 92 # Code object corresponding to the current source
91 93 code = None
92 94 # Boolean indicating whether the current block is complete
93 95 is_complete = None
94 96 # Input mode
95 97 input_mode = 'append'
96 98
97 99 # Private attributes
98 100
99 101 # List
100 102 _buffer = None
101 103
102 104 def __init__(self, input_mode=None):
103 105 """Create a new BlockBreaker instance.
104 106
105 107 Parameters
106 108 ----------
107 109 input_mode : str
108 110
109 111 One of 'append', 'replace', default is 'append'. This controls how
110 112 new inputs are used: in 'append' mode, they are appended to the
111 113 existing buffer and the whole buffer is compiled; in 'replace' mode,
112 114 each new input completely replaces all prior inputs. Replace mode is
113 115 thus equivalent to prepending a full reset() to every push() call.
114 116
115 117 In practice, line-oriented clients likely want to use 'append' mode
116 118 while block-oriented ones will want to use 'replace'.
117 119 """
118 120 self._buffer = []
119 121 self.compile = codeop.CommandCompiler()
120 122 self.encoding = get_input_encoding()
121 123 self.input_mode = BlockBreaker.input_mode if input_mode is None \
122 124 else input_mode
123 125
124 126 def reset(self):
125 127 """Reset the input buffer and associated state."""
126 128 self.indent_spaces = 0
127 129 self._buffer[:] = []
128 130 self.source = ''
129 131 self.code = None
132 self.is_complete = False
133 self.full_dedent = False
130 134
131 135 def source_reset(self):
132 136 """Return the input source and perform a full reset.
133 137 """
134 138 out = self.source
135 139 self.reset()
136 140 return out
137 141
138 142 def push(self, lines):
139 143 """Push one ore more lines of input.
140 144
141 145 This stores the given lines and returns a status code indicating
142 146 whether the code forms a complete Python block or not.
143 147
144 148 Any exceptions generated in compilation are allowed to propagate.
145 149
146 150 Parameters
147 151 ----------
148 152 lines : string
149 153 One or more lines of Python input.
150 154
151 155 Returns
152 156 -------
153 157 is_complete : boolean
154 158 True if the current input source (the result of the current input
155 159 plus prior inputs) forms a complete Python execution block. Note that
156 160 this value is also stored as an attribute so it can be queried at any
157 161 time.
158 162 """
159 163 if self.input_mode == 'replace':
160 164 self.reset()
161 165
162 166 # If the source code has leading blanks, add 'if 1:\n' to it
163 167 # this allows execution of indented pasted code. It is tempting
164 168 # to add '\n' at the end of source to run commands like ' a=1'
165 169 # directly, but this fails for more complicated scenarios
166 170 if not self._buffer and lines[:1] in [' ', '\t']:
167 171 lines = 'if 1:\n%s' % lines
168 172
169 173 self._store(lines)
170 174 source = self.source
171 175
172 176 # Before calling compile(), reset the code object to None so that if an
173 177 # exception is raised in compilation, we don't mislead by having
174 178 # inconsistent code/source attributes.
175 179 self.code, self.is_complete = None, None
180
181 self._update_indent(lines)
176 182 try:
177 183 self.code = self.compile(source)
178 184 # Invalid syntax can produce any of a number of different errors from
179 185 # inside the compiler, so we have to catch them all. Syntax errors
180 186 # immediately produce a 'ready' block, so the invalid Python can be
181 187 # sent to the kernel for evaluation with possible ipython
182 188 # special-syntax conversion.
183 except (SyntaxError, OverflowError, ValueError, TypeError, MemoryError):
189 except (SyntaxError, OverflowError, ValueError, TypeError,
190 MemoryError):
184 191 self.is_complete = True
185 192 else:
186 193 # Compilation didn't produce any exceptions (though it may not have
187 194 # given a complete code object)
188 195 self.is_complete = self.code is not None
189 self._update_indent(lines)
190 196
191 197 return self.is_complete
192 198
193 199 def interactive_block_ready(self):
194 200 """Return whether a block of interactive input is ready for execution.
195 201
196 202 This method is meant to be used by line-oriented frontends, who need to
197 203 guess whether a block is complete or not based solely on prior and
198 204 current input lines. The BlockBreaker considers it has a complete
199 205 interactive block when *all* of the following are true:
200 206
201 207 1. The input compiles to a complete statement.
202 208
203 209 2. The indentation level is flush-left (because if we are indented,
204 210 like inside a function definition or for loop, we need to keep
205 211 reading new input).
206 212
207 213 3. There is one extra line consisting only of whitespace.
208 214
209 215 Because of condition #3, this method should be used only by
210 216 *line-oriented* frontends, since it means that intermediate blank lines
211 217 are not allowed in function definitions (or any other indented block).
212 218
213 219 Block-oriented frontends that have a separate keyboard event to
214 220 indicate execution should use the :meth:`split_blocks` method instead.
215 221 """
222 #print 'complete?', self.source # dbg
223 #if self.full_dedent:
224 # True
225
216 226 if not self.is_complete:
217 227 return False
218 228 if self.indent_spaces==0:
219 229 return True
220 230 last_line = self.source.splitlines()[-1]
221 231 if not last_line or last_line.isspace():
222 232 return True
223 233 else:
224 234 return False
225 235
226 236 def split_blocks(self, lines):
227 """Split a multiline string into multiple input blocks"""
228 raise NotImplementedError
237 """Split a multiline string into multiple input blocks.
238
239 Note: this method starts by performing a full reset().
240
241 Parameters
242 ----------
243 lines : str
244 A possibly multiline string.
245
246 Returns
247 -------
248 blocks : list
249 A list of strings, each possibly multiline. Each string corresponds
250 to a single block that can be compiled in 'single' mode (unless it
251 has a syntax error)."""
252
253 # This code is fairly delicate. If you make any changes here, make
254 # absolutely sure that you do run the full test suite and ALL tests
255 # pass.
256
257 self.reset()
258 blocks = []
259
260 # Reversed copy so we can use pop() efficiently and consume the input
261 # as a stack
262 lines = lines.splitlines()[::-1]
263 # Outer loop over all input
264 while lines:
265 # Inner loop to build each block
266 while True:
267 # Safety exit from inner loop
268 if not lines:
269 break
270 # Grab next line but don't push it yet
271 next_line = lines.pop()
272 # Blank/empty lines are pushed as-is
273 if not next_line or next_line.isspace():
274 self.push(next_line)
275 continue
276
277 # Check indentation changes caused by the *next* line
278 indent_spaces, full_dedent = self._find_indent(next_line)
279
280 # If the next line causes a dedent, it can be for two differnt
281 # reasons: either an explicit de-dent by the user or a
282 # return/raise/pass statement. These MUST be handled
283 # separately:
284 #
285 # 1. the first case is only detected when the actual explicit
286 # dedent happens, and that would be the *first* line of a *new*
287 # block. Thus, we must put the line back into the input buffer
288 # so that it starts a new block on the next pass.
289 #
290 # 2. the second case is detected in the line before the actual
291 # dedent happens, so , we consume the line and we can break out
292 # to start a new block.
293
294 # Case 1, explicit dedent causes a break
295 if full_dedent and not next_line.startswith(' '):
296 lines.append(next_line)
297 break
298
299 # Otherwise any line is pushed
300 self.push(next_line)
301
302 # Case 2, full dedent with full block ready:
303 if full_dedent or \
304 self.indent_spaces==0 and self.interactive_block_ready():
305 break
306 # Form the new block with the current source input
307 blocks.append(self.source_reset())
308
309 return blocks
229 310
230 311 #------------------------------------------------------------------------
231 312 # Private interface
232 313 #------------------------------------------------------------------------
233
234 def _update_indent(self, lines):
235 """Keep track of the indent level."""
236 314
237 for line in remove_comments(lines).splitlines():
315 def _find_indent(self, line):
316 """Compute the new indentation level for a single line.
317
318 Parameters
319 ----------
320 line : str
321 A single new line of non-whitespace, non-comment Python input.
322
323 Returns
324 -------
325 indent_spaces : int
326 New value for the indent level (it may be equal to self.indent_spaces
327 if indentation doesn't change.
328
329 full_dedent : boolean
330 Whether the new line causes a full flush-left dedent.
331 """
332 indent_spaces = self.indent_spaces
333 full_dedent = self.full_dedent
334
335 inisp = num_ini_spaces(line)
336 if inisp < indent_spaces:
337 indent_spaces = inisp
338 if indent_spaces <= 0:
339 #print 'Full dedent in text',self.source # dbg
340 full_dedent = True
341
342 if line[-1] == ':':
343 indent_spaces += 4
344 elif dedent_re.match(line):
345 indent_spaces -= 4
346 if indent_spaces <= 0:
347 full_dedent = True
348
349 # Safety
350 if indent_spaces < 0:
351 indent_spaces = 0
352 #print 'safety' # dbg
238 353
354 return indent_spaces, full_dedent
355
356 def _update_indent(self, lines):
357 for line in remove_comments(lines).splitlines():
239 358 if line and not line.isspace():
240 if self.code is not None:
241 inisp = num_ini_spaces(line)
242 if inisp < self.indent_spaces:
243 self.indent_spaces = inisp
244
245 if line[-1] == ':':
246 self.indent_spaces += 4
247 elif dedent_re.match(line):
248 self.indent_spaces -= 4
359 self.indent_spaces, self.full_dedent = self._find_indent(line)
249 360
250 361 def _store(self, lines):
251 362 """Store one or more lines of input.
252 363
253 364 If input lines are not newline-terminated, a newline is automatically
254 365 appended."""
255 366
256 367 if lines.endswith('\n'):
257 368 self._buffer.append(lines)
258 369 else:
259 370 self._buffer.append(lines+'\n')
371 self._set_source()
372
373 def _set_source(self):
260 374 self.source = ''.join(self._buffer).encode(self.encoding)
375
@@ -1,189 +1,276 b''
1 1 """Tests for the blockbreaker module.
2 2 """
3 3 #-----------------------------------------------------------------------------
4 4 # Copyright (C) 2010 The IPython Development Team
5 5 #
6 6 # Distributed under the terms of the BSD License. The full license is in
7 7 # the file COPYING, distributed as part of this software.
8 8 #-----------------------------------------------------------------------------
9 9
10 10 #-----------------------------------------------------------------------------
11 11 # Imports
12 12 #-----------------------------------------------------------------------------
13 13 # stdlib
14 14 import unittest
15 15
16 16 # Third party
17 17 import nose.tools as nt
18 18
19 19 # Our own
20 20 from IPython.core import blockbreaker as BB
21 21
22 22 #-----------------------------------------------------------------------------
23 # Test utilities, just for local use
24 #-----------------------------------------------------------------------------
25
26 def assemble(block):
27 """Assemble a block into multi-line sub-blocks."""
28 return ['\n'.join(sub_block)+'\n' for sub_block in block]
29
30 #-----------------------------------------------------------------------------
23 31 # Tests
24 32 #-----------------------------------------------------------------------------
25 33 def test_spaces():
26 34 tests = [('', 0),
27 35 (' ', 1),
28 36 ('\n', 0),
29 37 (' \n', 1),
30 38 ('x', 0),
31 39 (' x', 1),
32 40 (' x',2),
33 41 (' x',4),
34 42 # Note: tabs are counted as a single whitespace!
35 43 ('\tx', 1),
36 44 ('\t x', 2),
37 45 ]
38 46
39 47 for s, nsp in tests:
40 48 nt.assert_equal(BB.num_ini_spaces(s), nsp)
41 49
42 50
43 51 def test_remove_comments():
44 52 tests = [('text', 'text'),
45 53 ('text # comment', 'text '),
46 54 ('text # comment\n', 'text \n'),
47 55 ('text # comment \n', 'text \n'),
48 56 ('line # c \nline\n','line \nline\n'),
49 57 ('line # c \nline#c2 \nline\nline #c\n\n',
50 58 'line \nline\nline\nline \n\n'),
51 59 ]
52 60
53 61 for inp, out in tests:
54 62 nt.assert_equal(BB.remove_comments(inp), out)
55 63
56 64
57 65 def test_get_input_encoding():
58 66 encoding = BB.get_input_encoding()
59 67 nt.assert_true(isinstance(encoding, basestring))
60 68 # simple-minded check that at least encoding a simple string works with the
61 69 # encoding we got.
62 70 nt.assert_equal('test'.encode(encoding), 'test')
63 71
64 72
65 73 class BlockBreakerTestCase(unittest.TestCase):
66 74 def setUp(self):
67 75 self.bb = BB.BlockBreaker()
68 76
69 77 def test_reset(self):
70 78 bb = self.bb
71 79 bb.push('x=1')
72 80 bb.reset()
73 81 self.assertEqual(bb._buffer, [])
74 82 self.assertEqual(bb.indent_spaces, 0)
75 83 self.assertEqual(bb.source, '')
76 84 self.assertEqual(bb.code, None)
85 self.assertEqual(bb.is_complete, False)
77 86
78 87 def test_source(self):
79 88 self.bb._store('1')
80 89 self.bb._store('2')
81 90 self.assertEqual(self.bb.source, '1\n2\n')
82 91 self.assertTrue(len(self.bb._buffer)>0)
83 92 self.assertEqual(self.bb.source_reset(), '1\n2\n')
84 93 self.assertEqual(self.bb._buffer, [])
85 94 self.assertEqual(self.bb.source, '')
86 95
87 96 def test_indent(self):
88 97 bb = self.bb # shorthand
89 98 bb.push('x=1')
90 99 self.assertEqual(bb.indent_spaces, 0)
91 100 bb.push('if 1:\n x=1')
92 101 self.assertEqual(bb.indent_spaces, 4)
93 102 bb.push('y=2\n')
94 103 self.assertEqual(bb.indent_spaces, 0)
95 104 bb.push('if 1:')
96 105 self.assertEqual(bb.indent_spaces, 4)
97 106 bb.push(' x=1')
98 107 self.assertEqual(bb.indent_spaces, 4)
99 108 # Blank lines shouldn't change the indent level
100 109 bb.push(' '*2)
101 110 self.assertEqual(bb.indent_spaces, 4)
102 111
103 112 def test_indent2(self):
104 113 bb = self.bb
105 114 # When a multiline statement contains parens or multiline strings, we
106 115 # shouldn't get confused.
107 116 bb.push("if 1:")
108 117 bb.push(" x = (1+\n 2)")
109 118 self.assertEqual(bb.indent_spaces, 4)
110 119
111 120 def test_dedent(self):
112 121 bb = self.bb # shorthand
113 122 bb.push('if 1:')
114 123 self.assertEqual(bb.indent_spaces, 4)
115 124 bb.push(' pass')
116 125 self.assertEqual(bb.indent_spaces, 0)
117 126
118 127 def test_push(self):
119 128 bb = self.bb
120 129 bb.push('x=1')
121 130 self.assertTrue(bb.is_complete)
122 131
123 132 def test_push2(self):
124 133 bb = self.bb
125 134 bb.push('if 1:')
126 135 self.assertFalse(bb.is_complete)
127 136 for line in [' x=1', '# a comment', ' y=2']:
128 137 bb.push(line)
129 138 self.assertTrue(bb.is_complete)
130 139
131 140 def test_push3(self):
132 141 """Test input with leading whitespace"""
133 142 bb = self.bb
134 143 bb.push(' x=1')
135 144 bb.push(' y=2')
136 145 self.assertEqual(bb.source, 'if 1:\n x=1\n y=2\n')
137 146
138 147 def test_replace_mode(self):
139 148 bb = self.bb
140 149 bb.input_mode = 'replace'
141 150 bb.push('x=1')
142 151 self.assertEqual(bb.source, 'x=1\n')
143 152 bb.push('x=2')
144 153 self.assertEqual(bb.source, 'x=2\n')
145 154
146 155 def test_interactive_block_ready(self):
147 156 bb = self.bb
148 157 bb.push('x=1')
149 158 self.assertTrue(bb.interactive_block_ready())
150 159
151 160 def test_interactive_block_ready2(self):
152 161 bb = self.bb
153 162 bb.push('if 1:')
154 163 self.assertFalse(bb.interactive_block_ready())
155 164 bb.push(' x=1')
156 165 self.assertFalse(bb.interactive_block_ready())
157 166 bb.push('')
158 167 self.assertTrue(bb.interactive_block_ready())
159 168
160 169 def test_interactive_block_ready3(self):
161 170 bb = self.bb
162 171 bb.push("x = (2+\n3)")
163 172 self.assertTrue(bb.interactive_block_ready())
164 173
165 174 def test_interactive_block_ready4(self):
166 175 bb = self.bb
167 176 # When a multiline statement contains parens or multiline strings, we
168 177 # shouldn't get confused.
169 178 # FIXME: we should be able to better handle de-dents in statements like
170 179 # multiline strings and multiline expressions (continued with \ or
171 180 # parens). Right now we aren't handling the indentation tracking quite
172 181 # correctly with this, though in practice it may not be too much of a
173 182 # problem. We'll need to see.
174 183 bb.push("if 1:")
175 184 bb.push(" x = (2+")
176 185 bb.push(" 3)")
177 186 self.assertFalse(bb.interactive_block_ready())
178 187 bb.push(" y = 3")
179 188 self.assertFalse(bb.interactive_block_ready())
180 189 bb.push('')
181 190 self.assertTrue(bb.interactive_block_ready())
182 191
183 192 def test_syntax_error(self):
184 193 bb = self.bb
185 194 # Syntax errors immediately produce a 'ready' block, so the invalid
186 195 # Python can be sent to the kernel for evaluation with possible ipython
187 196 # special-syntax conversion.
188 197 bb.push('run foo')
189 198 self.assertTrue(bb.interactive_block_ready())
199
200 def check_split(self, block_lines, compile=True):
201 blocks = assemble(block_lines)
202 lines = ''.join(blocks)
203 oblock = self.bb.split_blocks(lines)
204 self.assertEqual(oblock, blocks)
205 if compile:
206 for block in blocks:
207 self.bb.compile(block)
208
209 def test_split(self):
210 # All blocks of input we want to test in a list. The format for each
211 # block is a list of lists, with each inner lists consisting of all the
212 # lines (as single-lines) that should make up a sub-block.
213
214 # Note: do NOT put here sub-blocks that don't compile, as the
215 # check_split() routine makes a final verification pass to check that
216 # each sub_block, as returned by split_blocks(), does compile
217 # correctly.
218 all_blocks = [ [['x=1']],
219
220 [['x=1'],
221 ['y=2']],
222
223 [['x=1'],
224 ['# a comment'],
225 ['y=11']],
226
227 [['if 1:',
228 ' x=1'],
229 ['y=3']],
230
231 [['def f(x):',
232 ' return x'],
233 ['x=1']],
234
235 [['def f(x):',
236 ' x+=1',
237 ' ',
238 ' return x'],
239 ['x=1']],
240
241 [['def f(x):',
242 ' if x>0:',
243 ' y=1',
244 ' # a comment',
245 ' else:',
246 ' y=4',
247 ' ',
248 ' return y'],
249 ['x=1'],
250 ['if 1:',
251 ' y=11'] ],
252
253 [['for i in range(10):'
254 ' x=i**2']],
255
256 [['for i in range(10):'
257 ' x=i**2'],
258 ['z = 1']],
259 ]
260 for block_lines in all_blocks:
261 self.check_split(block_lines)
262
263 def test_split_syntax_errors(self):
264 # Block splitting with invalid syntax
265 all_blocks = [ [['a syntax error']],
266
267 [['x=1'],
268 ['a syntax error']],
269
270 [['for i in range(10):'
271 ' an error']],
272
273 ]
274 for block_lines in all_blocks:
275 self.check_split(block_lines, compile=False)
276
General Comments 0
You need to be logged in to leave comments. Login now