##// END OF EJS Templates
Add test for missing input encoding. Back to 100% coverage.
Fernando Perez -
Show More
@@ -1,419 +1,421 b''
1 """Analysis of text input into executable blocks.
1 """Analysis of text input into executable blocks.
2
2
3 The main class in this module, :class:`InputSplitter`, is designed to break
3 The main class in this module, :class:`InputSplitter`, is designed to break
4 input from either interactive, line-by-line environments or block-based ones,
4 input from either interactive, line-by-line environments or block-based ones,
5 into standalone blocks that can be executed by Python as 'single' statements
5 into standalone blocks that can be executed by Python as 'single' statements
6 (thus triggering sys.displayhook).
6 (thus triggering sys.displayhook).
7
7
8 For more details, see the class docstring below.
8 For more details, see the class docstring below.
9 """
9 """
10 #-----------------------------------------------------------------------------
10 #-----------------------------------------------------------------------------
11 # Copyright (C) 2010 The IPython Development Team
11 # Copyright (C) 2010 The IPython Development Team
12 #
12 #
13 # Distributed under the terms of the BSD License. The full license is in
13 # Distributed under the terms of the BSD License. The full license is in
14 # the file COPYING, distributed as part of this software.
14 # the file COPYING, distributed as part of this software.
15 #-----------------------------------------------------------------------------
15 #-----------------------------------------------------------------------------
16
16
17 #-----------------------------------------------------------------------------
17 #-----------------------------------------------------------------------------
18 # Imports
18 # Imports
19 #-----------------------------------------------------------------------------
19 #-----------------------------------------------------------------------------
20 # stdlib
20 # stdlib
21 import codeop
21 import codeop
22 import re
22 import re
23 import sys
23 import sys
24
24
25 #-----------------------------------------------------------------------------
25 #-----------------------------------------------------------------------------
26 # Utilities
26 # Utilities
27 #-----------------------------------------------------------------------------
27 #-----------------------------------------------------------------------------
28
28
29 # FIXME: move these utilities to the general ward...
29 # FIXME: move these utilities to the general ward...
30
30
31 # compiled regexps for autoindent management
31 # compiled regexps for autoindent management
32 dedent_re = re.compile(r'^\s+raise|^\s+return|^\s+pass')
32 dedent_re = re.compile(r'^\s+raise|^\s+return|^\s+pass')
33 ini_spaces_re = re.compile(r'^([ \t\r\f\v]+)')
33 ini_spaces_re = re.compile(r'^([ \t\r\f\v]+)')
34
34
35
35
36 def num_ini_spaces(s):
36 def num_ini_spaces(s):
37 """Return the number of initial spaces in a string.
37 """Return the number of initial spaces in a string.
38
38
39 Note that tabs are counted as a single space. For now, we do *not* support
39 Note that tabs are counted as a single space. For now, we do *not* support
40 mixing of tabs and spaces in the user's input.
40 mixing of tabs and spaces in the user's input.
41
41
42 Parameters
42 Parameters
43 ----------
43 ----------
44 s : string
44 s : string
45
45
46 Returns
46 Returns
47 -------
47 -------
48 n : int
48 n : int
49 """
49 """
50
50
51 ini_spaces = ini_spaces_re.match(s)
51 ini_spaces = ini_spaces_re.match(s)
52 if ini_spaces:
52 if ini_spaces:
53 return ini_spaces.end()
53 return ini_spaces.end()
54 else:
54 else:
55 return 0
55 return 0
56
56
57
57
58 def remove_comments(src):
58 def remove_comments(src):
59 """Remove all comments from input source.
59 """Remove all comments from input source.
60
60
61 Note: comments are NOT recognized inside of strings!
61 Note: comments are NOT recognized inside of strings!
62
62
63 Parameters
63 Parameters
64 ----------
64 ----------
65 src : string
65 src : string
66 A single or multiline input string.
66 A single or multiline input string.
67
67
68 Returns
68 Returns
69 -------
69 -------
70 String with all Python comments removed.
70 String with all Python comments removed.
71 """
71 """
72
72
73 return re.sub('#.*', '', src)
73 return re.sub('#.*', '', src)
74
74
75
75
76 def get_input_encoding():
76 def get_input_encoding():
77 """Return the default standard input encoding."""
77 """Return the default standard input encoding.
78
79 If sys.stdin has no encoding, 'ascii' is returned."""
78 # There are strange environments for which sys.stdin.encoding is None. We
80 # There are strange environments for which sys.stdin.encoding is None. We
79 # ensure that a valid encoding is returned.
81 # ensure that a valid encoding is returned.
80 encoding = getattr(sys.stdin, 'encoding', None)
82 encoding = getattr(sys.stdin, 'encoding', None)
81 if encoding is None:
83 if encoding is None:
82 encoding = 'ascii'
84 encoding = 'ascii'
83 return encoding
85 return encoding
84
86
85 #-----------------------------------------------------------------------------
87 #-----------------------------------------------------------------------------
86 # Classes and functions
88 # Classes and functions
87 #-----------------------------------------------------------------------------
89 #-----------------------------------------------------------------------------
88
90
89 class InputSplitter(object):
91 class InputSplitter(object):
90 """An object that can split Python source input in executable blocks.
92 """An object that can split Python source input in executable blocks.
91
93
92 This object is designed to be used in one of two basic modes:
94 This object is designed to be used in one of two basic modes:
93
95
94 1. By feeding it python source line-by-line, using :meth:`push`. In this
96 1. By feeding it python source line-by-line, using :meth:`push`. In this
95 mode, it will return on each push whether the currently pushed code
97 mode, it will return on each push whether the currently pushed code
96 could be executed already. In addition, it provides a method called
98 could be executed already. In addition, it provides a method called
97 :meth:`push_accepts_more` that can be used to query whether more input
99 :meth:`push_accepts_more` that can be used to query whether more input
98 can be pushed into a single interactive block.
100 can be pushed into a single interactive block.
99
101
100 2. By calling :meth:`split_blocks` with a single, multiline Python string,
102 2. By calling :meth:`split_blocks` with a single, multiline Python string,
101 that is then split into blocks each of which can be executed
103 that is then split into blocks each of which can be executed
102 interactively as a single statement.
104 interactively as a single statement.
103
105
104 This is a simple example of how an interactive terminal-based client can use
106 This is a simple example of how an interactive terminal-based client can use
105 this tool::
107 this tool::
106
108
107 isp = InputSplitter()
109 isp = InputSplitter()
108 while isp.push_accepts_more():
110 while isp.push_accepts_more():
109 indent = ' '*isp.indent_spaces
111 indent = ' '*isp.indent_spaces
110 prompt = '>>> ' + indent
112 prompt = '>>> ' + indent
111 line = indent + raw_input(prompt)
113 line = indent + raw_input(prompt)
112 isp.push(line)
114 isp.push(line)
113 print 'Input source was:\n', isp.source_reset(),
115 print 'Input source was:\n', isp.source_reset(),
114 """
116 """
115 # Number of spaces of indentation computed from input that has been pushed
117 # Number of spaces of indentation computed from input that has been pushed
116 # so far. This is the attributes callers should query to get the current
118 # so far. This is the attributes callers should query to get the current
117 # indentation level, in order to provide auto-indent facilities.
119 # indentation level, in order to provide auto-indent facilities.
118 indent_spaces = 0
120 indent_spaces = 0
119 # String, indicating the default input encoding. It is computed by default
121 # String, indicating the default input encoding. It is computed by default
120 # at initialization time via get_input_encoding(), but it can be reset by a
122 # at initialization time via get_input_encoding(), but it can be reset by a
121 # client with specific knowledge of the encoding.
123 # client with specific knowledge of the encoding.
122 encoding = ''
124 encoding = ''
123 # String where the current full source input is stored, properly encoded.
125 # String where the current full source input is stored, properly encoded.
124 # Reading this attribute is the normal way of querying the currently pushed
126 # Reading this attribute is the normal way of querying the currently pushed
125 # source code, that has been properly encoded.
127 # source code, that has been properly encoded.
126 source = ''
128 source = ''
127 # Code object corresponding to the current source. It is automatically
129 # Code object corresponding to the current source. It is automatically
128 # synced to the source, so it can be queried at any time to obtain the code
130 # synced to the source, so it can be queried at any time to obtain the code
129 # object; it will be None if the source doesn't compile to valid Python.
131 # object; it will be None if the source doesn't compile to valid Python.
130 code = None
132 code = None
131 # Input mode
133 # Input mode
132 input_mode = 'append'
134 input_mode = 'append'
133
135
134 # Private attributes
136 # Private attributes
135
137
136 # List with lines of input accumulated so far
138 # List with lines of input accumulated so far
137 _buffer = None
139 _buffer = None
138 # Command compiler
140 # Command compiler
139 _compile = None
141 _compile = None
140 # Mark when input has changed indentation all the way back to flush-left
142 # Mark when input has changed indentation all the way back to flush-left
141 _full_dedent = False
143 _full_dedent = False
142 # Boolean indicating whether the current block is complete
144 # Boolean indicating whether the current block is complete
143 _is_complete = None
145 _is_complete = None
144
146
145 def __init__(self, input_mode=None):
147 def __init__(self, input_mode=None):
146 """Create a new InputSplitter instance.
148 """Create a new InputSplitter instance.
147
149
148 Parameters
150 Parameters
149 ----------
151 ----------
150 input_mode : str
152 input_mode : str
151
153
152 One of 'append', 'replace', default is 'append'. This controls how
154 One of 'append', 'replace', default is 'append'. This controls how
153 new inputs are used: in 'append' mode, they are appended to the
155 new inputs are used: in 'append' mode, they are appended to the
154 existing buffer and the whole buffer is compiled; in 'replace' mode,
156 existing buffer and the whole buffer is compiled; in 'replace' mode,
155 each new input completely replaces all prior inputs. Replace mode is
157 each new input completely replaces all prior inputs. Replace mode is
156 thus equivalent to prepending a full reset() to every push() call.
158 thus equivalent to prepending a full reset() to every push() call.
157
159
158 In practice, line-oriented clients likely want to use 'append' mode
160 In practice, line-oriented clients likely want to use 'append' mode
159 while block-oriented ones will want to use 'replace'.
161 while block-oriented ones will want to use 'replace'.
160 """
162 """
161 self._buffer = []
163 self._buffer = []
162 self._compile = codeop.CommandCompiler()
164 self._compile = codeop.CommandCompiler()
163 self.encoding = get_input_encoding()
165 self.encoding = get_input_encoding()
164 self.input_mode = InputSplitter.input_mode if input_mode is None \
166 self.input_mode = InputSplitter.input_mode if input_mode is None \
165 else input_mode
167 else input_mode
166
168
167 def reset(self):
169 def reset(self):
168 """Reset the input buffer and associated state."""
170 """Reset the input buffer and associated state."""
169 self.indent_spaces = 0
171 self.indent_spaces = 0
170 self._buffer[:] = []
172 self._buffer[:] = []
171 self.source = ''
173 self.source = ''
172 self.code = None
174 self.code = None
173 self._is_complete = False
175 self._is_complete = False
174 self._full_dedent = False
176 self._full_dedent = False
175
177
176 def source_reset(self):
178 def source_reset(self):
177 """Return the input source and perform a full reset.
179 """Return the input source and perform a full reset.
178 """
180 """
179 out = self.source
181 out = self.source
180 self.reset()
182 self.reset()
181 return out
183 return out
182
184
183 def push(self, lines):
185 def push(self, lines):
184 """Push one ore more lines of input.
186 """Push one ore more lines of input.
185
187
186 This stores the given lines and returns a status code indicating
188 This stores the given lines and returns a status code indicating
187 whether the code forms a complete Python block or not.
189 whether the code forms a complete Python block or not.
188
190
189 Any exceptions generated in compilation are swallowed, but if an
191 Any exceptions generated in compilation are swallowed, but if an
190 exception was produced, the method returns True.
192 exception was produced, the method returns True.
191
193
192 Parameters
194 Parameters
193 ----------
195 ----------
194 lines : string
196 lines : string
195 One or more lines of Python input.
197 One or more lines of Python input.
196
198
197 Returns
199 Returns
198 -------
200 -------
199 is_complete : boolean
201 is_complete : boolean
200 True if the current input source (the result of the current input
202 True if the current input source (the result of the current input
201 plus prior inputs) forms a complete Python execution block. Note that
203 plus prior inputs) forms a complete Python execution block. Note that
202 this value is also stored as a private attribute (_is_complete), so it
204 this value is also stored as a private attribute (_is_complete), so it
203 can be queried at any time.
205 can be queried at any time.
204 """
206 """
205 if self.input_mode == 'replace':
207 if self.input_mode == 'replace':
206 self.reset()
208 self.reset()
207
209
208 # If the source code has leading blanks, add 'if 1:\n' to it
210 # If the source code has leading blanks, add 'if 1:\n' to it
209 # this allows execution of indented pasted code. It is tempting
211 # this allows execution of indented pasted code. It is tempting
210 # to add '\n' at the end of source to run commands like ' a=1'
212 # to add '\n' at the end of source to run commands like ' a=1'
211 # directly, but this fails for more complicated scenarios
213 # directly, but this fails for more complicated scenarios
212 if not self._buffer and lines[:1] in [' ', '\t']:
214 if not self._buffer and lines[:1] in [' ', '\t']:
213 lines = 'if 1:\n%s' % lines
215 lines = 'if 1:\n%s' % lines
214
216
215 self._store(lines)
217 self._store(lines)
216 source = self.source
218 source = self.source
217
219
218 # Before calling _compile(), reset the code object to None so that if an
220 # Before calling _compile(), reset the code object to None so that if an
219 # exception is raised in compilation, we don't mislead by having
221 # exception is raised in compilation, we don't mislead by having
220 # inconsistent code/source attributes.
222 # inconsistent code/source attributes.
221 self.code, self._is_complete = None, None
223 self.code, self._is_complete = None, None
222
224
223 self._update_indent(lines)
225 self._update_indent(lines)
224 try:
226 try:
225 self.code = self._compile(source)
227 self.code = self._compile(source)
226 # Invalid syntax can produce any of a number of different errors from
228 # Invalid syntax can produce any of a number of different errors from
227 # inside the compiler, so we have to catch them all. Syntax errors
229 # inside the compiler, so we have to catch them all. Syntax errors
228 # immediately produce a 'ready' block, so the invalid Python can be
230 # immediately produce a 'ready' block, so the invalid Python can be
229 # sent to the kernel for evaluation with possible ipython
231 # sent to the kernel for evaluation with possible ipython
230 # special-syntax conversion.
232 # special-syntax conversion.
231 except (SyntaxError, OverflowError, ValueError, TypeError,
233 except (SyntaxError, OverflowError, ValueError, TypeError,
232 MemoryError):
234 MemoryError):
233 self._is_complete = True
235 self._is_complete = True
234 else:
236 else:
235 # Compilation didn't produce any exceptions (though it may not have
237 # Compilation didn't produce any exceptions (though it may not have
236 # given a complete code object)
238 # given a complete code object)
237 self._is_complete = self.code is not None
239 self._is_complete = self.code is not None
238
240
239 return self._is_complete
241 return self._is_complete
240
242
241 def push_accepts_more(self):
243 def push_accepts_more(self):
242 """Return whether a block of interactive input can accept more input.
244 """Return whether a block of interactive input can accept more input.
243
245
244 This method is meant to be used by line-oriented frontends, who need to
246 This method is meant to be used by line-oriented frontends, who need to
245 guess whether a block is complete or not based solely on prior and
247 guess whether a block is complete or not based solely on prior and
246 current input lines. The InputSplitter considers it has a complete
248 current input lines. The InputSplitter considers it has a complete
247 interactive block and will not accept more input only when either a
249 interactive block and will not accept more input only when either a
248 SyntaxError is raised, or *all* of the following are true:
250 SyntaxError is raised, or *all* of the following are true:
249
251
250 1. The input compiles to a complete statement.
252 1. The input compiles to a complete statement.
251
253
252 2. The indentation level is flush-left (because if we are indented,
254 2. The indentation level is flush-left (because if we are indented,
253 like inside a function definition or for loop, we need to keep
255 like inside a function definition or for loop, we need to keep
254 reading new input).
256 reading new input).
255
257
256 3. There is one extra line consisting only of whitespace.
258 3. There is one extra line consisting only of whitespace.
257
259
258 Because of condition #3, this method should be used only by
260 Because of condition #3, this method should be used only by
259 *line-oriented* frontends, since it means that intermediate blank lines
261 *line-oriented* frontends, since it means that intermediate blank lines
260 are not allowed in function definitions (or any other indented block).
262 are not allowed in function definitions (or any other indented block).
261
263
262 Block-oriented frontends that have a separate keyboard event to
264 Block-oriented frontends that have a separate keyboard event to
263 indicate execution should use the :meth:`split_blocks` method instead.
265 indicate execution should use the :meth:`split_blocks` method instead.
264
266
265 If the current input produces a syntax error, this method immediately
267 If the current input produces a syntax error, this method immediately
266 returns False but does *not* raise the syntax error exception, as
268 returns False but does *not* raise the syntax error exception, as
267 typically clients will want to send invalid syntax to an execution
269 typically clients will want to send invalid syntax to an execution
268 backend which might convert the invalid syntax into valid Python via
270 backend which might convert the invalid syntax into valid Python via
269 one of the dynamic IPython mechanisms.
271 one of the dynamic IPython mechanisms.
270 """
272 """
271
273
272 if not self._is_complete:
274 if not self._is_complete:
273 return True
275 return True
274
276
275 if self.indent_spaces==0:
277 if self.indent_spaces==0:
276 return False
278 return False
277
279
278 last_line = self.source.splitlines()[-1]
280 last_line = self.source.splitlines()[-1]
279 return bool(last_line and not last_line.isspace())
281 return bool(last_line and not last_line.isspace())
280
282
281 def split_blocks(self, lines):
283 def split_blocks(self, lines):
282 """Split a multiline string into multiple input blocks.
284 """Split a multiline string into multiple input blocks.
283
285
284 Note: this method starts by performing a full reset().
286 Note: this method starts by performing a full reset().
285
287
286 Parameters
288 Parameters
287 ----------
289 ----------
288 lines : str
290 lines : str
289 A possibly multiline string.
291 A possibly multiline string.
290
292
291 Returns
293 Returns
292 -------
294 -------
293 blocks : list
295 blocks : list
294 A list of strings, each possibly multiline. Each string corresponds
296 A list of strings, each possibly multiline. Each string corresponds
295 to a single block that can be compiled in 'single' mode (unless it
297 to a single block that can be compiled in 'single' mode (unless it
296 has a syntax error)."""
298 has a syntax error)."""
297
299
298 # This code is fairly delicate. If you make any changes here, make
300 # This code is fairly delicate. If you make any changes here, make
299 # absolutely sure that you do run the full test suite and ALL tests
301 # absolutely sure that you do run the full test suite and ALL tests
300 # pass.
302 # pass.
301
303
302 self.reset()
304 self.reset()
303 blocks = []
305 blocks = []
304
306
305 # Reversed copy so we can use pop() efficiently and consume the input
307 # Reversed copy so we can use pop() efficiently and consume the input
306 # as a stack
308 # as a stack
307 lines = lines.splitlines()[::-1]
309 lines = lines.splitlines()[::-1]
308 # Outer loop over all input
310 # Outer loop over all input
309 while lines:
311 while lines:
310 # Inner loop to build each block
312 # Inner loop to build each block
311 while True:
313 while True:
312 # Safety exit from inner loop
314 # Safety exit from inner loop
313 if not lines:
315 if not lines:
314 break
316 break
315 # Grab next line but don't push it yet
317 # Grab next line but don't push it yet
316 next_line = lines.pop()
318 next_line = lines.pop()
317 # Blank/empty lines are pushed as-is
319 # Blank/empty lines are pushed as-is
318 if not next_line or next_line.isspace():
320 if not next_line or next_line.isspace():
319 self.push(next_line)
321 self.push(next_line)
320 continue
322 continue
321
323
322 # Check indentation changes caused by the *next* line
324 # Check indentation changes caused by the *next* line
323 indent_spaces, _full_dedent = self._find_indent(next_line)
325 indent_spaces, _full_dedent = self._find_indent(next_line)
324
326
325 # If the next line causes a dedent, it can be for two differnt
327 # If the next line causes a dedent, it can be for two differnt
326 # reasons: either an explicit de-dent by the user or a
328 # reasons: either an explicit de-dent by the user or a
327 # return/raise/pass statement. These MUST be handled
329 # return/raise/pass statement. These MUST be handled
328 # separately:
330 # separately:
329 #
331 #
330 # 1. the first case is only detected when the actual explicit
332 # 1. the first case is only detected when the actual explicit
331 # dedent happens, and that would be the *first* line of a *new*
333 # dedent happens, and that would be the *first* line of a *new*
332 # block. Thus, we must put the line back into the input buffer
334 # block. Thus, we must put the line back into the input buffer
333 # so that it starts a new block on the next pass.
335 # so that it starts a new block on the next pass.
334 #
336 #
335 # 2. the second case is detected in the line before the actual
337 # 2. the second case is detected in the line before the actual
336 # dedent happens, so , we consume the line and we can break out
338 # dedent happens, so , we consume the line and we can break out
337 # to start a new block.
339 # to start a new block.
338
340
339 # Case 1, explicit dedent causes a break
341 # Case 1, explicit dedent causes a break
340 if _full_dedent and not next_line.startswith(' '):
342 if _full_dedent and not next_line.startswith(' '):
341 lines.append(next_line)
343 lines.append(next_line)
342 break
344 break
343
345
344 # Otherwise any line is pushed
346 # Otherwise any line is pushed
345 self.push(next_line)
347 self.push(next_line)
346
348
347 # Case 2, full dedent with full block ready:
349 # Case 2, full dedent with full block ready:
348 if _full_dedent or \
350 if _full_dedent or \
349 self.indent_spaces==0 and not self.push_accepts_more():
351 self.indent_spaces==0 and not self.push_accepts_more():
350 break
352 break
351 # Form the new block with the current source input
353 # Form the new block with the current source input
352 blocks.append(self.source_reset())
354 blocks.append(self.source_reset())
353
355
354 return blocks
356 return blocks
355
357
356 #------------------------------------------------------------------------
358 #------------------------------------------------------------------------
357 # Private interface
359 # Private interface
358 #------------------------------------------------------------------------
360 #------------------------------------------------------------------------
359
361
360 def _find_indent(self, line):
362 def _find_indent(self, line):
361 """Compute the new indentation level for a single line.
363 """Compute the new indentation level for a single line.
362
364
363 Parameters
365 Parameters
364 ----------
366 ----------
365 line : str
367 line : str
366 A single new line of non-whitespace, non-comment Python input.
368 A single new line of non-whitespace, non-comment Python input.
367
369
368 Returns
370 Returns
369 -------
371 -------
370 indent_spaces : int
372 indent_spaces : int
371 New value for the indent level (it may be equal to self.indent_spaces
373 New value for the indent level (it may be equal to self.indent_spaces
372 if indentation doesn't change.
374 if indentation doesn't change.
373
375
374 full_dedent : boolean
376 full_dedent : boolean
375 Whether the new line causes a full flush-left dedent.
377 Whether the new line causes a full flush-left dedent.
376 """
378 """
377 indent_spaces = self.indent_spaces
379 indent_spaces = self.indent_spaces
378 full_dedent = self._full_dedent
380 full_dedent = self._full_dedent
379
381
380 inisp = num_ini_spaces(line)
382 inisp = num_ini_spaces(line)
381 if inisp < indent_spaces:
383 if inisp < indent_spaces:
382 indent_spaces = inisp
384 indent_spaces = inisp
383 if indent_spaces <= 0:
385 if indent_spaces <= 0:
384 #print 'Full dedent in text',self.source # dbg
386 #print 'Full dedent in text',self.source # dbg
385 full_dedent = True
387 full_dedent = True
386
388
387 if line[-1] == ':':
389 if line[-1] == ':':
388 indent_spaces += 4
390 indent_spaces += 4
389 elif dedent_re.match(line):
391 elif dedent_re.match(line):
390 indent_spaces -= 4
392 indent_spaces -= 4
391 if indent_spaces <= 0:
393 if indent_spaces <= 0:
392 full_dedent = True
394 full_dedent = True
393
395
394 # Safety
396 # Safety
395 if indent_spaces < 0:
397 if indent_spaces < 0:
396 indent_spaces = 0
398 indent_spaces = 0
397 #print 'safety' # dbg
399 #print 'safety' # dbg
398
400
399 return indent_spaces, full_dedent
401 return indent_spaces, full_dedent
400
402
401 def _update_indent(self, lines):
403 def _update_indent(self, lines):
402 for line in remove_comments(lines).splitlines():
404 for line in remove_comments(lines).splitlines():
403 if line and not line.isspace():
405 if line and not line.isspace():
404 self.indent_spaces, self._full_dedent = self._find_indent(line)
406 self.indent_spaces, self._full_dedent = self._find_indent(line)
405
407
406 def _store(self, lines):
408 def _store(self, lines):
407 """Store one or more lines of input.
409 """Store one or more lines of input.
408
410
409 If input lines are not newline-terminated, a newline is automatically
411 If input lines are not newline-terminated, a newline is automatically
410 appended."""
412 appended."""
411
413
412 if lines.endswith('\n'):
414 if lines.endswith('\n'):
413 self._buffer.append(lines)
415 self._buffer.append(lines)
414 else:
416 else:
415 self._buffer.append(lines+'\n')
417 self._buffer.append(lines+'\n')
416 self._set_source()
418 self._set_source()
417
419
418 def _set_source(self):
420 def _set_source(self):
419 self.source = ''.join(self._buffer).encode(self.encoding)
421 self.source = ''.join(self._buffer).encode(self.encoding)
@@ -1,346 +1,364 b''
1 """Tests for the inputsplitter module.
1 """Tests for the inputsplitter module.
2 """
2 """
3 #-----------------------------------------------------------------------------
3 #-----------------------------------------------------------------------------
4 # Copyright (C) 2010 The IPython Development Team
4 # Copyright (C) 2010 The IPython Development Team
5 #
5 #
6 # Distributed under the terms of the BSD License. The full license is in
6 # Distributed under the terms of the BSD License. The full license is in
7 # the file COPYING, distributed as part of this software.
7 # the file COPYING, distributed as part of this software.
8 #-----------------------------------------------------------------------------
8 #-----------------------------------------------------------------------------
9
9
10 #-----------------------------------------------------------------------------
10 #-----------------------------------------------------------------------------
11 # Imports
11 # Imports
12 #-----------------------------------------------------------------------------
12 #-----------------------------------------------------------------------------
13 # stdlib
13 # stdlib
14 import unittest
14 import unittest
15 import sys
15
16
16 # Third party
17 # Third party
17 import nose.tools as nt
18 import nose.tools as nt
18
19
19 # Our own
20 # Our own
20 from IPython.core import inputsplitter as isp
21 from IPython.core import inputsplitter as isp
21
22
22 #-----------------------------------------------------------------------------
23 #-----------------------------------------------------------------------------
23 # Semi-complete examples (also used as tests)
24 # Semi-complete examples (also used as tests)
24 #-----------------------------------------------------------------------------
25 #-----------------------------------------------------------------------------
25 def mini_interactive_loop(raw_input):
26 def mini_interactive_loop(raw_input):
26 """Minimal example of the logic of an interactive interpreter loop.
27 """Minimal example of the logic of an interactive interpreter loop.
27
28
28 This serves as an example, and it is used by the test system with a fake
29 This serves as an example, and it is used by the test system with a fake
29 raw_input that simulates interactive input."""
30 raw_input that simulates interactive input."""
30
31
31 from IPython.core.inputsplitter import InputSplitter
32 from IPython.core.inputsplitter import InputSplitter
32
33
33 isp = InputSplitter()
34 isp = InputSplitter()
34 # In practice, this input loop would be wrapped in an outside loop to read
35 # In practice, this input loop would be wrapped in an outside loop to read
35 # input indefinitely, until some exit/quit command was issued. Here we
36 # input indefinitely, until some exit/quit command was issued. Here we
36 # only illustrate the basic inner loop.
37 # only illustrate the basic inner loop.
37 while isp.push_accepts_more():
38 while isp.push_accepts_more():
38 indent = ' '*isp.indent_spaces
39 indent = ' '*isp.indent_spaces
39 prompt = '>>> ' + indent
40 prompt = '>>> ' + indent
40 line = indent + raw_input(prompt)
41 line = indent + raw_input(prompt)
41 isp.push(line)
42 isp.push(line)
42
43
43 # Here we just return input so we can use it in a test suite, but a real
44 # Here we just return input so we can use it in a test suite, but a real
44 # interpreter would instead send it for execution somewhere.
45 # interpreter would instead send it for execution somewhere.
45 src = isp.source_reset()
46 src = isp.source_reset()
46 print 'Input source was:\n', src
47 print 'Input source was:\n', src
47 return src
48 return src
48
49
49 #-----------------------------------------------------------------------------
50 #-----------------------------------------------------------------------------
50 # Test utilities, just for local use
51 # Test utilities, just for local use
51 #-----------------------------------------------------------------------------
52 #-----------------------------------------------------------------------------
52
53
53 def assemble(block):
54 def assemble(block):
54 """Assemble a block into multi-line sub-blocks."""
55 """Assemble a block into multi-line sub-blocks."""
55 return ['\n'.join(sub_block)+'\n' for sub_block in block]
56 return ['\n'.join(sub_block)+'\n' for sub_block in block]
56
57
57
58
58 def pseudo_input(lines):
59 def pseudo_input(lines):
59 """Return a function that acts like raw_input but feeds the input list."""
60 """Return a function that acts like raw_input but feeds the input list."""
60 ilines = iter(lines)
61 ilines = iter(lines)
61 def raw_in(prompt):
62 def raw_in(prompt):
62 try:
63 try:
63 return next(ilines)
64 return next(ilines)
64 except StopIteration:
65 except StopIteration:
65 return ''
66 return ''
66 return raw_in
67 return raw_in
67
68
68 #-----------------------------------------------------------------------------
69 #-----------------------------------------------------------------------------
69 # Tests
70 # Tests
70 #-----------------------------------------------------------------------------
71 #-----------------------------------------------------------------------------
71 def test_spaces():
72 def test_spaces():
72 tests = [('', 0),
73 tests = [('', 0),
73 (' ', 1),
74 (' ', 1),
74 ('\n', 0),
75 ('\n', 0),
75 (' \n', 1),
76 (' \n', 1),
76 ('x', 0),
77 ('x', 0),
77 (' x', 1),
78 (' x', 1),
78 (' x',2),
79 (' x',2),
79 (' x',4),
80 (' x',4),
80 # Note: tabs are counted as a single whitespace!
81 # Note: tabs are counted as a single whitespace!
81 ('\tx', 1),
82 ('\tx', 1),
82 ('\t x', 2),
83 ('\t x', 2),
83 ]
84 ]
84
85
85 for s, nsp in tests:
86 for s, nsp in tests:
86 nt.assert_equal(isp.num_ini_spaces(s), nsp)
87 nt.assert_equal(isp.num_ini_spaces(s), nsp)
87
88
88
89
89 def test_remove_comments():
90 def test_remove_comments():
90 tests = [('text', 'text'),
91 tests = [('text', 'text'),
91 ('text # comment', 'text '),
92 ('text # comment', 'text '),
92 ('text # comment\n', 'text \n'),
93 ('text # comment\n', 'text \n'),
93 ('text # comment \n', 'text \n'),
94 ('text # comment \n', 'text \n'),
94 ('line # c \nline\n','line \nline\n'),
95 ('line # c \nline\n','line \nline\n'),
95 ('line # c \nline#c2 \nline\nline #c\n\n',
96 ('line # c \nline#c2 \nline\nline #c\n\n',
96 'line \nline\nline\nline \n\n'),
97 'line \nline\nline\nline \n\n'),
97 ]
98 ]
98
99
99 for inp, out in tests:
100 for inp, out in tests:
100 nt.assert_equal(isp.remove_comments(inp), out)
101 nt.assert_equal(isp.remove_comments(inp), out)
101
102
102
103
103 def test_get_input_encoding():
104 def test_get_input_encoding():
104 encoding = isp.get_input_encoding()
105 encoding = isp.get_input_encoding()
105 nt.assert_true(isinstance(encoding, basestring))
106 nt.assert_true(isinstance(encoding, basestring))
106 # simple-minded check that at least encoding a simple string works with the
107 # simple-minded check that at least encoding a simple string works with the
107 # encoding we got.
108 # encoding we got.
108 nt.assert_equal('test'.encode(encoding), 'test')
109 nt.assert_equal('test'.encode(encoding), 'test')
109
110
110
111
112 class NoInputEncodingTestCase(unittest.TestCase):
113 def setUp(self):
114 self.old_stdin = sys.stdin
115 class X: pass
116 fake_stdin = X()
117 sys.stdin = fake_stdin
118
119 def test(self):
120 # Verify that if sys.stdin has no 'encoding' attribute we do the right
121 # thing
122 enc = isp.get_input_encoding()
123 self.assertEqual(enc, 'ascii')
124
125 def tearDown(self):
126 sys.stdin = self.old_stdin
127
128
111 class InputSplitterTestCase(unittest.TestCase):
129 class InputSplitterTestCase(unittest.TestCase):
112 def setUp(self):
130 def setUp(self):
113 self.isp = isp.InputSplitter()
131 self.isp = isp.InputSplitter()
114
132
115 def test_reset(self):
133 def test_reset(self):
116 isp = self.isp
134 isp = self.isp
117 isp.push('x=1')
135 isp.push('x=1')
118 isp.reset()
136 isp.reset()
119 self.assertEqual(isp._buffer, [])
137 self.assertEqual(isp._buffer, [])
120 self.assertEqual(isp.indent_spaces, 0)
138 self.assertEqual(isp.indent_spaces, 0)
121 self.assertEqual(isp.source, '')
139 self.assertEqual(isp.source, '')
122 self.assertEqual(isp.code, None)
140 self.assertEqual(isp.code, None)
123 self.assertEqual(isp._is_complete, False)
141 self.assertEqual(isp._is_complete, False)
124
142
125 def test_source(self):
143 def test_source(self):
126 self.isp._store('1')
144 self.isp._store('1')
127 self.isp._store('2')
145 self.isp._store('2')
128 self.assertEqual(self.isp.source, '1\n2\n')
146 self.assertEqual(self.isp.source, '1\n2\n')
129 self.assertTrue(len(self.isp._buffer)>0)
147 self.assertTrue(len(self.isp._buffer)>0)
130 self.assertEqual(self.isp.source_reset(), '1\n2\n')
148 self.assertEqual(self.isp.source_reset(), '1\n2\n')
131 self.assertEqual(self.isp._buffer, [])
149 self.assertEqual(self.isp._buffer, [])
132 self.assertEqual(self.isp.source, '')
150 self.assertEqual(self.isp.source, '')
133
151
134 def test_indent(self):
152 def test_indent(self):
135 isp = self.isp # shorthand
153 isp = self.isp # shorthand
136 isp.push('x=1')
154 isp.push('x=1')
137 self.assertEqual(isp.indent_spaces, 0)
155 self.assertEqual(isp.indent_spaces, 0)
138 isp.push('if 1:\n x=1')
156 isp.push('if 1:\n x=1')
139 self.assertEqual(isp.indent_spaces, 4)
157 self.assertEqual(isp.indent_spaces, 4)
140 isp.push('y=2\n')
158 isp.push('y=2\n')
141 self.assertEqual(isp.indent_spaces, 0)
159 self.assertEqual(isp.indent_spaces, 0)
142 isp.push('if 1:')
160 isp.push('if 1:')
143 self.assertEqual(isp.indent_spaces, 4)
161 self.assertEqual(isp.indent_spaces, 4)
144 isp.push(' x=1')
162 isp.push(' x=1')
145 self.assertEqual(isp.indent_spaces, 4)
163 self.assertEqual(isp.indent_spaces, 4)
146 # Blank lines shouldn't change the indent level
164 # Blank lines shouldn't change the indent level
147 isp.push(' '*2)
165 isp.push(' '*2)
148 self.assertEqual(isp.indent_spaces, 4)
166 self.assertEqual(isp.indent_spaces, 4)
149
167
150 def test_indent2(self):
168 def test_indent2(self):
151 isp = self.isp
169 isp = self.isp
152 # When a multiline statement contains parens or multiline strings, we
170 # When a multiline statement contains parens or multiline strings, we
153 # shouldn't get confused.
171 # shouldn't get confused.
154 isp.push("if 1:")
172 isp.push("if 1:")
155 isp.push(" x = (1+\n 2)")
173 isp.push(" x = (1+\n 2)")
156 self.assertEqual(isp.indent_spaces, 4)
174 self.assertEqual(isp.indent_spaces, 4)
157
175
158 def test_dedent(self):
176 def test_dedent(self):
159 isp = self.isp # shorthand
177 isp = self.isp # shorthand
160 isp.push('if 1:')
178 isp.push('if 1:')
161 self.assertEqual(isp.indent_spaces, 4)
179 self.assertEqual(isp.indent_spaces, 4)
162 isp.push(' pass')
180 isp.push(' pass')
163 self.assertEqual(isp.indent_spaces, 0)
181 self.assertEqual(isp.indent_spaces, 0)
164
182
165 def test_push(self):
183 def test_push(self):
166 isp = self.isp
184 isp = self.isp
167 self.assertTrue(isp.push('x=1'))
185 self.assertTrue(isp.push('x=1'))
168
186
169 def test_push2(self):
187 def test_push2(self):
170 isp = self.isp
188 isp = self.isp
171 self.assertFalse(isp.push('if 1:'))
189 self.assertFalse(isp.push('if 1:'))
172 for line in [' x=1', '# a comment', ' y=2']:
190 for line in [' x=1', '# a comment', ' y=2']:
173 self.assertTrue(isp.push(line))
191 self.assertTrue(isp.push(line))
174
192
175 def test_push3(self):
193 def test_push3(self):
176 """Test input with leading whitespace"""
194 """Test input with leading whitespace"""
177 isp = self.isp
195 isp = self.isp
178 isp.push(' x=1')
196 isp.push(' x=1')
179 isp.push(' y=2')
197 isp.push(' y=2')
180 self.assertEqual(isp.source, 'if 1:\n x=1\n y=2\n')
198 self.assertEqual(isp.source, 'if 1:\n x=1\n y=2\n')
181
199
182 def test_replace_mode(self):
200 def test_replace_mode(self):
183 isp = self.isp
201 isp = self.isp
184 isp.input_mode = 'replace'
202 isp.input_mode = 'replace'
185 isp.push('x=1')
203 isp.push('x=1')
186 self.assertEqual(isp.source, 'x=1\n')
204 self.assertEqual(isp.source, 'x=1\n')
187 isp.push('x=2')
205 isp.push('x=2')
188 self.assertEqual(isp.source, 'x=2\n')
206 self.assertEqual(isp.source, 'x=2\n')
189
207
190 def test_push_accepts_more(self):
208 def test_push_accepts_more(self):
191 isp = self.isp
209 isp = self.isp
192 isp.push('x=1')
210 isp.push('x=1')
193 self.assertFalse(isp.push_accepts_more())
211 self.assertFalse(isp.push_accepts_more())
194
212
195 def test_push_accepts_more2(self):
213 def test_push_accepts_more2(self):
196 isp = self.isp
214 isp = self.isp
197 isp.push('if 1:')
215 isp.push('if 1:')
198 self.assertTrue(isp.push_accepts_more())
216 self.assertTrue(isp.push_accepts_more())
199 isp.push(' x=1')
217 isp.push(' x=1')
200 self.assertTrue(isp.push_accepts_more())
218 self.assertTrue(isp.push_accepts_more())
201 isp.push('')
219 isp.push('')
202 self.assertFalse(isp.push_accepts_more())
220 self.assertFalse(isp.push_accepts_more())
203
221
204 def test_push_accepts_more3(self):
222 def test_push_accepts_more3(self):
205 isp = self.isp
223 isp = self.isp
206 isp.push("x = (2+\n3)")
224 isp.push("x = (2+\n3)")
207 self.assertFalse(isp.push_accepts_more())
225 self.assertFalse(isp.push_accepts_more())
208
226
209 def test_push_accepts_more4(self):
227 def test_push_accepts_more4(self):
210 isp = self.isp
228 isp = self.isp
211 # When a multiline statement contains parens or multiline strings, we
229 # When a multiline statement contains parens or multiline strings, we
212 # shouldn't get confused.
230 # shouldn't get confused.
213 # FIXME: we should be able to better handle de-dents in statements like
231 # FIXME: we should be able to better handle de-dents in statements like
214 # multiline strings and multiline expressions (continued with \ or
232 # multiline strings and multiline expressions (continued with \ or
215 # parens). Right now we aren't handling the indentation tracking quite
233 # parens). Right now we aren't handling the indentation tracking quite
216 # correctly with this, though in practice it may not be too much of a
234 # correctly with this, though in practice it may not be too much of a
217 # problem. We'll need to see.
235 # problem. We'll need to see.
218 isp.push("if 1:")
236 isp.push("if 1:")
219 isp.push(" x = (2+")
237 isp.push(" x = (2+")
220 isp.push(" 3)")
238 isp.push(" 3)")
221 self.assertTrue(isp.push_accepts_more())
239 self.assertTrue(isp.push_accepts_more())
222 isp.push(" y = 3")
240 isp.push(" y = 3")
223 self.assertTrue(isp.push_accepts_more())
241 self.assertTrue(isp.push_accepts_more())
224 isp.push('')
242 isp.push('')
225 self.assertFalse(isp.push_accepts_more())
243 self.assertFalse(isp.push_accepts_more())
226
244
227 def test_syntax_error(self):
245 def test_syntax_error(self):
228 isp = self.isp
246 isp = self.isp
229 # Syntax errors immediately produce a 'ready' block, so the invalid
247 # Syntax errors immediately produce a 'ready' block, so the invalid
230 # Python can be sent to the kernel for evaluation with possible ipython
248 # Python can be sent to the kernel for evaluation with possible ipython
231 # special-syntax conversion.
249 # special-syntax conversion.
232 isp.push('run foo')
250 isp.push('run foo')
233 self.assertFalse(isp.push_accepts_more())
251 self.assertFalse(isp.push_accepts_more())
234
252
235 def check_split(self, block_lines, compile=True):
253 def check_split(self, block_lines, compile=True):
236 blocks = assemble(block_lines)
254 blocks = assemble(block_lines)
237 lines = ''.join(blocks)
255 lines = ''.join(blocks)
238 oblock = self.isp.split_blocks(lines)
256 oblock = self.isp.split_blocks(lines)
239 self.assertEqual(oblock, blocks)
257 self.assertEqual(oblock, blocks)
240 if compile:
258 if compile:
241 for block in blocks:
259 for block in blocks:
242 self.isp._compile(block)
260 self.isp._compile(block)
243
261
244 def test_split(self):
262 def test_split(self):
245 # All blocks of input we want to test in a list. The format for each
263 # All blocks of input we want to test in a list. The format for each
246 # block is a list of lists, with each inner lists consisting of all the
264 # block is a list of lists, with each inner lists consisting of all the
247 # lines (as single-lines) that should make up a sub-block.
265 # lines (as single-lines) that should make up a sub-block.
248
266
249 # Note: do NOT put here sub-blocks that don't compile, as the
267 # Note: do NOT put here sub-blocks that don't compile, as the
250 # check_split() routine makes a final verification pass to check that
268 # check_split() routine makes a final verification pass to check that
251 # each sub_block, as returned by split_blocks(), does compile
269 # each sub_block, as returned by split_blocks(), does compile
252 # correctly.
270 # correctly.
253 all_blocks = [ [['x=1']],
271 all_blocks = [ [['x=1']],
254
272
255 [['x=1'],
273 [['x=1'],
256 ['y=2']],
274 ['y=2']],
257
275
258 [['x=1'],
276 [['x=1'],
259 ['# a comment'],
277 ['# a comment'],
260 ['y=11']],
278 ['y=11']],
261
279
262 [['if 1:',
280 [['if 1:',
263 ' x=1'],
281 ' x=1'],
264 ['y=3']],
282 ['y=3']],
265
283
266 [['def f(x):',
284 [['def f(x):',
267 ' return x'],
285 ' return x'],
268 ['x=1']],
286 ['x=1']],
269
287
270 [['def f(x):',
288 [['def f(x):',
271 ' x+=1',
289 ' x+=1',
272 ' ',
290 ' ',
273 ' return x'],
291 ' return x'],
274 ['x=1']],
292 ['x=1']],
275
293
276 [['def f(x):',
294 [['def f(x):',
277 ' if x>0:',
295 ' if x>0:',
278 ' y=1',
296 ' y=1',
279 ' # a comment',
297 ' # a comment',
280 ' else:',
298 ' else:',
281 ' y=4',
299 ' y=4',
282 ' ',
300 ' ',
283 ' return y'],
301 ' return y'],
284 ['x=1'],
302 ['x=1'],
285 ['if 1:',
303 ['if 1:',
286 ' y=11'] ],
304 ' y=11'] ],
287
305
288 [['for i in range(10):'
306 [['for i in range(10):'
289 ' x=i**2']],
307 ' x=i**2']],
290
308
291 [['for i in range(10):'
309 [['for i in range(10):'
292 ' x=i**2'],
310 ' x=i**2'],
293 ['z = 1']],
311 ['z = 1']],
294 ]
312 ]
295 for block_lines in all_blocks:
313 for block_lines in all_blocks:
296 self.check_split(block_lines)
314 self.check_split(block_lines)
297
315
298 def test_split_syntax_errors(self):
316 def test_split_syntax_errors(self):
299 # Block splitting with invalid syntax
317 # Block splitting with invalid syntax
300 all_blocks = [ [['a syntax error']],
318 all_blocks = [ [['a syntax error']],
301
319
302 [['x=1'],
320 [['x=1'],
303 ['a syntax error']],
321 ['a syntax error']],
304
322
305 [['for i in range(10):'
323 [['for i in range(10):'
306 ' an error']],
324 ' an error']],
307
325
308 ]
326 ]
309 for block_lines in all_blocks:
327 for block_lines in all_blocks:
310 self.check_split(block_lines, compile=False)
328 self.check_split(block_lines, compile=False)
311
329
312
330
313 class InteractiveLoopTestCase(unittest.TestCase):
331 class InteractiveLoopTestCase(unittest.TestCase):
314 """Tests for an interactive loop like a python shell.
332 """Tests for an interactive loop like a python shell.
315 """
333 """
316 def check_ns(self, lines, ns):
334 def check_ns(self, lines, ns):
317 """Validate that the given input lines produce the resulting namespace.
335 """Validate that the given input lines produce the resulting namespace.
318
336
319 Note: the input lines are given exactly as they would be typed in an
337 Note: the input lines are given exactly as they would be typed in an
320 auto-indenting environment, as mini_interactive_loop above already does
338 auto-indenting environment, as mini_interactive_loop above already does
321 auto-indenting and prepends spaces to the input.
339 auto-indenting and prepends spaces to the input.
322 """
340 """
323 src = mini_interactive_loop(pseudo_input(lines))
341 src = mini_interactive_loop(pseudo_input(lines))
324 test_ns = {}
342 test_ns = {}
325 exec src in test_ns
343 exec src in test_ns
326 # We can't check that the provided ns is identical to the test_ns,
344 # We can't check that the provided ns is identical to the test_ns,
327 # because Python fills test_ns with extra keys (copyright, etc). But
345 # because Python fills test_ns with extra keys (copyright, etc). But
328 # we can check that the given dict is *contained* in test_ns
346 # we can check that the given dict is *contained* in test_ns
329 for k,v in ns.items():
347 for k,v in ns.items():
330 self.assertEqual(test_ns[k], v)
348 self.assertEqual(test_ns[k], v)
331
349
332 def test_simple(self):
350 def test_simple(self):
333 self.check_ns(['x=1'], dict(x=1))
351 self.check_ns(['x=1'], dict(x=1))
334
352
335 def test_simple2(self):
353 def test_simple2(self):
336 self.check_ns(['if 1:', 'x=2'], dict(x=2))
354 self.check_ns(['if 1:', 'x=2'], dict(x=2))
337
355
338 def test_xy(self):
356 def test_xy(self):
339 self.check_ns(['x=1; y=2'], dict(x=1, y=2))
357 self.check_ns(['x=1; y=2'], dict(x=1, y=2))
340
358
341 def test_abc(self):
359 def test_abc(self):
342 self.check_ns(['if 1:','a=1','b=2','c=3'], dict(a=1, b=2, c=3))
360 self.check_ns(['if 1:','a=1','b=2','c=3'], dict(a=1, b=2, c=3))
343
361
344 def test_multi(self):
362 def test_multi(self):
345 self.check_ns(['x =(1+','1+','2)'], dict(x=4))
363 self.check_ns(['x =(1+','1+','2)'], dict(x=4))
346
364
General Comments 0
You need to be logged in to leave comments. Login now