##// END OF EJS Templates
Made blockbreakers' input encoding detection more robust to strange...
epatters -
Show More
@@ -1,414 +1,419 b''
1 1 """Analysis of text input into executable blocks.
2 2
3 3 The main class in this module, :class:`InputSplitter`, is designed to break
4 4 input from either interactive, line-by-line environments or block-based ones,
5 5 into standalone blocks that can be executed by Python as 'single' statements
6 6 (thus triggering sys.displayhook).
7 7
8 8 For more details, see the class docstring below.
9 9 """
10 10 #-----------------------------------------------------------------------------
11 11 # Copyright (C) 2010 The IPython Development Team
12 12 #
13 13 # Distributed under the terms of the BSD License. The full license is in
14 14 # the file COPYING, distributed as part of this software.
15 15 #-----------------------------------------------------------------------------
16 16
17 17 #-----------------------------------------------------------------------------
18 18 # Imports
19 19 #-----------------------------------------------------------------------------
20 20 # stdlib
21 21 import codeop
22 22 import re
23 23 import sys
24 24
25 25 #-----------------------------------------------------------------------------
26 26 # Utilities
27 27 #-----------------------------------------------------------------------------
28 28
29 29 # FIXME: move these utilities to the general ward...
30 30
31 31 # compiled regexps for autoindent management
32 32 dedent_re = re.compile(r'^\s+raise|^\s+return|^\s+pass')
33 33 ini_spaces_re = re.compile(r'^([ \t\r\f\v]+)')
34 34
35 35
36 36 def num_ini_spaces(s):
37 37 """Return the number of initial spaces in a string.
38 38
39 39 Note that tabs are counted as a single space. For now, we do *not* support
40 40 mixing of tabs and spaces in the user's input.
41 41
42 42 Parameters
43 43 ----------
44 44 s : string
45 45
46 46 Returns
47 47 -------
48 48 n : int
49 49 """
50 50
51 51 ini_spaces = ini_spaces_re.match(s)
52 52 if ini_spaces:
53 53 return ini_spaces.end()
54 54 else:
55 55 return 0
56 56
57 57
58 58 def remove_comments(src):
59 59 """Remove all comments from input source.
60 60
61 61 Note: comments are NOT recognized inside of strings!
62 62
63 63 Parameters
64 64 ----------
65 65 src : string
66 66 A single or multiline input string.
67 67
68 68 Returns
69 69 -------
70 70 String with all Python comments removed.
71 71 """
72 72
73 73 return re.sub('#.*', '', src)
74 74
75 75
76 76 def get_input_encoding():
77 77 """Return the default standard input encoding."""
78 return getattr(sys.stdin, 'encoding', 'ascii')
78 # There are strange environments for which sys.stdin.encoding is None. We
79 # ensure that a valid encoding is returned.
80 encoding = getattr(sys.stdin, 'encoding', None)
81 if encoding is None:
82 encoding = 'ascii'
83 return encoding
79 84
80 85 #-----------------------------------------------------------------------------
81 86 # Classes and functions
82 87 #-----------------------------------------------------------------------------
83 88
84 89 class InputSplitter(object):
85 90 """An object that can split Python source input in executable blocks.
86 91
87 92 This object is designed to be used in one of two basic modes:
88 93
89 94 1. By feeding it python source line-by-line, using :meth:`push`. In this
90 95 mode, it will return on each push whether the currently pushed code
91 96 could be executed already. In addition, it provides a method called
92 97 :meth:`push_accepts_more` that can be used to query whether more input
93 98 can be pushed into a single interactive block.
94 99
95 100 2. By calling :meth:`split_blocks` with a single, multiline Python string,
96 101 that is then split into blocks each of which can be executed
97 102 interactively as a single statement.
98 103
99 104 This is a simple example of how an interactive terminal-based client can use
100 105 this tool::
101 106
102 107 isp = InputSplitter()
103 108 while isp.push_accepts_more():
104 109 indent = ' '*isp.indent_spaces
105 110 prompt = '>>> ' + indent
106 111 line = indent + raw_input(prompt)
107 112 isp.push(line)
108 113 print 'Input source was:\n', isp.source_reset(),
109 114 """
110 115 # Number of spaces of indentation computed from input that has been pushed
111 116 # so far. This is the attributes callers should query to get the current
112 117 # indentation level, in order to provide auto-indent facilities.
113 118 indent_spaces = 0
114 119 # String, indicating the default input encoding. It is computed by default
115 120 # at initialization time via get_input_encoding(), but it can be reset by a
116 121 # client with specific knowledge of the encoding.
117 122 encoding = ''
118 123 # String where the current full source input is stored, properly encoded.
119 124 # Reading this attribute is the normal way of querying the currently pushed
120 125 # source code, that has been properly encoded.
121 126 source = ''
122 127 # Code object corresponding to the current source. It is automatically
123 128 # synced to the source, so it can be queried at any time to obtain the code
124 129 # object; it will be None if the source doesn't compile to valid Python.
125 130 code = None
126 131 # Input mode
127 132 input_mode = 'append'
128 133
129 134 # Private attributes
130 135
131 136 # List with lines of input accumulated so far
132 137 _buffer = None
133 138 # Command compiler
134 139 _compile = None
135 140 # Mark when input has changed indentation all the way back to flush-left
136 141 _full_dedent = False
137 142 # Boolean indicating whether the current block is complete
138 143 _is_complete = None
139 144
140 145 def __init__(self, input_mode=None):
141 146 """Create a new InputSplitter instance.
142 147
143 148 Parameters
144 149 ----------
145 150 input_mode : str
146 151
147 152 One of 'append', 'replace', default is 'append'. This controls how
148 153 new inputs are used: in 'append' mode, they are appended to the
149 154 existing buffer and the whole buffer is compiled; in 'replace' mode,
150 155 each new input completely replaces all prior inputs. Replace mode is
151 156 thus equivalent to prepending a full reset() to every push() call.
152 157
153 158 In practice, line-oriented clients likely want to use 'append' mode
154 159 while block-oriented ones will want to use 'replace'.
155 160 """
156 161 self._buffer = []
157 162 self._compile = codeop.CommandCompiler()
158 163 self.encoding = get_input_encoding()
159 164 self.input_mode = InputSplitter.input_mode if input_mode is None \
160 165 else input_mode
161 166
162 167 def reset(self):
163 168 """Reset the input buffer and associated state."""
164 169 self.indent_spaces = 0
165 170 self._buffer[:] = []
166 171 self.source = ''
167 172 self.code = None
168 173 self._is_complete = False
169 174 self._full_dedent = False
170 175
171 176 def source_reset(self):
172 177 """Return the input source and perform a full reset.
173 178 """
174 179 out = self.source
175 180 self.reset()
176 181 return out
177 182
178 183 def push(self, lines):
179 184 """Push one ore more lines of input.
180 185
181 186 This stores the given lines and returns a status code indicating
182 187 whether the code forms a complete Python block or not.
183 188
184 189 Any exceptions generated in compilation are swallowed, but if an
185 190 exception was produced, the method returns True.
186 191
187 192 Parameters
188 193 ----------
189 194 lines : string
190 195 One or more lines of Python input.
191 196
192 197 Returns
193 198 -------
194 199 is_complete : boolean
195 200 True if the current input source (the result of the current input
196 201 plus prior inputs) forms a complete Python execution block. Note that
197 202 this value is also stored as a private attribute (_is_complete), so it
198 203 can be queried at any time.
199 204 """
200 205 if self.input_mode == 'replace':
201 206 self.reset()
202 207
203 208 # If the source code has leading blanks, add 'if 1:\n' to it
204 209 # this allows execution of indented pasted code. It is tempting
205 210 # to add '\n' at the end of source to run commands like ' a=1'
206 211 # directly, but this fails for more complicated scenarios
207 212 if not self._buffer and lines[:1] in [' ', '\t']:
208 213 lines = 'if 1:\n%s' % lines
209 214
210 215 self._store(lines)
211 216 source = self.source
212 217
213 218 # Before calling _compile(), reset the code object to None so that if an
214 219 # exception is raised in compilation, we don't mislead by having
215 220 # inconsistent code/source attributes.
216 221 self.code, self._is_complete = None, None
217 222
218 223 self._update_indent(lines)
219 224 try:
220 225 self.code = self._compile(source)
221 226 # Invalid syntax can produce any of a number of different errors from
222 227 # inside the compiler, so we have to catch them all. Syntax errors
223 228 # immediately produce a 'ready' block, so the invalid Python can be
224 229 # sent to the kernel for evaluation with possible ipython
225 230 # special-syntax conversion.
226 231 except (SyntaxError, OverflowError, ValueError, TypeError,
227 232 MemoryError):
228 233 self._is_complete = True
229 234 else:
230 235 # Compilation didn't produce any exceptions (though it may not have
231 236 # given a complete code object)
232 237 self._is_complete = self.code is not None
233 238
234 239 return self._is_complete
235 240
236 241 def push_accepts_more(self):
237 242 """Return whether a block of interactive input can accept more input.
238 243
239 244 This method is meant to be used by line-oriented frontends, who need to
240 245 guess whether a block is complete or not based solely on prior and
241 246 current input lines. The InputSplitter considers it has a complete
242 247 interactive block and will not accept more input only when either a
243 248 SyntaxError is raised, or *all* of the following are true:
244 249
245 250 1. The input compiles to a complete statement.
246 251
247 252 2. The indentation level is flush-left (because if we are indented,
248 253 like inside a function definition or for loop, we need to keep
249 254 reading new input).
250 255
251 256 3. There is one extra line consisting only of whitespace.
252 257
253 258 Because of condition #3, this method should be used only by
254 259 *line-oriented* frontends, since it means that intermediate blank lines
255 260 are not allowed in function definitions (or any other indented block).
256 261
257 262 Block-oriented frontends that have a separate keyboard event to
258 263 indicate execution should use the :meth:`split_blocks` method instead.
259 264
260 265 If the current input produces a syntax error, this method immediately
261 266 returns False but does *not* raise the syntax error exception, as
262 267 typically clients will want to send invalid syntax to an execution
263 268 backend which might convert the invalid syntax into valid Python via
264 269 one of the dynamic IPython mechanisms.
265 270 """
266 271
267 272 if not self._is_complete:
268 273 return True
269 274
270 275 if self.indent_spaces==0:
271 276 return False
272 277
273 278 last_line = self.source.splitlines()[-1]
274 279 return bool(last_line and not last_line.isspace())
275 280
276 281 def split_blocks(self, lines):
277 282 """Split a multiline string into multiple input blocks.
278 283
279 284 Note: this method starts by performing a full reset().
280 285
281 286 Parameters
282 287 ----------
283 288 lines : str
284 289 A possibly multiline string.
285 290
286 291 Returns
287 292 -------
288 293 blocks : list
289 294 A list of strings, each possibly multiline. Each string corresponds
290 295 to a single block that can be compiled in 'single' mode (unless it
291 296 has a syntax error)."""
292 297
293 298 # This code is fairly delicate. If you make any changes here, make
294 299 # absolutely sure that you do run the full test suite and ALL tests
295 300 # pass.
296 301
297 302 self.reset()
298 303 blocks = []
299 304
300 305 # Reversed copy so we can use pop() efficiently and consume the input
301 306 # as a stack
302 307 lines = lines.splitlines()[::-1]
303 308 # Outer loop over all input
304 309 while lines:
305 310 # Inner loop to build each block
306 311 while True:
307 312 # Safety exit from inner loop
308 313 if not lines:
309 314 break
310 315 # Grab next line but don't push it yet
311 316 next_line = lines.pop()
312 317 # Blank/empty lines are pushed as-is
313 318 if not next_line or next_line.isspace():
314 319 self.push(next_line)
315 320 continue
316 321
317 322 # Check indentation changes caused by the *next* line
318 323 indent_spaces, _full_dedent = self._find_indent(next_line)
319 324
320 325 # If the next line causes a dedent, it can be for two differnt
321 326 # reasons: either an explicit de-dent by the user or a
322 327 # return/raise/pass statement. These MUST be handled
323 328 # separately:
324 329 #
325 330 # 1. the first case is only detected when the actual explicit
326 331 # dedent happens, and that would be the *first* line of a *new*
327 332 # block. Thus, we must put the line back into the input buffer
328 333 # so that it starts a new block on the next pass.
329 334 #
330 335 # 2. the second case is detected in the line before the actual
331 336 # dedent happens, so , we consume the line and we can break out
332 337 # to start a new block.
333 338
334 339 # Case 1, explicit dedent causes a break
335 340 if _full_dedent and not next_line.startswith(' '):
336 341 lines.append(next_line)
337 342 break
338 343
339 344 # Otherwise any line is pushed
340 345 self.push(next_line)
341 346
342 347 # Case 2, full dedent with full block ready:
343 348 if _full_dedent or \
344 349 self.indent_spaces==0 and not self.push_accepts_more():
345 350 break
346 351 # Form the new block with the current source input
347 352 blocks.append(self.source_reset())
348 353
349 354 return blocks
350 355
351 356 #------------------------------------------------------------------------
352 357 # Private interface
353 358 #------------------------------------------------------------------------
354 359
355 360 def _find_indent(self, line):
356 361 """Compute the new indentation level for a single line.
357 362
358 363 Parameters
359 364 ----------
360 365 line : str
361 366 A single new line of non-whitespace, non-comment Python input.
362 367
363 368 Returns
364 369 -------
365 370 indent_spaces : int
366 371 New value for the indent level (it may be equal to self.indent_spaces
367 372 if indentation doesn't change.
368 373
369 374 full_dedent : boolean
370 375 Whether the new line causes a full flush-left dedent.
371 376 """
372 377 indent_spaces = self.indent_spaces
373 378 full_dedent = self._full_dedent
374 379
375 380 inisp = num_ini_spaces(line)
376 381 if inisp < indent_spaces:
377 382 indent_spaces = inisp
378 383 if indent_spaces <= 0:
379 384 #print 'Full dedent in text',self.source # dbg
380 385 full_dedent = True
381 386
382 387 if line[-1] == ':':
383 388 indent_spaces += 4
384 389 elif dedent_re.match(line):
385 390 indent_spaces -= 4
386 391 if indent_spaces <= 0:
387 392 full_dedent = True
388 393
389 394 # Safety
390 395 if indent_spaces < 0:
391 396 indent_spaces = 0
392 397 #print 'safety' # dbg
393 398
394 399 return indent_spaces, full_dedent
395 400
396 401 def _update_indent(self, lines):
397 402 for line in remove_comments(lines).splitlines():
398 403 if line and not line.isspace():
399 404 self.indent_spaces, self._full_dedent = self._find_indent(line)
400 405
401 406 def _store(self, lines):
402 407 """Store one or more lines of input.
403 408
404 409 If input lines are not newline-terminated, a newline is automatically
405 410 appended."""
406 411
407 412 if lines.endswith('\n'):
408 413 self._buffer.append(lines)
409 414 else:
410 415 self._buffer.append(lines+'\n')
411 416 self._set_source()
412 417
413 418 def _set_source(self):
414 419 self.source = ''.join(self._buffer).encode(self.encoding)
General Comments 0
You need to be logged in to leave comments. Login now