##// END OF EJS Templates
Completed first pass of inputsplitter with IPython syntax....
Fernando Perez -
Show More
@@ -1,521 +1,798 b''
1 1 """Analysis of text input into executable blocks.
2 2
3 3 The main class in this module, :class:`InputSplitter`, is designed to break
4 4 input from either interactive, line-by-line environments or block-based ones,
5 5 into standalone blocks that can be executed by Python as 'single' statements
6 6 (thus triggering sys.displayhook).
7 7
8 8 For more details, see the class docstring below.
9
10 Authors
11
12 * Fernando Perez
13 * Brian Granger
9 14 """
10 15 #-----------------------------------------------------------------------------
11 16 # Copyright (C) 2010 The IPython Development Team
12 17 #
13 18 # Distributed under the terms of the BSD License. The full license is in
14 19 # the file COPYING, distributed as part of this software.
15 20 #-----------------------------------------------------------------------------
16 21
17 22 #-----------------------------------------------------------------------------
18 23 # Imports
19 24 #-----------------------------------------------------------------------------
20 25 # stdlib
21 26 import codeop
22 27 import re
23 28 import sys
24 29
25 30 # IPython modules
26 31 from IPython.utils.text import make_quoted_expr
27 32
28 33 #-----------------------------------------------------------------------------
34 # Globals
35 #-----------------------------------------------------------------------------
36
37 # The escape sequences that define the syntax transformations IPython will
38 # apply to user input. These can NOT be just changed here: many regular
39 # expressions and other parts of the code may use their hardcoded values, and
40 # for all intents and purposes they constitute the 'IPython syntax', so they
41 # should be considered fixed.
42
43 ESC_SHELL = '!'
44 ESC_SH_CAP = '!!'
45 ESC_HELP = '?'
46 ESC_HELP2 = '??'
47 ESC_MAGIC = '%'
48 ESC_QUOTE = ','
49 ESC_QUOTE2 = ';'
50 ESC_PAREN = '/'
51
52 #-----------------------------------------------------------------------------
29 53 # Utilities
30 54 #-----------------------------------------------------------------------------
31 55
32 # FIXME: move these utilities to the general ward...
56 # FIXME: These are general-purpose utilities that later can be moved to the
57 # general ward. Kept here for now because we're being very strict about test
58 # coverage with this code, and this lets us ensure that we keep 100% coverage
59 # while developing.
33 60
34 61 # compiled regexps for autoindent management
35 62 dedent_re = re.compile(r'^\s+raise|^\s+return|^\s+pass')
36 63 ini_spaces_re = re.compile(r'^([ \t\r\f\v]+)')
37 64
38 65
39 66 def num_ini_spaces(s):
40 67 """Return the number of initial spaces in a string.
41 68
42 69 Note that tabs are counted as a single space. For now, we do *not* support
43 70 mixing of tabs and spaces in the user's input.
44 71
45 72 Parameters
46 73 ----------
47 74 s : string
48 75
49 76 Returns
50 77 -------
51 78 n : int
52 79 """
53 80
54 81 ini_spaces = ini_spaces_re.match(s)
55 82 if ini_spaces:
56 83 return ini_spaces.end()
57 84 else:
58 85 return 0
59 86
60 87
61 88 def remove_comments(src):
62 89 """Remove all comments from input source.
63 90
64 91 Note: comments are NOT recognized inside of strings!
65 92
66 93 Parameters
67 94 ----------
68 95 src : string
69 96 A single or multiline input string.
70 97
71 98 Returns
72 99 -------
73 100 String with all Python comments removed.
74 101 """
75 102
76 103 return re.sub('#.*', '', src)
77 104
78 105
79 106 def get_input_encoding():
80 107 """Return the default standard input encoding.
81 108
82 109 If sys.stdin has no encoding, 'ascii' is returned."""
83 110 # There are strange environments for which sys.stdin.encoding is None. We
84 111 # ensure that a valid encoding is returned.
85 112 encoding = getattr(sys.stdin, 'encoding', None)
86 113 if encoding is None:
87 114 encoding = 'ascii'
88 115 return encoding
89 116
90 117 #-----------------------------------------------------------------------------
91 # Classes and functions
118 # Classes and functions for normal Python syntax handling
92 119 #-----------------------------------------------------------------------------
93 120
94 121 class InputSplitter(object):
95 122 """An object that can split Python source input in executable blocks.
96 123
97 124 This object is designed to be used in one of two basic modes:
98 125
99 126 1. By feeding it python source line-by-line, using :meth:`push`. In this
100 127 mode, it will return on each push whether the currently pushed code
101 128 could be executed already. In addition, it provides a method called
102 129 :meth:`push_accepts_more` that can be used to query whether more input
103 130 can be pushed into a single interactive block.
104 131
105 132 2. By calling :meth:`split_blocks` with a single, multiline Python string,
106 133 that is then split into blocks each of which can be executed
107 134 interactively as a single statement.
108 135
109 136 This is a simple example of how an interactive terminal-based client can use
110 137 this tool::
111 138
112 139 isp = InputSplitter()
113 140 while isp.push_accepts_more():
114 141 indent = ' '*isp.indent_spaces
115 142 prompt = '>>> ' + indent
116 143 line = indent + raw_input(prompt)
117 144 isp.push(line)
118 145 print 'Input source was:\n', isp.source_reset(),
119 146 """
120 147 # Number of spaces of indentation computed from input that has been pushed
121 148 # so far. This is the attributes callers should query to get the current
122 149 # indentation level, in order to provide auto-indent facilities.
123 150 indent_spaces = 0
124 151 # String, indicating the default input encoding. It is computed by default
125 152 # at initialization time via get_input_encoding(), but it can be reset by a
126 153 # client with specific knowledge of the encoding.
127 154 encoding = ''
128 155 # String where the current full source input is stored, properly encoded.
129 156 # Reading this attribute is the normal way of querying the currently pushed
130 157 # source code, that has been properly encoded.
131 158 source = ''
132 159 # Code object corresponding to the current source. It is automatically
133 160 # synced to the source, so it can be queried at any time to obtain the code
134 161 # object; it will be None if the source doesn't compile to valid Python.
135 162 code = None
136 163 # Input mode
137 164 input_mode = 'append'
138 165
139 166 # Private attributes
140 167
141 168 # List with lines of input accumulated so far
142 169 _buffer = None
143 170 # Command compiler
144 171 _compile = None
145 172 # Mark when input has changed indentation all the way back to flush-left
146 173 _full_dedent = False
147 174 # Boolean indicating whether the current block is complete
148 175 _is_complete = None
149 176
150 177 def __init__(self, input_mode=None):
151 178 """Create a new InputSplitter instance.
152 179
153 180 Parameters
154 181 ----------
155 182 input_mode : str
156 183
157 184 One of 'append', 'replace', default is 'append'. This controls how
158 185 new inputs are used: in 'append' mode, they are appended to the
159 186 existing buffer and the whole buffer is compiled; in 'replace' mode,
160 187 each new input completely replaces all prior inputs. Replace mode is
161 188 thus equivalent to prepending a full reset() to every push() call.
162 189
163 190 In practice, line-oriented clients likely want to use 'append' mode
164 191 while block-oriented ones will want to use 'replace'.
165 192 """
166 193 self._buffer = []
167 194 self._compile = codeop.CommandCompiler()
168 195 self.encoding = get_input_encoding()
169 196 self.input_mode = InputSplitter.input_mode if input_mode is None \
170 197 else input_mode
171 198
172 199 def reset(self):
173 200 """Reset the input buffer and associated state."""
174 201 self.indent_spaces = 0
175 202 self._buffer[:] = []
176 203 self.source = ''
177 204 self.code = None
178 205 self._is_complete = False
179 206 self._full_dedent = False
180 207
181 208 def source_reset(self):
182 209 """Return the input source and perform a full reset.
183 210 """
184 211 out = self.source
185 212 self.reset()
186 213 return out
187 214
188 215 def push(self, lines):
189 216 """Push one ore more lines of input.
190 217
191 218 This stores the given lines and returns a status code indicating
192 219 whether the code forms a complete Python block or not.
193 220
194 221 Any exceptions generated in compilation are swallowed, but if an
195 222 exception was produced, the method returns True.
196 223
197 224 Parameters
198 225 ----------
199 226 lines : string
200 227 One or more lines of Python input.
201 228
202 229 Returns
203 230 -------
204 231 is_complete : boolean
205 232 True if the current input source (the result of the current input
206 233 plus prior inputs) forms a complete Python execution block. Note that
207 234 this value is also stored as a private attribute (_is_complete), so it
208 235 can be queried at any time.
209 236 """
210 237 if self.input_mode == 'replace':
211 238 self.reset()
212 239
213 240 # If the source code has leading blanks, add 'if 1:\n' to it
214 241 # this allows execution of indented pasted code. It is tempting
215 242 # to add '\n' at the end of source to run commands like ' a=1'
216 243 # directly, but this fails for more complicated scenarios
217 244 if not self._buffer and lines[:1] in [' ', '\t']:
218 245 lines = 'if 1:\n%s' % lines
219 246
220 247 self._store(lines)
221 248 source = self.source
222 249
223 250 # Before calling _compile(), reset the code object to None so that if an
224 251 # exception is raised in compilation, we don't mislead by having
225 252 # inconsistent code/source attributes.
226 253 self.code, self._is_complete = None, None
227 254
228 255 self._update_indent(lines)
229 256 try:
230 257 self.code = self._compile(source)
231 258 # Invalid syntax can produce any of a number of different errors from
232 259 # inside the compiler, so we have to catch them all. Syntax errors
233 260 # immediately produce a 'ready' block, so the invalid Python can be
234 261 # sent to the kernel for evaluation with possible ipython
235 262 # special-syntax conversion.
236 263 except (SyntaxError, OverflowError, ValueError, TypeError,
237 264 MemoryError):
238 265 self._is_complete = True
239 266 else:
240 267 # Compilation didn't produce any exceptions (though it may not have
241 268 # given a complete code object)
242 269 self._is_complete = self.code is not None
243 270
244 271 return self._is_complete
245 272
246 273 def push_accepts_more(self):
247 274 """Return whether a block of interactive input can accept more input.
248 275
249 276 This method is meant to be used by line-oriented frontends, who need to
250 277 guess whether a block is complete or not based solely on prior and
251 278 current input lines. The InputSplitter considers it has a complete
252 279 interactive block and will not accept more input only when either a
253 280 SyntaxError is raised, or *all* of the following are true:
254 281
255 282 1. The input compiles to a complete statement.
256 283
257 284 2. The indentation level is flush-left (because if we are indented,
258 285 like inside a function definition or for loop, we need to keep
259 286 reading new input).
260 287
261 288 3. There is one extra line consisting only of whitespace.
262 289
263 290 Because of condition #3, this method should be used only by
264 291 *line-oriented* frontends, since it means that intermediate blank lines
265 292 are not allowed in function definitions (or any other indented block).
266 293
267 294 Block-oriented frontends that have a separate keyboard event to
268 295 indicate execution should use the :meth:`split_blocks` method instead.
269 296
270 297 If the current input produces a syntax error, this method immediately
271 298 returns False but does *not* raise the syntax error exception, as
272 299 typically clients will want to send invalid syntax to an execution
273 300 backend which might convert the invalid syntax into valid Python via
274 301 one of the dynamic IPython mechanisms.
275 302 """
276 303
277 304 if not self._is_complete:
278 305 return True
279 306
280 307 if self.indent_spaces==0:
281 308 return False
282 309
283 310 last_line = self.source.splitlines()[-1]
284 311 return bool(last_line and not last_line.isspace())
285 312
286 313 def split_blocks(self, lines):
287 314 """Split a multiline string into multiple input blocks.
288 315
289 316 Note: this method starts by performing a full reset().
290 317
291 318 Parameters
292 319 ----------
293 320 lines : str
294 321 A possibly multiline string.
295 322
296 323 Returns
297 324 -------
298 325 blocks : list
299 326 A list of strings, each possibly multiline. Each string corresponds
300 327 to a single block that can be compiled in 'single' mode (unless it
301 328 has a syntax error)."""
302 329
303 330 # This code is fairly delicate. If you make any changes here, make
304 331 # absolutely sure that you do run the full test suite and ALL tests
305 332 # pass.
306 333
307 334 self.reset()
308 335 blocks = []
309 336
310 337 # Reversed copy so we can use pop() efficiently and consume the input
311 338 # as a stack
312 339 lines = lines.splitlines()[::-1]
313 340 # Outer loop over all input
314 341 while lines:
315 342 # Inner loop to build each block
316 343 while True:
317 344 # Safety exit from inner loop
318 345 if not lines:
319 346 break
320 347 # Grab next line but don't push it yet
321 348 next_line = lines.pop()
322 349 # Blank/empty lines are pushed as-is
323 350 if not next_line or next_line.isspace():
324 351 self.push(next_line)
325 352 continue
326 353
327 354 # Check indentation changes caused by the *next* line
328 355 indent_spaces, _full_dedent = self._find_indent(next_line)
329 356
330 357 # If the next line causes a dedent, it can be for two differnt
331 358 # reasons: either an explicit de-dent by the user or a
332 359 # return/raise/pass statement. These MUST be handled
333 360 # separately:
334 361 #
335 362 # 1. the first case is only detected when the actual explicit
336 363 # dedent happens, and that would be the *first* line of a *new*
337 364 # block. Thus, we must put the line back into the input buffer
338 365 # so that it starts a new block on the next pass.
339 366 #
340 367 # 2. the second case is detected in the line before the actual
341 368 # dedent happens, so , we consume the line and we can break out
342 369 # to start a new block.
343 370
344 371 # Case 1, explicit dedent causes a break
345 372 if _full_dedent and not next_line.startswith(' '):
346 373 lines.append(next_line)
347 374 break
348 375
349 376 # Otherwise any line is pushed
350 377 self.push(next_line)
351 378
352 379 # Case 2, full dedent with full block ready:
353 380 if _full_dedent or \
354 381 self.indent_spaces==0 and not self.push_accepts_more():
355 382 break
356 383 # Form the new block with the current source input
357 384 blocks.append(self.source_reset())
358 385
359 386 return blocks
360 387
361 388 #------------------------------------------------------------------------
362 389 # Private interface
363 390 #------------------------------------------------------------------------
364 391
365 392 def _find_indent(self, line):
366 393 """Compute the new indentation level for a single line.
367 394
368 395 Parameters
369 396 ----------
370 397 line : str
371 398 A single new line of non-whitespace, non-comment Python input.
372 399
373 400 Returns
374 401 -------
375 402 indent_spaces : int
376 403 New value for the indent level (it may be equal to self.indent_spaces
377 404 if indentation doesn't change.
378 405
379 406 full_dedent : boolean
380 407 Whether the new line causes a full flush-left dedent.
381 408 """
382 409 indent_spaces = self.indent_spaces
383 410 full_dedent = self._full_dedent
384 411
385 412 inisp = num_ini_spaces(line)
386 413 if inisp < indent_spaces:
387 414 indent_spaces = inisp
388 415 if indent_spaces <= 0:
389 416 #print 'Full dedent in text',self.source # dbg
390 417 full_dedent = True
391 418
392 419 if line[-1] == ':':
393 420 indent_spaces += 4
394 421 elif dedent_re.match(line):
395 422 indent_spaces -= 4
396 423 if indent_spaces <= 0:
397 424 full_dedent = True
398 425
399 426 # Safety
400 427 if indent_spaces < 0:
401 428 indent_spaces = 0
402 429 #print 'safety' # dbg
403 430
404 431 return indent_spaces, full_dedent
405 432
406 433 def _update_indent(self, lines):
407 434 for line in remove_comments(lines).splitlines():
408 435 if line and not line.isspace():
409 436 self.indent_spaces, self._full_dedent = self._find_indent(line)
410 437
411 438 def _store(self, lines):
412 439 """Store one or more lines of input.
413 440
414 441 If input lines are not newline-terminated, a newline is automatically
415 442 appended."""
416 443
417 444 if lines.endswith('\n'):
418 445 self._buffer.append(lines)
419 446 else:
420 447 self._buffer.append(lines+'\n')
421 448 self._set_source()
422 449
423 450 def _set_source(self):
424 451 self.source = ''.join(self._buffer).encode(self.encoding)
425 452
426 453
427 454 #-----------------------------------------------------------------------------
428 # IPython-specific syntactic support
455 # Functions and classes for IPython-specific syntactic support
429 456 #-----------------------------------------------------------------------------
430 457
431 # We implement things, as much as possible, as standalone functions that can be
432 # tested and validated in isolation.
458 # RegExp for splitting line contents into pre-char//first word-method//rest.
459 # For clarity, each group in on one line.
460
461 line_split = re.compile("""
462 ^(\s*) # any leading space
463 ([,;/%]|!!?|\?\??) # escape character or characters
464 \s*([\w\.]*) # function/method part (mix of \w and '.')
465 (\s+.*$|$) # rest of line
466 """, re.VERBOSE)
467
468
469 def split_user_input(line):
470 """Split user input into early whitespace, esc-char, function part and rest.
471
472 This is currently handles lines with '=' in them in a very inconsistent
473 manner.
474
475 Examples
476 ========
477 >>> split_user_input('x=1')
478 ('', '', 'x=1', '')
479 >>> split_user_input('?')
480 ('', '?', '', '')
481 >>> split_user_input('??')
482 ('', '??', '', '')
483 >>> split_user_input(' ?')
484 (' ', '?', '', '')
485 >>> split_user_input(' ??')
486 (' ', '??', '', '')
487 >>> split_user_input('??x')
488 ('', '??', 'x', '')
489 >>> split_user_input('?x=1')
490 ('', '', '?x=1', '')
491 >>> split_user_input('!ls')
492 ('', '!', 'ls', '')
493 >>> split_user_input(' !ls')
494 (' ', '!', 'ls', '')
495 >>> split_user_input('!!ls')
496 ('', '!!', 'ls', '')
497 >>> split_user_input(' !!ls')
498 (' ', '!!', 'ls', '')
499 >>> split_user_input(',ls')
500 ('', ',', 'ls', '')
501 >>> split_user_input(';ls')
502 ('', ';', 'ls', '')
503 >>> split_user_input(' ;ls')
504 (' ', ';', 'ls', '')
505 >>> split_user_input('f.g(x)')
506 ('', '', 'f.g(x)', '')
507 >>> split_user_input('f.g (x)')
508 ('', '', 'f.g', '(x)')
509 """
510 match = line_split.match(line)
511 if match:
512 lspace, esc, fpart, rest = match.groups()
513 else:
514 # print "match failed for line '%s'" % line
515 try:
516 fpart, rest = line.split(None,1)
517 except ValueError:
518 # print "split failed for line '%s'" % line
519 fpart, rest = line,''
520 lspace = re.match('^(\s*)(.*)',line).groups()[0]
521 esc = ''
522
523 # fpart has to be a valid python identifier, so it better be only pure
524 # ascii, no unicode:
525 try:
526 fpart = fpart.encode('ascii')
527 except UnicodeEncodeError:
528 lspace = unicode(lspace)
529 rest = fpart + u' ' + rest
530 fpart = u''
531
532 #print 'line:<%s>' % line # dbg
533 #print 'esc <%s> fpart <%s> rest <%s>' % (esc,fpart.strip(),rest) # dbg
534 return lspace, esc, fpart.strip(), rest.lstrip()
535
536
537 # The escaped translators ALL receive a line where their own escape has been
538 # stripped. Only '?' is valid at the end of the line, all others can only be
539 # placed at the start.
540
541 class LineInfo(object):
542 """A single line of input and associated info.
543
544 This is a utility class that mostly wraps the output of
545 :func:`split_user_input` into a convenient object to be passed around
546 during input transformations.
547
548 Includes the following as properties:
549
550 line
551 The original, raw line
552
553 lspace
554 Any early whitespace before actual text starts.
555
556 esc
557 The initial esc character (or characters, for double-char escapes like
558 '??' or '!!').
559
560 pre_char
561 The escape character(s) in esc or the empty string if there isn't one.
562
563 fpart
564 The 'function part', which is basically the maximal initial sequence
565 of valid python identifiers and the '.' character. This is what is
566 checked for alias and magic transformations, used for auto-calling,
567 etc.
568
569 rest
570 Everything else on the line.
571 """
572 def __init__(self, line):
573 self.line = line
574 self.lspace, self.esc, self.fpart, self.rest = \
575 split_user_input(line)
576
577 def __str__(self):
578 return "LineInfo [%s|%s|%s|%s]" % (self.lspace, self.esc,
579 self.fpart, self.rest)
580
581
582 # Transformations of the special syntaxes that don't rely on an explicit escape
583 # character but instead on patterns on the input line
584
585 # The core transformations are implemented as standalone functions that can be
586 # tested and validated in isolation. Each of these uses a regexp, we
587 # pre-compile these and keep them close to each function definition for clarity
433 588
434 # Each of these uses a regexp, we pre-compile these and keep them close to each
435 # function definition for clarity
436 589 _assign_system_re = re.compile(r'(?P<lhs>(\s*)([\w\.]+)((\s*,\s*[\w\.]+)*))'
437 590 r'\s*=\s*!\s*(?P<cmd>.*)')
438 591
439 592 def transform_assign_system(line):
440 593 """Handle the `files = !ls` syntax."""
441 594 # FIXME: This transforms the line to use %sc, but we've listed that magic
442 595 # as deprecated. We should then implement this functionality in a
443 596 # standalone api that we can transform to, without going through a
444 597 # deprecated magic.
445 598 m = _assign_system_re.match(line)
446 599 if m is not None:
447 600 cmd = m.group('cmd')
448 601 lhs = m.group('lhs')
449 602 expr = make_quoted_expr("sc -l = %s" % cmd)
450 603 new_line = '%s = get_ipython().magic(%s)' % (lhs, expr)
451 604 return new_line
452 605 return line
453 606
454 607
455 608 _assign_magic_re = re.compile(r'(?P<lhs>(\s*)([\w\.]+)((\s*,\s*[\w\.]+)*))'
456 609 r'\s*=\s*%\s*(?P<cmd>.*)')
457 610
458 611 def transform_assign_magic(line):
459 612 """Handle the `a = %who` syntax."""
460 613 m = _assign_magic_re.match(line)
461 614 if m is not None:
462 615 cmd = m.group('cmd')
463 616 lhs = m.group('lhs')
464 617 expr = make_quoted_expr(cmd)
465 618 new_line = '%s = get_ipython().magic(%s)' % (lhs, expr)
466 619 return new_line
467 620 return line
468 621
469 622
470 _classic_prompt_re = re.compile(r'(^[ \t]*>>> |^[ \t]*\.\.\. )')
623 _classic_prompt_re = re.compile(r'^([ \t]*>>> |^[ \t]*\.\.\. )')
471 624
472 625 def transform_classic_prompt(line):
473 626 """Handle inputs that start with '>>> ' syntax."""
474 627
475 if not line or line.isspace() or line.strip() == '...':
476 # This allows us to recognize multiple input prompts separated by
477 # blank lines and pasted in a single chunk, very common when
478 # pasting doctests or long tutorial passages.
479 return ''
628 if not line or line.isspace():
629 return line
480 630 m = _classic_prompt_re.match(line)
481 631 if m:
482 632 return line[len(m.group(0)):]
483 633 else:
484 634 return line
485 635
486 636
487 _ipy_prompt_re = re.compile(r'(^[ \t]*In \[\d+\]: |^[ \t]*\ \ \ \.\.\.+: )')
637 _ipy_prompt_re = re.compile(r'^([ \t]*In \[\d+\]: |^[ \t]*\ \ \ \.\.\.+: )')
488 638
489 639 def transform_ipy_prompt(line):
490 640 """Handle inputs that start classic IPython prompt syntax."""
491 641
492 if not line or line.isspace() or line.strip() == '...':
493 # This allows us to recognize multiple input prompts separated by
494 # blank lines and pasted in a single chunk, very common when
495 # pasting doctests or long tutorial passages.
496 return ''
642 if not line or line.isspace():
643 return line
497 644 m = _ipy_prompt_re.match(line)
498 645 if m:
499 646 return line[len(m.group(0)):]
500 647 else:
501 648 return line
502 649
503 650
504 # Warning, these cannot be changed unless various regular expressions
505 # are updated in a number of places. Not great, but at least we told you.
506 ESC_SHELL = '!'
507 ESC_SH_CAP = '!!'
508 ESC_HELP = '?'
509 ESC_MAGIC = '%'
510 ESC_QUOTE = ','
511 ESC_QUOTE2 = ';'
512 ESC_PAREN = '/'
651 def transform_unescaped(line):
652 """Transform lines that are explicitly escaped out.
653
654 This calls to the above transform_* functions for the actual line
655 translations.
656
657 Parameters
658 ----------
659 line : str
660 A single line of input to be transformed.
661
662 Returns
663 -------
664 new_line : str
665 Transformed line, which may be identical to the original."""
666
667 if not line or line.isspace():
668 return line
669
670 new_line = line
671 for f in [transform_assign_system, transform_assign_magic,
672 transform_classic_prompt, transform_ipy_prompt ] :
673 new_line = f(new_line)
674 return new_line
675
676 # Support for syntax transformations that use explicit escapes typed by the
677 # user at the beginning of a line
678
679 def tr_system(line_info):
680 "Translate lines escaped with: !"
681 cmd = line_info.line.lstrip().lstrip(ESC_SHELL)
682 return '%sget_ipython().system(%s)' % (line_info.lspace,
683 make_quoted_expr(cmd))
684
685
686 def tr_system2(line_info):
687 "Translate lines escaped with: !!"
688 cmd = line_info.line.lstrip()[2:]
689 return '%sget_ipython().getoutput(%s)' % (line_info.lspace,
690 make_quoted_expr(cmd))
691
692
693 def tr_help(line_info):
694 "Translate lines escaped with: ?/??"
695 # A naked help line should just fire the intro help screen
696 if not line_info.line[1:]:
697 return 'get_ipython().show_usage()'
698
699 # There may be one or two '?' at the end, move them to the front so that
700 # the rest of the logic can assume escapes are at the start
701 line = line_info.line
702 if line.endswith('?'):
703 line = line[-1] + line[:-1]
704 if line.endswith('?'):
705 line = line[-1] + line[:-1]
706 line_info = LineInfo(line)
707
708 # From here on, simply choose which level of detail to get.
709 if line_info.esc == '?':
710 pinfo = 'pinfo'
711 elif line_info.esc == '??':
712 pinfo = 'pinfo2'
713
714 tpl = '%sget_ipython().magic("%s %s")'
715 return tpl % (line_info.lspace, pinfo,
716 ' '.join([line_info.fpart, line_info.rest]).strip())
717
718
719 def tr_magic(line_info):
720 "Translate lines escaped with: %"
721 tpl = '%sget_ipython().magic(%s)'
722 cmd = make_quoted_expr(' '.join([line_info.fpart,
723 line_info.rest])).strip()
724 return tpl % (line_info.lspace, cmd)
725
726
727 def tr_quote(line_info):
728 "Translate lines escaped with: ,"
729 return '%s%s("%s")' % (line_info.lspace, line_info.fpart,
730 '", "'.join(line_info.rest.split()) )
731
732
733 def tr_quote2(line_info):
734 "Translate lines escaped with: ;"
735 return '%s%s("%s")' % (line_info.lspace, line_info.fpart,
736 line_info.rest)
737
738
739 def tr_paren(line_info):
740 "Translate lines escaped with: /"
741 return '%s%s(%s)' % (line_info.lspace, line_info.fpart,
742 ", ".join(line_info.rest.split()))
743
744
745 def transform_escaped(line):
746 """Transform lines that are explicitly escaped out.
747
748 This calls to the above tr_* functions for the actual line translations."""
749
750 tr = { ESC_SHELL : tr_system,
751 ESC_SH_CAP : tr_system2,
752 ESC_HELP : tr_help,
753 ESC_HELP2 : tr_help,
754 ESC_MAGIC : tr_magic,
755 ESC_QUOTE : tr_quote,
756 ESC_QUOTE2 : tr_quote2,
757 ESC_PAREN : tr_paren }
758
759 # Empty lines just get returned unmodified
760 if not line or line.isspace():
761 return line
762
763 # Get line endpoints, where the escapes can be
764 line_info = LineInfo(line)
765
766 # If the escape is not at the start, only '?' needs to be special-cased.
767 # All other escapes are only valid at the start
768 if not line_info.esc in tr:
769 if line.endswith(ESC_HELP):
770 return tr_help(line_info)
771 else:
772 # If we don't recognize the escape, don't modify the line
773 return line
774
775 return tr[line_info.esc](line_info)
776
513 777
514 778 class IPythonInputSplitter(InputSplitter):
515 779 """An input splitter that recognizes all of IPython's special syntax."""
516 780
517
518 781 def push(self, lines):
519 782 """Push one or more lines of IPython input.
520 783 """
521 return super(IPythonInputSplitter, self).push(lines)
784 # We only apply the line transformers to the input if we have either no
785 # input yet, or complete input. This prevents the accidental
786 # transformation of escapes inside multiline expressions like
787 # triple-quoted strings or parenthesized expressions.
788 lines_list = lines.splitlines()
789 if self._is_complete or not self._buffer:
790
791 new_list = map(transform_escaped, lines_list)
792 else:
793 new_list = lines_list
794
795 # Now apply the unescaped transformations to each input line
796 new_list = map(transform_unescaped, new_list)
797 newlines = '\n'.join(new_list)
798 return super(IPythonInputSplitter, self).push(newlines)
@@ -1,411 +1,623 b''
1 # -*- coding: utf-8 -*-
1 2 """Tests for the inputsplitter module.
2 3 """
3 4 #-----------------------------------------------------------------------------
4 5 # Copyright (C) 2010 The IPython Development Team
5 6 #
6 7 # Distributed under the terms of the BSD License. The full license is in
7 8 # the file COPYING, distributed as part of this software.
8 9 #-----------------------------------------------------------------------------
9 10
10 11 #-----------------------------------------------------------------------------
11 12 # Imports
12 13 #-----------------------------------------------------------------------------
13 14 # stdlib
14 15 import unittest
15 16 import sys
16 17
17 18 # Third party
18 19 import nose.tools as nt
19 20
20 21 # Our own
21 22 from IPython.core import inputsplitter as isp
22 23
23 24 #-----------------------------------------------------------------------------
24 25 # Semi-complete examples (also used as tests)
25 26 #-----------------------------------------------------------------------------
27
28 # Note: at the bottom, there's a slightly more complete version of this that
29 # can be useful during development of code here.
30
26 31 def mini_interactive_loop(raw_input):
27 32 """Minimal example of the logic of an interactive interpreter loop.
28 33
29 34 This serves as an example, and it is used by the test system with a fake
30 35 raw_input that simulates interactive input."""
31 36
32 37 from IPython.core.inputsplitter import InputSplitter
33 38
34 39 isp = InputSplitter()
35 40 # In practice, this input loop would be wrapped in an outside loop to read
36 41 # input indefinitely, until some exit/quit command was issued. Here we
37 42 # only illustrate the basic inner loop.
38 43 while isp.push_accepts_more():
39 44 indent = ' '*isp.indent_spaces
40 45 prompt = '>>> ' + indent
41 46 line = indent + raw_input(prompt)
42 47 isp.push(line)
43 48
44 49 # Here we just return input so we can use it in a test suite, but a real
45 50 # interpreter would instead send it for execution somewhere.
46 51 src = isp.source_reset()
47 print 'Input source was:\n', src
52 #print 'Input source was:\n', src # dbg
48 53 return src
49 54
50 55 #-----------------------------------------------------------------------------
51 56 # Test utilities, just for local use
52 57 #-----------------------------------------------------------------------------
53 58
54 59 def assemble(block):
55 60 """Assemble a block into multi-line sub-blocks."""
56 61 return ['\n'.join(sub_block)+'\n' for sub_block in block]
57 62
58 63
59 64 def pseudo_input(lines):
60 65 """Return a function that acts like raw_input but feeds the input list."""
61 66 ilines = iter(lines)
62 67 def raw_in(prompt):
63 68 try:
64 69 return next(ilines)
65 70 except StopIteration:
66 71 return ''
67 72 return raw_in
68 73
69 74 #-----------------------------------------------------------------------------
70 75 # Tests
71 76 #-----------------------------------------------------------------------------
72 77 def test_spaces():
73 78 tests = [('', 0),
74 79 (' ', 1),
75 80 ('\n', 0),
76 81 (' \n', 1),
77 82 ('x', 0),
78 83 (' x', 1),
79 84 (' x',2),
80 85 (' x',4),
81 86 # Note: tabs are counted as a single whitespace!
82 87 ('\tx', 1),
83 88 ('\t x', 2),
84 89 ]
85 90
86 91 for s, nsp in tests:
87 92 nt.assert_equal(isp.num_ini_spaces(s), nsp)
88 93
89 94
90 95 def test_remove_comments():
91 96 tests = [('text', 'text'),
92 97 ('text # comment', 'text '),
93 98 ('text # comment\n', 'text \n'),
94 99 ('text # comment \n', 'text \n'),
95 100 ('line # c \nline\n','line \nline\n'),
96 101 ('line # c \nline#c2 \nline\nline #c\n\n',
97 102 'line \nline\nline\nline \n\n'),
98 103 ]
99 104
100 105 for inp, out in tests:
101 106 nt.assert_equal(isp.remove_comments(inp), out)
102 107
103 108
104 109 def test_get_input_encoding():
105 110 encoding = isp.get_input_encoding()
106 111 nt.assert_true(isinstance(encoding, basestring))
107 112 # simple-minded check that at least encoding a simple string works with the
108 113 # encoding we got.
109 114 nt.assert_equal('test'.encode(encoding), 'test')
110 115
111 116
112 117 class NoInputEncodingTestCase(unittest.TestCase):
113 118 def setUp(self):
114 119 self.old_stdin = sys.stdin
115 120 class X: pass
116 121 fake_stdin = X()
117 122 sys.stdin = fake_stdin
118 123
119 124 def test(self):
120 125 # Verify that if sys.stdin has no 'encoding' attribute we do the right
121 126 # thing
122 127 enc = isp.get_input_encoding()
123 128 self.assertEqual(enc, 'ascii')
124 129
125 130 def tearDown(self):
126 131 sys.stdin = self.old_stdin
127 132
128 133
129 134 class InputSplitterTestCase(unittest.TestCase):
130 135 def setUp(self):
131 136 self.isp = isp.InputSplitter()
132 137
133 138 def test_reset(self):
134 139 isp = self.isp
135 140 isp.push('x=1')
136 141 isp.reset()
137 142 self.assertEqual(isp._buffer, [])
138 143 self.assertEqual(isp.indent_spaces, 0)
139 144 self.assertEqual(isp.source, '')
140 145 self.assertEqual(isp.code, None)
141 146 self.assertEqual(isp._is_complete, False)
142 147
143 148 def test_source(self):
144 149 self.isp._store('1')
145 150 self.isp._store('2')
146 151 self.assertEqual(self.isp.source, '1\n2\n')
147 152 self.assertTrue(len(self.isp._buffer)>0)
148 153 self.assertEqual(self.isp.source_reset(), '1\n2\n')
149 154 self.assertEqual(self.isp._buffer, [])
150 155 self.assertEqual(self.isp.source, '')
151 156
152 157 def test_indent(self):
153 158 isp = self.isp # shorthand
154 159 isp.push('x=1')
155 160 self.assertEqual(isp.indent_spaces, 0)
156 161 isp.push('if 1:\n x=1')
157 162 self.assertEqual(isp.indent_spaces, 4)
158 163 isp.push('y=2\n')
159 164 self.assertEqual(isp.indent_spaces, 0)
160 165 isp.push('if 1:')
161 166 self.assertEqual(isp.indent_spaces, 4)
162 167 isp.push(' x=1')
163 168 self.assertEqual(isp.indent_spaces, 4)
164 169 # Blank lines shouldn't change the indent level
165 170 isp.push(' '*2)
166 171 self.assertEqual(isp.indent_spaces, 4)
167 172
168 173 def test_indent2(self):
169 174 isp = self.isp
170 175 # When a multiline statement contains parens or multiline strings, we
171 176 # shouldn't get confused.
172 177 isp.push("if 1:")
173 178 isp.push(" x = (1+\n 2)")
174 179 self.assertEqual(isp.indent_spaces, 4)
175 180
176 181 def test_dedent(self):
177 182 isp = self.isp # shorthand
178 183 isp.push('if 1:')
179 184 self.assertEqual(isp.indent_spaces, 4)
180 185 isp.push(' pass')
181 186 self.assertEqual(isp.indent_spaces, 0)
182 187
183 188 def test_push(self):
184 189 isp = self.isp
185 190 self.assertTrue(isp.push('x=1'))
186 191
187 192 def test_push2(self):
188 193 isp = self.isp
189 194 self.assertFalse(isp.push('if 1:'))
190 195 for line in [' x=1', '# a comment', ' y=2']:
191 196 self.assertTrue(isp.push(line))
192 197
193 198 def test_push3(self):
194 199 """Test input with leading whitespace"""
195 200 isp = self.isp
196 201 isp.push(' x=1')
197 202 isp.push(' y=2')
198 203 self.assertEqual(isp.source, 'if 1:\n x=1\n y=2\n')
199 204
200 205 def test_replace_mode(self):
201 206 isp = self.isp
202 207 isp.input_mode = 'replace'
203 208 isp.push('x=1')
204 209 self.assertEqual(isp.source, 'x=1\n')
205 210 isp.push('x=2')
206 211 self.assertEqual(isp.source, 'x=2\n')
207 212
208 213 def test_push_accepts_more(self):
209 214 isp = self.isp
210 215 isp.push('x=1')
211 216 self.assertFalse(isp.push_accepts_more())
212 217
213 218 def test_push_accepts_more2(self):
214 219 isp = self.isp
215 220 isp.push('if 1:')
216 221 self.assertTrue(isp.push_accepts_more())
217 222 isp.push(' x=1')
218 223 self.assertTrue(isp.push_accepts_more())
219 224 isp.push('')
220 225 self.assertFalse(isp.push_accepts_more())
221 226
222 227 def test_push_accepts_more3(self):
223 228 isp = self.isp
224 229 isp.push("x = (2+\n3)")
225 230 self.assertFalse(isp.push_accepts_more())
226 231
227 232 def test_push_accepts_more4(self):
228 233 isp = self.isp
229 234 # When a multiline statement contains parens or multiline strings, we
230 235 # shouldn't get confused.
231 236 # FIXME: we should be able to better handle de-dents in statements like
232 237 # multiline strings and multiline expressions (continued with \ or
233 238 # parens). Right now we aren't handling the indentation tracking quite
234 239 # correctly with this, though in practice it may not be too much of a
235 240 # problem. We'll need to see.
236 241 isp.push("if 1:")
237 242 isp.push(" x = (2+")
238 243 isp.push(" 3)")
239 244 self.assertTrue(isp.push_accepts_more())
240 245 isp.push(" y = 3")
241 246 self.assertTrue(isp.push_accepts_more())
242 247 isp.push('')
243 248 self.assertFalse(isp.push_accepts_more())
244 249
245 250 def test_syntax_error(self):
246 251 isp = self.isp
247 252 # Syntax errors immediately produce a 'ready' block, so the invalid
248 253 # Python can be sent to the kernel for evaluation with possible ipython
249 254 # special-syntax conversion.
250 255 isp.push('run foo')
251 256 self.assertFalse(isp.push_accepts_more())
252 257
253 258 def check_split(self, block_lines, compile=True):
254 259 blocks = assemble(block_lines)
255 260 lines = ''.join(blocks)
256 261 oblock = self.isp.split_blocks(lines)
257 262 self.assertEqual(oblock, blocks)
258 263 if compile:
259 264 for block in blocks:
260 265 self.isp._compile(block)
261 266
262 267 def test_split(self):
263 268 # All blocks of input we want to test in a list. The format for each
264 269 # block is a list of lists, with each inner lists consisting of all the
265 270 # lines (as single-lines) that should make up a sub-block.
266 271
267 272 # Note: do NOT put here sub-blocks that don't compile, as the
268 273 # check_split() routine makes a final verification pass to check that
269 274 # each sub_block, as returned by split_blocks(), does compile
270 275 # correctly.
271 276 all_blocks = [ [['x=1']],
272 277
273 278 [['x=1'],
274 279 ['y=2']],
275 280
276 281 [['x=1'],
277 282 ['# a comment'],
278 283 ['y=11']],
279 284
280 285 [['if 1:',
281 286 ' x=1'],
282 287 ['y=3']],
283 288
284 289 [['def f(x):',
285 290 ' return x'],
286 291 ['x=1']],
287 292
288 293 [['def f(x):',
289 294 ' x+=1',
290 295 ' ',
291 296 ' return x'],
292 297 ['x=1']],
293 298
294 299 [['def f(x):',
295 300 ' if x>0:',
296 301 ' y=1',
297 302 ' # a comment',
298 303 ' else:',
299 304 ' y=4',
300 305 ' ',
301 306 ' return y'],
302 307 ['x=1'],
303 308 ['if 1:',
304 309 ' y=11'] ],
305 310
306 311 [['for i in range(10):'
307 312 ' x=i**2']],
308 313
309 314 [['for i in range(10):'
310 315 ' x=i**2'],
311 316 ['z = 1']],
312 317 ]
313 318 for block_lines in all_blocks:
314 319 self.check_split(block_lines)
315 320
316 321 def test_split_syntax_errors(self):
317 322 # Block splitting with invalid syntax
318 323 all_blocks = [ [['a syntax error']],
319 324
320 325 [['x=1'],
321 326 ['a syntax error']],
322 327
323 328 [['for i in range(10):'
324 329 ' an error']],
325 330
326 331 ]
327 332 for block_lines in all_blocks:
328 333 self.check_split(block_lines, compile=False)
329 334
330 335
331 336 class InteractiveLoopTestCase(unittest.TestCase):
332 337 """Tests for an interactive loop like a python shell.
333 338 """
334 339 def check_ns(self, lines, ns):
335 340 """Validate that the given input lines produce the resulting namespace.
336 341
337 342 Note: the input lines are given exactly as they would be typed in an
338 343 auto-indenting environment, as mini_interactive_loop above already does
339 344 auto-indenting and prepends spaces to the input.
340 345 """
341 346 src = mini_interactive_loop(pseudo_input(lines))
342 347 test_ns = {}
343 348 exec src in test_ns
344 349 # We can't check that the provided ns is identical to the test_ns,
345 350 # because Python fills test_ns with extra keys (copyright, etc). But
346 351 # we can check that the given dict is *contained* in test_ns
347 352 for k,v in ns.items():
348 353 self.assertEqual(test_ns[k], v)
349 354
350 355 def test_simple(self):
351 356 self.check_ns(['x=1'], dict(x=1))
352 357
353 358 def test_simple2(self):
354 359 self.check_ns(['if 1:', 'x=2'], dict(x=2))
355 360
356 361 def test_xy(self):
357 362 self.check_ns(['x=1; y=2'], dict(x=1, y=2))
358 363
359 364 def test_abc(self):
360 365 self.check_ns(['if 1:','a=1','b=2','c=3'], dict(a=1, b=2, c=3))
361 366
362 367 def test_multi(self):
363 368 self.check_ns(['x =(1+','1+','2)'], dict(x=4))
364 369
365 370
366 class IPythonInputTestCase(InputSplitterTestCase):
367 def setUp(self):
368 self.isp = isp.IPythonInputSplitter()
371 def test_LineInfo():
372 """Simple test for LineInfo construction and str()"""
373 linfo = isp.LineInfo(' %cd /home')
374 nt.assert_equals(str(linfo), 'LineInfo [ |%|cd|/home]')
375
376
377 def test_split_user_input():
378 """Unicode test - split_user_input already has good doctests"""
379 line = u"PΓ©rez Fernando"
380 parts = isp.split_user_input(line)
381 parts_expected = (u'', u'', u'', line)
382 nt.assert_equal(parts, parts_expected)
369 383
370 384
371 385 # Transformer tests
372 386 def transform_checker(tests, func):
373 387 """Utility to loop over test inputs"""
374 388 for inp, tr in tests:
375 389 nt.assert_equals(func(inp), tr)
376
390
391 # Data for all the syntax tests in the form of lists of pairs of
392 # raw/transformed input. We store it here as a global dict so that we can use
393 # it both within single-function tests and also to validate the behavior of the
394 # larger objects
395
396 syntax = \
397 dict(assign_system =
398 [('a =! ls', 'a = get_ipython().magic("sc -l = ls")'),
399 ('b = !ls', 'b = get_ipython().magic("sc -l = ls")'),
400 ('x=1', 'x=1'), # normal input is unmodified
401 (' ',' '), # blank lines are kept intact
402 ],
403
404 assign_magic =
405 [('a =% who', 'a = get_ipython().magic("who")'),
406 ('b = %who', 'b = get_ipython().magic("who")'),
407 ('x=1', 'x=1'), # normal input is unmodified
408 (' ',' '), # blank lines are kept intact
409 ],
410
411 classic_prompt =
412 [('>>> x=1', 'x=1'),
413 ('x=1', 'x=1'), # normal input is unmodified
414 (' ',' '), # blank lines are kept intact
415 ],
416
417 ipy_prompt =
418 [('In [1]: x=1', 'x=1'),
419 ('x=1', 'x=1'), # normal input is unmodified
420 (' ',' '), # blank lines are kept intact
421 ],
422
423 # Tests for the escape transformer to leave normal code alone
424 escaped_noesc =
425 [ (' ', ' '),
426 ('x=1', 'x=1'),
427 ],
428
429 # System calls
430 escaped_shell =
431 [ ('!ls', 'get_ipython().system("ls")'),
432 # Double-escape shell, this means to capture the output of the
433 # subprocess and return it
434 ('!!ls', 'get_ipython().getoutput("ls")'),
435 ],
436
437 # Help/object info
438 escaped_help =
439 [ ('?', 'get_ipython().show_usage()'),
440 ('?x1', 'get_ipython().magic("pinfo x1")'),
441 ('??x2', 'get_ipython().magic("pinfo2 x2")'),
442 ('x3?', 'get_ipython().magic("pinfo x3")'),
443 ('x4??', 'get_ipython().magic("pinfo2 x4")'),
444 ],
445
446 # Explicit magic calls
447 escaped_magic =
448 [ ('%cd', 'get_ipython().magic("cd")'),
449 ('%cd /home', 'get_ipython().magic("cd /home")'),
450 (' %magic', ' get_ipython().magic("magic")'),
451 ],
452
453 # Quoting with separate arguments
454 escaped_quote =
455 [ (',f', 'f("")'),
456 (',f x', 'f("x")'),
457 (' ,f y', ' f("y")'),
458 (',f a b', 'f("a", "b")'),
459 ],
460
461 # Quoting with single argument
462 escaped_quote2 =
463 [ (';f', 'f("")'),
464 (';f x', 'f("x")'),
465 (' ;f y', ' f("y")'),
466 (';f a b', 'f("a b")'),
467 ],
468
469 # Simply apply parens
470 escaped_paren =
471 [ ('/f', 'f()'),
472 ('/f x', 'f(x)'),
473 (' /f y', ' f(y)'),
474 ('/f a b', 'f(a, b)'),
475 ],
476
477 # More complex multiline tests
478 ## escaped_multiline =
479 ## [()],
480 )
481
482 # multiline syntax examples. Each of these should be a list of lists, with
483 # each entry itself having pairs of raw/transformed input. The union (with
484 # '\n'.join() of the transformed inputs is what the splitter should produce
485 # when fed the raw lines one at a time via push.
486 syntax_ml = \
487 dict(classic_prompt =
488 [ [('>>> for i in range(10):','for i in range(10):'),
489 ('... print i',' print i'),
490 ('... ', ''),
491 ],
492 ],
493
494 ipy_prompt =
495 [ [('In [24]: for i in range(10):','for i in range(10):'),
496 (' ....: print i',' print i'),
497 (' ....: ', ''),
498 ],
499 ],
500 )
501
377 502
378 503 def test_assign_system():
379 tests = [('a =! ls', 'a = get_ipython().magic("sc -l = ls")'),
380 ('b = !ls', 'b = get_ipython().magic("sc -l = ls")'),
381 ('x=1','x=1')]
382 transform_checker(tests, isp.transform_assign_system)
504 transform_checker(syntax['assign_system'], isp.transform_assign_system)
383 505
384 506
385 507 def test_assign_magic():
386 tests = [('a =% who', 'a = get_ipython().magic("who")'),
387 ('b = %who', 'b = get_ipython().magic("who")'),
388 ('x=1','x=1')]
389 transform_checker(tests, isp.transform_assign_magic)
508 transform_checker(syntax['assign_magic'], isp.transform_assign_magic)
390 509
391 510
392 511 def test_classic_prompt():
393 tests = [('>>> x=1', 'x=1'),
394 ('>>> for i in range(10):','for i in range(10):'),
395 ('... print i',' print i'),
396 ('...', ''),
397 ('x=1','x=1')
398 ]
399 transform_checker(tests, isp.transform_classic_prompt)
512 transform_checker(syntax['classic_prompt'], isp.transform_classic_prompt)
513 for example in syntax_ml['classic_prompt']:
514 transform_checker(example, isp.transform_classic_prompt)
400 515
401 516
402 517 def test_ipy_prompt():
403 tests = [('In [1]: x=1', 'x=1'),
404 ('In [24]: for i in range(10):','for i in range(10):'),
405 (' ....: print i',' print i'),
406 (' ....: ', ''),
407 ('x=1', 'x=1'), # normal input is unmodified
408 (' ','') # blank lines are just collapsed
409 ]
410 transform_checker(tests, isp.transform_ipy_prompt)
518 transform_checker(syntax['ipy_prompt'], isp.transform_ipy_prompt)
519 for example in syntax_ml['ipy_prompt']:
520 transform_checker(example, isp.transform_ipy_prompt)
521
522
523 def test_escaped_noesc():
524 transform_checker(syntax['escaped_noesc'], isp.transform_escaped)
525
526
527 def test_escaped_shell():
528 transform_checker(syntax['escaped_shell'], isp.transform_escaped)
529
530
531 def test_escaped_help():
532 transform_checker(syntax['escaped_help'], isp.transform_escaped)
533
534
535 def test_escaped_magic():
536 transform_checker(syntax['escaped_magic'], isp.transform_escaped)
537
538
539 def test_escaped_quote():
540 transform_checker(syntax['escaped_quote'], isp.transform_escaped)
541
542
543 def test_escaped_quote2():
544 transform_checker(syntax['escaped_quote2'], isp.transform_escaped)
545
546
547 def test_escaped_paren():
548 transform_checker(syntax['escaped_paren'], isp.transform_escaped)
549
550
551 class IPythonInputTestCase(InputSplitterTestCase):
552 """By just creating a new class whose .isp is a different instance, we
553 re-run the same test battery on the new input splitter.
554
555 In addition, this runs the tests over the syntax and syntax_ml dicts that
556 were tested by individual functions, as part of the OO interface.
557 """
558 def setUp(self):
559 self.isp = isp.IPythonInputSplitter()
560
561 def test_syntax(self):
562 """Call all single-line syntax tests from the main object"""
563 isp = self.isp
564 for example in syntax.itervalues():
565 for raw, out_t in example:
566 if raw.startswith(' '):
567 continue
568
569 isp.push(raw)
570 out = isp.source_reset().rstrip()
571 self.assertEqual(out, out_t)
572
573 def test_syntax_multiline(self):
574 isp = self.isp
575 for example in syntax_ml.itervalues():
576 out_t_parts = []
577 for line_pairs in example:
578 for raw, out_t_part in line_pairs:
579 isp.push(raw)
580 out_t_parts.append(out_t_part)
581
582 out = isp.source_reset().rstrip()
583 out_t = '\n'.join(out_t_parts).rstrip()
584 self.assertEqual(out, out_t)
585
586
587 #-----------------------------------------------------------------------------
588 # Main - use as a script
589 #-----------------------------------------------------------------------------
590
591 if __name__ == '__main__':
592 # A simple demo for interactive experimentation. This code will not get
593 # picked up by any test suite. Useful mostly for illustration and during
594 # development.
595 from IPython.core.inputsplitter import InputSplitter, IPythonInputSplitter
411 596
597 #isp, start_prompt = InputSplitter(), '>>> '
598 isp, start_prompt = IPythonInputSplitter(), 'In> '
599
600 autoindent = True
601 #autoindent = False
602
603 # In practice, this input loop would be wrapped in an outside loop to read
604 # input indefinitely, until some exit/quit command was issued. Here we
605 # only illustrate the basic inner loop.
606 try:
607 while True:
608 prompt = start_prompt
609 while isp.push_accepts_more():
610 indent = ' '*isp.indent_spaces
611 if autoindent:
612 line = indent + raw_input(prompt+indent)
613 else:
614 line = raw_input(prompt)
615 isp.push(line)
616 prompt = '... '
617
618 # Here we just return input so we can use it in a test suite, but a
619 # real interpreter would instead send it for execution somewhere.
620 src = isp.source_reset()
621 print 'Input source was:\n', src # dbg
622 except EOFError:
623 print 'Bye'
@@ -1,473 +1,474 b''
1 1 # encoding: utf-8
2 2 """
3 3 Utilities for working with strings and text.
4 4 """
5 5
6 6 #-----------------------------------------------------------------------------
7 7 # Copyright (C) 2008-2009 The IPython Development Team
8 8 #
9 9 # Distributed under the terms of the BSD License. The full license is in
10 10 # the file COPYING, distributed as part of this software.
11 11 #-----------------------------------------------------------------------------
12 12
13 13 #-----------------------------------------------------------------------------
14 14 # Imports
15 15 #-----------------------------------------------------------------------------
16 16
17 17 import __main__
18 18
19 19 import os
20 20 import re
21 21 import shutil
22 22 import types
23 23
24 24 from IPython.external.path import path
25 25
26 26 from IPython.utils.generics import result_display
27 27 from IPython.utils.io import nlprint
28 28 from IPython.utils.data import flatten
29 29
30 30 #-----------------------------------------------------------------------------
31 31 # Code
32 32 #-----------------------------------------------------------------------------
33 33
34 34 StringTypes = types.StringTypes
35 35
36 36
37 37 def unquote_ends(istr):
38 38 """Remove a single pair of quotes from the endpoints of a string."""
39 39
40 40 if not istr:
41 41 return istr
42 42 if (istr[0]=="'" and istr[-1]=="'") or \
43 43 (istr[0]=='"' and istr[-1]=='"'):
44 44 return istr[1:-1]
45 45 else:
46 46 return istr
47 47
48 48
49 49 class LSString(str):
50 50 """String derivative with a special access attributes.
51 51
52 52 These are normal strings, but with the special attributes:
53 53
54 54 .l (or .list) : value as list (split on newlines).
55 55 .n (or .nlstr): original value (the string itself).
56 56 .s (or .spstr): value as whitespace-separated string.
57 57 .p (or .paths): list of path objects
58 58
59 59 Any values which require transformations are computed only once and
60 60 cached.
61 61
62 62 Such strings are very useful to efficiently interact with the shell, which
63 63 typically only understands whitespace-separated options for commands."""
64 64
65 65 def get_list(self):
66 66 try:
67 67 return self.__list
68 68 except AttributeError:
69 69 self.__list = self.split('\n')
70 70 return self.__list
71 71
72 72 l = list = property(get_list)
73 73
74 74 def get_spstr(self):
75 75 try:
76 76 return self.__spstr
77 77 except AttributeError:
78 78 self.__spstr = self.replace('\n',' ')
79 79 return self.__spstr
80 80
81 81 s = spstr = property(get_spstr)
82 82
83 83 def get_nlstr(self):
84 84 return self
85 85
86 86 n = nlstr = property(get_nlstr)
87 87
88 88 def get_paths(self):
89 89 try:
90 90 return self.__paths
91 91 except AttributeError:
92 92 self.__paths = [path(p) for p in self.split('\n') if os.path.exists(p)]
93 93 return self.__paths
94 94
95 95 p = paths = property(get_paths)
96 96
97 97
98 98 def print_lsstring(arg):
99 99 """ Prettier (non-repr-like) and more informative printer for LSString """
100 100 print "LSString (.p, .n, .l, .s available). Value:"
101 101 print arg
102 102
103 103
104 104 print_lsstring = result_display.when_type(LSString)(print_lsstring)
105 105
106 106
107 107 class SList(list):
108 108 """List derivative with a special access attributes.
109 109
110 110 These are normal lists, but with the special attributes:
111 111
112 112 .l (or .list) : value as list (the list itself).
113 113 .n (or .nlstr): value as a string, joined on newlines.
114 114 .s (or .spstr): value as a string, joined on spaces.
115 115 .p (or .paths): list of path objects
116 116
117 117 Any values which require transformations are computed only once and
118 118 cached."""
119 119
120 120 def get_list(self):
121 121 return self
122 122
123 123 l = list = property(get_list)
124 124
125 125 def get_spstr(self):
126 126 try:
127 127 return self.__spstr
128 128 except AttributeError:
129 129 self.__spstr = ' '.join(self)
130 130 return self.__spstr
131 131
132 132 s = spstr = property(get_spstr)
133 133
134 134 def get_nlstr(self):
135 135 try:
136 136 return self.__nlstr
137 137 except AttributeError:
138 138 self.__nlstr = '\n'.join(self)
139 139 return self.__nlstr
140 140
141 141 n = nlstr = property(get_nlstr)
142 142
143 143 def get_paths(self):
144 144 try:
145 145 return self.__paths
146 146 except AttributeError:
147 147 self.__paths = [path(p) for p in self if os.path.exists(p)]
148 148 return self.__paths
149 149
150 150 p = paths = property(get_paths)
151 151
152 152 def grep(self, pattern, prune = False, field = None):
153 153 """ Return all strings matching 'pattern' (a regex or callable)
154 154
155 155 This is case-insensitive. If prune is true, return all items
156 156 NOT matching the pattern.
157 157
158 158 If field is specified, the match must occur in the specified
159 159 whitespace-separated field.
160 160
161 161 Examples::
162 162
163 163 a.grep( lambda x: x.startswith('C') )
164 164 a.grep('Cha.*log', prune=1)
165 165 a.grep('chm', field=-1)
166 166 """
167 167
168 168 def match_target(s):
169 169 if field is None:
170 170 return s
171 171 parts = s.split()
172 172 try:
173 173 tgt = parts[field]
174 174 return tgt
175 175 except IndexError:
176 176 return ""
177 177
178 178 if isinstance(pattern, basestring):
179 179 pred = lambda x : re.search(pattern, x, re.IGNORECASE)
180 180 else:
181 181 pred = pattern
182 182 if not prune:
183 183 return SList([el for el in self if pred(match_target(el))])
184 184 else:
185 185 return SList([el for el in self if not pred(match_target(el))])
186 186
187 187 def fields(self, *fields):
188 188 """ Collect whitespace-separated fields from string list
189 189
190 190 Allows quick awk-like usage of string lists.
191 191
192 192 Example data (in var a, created by 'a = !ls -l')::
193 193 -rwxrwxrwx 1 ville None 18 Dec 14 2006 ChangeLog
194 194 drwxrwxrwx+ 6 ville None 0 Oct 24 18:05 IPython
195 195
196 196 a.fields(0) is ['-rwxrwxrwx', 'drwxrwxrwx+']
197 197 a.fields(1,0) is ['1 -rwxrwxrwx', '6 drwxrwxrwx+']
198 198 (note the joining by space).
199 199 a.fields(-1) is ['ChangeLog', 'IPython']
200 200
201 201 IndexErrors are ignored.
202 202
203 203 Without args, fields() just split()'s the strings.
204 204 """
205 205 if len(fields) == 0:
206 206 return [el.split() for el in self]
207 207
208 208 res = SList()
209 209 for el in [f.split() for f in self]:
210 210 lineparts = []
211 211
212 212 for fd in fields:
213 213 try:
214 214 lineparts.append(el[fd])
215 215 except IndexError:
216 216 pass
217 217 if lineparts:
218 218 res.append(" ".join(lineparts))
219 219
220 220 return res
221 221
222 222 def sort(self,field= None, nums = False):
223 223 """ sort by specified fields (see fields())
224 224
225 225 Example::
226 226 a.sort(1, nums = True)
227 227
228 228 Sorts a by second field, in numerical order (so that 21 > 3)
229 229
230 230 """
231 231
232 232 #decorate, sort, undecorate
233 233 if field is not None:
234 234 dsu = [[SList([line]).fields(field), line] for line in self]
235 235 else:
236 236 dsu = [[line, line] for line in self]
237 237 if nums:
238 238 for i in range(len(dsu)):
239 239 numstr = "".join([ch for ch in dsu[i][0] if ch.isdigit()])
240 240 try:
241 241 n = int(numstr)
242 242 except ValueError:
243 243 n = 0;
244 244 dsu[i][0] = n
245 245
246 246
247 247 dsu.sort()
248 248 return SList([t[1] for t in dsu])
249 249
250 250
251 251 def print_slist(arg):
252 252 """ Prettier (non-repr-like) and more informative printer for SList """
253 253 print "SList (.p, .n, .l, .s, .grep(), .fields(), sort() available):"
254 254 if hasattr(arg, 'hideonce') and arg.hideonce:
255 255 arg.hideonce = False
256 256 return
257 257
258 258 nlprint(arg)
259 259
260 260
261 261 print_slist = result_display.when_type(SList)(print_slist)
262 262
263 263
264 264 def esc_quotes(strng):
265 265 """Return the input string with single and double quotes escaped out"""
266 266
267 267 return strng.replace('"','\\"').replace("'","\\'")
268 268
269 269
270 270 def make_quoted_expr(s):
271 271 """Return string s in appropriate quotes, using raw string if possible.
272 272
273 273 XXX - example removed because it caused encoding errors in documentation
274 274 generation. We need a new example that doesn't contain invalid chars.
275 275
276 276 Note the use of raw string and padding at the end to allow trailing
277 277 backslash.
278 278 """
279 279
280 280 tail = ''
281 281 tailpadding = ''
282 282 raw = ''
283 283 if "\\" in s:
284 284 raw = 'r'
285 285 if s.endswith('\\'):
286 286 tail = '[:-1]'
287 287 tailpadding = '_'
288 288 if '"' not in s:
289 289 quote = '"'
290 290 elif "'" not in s:
291 291 quote = "'"
292 292 elif '"""' not in s and not s.endswith('"'):
293 293 quote = '"""'
294 294 elif "'''" not in s and not s.endswith("'"):
295 295 quote = "'''"
296 296 else:
297 297 # give up, backslash-escaped string will do
298 return '"%s"' % esc_quotes(s)
299 res = raw + quote + s + tailpadding + quote + tail
298 return '"%s"' % esc_quotes(s).strip()
299 txt = (s + tailpadding).strip()
300 res = raw + quote + txt + quote + tail
300 301 return res
301 302
302 303
303 304 def qw(words,flat=0,sep=None,maxsplit=-1):
304 305 """Similar to Perl's qw() operator, but with some more options.
305 306
306 307 qw(words,flat=0,sep=' ',maxsplit=-1) -> words.split(sep,maxsplit)
307 308
308 309 words can also be a list itself, and with flat=1, the output will be
309 310 recursively flattened.
310 311
311 312 Examples:
312 313
313 314 >>> qw('1 2')
314 315 ['1', '2']
315 316
316 317 >>> qw(['a b','1 2',['m n','p q']])
317 318 [['a', 'b'], ['1', '2'], [['m', 'n'], ['p', 'q']]]
318 319
319 320 >>> qw(['a b','1 2',['m n','p q']],flat=1)
320 321 ['a', 'b', '1', '2', 'm', 'n', 'p', 'q']
321 322 """
322 323
323 324 if type(words) in StringTypes:
324 325 return [word.strip() for word in words.split(sep,maxsplit)
325 326 if word and not word.isspace() ]
326 327 if flat:
327 328 return flatten(map(qw,words,[1]*len(words)))
328 329 return map(qw,words)
329 330
330 331
331 332 def qwflat(words,sep=None,maxsplit=-1):
332 333 """Calls qw(words) in flat mode. It's just a convenient shorthand."""
333 334 return qw(words,1,sep,maxsplit)
334 335
335 336
336 337 def qw_lol(indata):
337 338 """qw_lol('a b') -> [['a','b']],
338 339 otherwise it's just a call to qw().
339 340
340 341 We need this to make sure the modules_some keys *always* end up as a
341 342 list of lists."""
342 343
343 344 if type(indata) in StringTypes:
344 345 return [qw(indata)]
345 346 else:
346 347 return qw(indata)
347 348
348 349
349 350 def grep(pat,list,case=1):
350 351 """Simple minded grep-like function.
351 352 grep(pat,list) returns occurrences of pat in list, None on failure.
352 353
353 354 It only does simple string matching, with no support for regexps. Use the
354 355 option case=0 for case-insensitive matching."""
355 356
356 357 # This is pretty crude. At least it should implement copying only references
357 358 # to the original data in case it's big. Now it copies the data for output.
358 359 out=[]
359 360 if case:
360 361 for term in list:
361 362 if term.find(pat)>-1: out.append(term)
362 363 else:
363 364 lpat=pat.lower()
364 365 for term in list:
365 366 if term.lower().find(lpat)>-1: out.append(term)
366 367
367 368 if len(out): return out
368 369 else: return None
369 370
370 371
371 372 def dgrep(pat,*opts):
372 373 """Return grep() on dir()+dir(__builtins__).
373 374
374 375 A very common use of grep() when working interactively."""
375 376
376 377 return grep(pat,dir(__main__)+dir(__main__.__builtins__),*opts)
377 378
378 379
379 380 def idgrep(pat):
380 381 """Case-insensitive dgrep()"""
381 382
382 383 return dgrep(pat,0)
383 384
384 385
385 386 def igrep(pat,list):
386 387 """Synonym for case-insensitive grep."""
387 388
388 389 return grep(pat,list,case=0)
389 390
390 391
391 392 def indent(str,nspaces=4,ntabs=0):
392 393 """Indent a string a given number of spaces or tabstops.
393 394
394 395 indent(str,nspaces=4,ntabs=0) -> indent str by ntabs+nspaces.
395 396 """
396 397 if str is None:
397 398 return
398 399 ind = '\t'*ntabs+' '*nspaces
399 400 outstr = '%s%s' % (ind,str.replace(os.linesep,os.linesep+ind))
400 401 if outstr.endswith(os.linesep+ind):
401 402 return outstr[:-len(ind)]
402 403 else:
403 404 return outstr
404 405
405 406 def native_line_ends(filename,backup=1):
406 407 """Convert (in-place) a file to line-ends native to the current OS.
407 408
408 409 If the optional backup argument is given as false, no backup of the
409 410 original file is left. """
410 411
411 412 backup_suffixes = {'posix':'~','dos':'.bak','nt':'.bak','mac':'.bak'}
412 413
413 414 bak_filename = filename + backup_suffixes[os.name]
414 415
415 416 original = open(filename).read()
416 417 shutil.copy2(filename,bak_filename)
417 418 try:
418 419 new = open(filename,'wb')
419 420 new.write(os.linesep.join(original.splitlines()))
420 421 new.write(os.linesep) # ALWAYS put an eol at the end of the file
421 422 new.close()
422 423 except:
423 424 os.rename(bak_filename,filename)
424 425 if not backup:
425 426 try:
426 427 os.remove(bak_filename)
427 428 except:
428 429 pass
429 430
430 431
431 432 def list_strings(arg):
432 433 """Always return a list of strings, given a string or list of strings
433 434 as input.
434 435
435 436 :Examples:
436 437
437 438 In [7]: list_strings('A single string')
438 439 Out[7]: ['A single string']
439 440
440 441 In [8]: list_strings(['A single string in a list'])
441 442 Out[8]: ['A single string in a list']
442 443
443 444 In [9]: list_strings(['A','list','of','strings'])
444 445 Out[9]: ['A', 'list', 'of', 'strings']
445 446 """
446 447
447 448 if isinstance(arg,basestring): return [arg]
448 449 else: return arg
449 450
450 451
451 452 def marquee(txt='',width=78,mark='*'):
452 453 """Return the input string centered in a 'marquee'.
453 454
454 455 :Examples:
455 456
456 457 In [16]: marquee('A test',40)
457 458 Out[16]: '**************** A test ****************'
458 459
459 460 In [17]: marquee('A test',40,'-')
460 461 Out[17]: '---------------- A test ----------------'
461 462
462 463 In [18]: marquee('A test',40,' ')
463 464 Out[18]: ' A test '
464 465
465 466 """
466 467 if not txt:
467 468 return (mark*width)[:width]
468 469 nmark = (width-len(txt)-2)/len(mark)/2
469 470 if nmark < 0: nmark =0
470 471 marks = mark*nmark
471 472 return '%s %s %s' % (marks,txt,marks)
472 473
473 474
General Comments 0
You need to be logged in to leave comments. Login now