##// END OF EJS Templates
Give input lines to tokenize one at a time...
Thomas Kluyver -
Show More
@@ -1,532 +1,525 b''
1 1 """Input transformer classes to support IPython special syntax.
2 2
3 3 This includes the machinery to recognise and transform ``%magic`` commands,
4 4 ``!system`` commands, ``help?`` querying, prompt stripping, and so forth.
5 5 """
6 6 import abc
7 7 import functools
8 8 import re
9 9 from io import StringIO
10 10
11 11 from IPython.core.splitinput import LineInfo
12 12 from IPython.utils import tokenize2
13 13 from IPython.utils.tokenize2 import generate_tokens, untokenize, TokenError
14 14
15 15 #-----------------------------------------------------------------------------
16 16 # Globals
17 17 #-----------------------------------------------------------------------------
18 18
19 19 # The escape sequences that define the syntax transformations IPython will
20 20 # apply to user input. These can NOT be just changed here: many regular
21 21 # expressions and other parts of the code may use their hardcoded values, and
22 22 # for all intents and purposes they constitute the 'IPython syntax', so they
23 23 # should be considered fixed.
24 24
25 25 ESC_SHELL = '!' # Send line to underlying system shell
26 26 ESC_SH_CAP = '!!' # Send line to system shell and capture output
27 27 ESC_HELP = '?' # Find information about object
28 28 ESC_HELP2 = '??' # Find extra-detailed information about object
29 29 ESC_MAGIC = '%' # Call magic function
30 30 ESC_MAGIC2 = '%%' # Call cell-magic function
31 31 ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call
32 32 ESC_QUOTE2 = ';' # Quote all args as a single string, call
33 33 ESC_PAREN = '/' # Call first argument with rest of line as arguments
34 34
35 35 ESC_SEQUENCES = [ESC_SHELL, ESC_SH_CAP, ESC_HELP ,\
36 36 ESC_HELP2, ESC_MAGIC, ESC_MAGIC2,\
37 37 ESC_QUOTE, ESC_QUOTE2, ESC_PAREN ]
38 38
39 39
40 40 class InputTransformer(metaclass=abc.ABCMeta):
41 41 """Abstract base class for line-based input transformers."""
42 42
43 43 @abc.abstractmethod
44 44 def push(self, line):
45 45 """Send a line of input to the transformer, returning the transformed
46 46 input or None if the transformer is waiting for more input.
47 47
48 48 Must be overridden by subclasses.
49 49
50 50 Implementations may raise ``SyntaxError`` if the input is invalid. No
51 51 other exceptions may be raised.
52 52 """
53 53 pass
54 54
55 55 @abc.abstractmethod
56 56 def reset(self):
57 57 """Return, transformed any lines that the transformer has accumulated,
58 58 and reset its internal state.
59 59
60 60 Must be overridden by subclasses.
61 61 """
62 62 pass
63 63
64 64 @classmethod
65 65 def wrap(cls, func):
66 66 """Can be used by subclasses as a decorator, to return a factory that
67 67 will allow instantiation with the decorated object.
68 68 """
69 69 @functools.wraps(func)
70 70 def transformer_factory(**kwargs):
71 71 return cls(func, **kwargs)
72 72
73 73 return transformer_factory
74 74
75 75 class StatelessInputTransformer(InputTransformer):
76 76 """Wrapper for a stateless input transformer implemented as a function."""
77 77 def __init__(self, func):
78 78 self.func = func
79 79
80 80 def __repr__(self):
81 81 return "StatelessInputTransformer(func={0!r})".format(self.func)
82 82
83 83 def push(self, line):
84 84 """Send a line of input to the transformer, returning the
85 85 transformed input."""
86 86 return self.func(line)
87 87
88 88 def reset(self):
89 89 """No-op - exists for compatibility."""
90 90 pass
91 91
92 92 class CoroutineInputTransformer(InputTransformer):
93 93 """Wrapper for an input transformer implemented as a coroutine."""
94 94 def __init__(self, coro, **kwargs):
95 95 # Prime it
96 96 self.coro = coro(**kwargs)
97 97 next(self.coro)
98 98
99 99 def __repr__(self):
100 100 return "CoroutineInputTransformer(coro={0!r})".format(self.coro)
101 101
102 102 def push(self, line):
103 103 """Send a line of input to the transformer, returning the
104 104 transformed input or None if the transformer is waiting for more
105 105 input.
106 106 """
107 107 return self.coro.send(line)
108 108
109 109 def reset(self):
110 110 """Return, transformed any lines that the transformer has
111 111 accumulated, and reset its internal state.
112 112 """
113 113 return self.coro.send(None)
114 114
115 115 class TokenInputTransformer(InputTransformer):
116 116 """Wrapper for a token-based input transformer.
117 117
118 118 func should accept a list of tokens (5-tuples, see tokenize docs), and
119 119 return an iterable which can be passed to tokenize.untokenize().
120 120 """
121 121 def __init__(self, func):
122 122 self.func = func
123 self.current_line = ""
124 self.line_used = False
123 self.buf = []
125 124 self.reset_tokenizer()
126
125
127 126 def reset_tokenizer(self):
128 self.tokenizer = generate_tokens(self.get_line)
129
130 def get_line(self):
131 if self.line_used:
132 raise TokenError
133 self.line_used = True
134 return self.current_line
135
127 it = iter(self.buf)
128 self.tokenizer = generate_tokens(it.__next__)
129
136 130 def push(self, line):
137 self.current_line += line + "\n"
138 if self.current_line.isspace():
131 self.buf.append(line + '\n')
132 if all(l.isspace() for l in self.buf):
139 133 return self.reset()
140
141 self.line_used = False
134
142 135 tokens = []
143 136 stop_at_NL = False
144 137 try:
145 138 for intok in self.tokenizer:
146 139 tokens.append(intok)
147 140 t = intok[0]
148 141 if t == tokenize2.NEWLINE or (stop_at_NL and t == tokenize2.NL):
149 142 # Stop before we try to pull a line we don't have yet
150 143 break
151 144 elif t == tokenize2.ERRORTOKEN:
152 145 stop_at_NL = True
153 146 except TokenError:
154 147 # Multi-line statement - stop and try again with the next line
155 148 self.reset_tokenizer()
156 149 return None
157 150
158 151 return self.output(tokens)
159 152
160 153 def output(self, tokens):
161 self.current_line = ""
154 self.buf.clear()
162 155 self.reset_tokenizer()
163 156 return untokenize(self.func(tokens)).rstrip('\n')
164 157
165 158 def reset(self):
166 l = self.current_line
167 self.current_line = ""
159 l = ''.join(self.buf)
160 self.buf.clear()
168 161 self.reset_tokenizer()
169 162 if l:
170 163 return l.rstrip('\n')
171 164
172 165 class assemble_python_lines(TokenInputTransformer):
173 166 def __init__(self):
174 167 super(assemble_python_lines, self).__init__(None)
175 168
176 169 def output(self, tokens):
177 170 return self.reset()
178 171
179 172 @CoroutineInputTransformer.wrap
180 173 def assemble_logical_lines():
181 174 """Join lines following explicit line continuations (\)"""
182 175 line = ''
183 176 while True:
184 177 line = (yield line)
185 178 if not line or line.isspace():
186 179 continue
187 180
188 181 parts = []
189 182 while line is not None:
190 183 if line.endswith('\\') and (not has_comment(line)):
191 184 parts.append(line[:-1])
192 185 line = (yield None) # Get another line
193 186 else:
194 187 parts.append(line)
195 188 break
196 189
197 190 # Output
198 191 line = ''.join(parts)
199 192
200 193 # Utilities
201 194 def _make_help_call(target, esc, lspace, next_input=None):
202 195 """Prepares a pinfo(2)/psearch call from a target name and the escape
203 196 (i.e. ? or ??)"""
204 197 method = 'pinfo2' if esc == '??' \
205 198 else 'psearch' if '*' in target \
206 199 else 'pinfo'
207 200 arg = " ".join([method, target])
208 201 if next_input is None:
209 202 return '%sget_ipython().magic(%r)' % (lspace, arg)
210 203 else:
211 204 return '%sget_ipython().set_next_input(%r);get_ipython().magic(%r)' % \
212 205 (lspace, next_input, arg)
213 206
214 207 # These define the transformations for the different escape characters.
215 208 def _tr_system(line_info):
216 209 "Translate lines escaped with: !"
217 210 cmd = line_info.line.lstrip().lstrip(ESC_SHELL)
218 211 return '%sget_ipython().system(%r)' % (line_info.pre, cmd)
219 212
220 213 def _tr_system2(line_info):
221 214 "Translate lines escaped with: !!"
222 215 cmd = line_info.line.lstrip()[2:]
223 216 return '%sget_ipython().getoutput(%r)' % (line_info.pre, cmd)
224 217
225 218 def _tr_help(line_info):
226 219 "Translate lines escaped with: ?/??"
227 220 # A naked help line should just fire the intro help screen
228 221 if not line_info.line[1:]:
229 222 return 'get_ipython().show_usage()'
230 223
231 224 return _make_help_call(line_info.ifun, line_info.esc, line_info.pre)
232 225
233 226 def _tr_magic(line_info):
234 227 "Translate lines escaped with: %"
235 228 tpl = '%sget_ipython().magic(%r)'
236 229 if line_info.line.startswith(ESC_MAGIC2):
237 230 return line_info.line
238 231 cmd = ' '.join([line_info.ifun, line_info.the_rest]).strip()
239 232 return tpl % (line_info.pre, cmd)
240 233
241 234 def _tr_quote(line_info):
242 235 "Translate lines escaped with: ,"
243 236 return '%s%s("%s")' % (line_info.pre, line_info.ifun,
244 237 '", "'.join(line_info.the_rest.split()) )
245 238
246 239 def _tr_quote2(line_info):
247 240 "Translate lines escaped with: ;"
248 241 return '%s%s("%s")' % (line_info.pre, line_info.ifun,
249 242 line_info.the_rest)
250 243
251 244 def _tr_paren(line_info):
252 245 "Translate lines escaped with: /"
253 246 return '%s%s(%s)' % (line_info.pre, line_info.ifun,
254 247 ", ".join(line_info.the_rest.split()))
255 248
256 249 tr = { ESC_SHELL : _tr_system,
257 250 ESC_SH_CAP : _tr_system2,
258 251 ESC_HELP : _tr_help,
259 252 ESC_HELP2 : _tr_help,
260 253 ESC_MAGIC : _tr_magic,
261 254 ESC_QUOTE : _tr_quote,
262 255 ESC_QUOTE2 : _tr_quote2,
263 256 ESC_PAREN : _tr_paren }
264 257
265 258 @StatelessInputTransformer.wrap
266 259 def escaped_commands(line):
267 260 """Transform escaped commands - %magic, !system, ?help + various autocalls.
268 261 """
269 262 if not line or line.isspace():
270 263 return line
271 264 lineinf = LineInfo(line)
272 265 if lineinf.esc not in tr:
273 266 return line
274 267
275 268 return tr[lineinf.esc](lineinf)
276 269
277 270 _initial_space_re = re.compile(r'\s*')
278 271
279 272 _help_end_re = re.compile(r"""(%{0,2}
280 273 [a-zA-Z_*][\w*]* # Variable name
281 274 (\.[a-zA-Z_*][\w*]*)* # .etc.etc
282 275 )
283 276 (\?\??)$ # ? or ??
284 277 """,
285 278 re.VERBOSE)
286 279
287 280 # Extra pseudotokens for multiline strings and data structures
288 281 _MULTILINE_STRING = object()
289 282 _MULTILINE_STRUCTURE = object()
290 283
291 284 def _line_tokens(line):
292 285 """Helper for has_comment and ends_in_comment_or_string."""
293 286 readline = StringIO(line).readline
294 287 toktypes = set()
295 288 try:
296 289 for t in generate_tokens(readline):
297 290 toktypes.add(t[0])
298 291 except TokenError as e:
299 292 # There are only two cases where a TokenError is raised.
300 293 if 'multi-line string' in e.args[0]:
301 294 toktypes.add(_MULTILINE_STRING)
302 295 else:
303 296 toktypes.add(_MULTILINE_STRUCTURE)
304 297 return toktypes
305 298
306 299 def has_comment(src):
307 300 """Indicate whether an input line has (i.e. ends in, or is) a comment.
308 301
309 302 This uses tokenize, so it can distinguish comments from # inside strings.
310 303
311 304 Parameters
312 305 ----------
313 306 src : string
314 307 A single line input string.
315 308
316 309 Returns
317 310 -------
318 311 comment : bool
319 312 True if source has a comment.
320 313 """
321 314 return (tokenize2.COMMENT in _line_tokens(src))
322 315
323 316 def ends_in_comment_or_string(src):
324 317 """Indicates whether or not an input line ends in a comment or within
325 318 a multiline string.
326 319
327 320 Parameters
328 321 ----------
329 322 src : string
330 323 A single line input string.
331 324
332 325 Returns
333 326 -------
334 327 comment : bool
335 328 True if source ends in a comment or multiline string.
336 329 """
337 330 toktypes = _line_tokens(src)
338 331 return (tokenize2.COMMENT in toktypes) or (_MULTILINE_STRING in toktypes)
339 332
340 333
341 334 @StatelessInputTransformer.wrap
342 335 def help_end(line):
343 336 """Translate lines with ?/?? at the end"""
344 337 m = _help_end_re.search(line)
345 338 if m is None or ends_in_comment_or_string(line):
346 339 return line
347 340 target = m.group(1)
348 341 esc = m.group(3)
349 342 lspace = _initial_space_re.match(line).group(0)
350 343
351 344 # If we're mid-command, put it back on the next prompt for the user.
352 345 next_input = line.rstrip('?') if line.strip() != m.group(0) else None
353 346
354 347 return _make_help_call(target, esc, lspace, next_input)
355 348
356 349
357 350 @CoroutineInputTransformer.wrap
358 351 def cellmagic(end_on_blank_line=False):
359 352 """Captures & transforms cell magics.
360 353
361 354 After a cell magic is started, this stores up any lines it gets until it is
362 355 reset (sent None).
363 356 """
364 357 tpl = 'get_ipython().run_cell_magic(%r, %r, %r)'
365 358 cellmagic_help_re = re.compile('%%\w+\?')
366 359 line = ''
367 360 while True:
368 361 line = (yield line)
369 362 # consume leading empty lines
370 363 while not line:
371 364 line = (yield line)
372 365
373 366 if not line.startswith(ESC_MAGIC2):
374 367 # This isn't a cell magic, idle waiting for reset then start over
375 368 while line is not None:
376 369 line = (yield line)
377 370 continue
378 371
379 372 if cellmagic_help_re.match(line):
380 373 # This case will be handled by help_end
381 374 continue
382 375
383 376 first = line
384 377 body = []
385 378 line = (yield None)
386 379 while (line is not None) and \
387 380 ((line.strip() != '') or not end_on_blank_line):
388 381 body.append(line)
389 382 line = (yield None)
390 383
391 384 # Output
392 385 magic_name, _, first = first.partition(' ')
393 386 magic_name = magic_name.lstrip(ESC_MAGIC2)
394 387 line = tpl % (magic_name, first, u'\n'.join(body))
395 388
396 389
397 390 def _strip_prompts(prompt_re, initial_re=None, turnoff_re=None):
398 391 """Remove matching input prompts from a block of input.
399 392
400 393 Parameters
401 394 ----------
402 395 prompt_re : regular expression
403 396 A regular expression matching any input prompt (including continuation)
404 397 initial_re : regular expression, optional
405 398 A regular expression matching only the initial prompt, but not continuation.
406 399 If no initial expression is given, prompt_re will be used everywhere.
407 400 Used mainly for plain Python prompts, where the continuation prompt
408 401 ``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
409 402
410 403 If initial_re and prompt_re differ,
411 404 only initial_re will be tested against the first line.
412 405 If any prompt is found on the first two lines,
413 406 prompts will be stripped from the rest of the block.
414 407 """
415 408 if initial_re is None:
416 409 initial_re = prompt_re
417 410 line = ''
418 411 while True:
419 412 line = (yield line)
420 413
421 414 # First line of cell
422 415 if line is None:
423 416 continue
424 417 out, n1 = initial_re.subn('', line, count=1)
425 418 if turnoff_re and not n1:
426 419 if turnoff_re.match(line):
427 420 # We're in e.g. a cell magic; disable this transformer for
428 421 # the rest of the cell.
429 422 while line is not None:
430 423 line = (yield line)
431 424 continue
432 425
433 426 line = (yield out)
434 427
435 428 if line is None:
436 429 continue
437 430 # check for any prompt on the second line of the cell,
438 431 # because people often copy from just after the first prompt,
439 432 # so we might not see it in the first line.
440 433 out, n2 = prompt_re.subn('', line, count=1)
441 434 line = (yield out)
442 435
443 436 if n1 or n2:
444 437 # Found a prompt in the first two lines - check for it in
445 438 # the rest of the cell as well.
446 439 while line is not None:
447 440 line = (yield prompt_re.sub('', line, count=1))
448 441
449 442 else:
450 443 # Prompts not in input - wait for reset
451 444 while line is not None:
452 445 line = (yield line)
453 446
454 447 @CoroutineInputTransformer.wrap
455 448 def classic_prompt():
456 449 """Strip the >>>/... prompts of the Python interactive shell."""
457 450 # FIXME: non-capturing version (?:...) usable?
458 451 prompt_re = re.compile(r'^(>>>|\.\.\.)( |$)')
459 452 initial_re = re.compile(r'^>>>( |$)')
460 453 # Any %magic/!system is IPython syntax, so we needn't look for >>> prompts
461 454 turnoff_re = re.compile(r'^[%!]')
462 455 return _strip_prompts(prompt_re, initial_re, turnoff_re)
463 456
464 457 @CoroutineInputTransformer.wrap
465 458 def ipy_prompt():
466 459 """Strip IPython's In [1]:/...: prompts."""
467 460 # FIXME: non-capturing version (?:...) usable?
468 461 prompt_re = re.compile(r'^(In \[\d+\]: |\s*\.{3,}: ?)')
469 462 # Disable prompt stripping inside cell magics
470 463 turnoff_re = re.compile(r'^%%')
471 464 return _strip_prompts(prompt_re, turnoff_re=turnoff_re)
472 465
473 466
474 467 @CoroutineInputTransformer.wrap
475 468 def leading_indent():
476 469 """Remove leading indentation.
477 470
478 471 If the first line starts with a spaces or tabs, the same whitespace will be
479 472 removed from each following line until it is reset.
480 473 """
481 474 space_re = re.compile(r'^[ \t]+')
482 475 line = ''
483 476 while True:
484 477 line = (yield line)
485 478
486 479 if line is None:
487 480 continue
488 481
489 482 m = space_re.match(line)
490 483 if m:
491 484 space = m.group(0)
492 485 while line is not None:
493 486 if line.startswith(space):
494 487 line = line[len(space):]
495 488 line = (yield line)
496 489 else:
497 490 # No leading spaces - wait for reset
498 491 while line is not None:
499 492 line = (yield line)
500 493
501 494
502 495 _assign_pat = \
503 496 r'''(?P<lhs>(\s*)
504 497 ([\w\.]+) # Initial identifier
505 498 (\s*,\s*
506 499 \*?[\w\.]+)* # Further identifiers for unpacking
507 500 \s*?,? # Trailing comma
508 501 )
509 502 \s*=\s*
510 503 '''
511 504
512 505 assign_system_re = re.compile(r'{}!\s*(?P<cmd>.*)'.format(_assign_pat), re.VERBOSE)
513 506 assign_system_template = '%s = get_ipython().getoutput(%r)'
514 507 @StatelessInputTransformer.wrap
515 508 def assign_from_system(line):
516 509 """Transform assignment from system commands (e.g. files = !ls)"""
517 510 m = assign_system_re.match(line)
518 511 if m is None:
519 512 return line
520 513
521 514 return assign_system_template % m.group('lhs', 'cmd')
522 515
523 516 assign_magic_re = re.compile(r'{}%\s*(?P<cmd>.*)'.format(_assign_pat), re.VERBOSE)
524 517 assign_magic_template = '%s = get_ipython().magic(%r)'
525 518 @StatelessInputTransformer.wrap
526 519 def assign_from_magic(line):
527 520 """Transform assignment from magic commands (e.g. a = %who_ls)"""
528 521 m = assign_magic_re.match(line)
529 522 if m is None:
530 523 return line
531 524
532 525 return assign_magic_template % m.group('lhs', 'cmd')
General Comments 0
You need to be logged in to leave comments. Login now