##// END OF EJS Templates
Merge pull request #12482 from meeseeksmachine/auto-backport-of-pr-12475-on-7.x
Matthias Bussonnier -
r25950:16a0884d merge
parent child Browse files
Show More
@@ -1,723 +1,723 b''
1 1 """Input transformer machinery to support IPython special syntax.
2 2
3 3 This includes the machinery to recognise and transform ``%magic`` commands,
4 4 ``!system`` commands, ``help?`` querying, prompt stripping, and so forth.
5 5
6 6 Added: IPython 7.0. Replaces inputsplitter and inputtransformer which were
7 7 deprecated in 7.0.
8 8 """
9 9
10 10 # Copyright (c) IPython Development Team.
11 11 # Distributed under the terms of the Modified BSD License.
12 12
13 13 from codeop import compile_command
14 14 import re
15 15 import tokenize
16 16 from typing import List, Tuple, Union
17 17 import warnings
18 18
19 19 _indent_re = re.compile(r'^[ \t]+')
20 20
21 21 def leading_empty_lines(lines):
22 22 """Remove leading empty lines
23 23
24 24 If the leading lines are empty or contain only whitespace, they will be
25 25 removed.
26 26 """
27 27 if not lines:
28 28 return lines
29 29 for i, line in enumerate(lines):
30 30 if line and not line.isspace():
31 31 return lines[i:]
32 32 return lines
33 33
34 34 def leading_indent(lines):
35 35 """Remove leading indentation.
36 36
37 37 If the first line starts with a spaces or tabs, the same whitespace will be
38 38 removed from each following line in the cell.
39 39 """
40 40 if not lines:
41 41 return lines
42 42 m = _indent_re.match(lines[0])
43 43 if not m:
44 44 return lines
45 45 space = m.group(0)
46 46 n = len(space)
47 47 return [l[n:] if l.startswith(space) else l
48 48 for l in lines]
49 49
50 50 class PromptStripper:
51 51 """Remove matching input prompts from a block of input.
52 52
53 53 Parameters
54 54 ----------
55 55 prompt_re : regular expression
56 56 A regular expression matching any input prompt (including continuation,
57 57 e.g. ``...``)
58 58 initial_re : regular expression, optional
59 59 A regular expression matching only the initial prompt, but not continuation.
60 60 If no initial expression is given, prompt_re will be used everywhere.
61 61 Used mainly for plain Python prompts (``>>>``), where the continuation prompt
62 62 ``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
63 63
64 64 If initial_re and prompt_re differ,
65 65 only initial_re will be tested against the first line.
66 66 If any prompt is found on the first two lines,
67 67 prompts will be stripped from the rest of the block.
68 68 """
69 69 def __init__(self, prompt_re, initial_re=None):
70 70 self.prompt_re = prompt_re
71 71 self.initial_re = initial_re or prompt_re
72 72
73 73 def _strip(self, lines):
74 74 return [self.prompt_re.sub('', l, count=1) for l in lines]
75 75
76 76 def __call__(self, lines):
77 77 if not lines:
78 78 return lines
79 79 if self.initial_re.match(lines[0]) or \
80 80 (len(lines) > 1 and self.prompt_re.match(lines[1])):
81 81 return self._strip(lines)
82 82 return lines
83 83
84 84 classic_prompt = PromptStripper(
85 85 prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'),
86 86 initial_re=re.compile(r'^>>>( |$)')
87 87 )
88 88
89 89 ipython_prompt = PromptStripper(re.compile(r'^(In \[\d+\]: |\s*\.{3,}: ?)'))
90 90
91 91 def cell_magic(lines):
92 92 if not lines or not lines[0].startswith('%%'):
93 93 return lines
94 94 if re.match(r'%%\w+\?', lines[0]):
95 95 # This case will be handled by help_end
96 96 return lines
97 magic_name, _, first_line = lines[0][2:-1].partition(' ')
97 magic_name, _, first_line = lines[0][2:].rstrip().partition(' ')
98 98 body = ''.join(lines[1:])
99 99 return ['get_ipython().run_cell_magic(%r, %r, %r)\n'
100 100 % (magic_name, first_line, body)]
101 101
102 102
103 103 def _find_assign_op(token_line) -> Union[int, None]:
104 104 """Get the index of the first assignment in the line ('=' not inside brackets)
105 105
106 106 Note: We don't try to support multiple special assignment (a = b = %foo)
107 107 """
108 108 paren_level = 0
109 109 for i, ti in enumerate(token_line):
110 110 s = ti.string
111 111 if s == '=' and paren_level == 0:
112 112 return i
113 113 if s in {'(','[','{'}:
114 114 paren_level += 1
115 115 elif s in {')', ']', '}'}:
116 116 if paren_level > 0:
117 117 paren_level -= 1
118 118
119 119 def find_end_of_continued_line(lines, start_line: int):
120 120 """Find the last line of a line explicitly extended using backslashes.
121 121
122 122 Uses 0-indexed line numbers.
123 123 """
124 124 end_line = start_line
125 125 while lines[end_line].endswith('\\\n'):
126 126 end_line += 1
127 127 if end_line >= len(lines):
128 128 break
129 129 return end_line
130 130
131 131 def assemble_continued_line(lines, start: Tuple[int, int], end_line: int):
132 132 r"""Assemble a single line from multiple continued line pieces
133 133
134 134 Continued lines are lines ending in ``\``, and the line following the last
135 135 ``\`` in the block.
136 136
137 137 For example, this code continues over multiple lines::
138 138
139 139 if (assign_ix is not None) \
140 140 and (len(line) >= assign_ix + 2) \
141 141 and (line[assign_ix+1].string == '%') \
142 142 and (line[assign_ix+2].type == tokenize.NAME):
143 143
144 144 This statement contains four continued line pieces.
145 145 Assembling these pieces into a single line would give::
146 146
147 147 if (assign_ix is not None) and (len(line) >= assign_ix + 2) and (line[...
148 148
149 149 This uses 0-indexed line numbers. *start* is (lineno, colno).
150 150
151 151 Used to allow ``%magic`` and ``!system`` commands to be continued over
152 152 multiple lines.
153 153 """
154 154 parts = [lines[start[0]][start[1]:]] + lines[start[0]+1:end_line+1]
155 return ' '.join([p[:-2] for p in parts[:-1]] # Strip backslash+newline
156 + [parts[-1][:-1]]) # Strip newline from last line
155 return ' '.join([p.rstrip()[:-1] for p in parts[:-1]] # Strip backslash+newline
156 + [parts[-1].rstrip()]) # Strip newline from last line
157 157
158 158 class TokenTransformBase:
159 159 """Base class for transformations which examine tokens.
160 160
161 161 Special syntax should not be transformed when it occurs inside strings or
162 162 comments. This is hard to reliably avoid with regexes. The solution is to
163 163 tokenise the code as Python, and recognise the special syntax in the tokens.
164 164
165 165 IPython's special syntax is not valid Python syntax, so tokenising may go
166 166 wrong after the special syntax starts. These classes therefore find and
167 167 transform *one* instance of special syntax at a time into regular Python
168 168 syntax. After each transformation, tokens are regenerated to find the next
169 169 piece of special syntax.
170 170
171 171 Subclasses need to implement one class method (find)
172 172 and one regular method (transform).
173 173
174 174 The priority attribute can select which transformation to apply if multiple
175 175 transformers match in the same place. Lower numbers have higher priority.
176 176 This allows "%magic?" to be turned into a help call rather than a magic call.
177 177 """
178 178 # Lower numbers -> higher priority (for matches in the same location)
179 179 priority = 10
180 180
181 181 def sortby(self):
182 182 return self.start_line, self.start_col, self.priority
183 183
184 184 def __init__(self, start):
185 185 self.start_line = start[0] - 1 # Shift from 1-index to 0-index
186 186 self.start_col = start[1]
187 187
188 188 @classmethod
189 189 def find(cls, tokens_by_line):
190 190 """Find one instance of special syntax in the provided tokens.
191 191
192 192 Tokens are grouped into logical lines for convenience,
193 193 so it is easy to e.g. look at the first token of each line.
194 194 *tokens_by_line* is a list of lists of tokenize.TokenInfo objects.
195 195
196 196 This should return an instance of its class, pointing to the start
197 197 position it has found, or None if it found no match.
198 198 """
199 199 raise NotImplementedError
200 200
201 201 def transform(self, lines: List[str]):
202 202 """Transform one instance of special syntax found by ``find()``
203 203
204 204 Takes a list of strings representing physical lines,
205 205 returns a similar list of transformed lines.
206 206 """
207 207 raise NotImplementedError
208 208
209 209 class MagicAssign(TokenTransformBase):
210 210 """Transformer for assignments from magics (a = %foo)"""
211 211 @classmethod
212 212 def find(cls, tokens_by_line):
213 213 """Find the first magic assignment (a = %foo) in the cell.
214 214 """
215 215 for line in tokens_by_line:
216 216 assign_ix = _find_assign_op(line)
217 217 if (assign_ix is not None) \
218 218 and (len(line) >= assign_ix + 2) \
219 219 and (line[assign_ix+1].string == '%') \
220 220 and (line[assign_ix+2].type == tokenize.NAME):
221 221 return cls(line[assign_ix+1].start)
222 222
223 223 def transform(self, lines: List[str]):
224 224 """Transform a magic assignment found by the ``find()`` classmethod.
225 225 """
226 226 start_line, start_col = self.start_line, self.start_col
227 227 lhs = lines[start_line][:start_col]
228 228 end_line = find_end_of_continued_line(lines, start_line)
229 229 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
230 230 assert rhs.startswith('%'), rhs
231 231 magic_name, _, args = rhs[1:].partition(' ')
232 232
233 233 lines_before = lines[:start_line]
234 234 call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)
235 235 new_line = lhs + call + '\n'
236 236 lines_after = lines[end_line+1:]
237 237
238 238 return lines_before + [new_line] + lines_after
239 239
240 240
241 241 class SystemAssign(TokenTransformBase):
242 242 """Transformer for assignments from system commands (a = !foo)"""
243 243 @classmethod
244 244 def find(cls, tokens_by_line):
245 245 """Find the first system assignment (a = !foo) in the cell.
246 246 """
247 247 for line in tokens_by_line:
248 248 assign_ix = _find_assign_op(line)
249 249 if (assign_ix is not None) \
250 250 and not line[assign_ix].line.strip().startswith('=') \
251 251 and (len(line) >= assign_ix + 2) \
252 252 and (line[assign_ix + 1].type == tokenize.ERRORTOKEN):
253 253 ix = assign_ix + 1
254 254
255 255 while ix < len(line) and line[ix].type == tokenize.ERRORTOKEN:
256 256 if line[ix].string == '!':
257 257 return cls(line[ix].start)
258 258 elif not line[ix].string.isspace():
259 259 break
260 260 ix += 1
261 261
262 262 def transform(self, lines: List[str]):
263 263 """Transform a system assignment found by the ``find()`` classmethod.
264 264 """
265 265 start_line, start_col = self.start_line, self.start_col
266 266
267 267 lhs = lines[start_line][:start_col]
268 268 end_line = find_end_of_continued_line(lines, start_line)
269 269 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
270 270 assert rhs.startswith('!'), rhs
271 271 cmd = rhs[1:]
272 272
273 273 lines_before = lines[:start_line]
274 274 call = "get_ipython().getoutput({!r})".format(cmd)
275 275 new_line = lhs + call + '\n'
276 276 lines_after = lines[end_line + 1:]
277 277
278 278 return lines_before + [new_line] + lines_after
279 279
280 280 # The escape sequences that define the syntax transformations IPython will
281 281 # apply to user input. These can NOT be just changed here: many regular
282 282 # expressions and other parts of the code may use their hardcoded values, and
283 283 # for all intents and purposes they constitute the 'IPython syntax', so they
284 284 # should be considered fixed.
285 285
286 286 ESC_SHELL = '!' # Send line to underlying system shell
287 287 ESC_SH_CAP = '!!' # Send line to system shell and capture output
288 288 ESC_HELP = '?' # Find information about object
289 289 ESC_HELP2 = '??' # Find extra-detailed information about object
290 290 ESC_MAGIC = '%' # Call magic function
291 291 ESC_MAGIC2 = '%%' # Call cell-magic function
292 292 ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call
293 293 ESC_QUOTE2 = ';' # Quote all args as a single string, call
294 294 ESC_PAREN = '/' # Call first argument with rest of line as arguments
295 295
296 296 ESCAPE_SINGLES = {'!', '?', '%', ',', ';', '/'}
297 297 ESCAPE_DOUBLES = {'!!', '??'} # %% (cell magic) is handled separately
298 298
299 299 def _make_help_call(target, esc, next_input=None):
300 300 """Prepares a pinfo(2)/psearch call from a target name and the escape
301 301 (i.e. ? or ??)"""
302 302 method = 'pinfo2' if esc == '??' \
303 303 else 'psearch' if '*' in target \
304 304 else 'pinfo'
305 305 arg = " ".join([method, target])
306 306 #Prepare arguments for get_ipython().run_line_magic(magic_name, magic_args)
307 307 t_magic_name, _, t_magic_arg_s = arg.partition(' ')
308 308 t_magic_name = t_magic_name.lstrip(ESC_MAGIC)
309 309 if next_input is None:
310 310 return 'get_ipython().run_line_magic(%r, %r)' % (t_magic_name, t_magic_arg_s)
311 311 else:
312 312 return 'get_ipython().set_next_input(%r);get_ipython().run_line_magic(%r, %r)' % \
313 313 (next_input, t_magic_name, t_magic_arg_s)
314 314
315 315 def _tr_help(content):
316 316 """Translate lines escaped with: ?
317 317
318 318 A naked help line should fire the intro help screen (shell.show_usage())
319 319 """
320 320 if not content:
321 321 return 'get_ipython().show_usage()'
322 322
323 323 return _make_help_call(content, '?')
324 324
325 325 def _tr_help2(content):
326 326 """Translate lines escaped with: ??
327 327
328 328 A naked help line should fire the intro help screen (shell.show_usage())
329 329 """
330 330 if not content:
331 331 return 'get_ipython().show_usage()'
332 332
333 333 return _make_help_call(content, '??')
334 334
335 335 def _tr_magic(content):
336 336 "Translate lines escaped with a percent sign: %"
337 337 name, _, args = content.partition(' ')
338 338 return 'get_ipython().run_line_magic(%r, %r)' % (name, args)
339 339
340 340 def _tr_quote(content):
341 341 "Translate lines escaped with a comma: ,"
342 342 name, _, args = content.partition(' ')
343 343 return '%s("%s")' % (name, '", "'.join(args.split()) )
344 344
345 345 def _tr_quote2(content):
346 346 "Translate lines escaped with a semicolon: ;"
347 347 name, _, args = content.partition(' ')
348 348 return '%s("%s")' % (name, args)
349 349
350 350 def _tr_paren(content):
351 351 "Translate lines escaped with a slash: /"
352 352 name, _, args = content.partition(' ')
353 353 return '%s(%s)' % (name, ", ".join(args.split()))
354 354
355 355 tr = { ESC_SHELL : 'get_ipython().system({!r})'.format,
356 356 ESC_SH_CAP : 'get_ipython().getoutput({!r})'.format,
357 357 ESC_HELP : _tr_help,
358 358 ESC_HELP2 : _tr_help2,
359 359 ESC_MAGIC : _tr_magic,
360 360 ESC_QUOTE : _tr_quote,
361 361 ESC_QUOTE2 : _tr_quote2,
362 362 ESC_PAREN : _tr_paren }
363 363
364 364 class EscapedCommand(TokenTransformBase):
365 365 """Transformer for escaped commands like %foo, !foo, or /foo"""
366 366 @classmethod
367 367 def find(cls, tokens_by_line):
368 368 """Find the first escaped command (%foo, !foo, etc.) in the cell.
369 369 """
370 370 for line in tokens_by_line:
371 371 if not line:
372 372 continue
373 373 ix = 0
374 374 ll = len(line)
375 375 while ll > ix and line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
376 376 ix += 1
377 377 if ix >= ll:
378 378 continue
379 379 if line[ix].string in ESCAPE_SINGLES:
380 380 return cls(line[ix].start)
381 381
382 382 def transform(self, lines):
383 383 """Transform an escaped line found by the ``find()`` classmethod.
384 384 """
385 385 start_line, start_col = self.start_line, self.start_col
386 386
387 387 indent = lines[start_line][:start_col]
388 388 end_line = find_end_of_continued_line(lines, start_line)
389 389 line = assemble_continued_line(lines, (start_line, start_col), end_line)
390 390
391 391 if len(line) > 1 and line[:2] in ESCAPE_DOUBLES:
392 392 escape, content = line[:2], line[2:]
393 393 else:
394 394 escape, content = line[:1], line[1:]
395 395
396 396 if escape in tr:
397 397 call = tr[escape](content)
398 398 else:
399 399 call = ''
400 400
401 401 lines_before = lines[:start_line]
402 402 new_line = indent + call + '\n'
403 403 lines_after = lines[end_line + 1:]
404 404
405 405 return lines_before + [new_line] + lines_after
406 406
407 407 _help_end_re = re.compile(r"""(%{0,2}
408 408 (?!\d)[\w*]+ # Variable name
409 409 (\.(?!\d)[\w*]+)* # .etc.etc
410 410 )
411 411 (\?\??)$ # ? or ??
412 412 """,
413 413 re.VERBOSE)
414 414
415 415 class HelpEnd(TokenTransformBase):
416 416 """Transformer for help syntax: obj? and obj??"""
417 417 # This needs to be higher priority (lower number) than EscapedCommand so
418 418 # that inspecting magics (%foo?) works.
419 419 priority = 5
420 420
421 421 def __init__(self, start, q_locn):
422 422 super().__init__(start)
423 423 self.q_line = q_locn[0] - 1 # Shift from 1-indexed to 0-indexed
424 424 self.q_col = q_locn[1]
425 425
426 426 @classmethod
427 427 def find(cls, tokens_by_line):
428 428 """Find the first help command (foo?) in the cell.
429 429 """
430 430 for line in tokens_by_line:
431 431 # Last token is NEWLINE; look at last but one
432 432 if len(line) > 2 and line[-2].string == '?':
433 433 # Find the first token that's not INDENT/DEDENT
434 434 ix = 0
435 435 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
436 436 ix += 1
437 437 return cls(line[ix].start, line[-2].start)
438 438
439 439 def transform(self, lines):
440 440 """Transform a help command found by the ``find()`` classmethod.
441 441 """
442 442 piece = ''.join(lines[self.start_line:self.q_line+1])
443 443 indent, content = piece[:self.start_col], piece[self.start_col:]
444 444 lines_before = lines[:self.start_line]
445 445 lines_after = lines[self.q_line + 1:]
446 446
447 447 m = _help_end_re.search(content)
448 448 if not m:
449 449 raise SyntaxError(content)
450 450 assert m is not None, content
451 451 target = m.group(1)
452 452 esc = m.group(3)
453 453
454 454 # If we're mid-command, put it back on the next prompt for the user.
455 455 next_input = None
456 456 if (not lines_before) and (not lines_after) \
457 457 and content.strip() != m.group(0):
458 458 next_input = content.rstrip('?\n')
459 459
460 460 call = _make_help_call(target, esc, next_input=next_input)
461 461 new_line = indent + call + '\n'
462 462
463 463 return lines_before + [new_line] + lines_after
464 464
465 465 def make_tokens_by_line(lines:List[str]):
466 466 """Tokenize a series of lines and group tokens by line.
467 467
468 468 The tokens for a multiline Python string or expression are grouped as one
469 469 line. All lines except the last lines should keep their line ending ('\\n',
470 470 '\\r\\n') for this to properly work. Use `.splitlines(keeplineending=True)`
471 471 for example when passing block of text to this function.
472 472
473 473 """
474 474 # NL tokens are used inside multiline expressions, but also after blank
475 475 # lines or comments. This is intentional - see https://bugs.python.org/issue17061
476 476 # We want to group the former case together but split the latter, so we
477 477 # track parentheses level, similar to the internals of tokenize.
478 478 NEWLINE, NL = tokenize.NEWLINE, tokenize.NL
479 479 tokens_by_line = [[]]
480 480 if len(lines) > 1 and not lines[0].endswith(('\n', '\r', '\r\n', '\x0b', '\x0c')):
481 481 warnings.warn("`make_tokens_by_line` received a list of lines which do not have lineending markers ('\\n', '\\r', '\\r\\n', '\\x0b', '\\x0c'), behavior will be unspecified")
482 482 parenlev = 0
483 483 try:
484 484 for token in tokenize.generate_tokens(iter(lines).__next__):
485 485 tokens_by_line[-1].append(token)
486 486 if (token.type == NEWLINE) \
487 487 or ((token.type == NL) and (parenlev <= 0)):
488 488 tokens_by_line.append([])
489 489 elif token.string in {'(', '[', '{'}:
490 490 parenlev += 1
491 491 elif token.string in {')', ']', '}'}:
492 492 if parenlev > 0:
493 493 parenlev -= 1
494 494 except tokenize.TokenError:
495 495 # Input ended in a multiline string or expression. That's OK for us.
496 496 pass
497 497
498 498
499 499 if not tokens_by_line[-1]:
500 500 tokens_by_line.pop()
501 501
502 502
503 503 return tokens_by_line
504 504
505 505 def show_linewise_tokens(s: str):
506 506 """For investigation and debugging"""
507 507 if not s.endswith('\n'):
508 508 s += '\n'
509 509 lines = s.splitlines(keepends=True)
510 510 for line in make_tokens_by_line(lines):
511 511 print("Line -------")
512 512 for tokinfo in line:
513 513 print(" ", tokinfo)
514 514
515 515 # Arbitrary limit to prevent getting stuck in infinite loops
516 516 TRANSFORM_LOOP_LIMIT = 500
517 517
518 518 class TransformerManager:
519 519 """Applies various transformations to a cell or code block.
520 520
521 521 The key methods for external use are ``transform_cell()``
522 522 and ``check_complete()``.
523 523 """
524 524 def __init__(self):
525 525 self.cleanup_transforms = [
526 526 leading_empty_lines,
527 527 leading_indent,
528 528 classic_prompt,
529 529 ipython_prompt,
530 530 ]
531 531 self.line_transforms = [
532 532 cell_magic,
533 533 ]
534 534 self.token_transformers = [
535 535 MagicAssign,
536 536 SystemAssign,
537 537 EscapedCommand,
538 538 HelpEnd,
539 539 ]
540 540
541 541 def do_one_token_transform(self, lines):
542 542 """Find and run the transform earliest in the code.
543 543
544 544 Returns (changed, lines).
545 545
546 546 This method is called repeatedly until changed is False, indicating
547 547 that all available transformations are complete.
548 548
549 549 The tokens following IPython special syntax might not be valid, so
550 550 the transformed code is retokenised every time to identify the next
551 551 piece of special syntax. Hopefully long code cells are mostly valid
552 552 Python, not using lots of IPython special syntax, so this shouldn't be
553 553 a performance issue.
554 554 """
555 555 tokens_by_line = make_tokens_by_line(lines)
556 556 candidates = []
557 557 for transformer_cls in self.token_transformers:
558 558 transformer = transformer_cls.find(tokens_by_line)
559 559 if transformer:
560 560 candidates.append(transformer)
561 561
562 562 if not candidates:
563 563 # Nothing to transform
564 564 return False, lines
565 565 ordered_transformers = sorted(candidates, key=TokenTransformBase.sortby)
566 566 for transformer in ordered_transformers:
567 567 try:
568 568 return True, transformer.transform(lines)
569 569 except SyntaxError:
570 570 pass
571 571 return False, lines
572 572
573 573 def do_token_transforms(self, lines):
574 574 for _ in range(TRANSFORM_LOOP_LIMIT):
575 575 changed, lines = self.do_one_token_transform(lines)
576 576 if not changed:
577 577 return lines
578 578
579 579 raise RuntimeError("Input transformation still changing after "
580 580 "%d iterations. Aborting." % TRANSFORM_LOOP_LIMIT)
581 581
582 582 def transform_cell(self, cell: str) -> str:
583 583 """Transforms a cell of input code"""
584 584 if not cell.endswith('\n'):
585 585 cell += '\n' # Ensure the cell has a trailing newline
586 586 lines = cell.splitlines(keepends=True)
587 587 for transform in self.cleanup_transforms + self.line_transforms:
588 588 lines = transform(lines)
589 589
590 590 lines = self.do_token_transforms(lines)
591 591 return ''.join(lines)
592 592
593 593 def check_complete(self, cell: str):
594 594 """Return whether a block of code is ready to execute, or should be continued
595 595
596 596 Parameters
597 597 ----------
598 598 source : string
599 599 Python input code, which can be multiline.
600 600
601 601 Returns
602 602 -------
603 603 status : str
604 604 One of 'complete', 'incomplete', or 'invalid' if source is not a
605 605 prefix of valid code.
606 606 indent_spaces : int or None
607 607 The number of spaces by which to indent the next line of code. If
608 608 status is not 'incomplete', this is None.
609 609 """
610 610 # Remember if the lines ends in a new line.
611 611 ends_with_newline = False
612 612 for character in reversed(cell):
613 613 if character == '\n':
614 614 ends_with_newline = True
615 615 break
616 616 elif character.strip():
617 617 break
618 618 else:
619 619 continue
620 620
621 621 if not ends_with_newline:
622 622 # Append an newline for consistent tokenization
623 623 # See https://bugs.python.org/issue33899
624 624 cell += '\n'
625 625
626 626 lines = cell.splitlines(keepends=True)
627 627
628 628 if not lines:
629 629 return 'complete', None
630 630
631 631 if lines[-1].endswith('\\'):
632 632 # Explicit backslash continuation
633 633 return 'incomplete', find_last_indent(lines)
634 634
635 635 try:
636 636 for transform in self.cleanup_transforms:
637 637 if not getattr(transform, 'has_side_effects', False):
638 638 lines = transform(lines)
639 639 except SyntaxError:
640 640 return 'invalid', None
641 641
642 642 if lines[0].startswith('%%'):
643 643 # Special case for cell magics - completion marked by blank line
644 644 if lines[-1].strip():
645 645 return 'incomplete', find_last_indent(lines)
646 646 else:
647 647 return 'complete', None
648 648
649 649 try:
650 650 for transform in self.line_transforms:
651 651 if not getattr(transform, 'has_side_effects', False):
652 652 lines = transform(lines)
653 653 lines = self.do_token_transforms(lines)
654 654 except SyntaxError:
655 655 return 'invalid', None
656 656
657 657 tokens_by_line = make_tokens_by_line(lines)
658 658
659 659 if not tokens_by_line:
660 660 return 'incomplete', find_last_indent(lines)
661 661
662 662 if tokens_by_line[-1][-1].type != tokenize.ENDMARKER:
663 663 # We're in a multiline string or expression
664 664 return 'incomplete', find_last_indent(lines)
665 665
666 666 newline_types = {tokenize.NEWLINE, tokenize.COMMENT, tokenize.ENDMARKER}
667 667
668 668 # Pop the last line which only contains DEDENTs and ENDMARKER
669 669 last_token_line = None
670 670 if {t.type for t in tokens_by_line[-1]} in [
671 671 {tokenize.DEDENT, tokenize.ENDMARKER},
672 672 {tokenize.ENDMARKER}
673 673 ] and len(tokens_by_line) > 1:
674 674 last_token_line = tokens_by_line.pop()
675 675
676 676 while tokens_by_line[-1] and tokens_by_line[-1][-1].type in newline_types:
677 677 tokens_by_line[-1].pop()
678 678
679 679 if not tokens_by_line[-1]:
680 680 return 'incomplete', find_last_indent(lines)
681 681
682 682 if tokens_by_line[-1][-1].string == ':':
683 683 # The last line starts a block (e.g. 'if foo:')
684 684 ix = 0
685 685 while tokens_by_line[-1][ix].type in {tokenize.INDENT, tokenize.DEDENT}:
686 686 ix += 1
687 687
688 688 indent = tokens_by_line[-1][ix].start[1]
689 689 return 'incomplete', indent + 4
690 690
691 691 if tokens_by_line[-1][0].line.endswith('\\'):
692 692 return 'incomplete', None
693 693
694 694 # At this point, our checks think the code is complete (or invalid).
695 695 # We'll use codeop.compile_command to check this with the real parser
696 696 try:
697 697 with warnings.catch_warnings():
698 698 warnings.simplefilter('error', SyntaxWarning)
699 699 res = compile_command(''.join(lines), symbol='exec')
700 700 except (SyntaxError, OverflowError, ValueError, TypeError,
701 701 MemoryError, SyntaxWarning):
702 702 return 'invalid', None
703 703 else:
704 704 if res is None:
705 705 return 'incomplete', find_last_indent(lines)
706 706
707 707 if last_token_line and last_token_line[0].type == tokenize.DEDENT:
708 708 if ends_with_newline:
709 709 return 'complete', None
710 710 return 'incomplete', find_last_indent(lines)
711 711
712 712 # If there's a blank line at the end, assume we're ready to execute
713 713 if not lines[-1].strip():
714 714 return 'complete', None
715 715
716 716 return 'complete', None
717 717
718 718
719 719 def find_last_indent(lines):
720 720 m = _indent_re.match(lines[-1])
721 721 if not m:
722 722 return 0
723 723 return len(m.group(0).replace('\t', ' '*4))
@@ -1,326 +1,337 b''
1 1 """Tests for the token-based transformers in IPython.core.inputtransformer2
2 2
3 3 Line-based transformers are the simpler ones; token-based transformers are
4 4 more complex. See test_inputtransformer2_line for tests for line-based
5 5 transformations.
6 6 """
7 7 import nose.tools as nt
8 8 import string
9 9
10 10 from IPython.core import inputtransformer2 as ipt2
11 11 from IPython.core.inputtransformer2 import make_tokens_by_line, _find_assign_op
12 12
13 13 from textwrap import dedent
14 14
15 15 MULTILINE_MAGIC = ("""\
16 16 a = f()
17 17 %foo \\
18 18 bar
19 19 g()
20 20 """.splitlines(keepends=True), (2, 0), """\
21 21 a = f()
22 22 get_ipython().run_line_magic('foo', ' bar')
23 23 g()
24 24 """.splitlines(keepends=True))
25 25
26 26 INDENTED_MAGIC = ("""\
27 27 for a in range(5):
28 28 %ls
29 29 """.splitlines(keepends=True), (2, 4), """\
30 30 for a in range(5):
31 31 get_ipython().run_line_magic('ls', '')
32 32 """.splitlines(keepends=True))
33 33
34 CRLF_MAGIC = ([
35 "a = f()\n",
36 "%ls\r\n",
37 "g()\n"
38 ], (2, 0), [
39 "a = f()\n",
40 "get_ipython().run_line_magic('ls', '')\n",
41 "g()\n"
42 ])
43
34 44 MULTILINE_MAGIC_ASSIGN = ("""\
35 45 a = f()
36 46 b = %foo \\
37 47 bar
38 48 g()
39 49 """.splitlines(keepends=True), (2, 4), """\
40 50 a = f()
41 51 b = get_ipython().run_line_magic('foo', ' bar')
42 52 g()
43 53 """.splitlines(keepends=True))
44 54
45 55 MULTILINE_SYSTEM_ASSIGN = ("""\
46 56 a = f()
47 57 b = !foo \\
48 58 bar
49 59 g()
50 60 """.splitlines(keepends=True), (2, 4), """\
51 61 a = f()
52 62 b = get_ipython().getoutput('foo bar')
53 63 g()
54 64 """.splitlines(keepends=True))
55 65
56 66 #####
57 67
58 68 MULTILINE_SYSTEM_ASSIGN_AFTER_DEDENT = ("""\
59 69 def test():
60 70 for i in range(1):
61 71 print(i)
62 72 res =! ls
63 73 """.splitlines(keepends=True), (4, 7), '''\
64 74 def test():
65 75 for i in range(1):
66 76 print(i)
67 77 res =get_ipython().getoutput(\' ls\')
68 78 '''.splitlines(keepends=True))
69 79
70 80 ######
71 81
72 82 AUTOCALL_QUOTE = (
73 83 [",f 1 2 3\n"], (1, 0),
74 84 ['f("1", "2", "3")\n']
75 85 )
76 86
77 87 AUTOCALL_QUOTE2 = (
78 88 [";f 1 2 3\n"], (1, 0),
79 89 ['f("1 2 3")\n']
80 90 )
81 91
82 92 AUTOCALL_PAREN = (
83 93 ["/f 1 2 3\n"], (1, 0),
84 94 ['f(1, 2, 3)\n']
85 95 )
86 96
87 97 SIMPLE_HELP = (
88 98 ["foo?\n"], (1, 0),
89 99 ["get_ipython().run_line_magic('pinfo', 'foo')\n"]
90 100 )
91 101
92 102 DETAILED_HELP = (
93 103 ["foo??\n"], (1, 0),
94 104 ["get_ipython().run_line_magic('pinfo2', 'foo')\n"]
95 105 )
96 106
97 107 MAGIC_HELP = (
98 108 ["%foo?\n"], (1, 0),
99 109 ["get_ipython().run_line_magic('pinfo', '%foo')\n"]
100 110 )
101 111
102 112 HELP_IN_EXPR = (
103 113 ["a = b + c?\n"], (1, 0),
104 114 ["get_ipython().set_next_input('a = b + c');"
105 115 "get_ipython().run_line_magic('pinfo', 'c')\n"]
106 116 )
107 117
108 118 HELP_CONTINUED_LINE = ("""\
109 119 a = \\
110 120 zip?
111 121 """.splitlines(keepends=True), (1, 0),
112 122 [r"get_ipython().set_next_input('a = \\\nzip');get_ipython().run_line_magic('pinfo', 'zip')" + "\n"]
113 123 )
114 124
115 125 HELP_MULTILINE = ("""\
116 126 (a,
117 127 b) = zip?
118 128 """.splitlines(keepends=True), (1, 0),
119 129 [r"get_ipython().set_next_input('(a,\nb) = zip');get_ipython().run_line_magic('pinfo', 'zip')" + "\n"]
120 130 )
121 131
122 132 HELP_UNICODE = (
123 133 ["Ο€.foo?\n"], (1, 0),
124 134 ["get_ipython().run_line_magic('pinfo', 'Ο€.foo')\n"]
125 135 )
126 136
127 137
128 138 def null_cleanup_transformer(lines):
129 139 """
130 140 A cleanup transform that returns an empty list.
131 141 """
132 142 return []
133 143
134 144 def check_make_token_by_line_never_ends_empty():
135 145 """
136 146 Check that not sequence of single or double characters ends up leading to en empty list of tokens
137 147 """
138 148 from string import printable
139 149 for c in printable:
140 150 nt.assert_not_equal(make_tokens_by_line(c)[-1], [])
141 151 for k in printable:
142 152 nt.assert_not_equal(make_tokens_by_line(c+k)[-1], [])
143 153
144 154 def check_find(transformer, case, match=True):
145 155 sample, expected_start, _ = case
146 156 tbl = make_tokens_by_line(sample)
147 157 res = transformer.find(tbl)
148 158 if match:
149 159 # start_line is stored 0-indexed, expected values are 1-indexed
150 160 nt.assert_equal((res.start_line+1, res.start_col), expected_start)
151 161 return res
152 162 else:
153 163 nt.assert_is(res, None)
154 164
155 165 def check_transform(transformer_cls, case):
156 166 lines, start, expected = case
157 167 transformer = transformer_cls(start)
158 168 nt.assert_equal(transformer.transform(lines), expected)
159 169
160 170 def test_continued_line():
161 171 lines = MULTILINE_MAGIC_ASSIGN[0]
162 172 nt.assert_equal(ipt2.find_end_of_continued_line(lines, 1), 2)
163 173
164 174 nt.assert_equal(ipt2.assemble_continued_line(lines, (1, 5), 2), "foo bar")
165 175
166 176 def test_find_assign_magic():
167 177 check_find(ipt2.MagicAssign, MULTILINE_MAGIC_ASSIGN)
168 178 check_find(ipt2.MagicAssign, MULTILINE_SYSTEM_ASSIGN, match=False)
169 179 check_find(ipt2.MagicAssign, MULTILINE_SYSTEM_ASSIGN_AFTER_DEDENT, match=False)
170 180
171 181 def test_transform_assign_magic():
172 182 check_transform(ipt2.MagicAssign, MULTILINE_MAGIC_ASSIGN)
173 183
174 184 def test_find_assign_system():
175 185 check_find(ipt2.SystemAssign, MULTILINE_SYSTEM_ASSIGN)
176 186 check_find(ipt2.SystemAssign, MULTILINE_SYSTEM_ASSIGN_AFTER_DEDENT)
177 187 check_find(ipt2.SystemAssign, (["a = !ls\n"], (1, 5), None))
178 188 check_find(ipt2.SystemAssign, (["a=!ls\n"], (1, 2), None))
179 189 check_find(ipt2.SystemAssign, MULTILINE_MAGIC_ASSIGN, match=False)
180 190
181 191 def test_transform_assign_system():
182 192 check_transform(ipt2.SystemAssign, MULTILINE_SYSTEM_ASSIGN)
183 193 check_transform(ipt2.SystemAssign, MULTILINE_SYSTEM_ASSIGN_AFTER_DEDENT)
184 194
185 195 def test_find_magic_escape():
186 196 check_find(ipt2.EscapedCommand, MULTILINE_MAGIC)
187 197 check_find(ipt2.EscapedCommand, INDENTED_MAGIC)
188 198 check_find(ipt2.EscapedCommand, MULTILINE_MAGIC_ASSIGN, match=False)
189 199
190 200 def test_transform_magic_escape():
191 201 check_transform(ipt2.EscapedCommand, MULTILINE_MAGIC)
192 202 check_transform(ipt2.EscapedCommand, INDENTED_MAGIC)
203 check_transform(ipt2.EscapedCommand, CRLF_MAGIC)
193 204
194 205 def test_find_autocalls():
195 206 for case in [AUTOCALL_QUOTE, AUTOCALL_QUOTE2, AUTOCALL_PAREN]:
196 207 print("Testing %r" % case[0])
197 208 check_find(ipt2.EscapedCommand, case)
198 209
199 210 def test_transform_autocall():
200 211 for case in [AUTOCALL_QUOTE, AUTOCALL_QUOTE2, AUTOCALL_PAREN]:
201 212 print("Testing %r" % case[0])
202 213 check_transform(ipt2.EscapedCommand, case)
203 214
204 215 def test_find_help():
205 216 for case in [SIMPLE_HELP, DETAILED_HELP, MAGIC_HELP, HELP_IN_EXPR]:
206 217 check_find(ipt2.HelpEnd, case)
207 218
208 219 tf = check_find(ipt2.HelpEnd, HELP_CONTINUED_LINE)
209 220 nt.assert_equal(tf.q_line, 1)
210 221 nt.assert_equal(tf.q_col, 3)
211 222
212 223 tf = check_find(ipt2.HelpEnd, HELP_MULTILINE)
213 224 nt.assert_equal(tf.q_line, 1)
214 225 nt.assert_equal(tf.q_col, 8)
215 226
216 227 # ? in a comment does not trigger help
217 228 check_find(ipt2.HelpEnd, (["foo # bar?\n"], None, None), match=False)
218 229 # Nor in a string
219 230 check_find(ipt2.HelpEnd, (["foo = '''bar?\n"], None, None), match=False)
220 231
221 232 def test_transform_help():
222 233 tf = ipt2.HelpEnd((1, 0), (1, 9))
223 234 nt.assert_equal(tf.transform(HELP_IN_EXPR[0]), HELP_IN_EXPR[2])
224 235
225 236 tf = ipt2.HelpEnd((1, 0), (2, 3))
226 237 nt.assert_equal(tf.transform(HELP_CONTINUED_LINE[0]), HELP_CONTINUED_LINE[2])
227 238
228 239 tf = ipt2.HelpEnd((1, 0), (2, 8))
229 240 nt.assert_equal(tf.transform(HELP_MULTILINE[0]), HELP_MULTILINE[2])
230 241
231 242 tf = ipt2.HelpEnd((1, 0), (1, 0))
232 243 nt.assert_equal(tf.transform(HELP_UNICODE[0]), HELP_UNICODE[2])
233 244
234 245 def test_find_assign_op_dedent():
235 246 """
236 247 be careful that empty token like dedent are not counted as parens
237 248 """
238 249 class Tk:
239 250 def __init__(self, s):
240 251 self.string = s
241 252
242 253 nt.assert_equal(_find_assign_op([Tk(s) for s in ('','a','=','b')]), 2)
243 254 nt.assert_equal(_find_assign_op([Tk(s) for s in ('','(', 'a','=','b', ')', '=' ,'5')]), 6)
244 255
245 256 def test_check_complete():
246 257 cc = ipt2.TransformerManager().check_complete
247 258 nt.assert_equal(cc("a = 1"), ('complete', None))
248 259 nt.assert_equal(cc("for a in range(5):"), ('incomplete', 4))
249 260 nt.assert_equal(cc("for a in range(5):\n if a > 0:"), ('incomplete', 8))
250 261 nt.assert_equal(cc("raise = 2"), ('invalid', None))
251 262 nt.assert_equal(cc("a = [1,\n2,"), ('incomplete', 0))
252 263 nt.assert_equal(cc(")"), ('incomplete', 0))
253 264 nt.assert_equal(cc("\\\r\n"), ('incomplete', 0))
254 265 nt.assert_equal(cc("a = '''\n hi"), ('incomplete', 3))
255 266 nt.assert_equal(cc("def a():\n x=1\n global x"), ('invalid', None))
256 267 nt.assert_equal(cc("a \\ "), ('invalid', None)) # Nothing allowed after backslash
257 268 nt.assert_equal(cc("1\\\n+2"), ('complete', None))
258 269 nt.assert_equal(cc("exit"), ('complete', None))
259 270
260 271 example = dedent("""
261 272 if True:
262 273 a=1""" )
263 274
264 275 nt.assert_equal(cc(example), ('incomplete', 4))
265 276 nt.assert_equal(cc(example+'\n'), ('complete', None))
266 277 nt.assert_equal(cc(example+'\n '), ('complete', None))
267 278
268 279 # no need to loop on all the letters/numbers.
269 280 short = '12abAB'+string.printable[62:]
270 281 for c in short:
271 282 # test does not raise:
272 283 cc(c)
273 284 for k in short:
274 285 cc(c+k)
275 286
276 287 nt.assert_equal(cc("def f():\n x=0\n \\\n "), ('incomplete', 2))
277 288
278 289 def test_check_complete_II():
279 290 """
280 291 Test that multiple line strings are properly handled.
281 292
282 293 Separate test function for convenience
283 294
284 295 """
285 296 cc = ipt2.TransformerManager().check_complete
286 297 nt.assert_equal(cc('''def foo():\n """'''), ('incomplete', 4))
287 298
288 299
289 300 def test_null_cleanup_transformer():
290 301 manager = ipt2.TransformerManager()
291 302 manager.cleanup_transforms.insert(0, null_cleanup_transformer)
292 303 assert manager.transform_cell("") == ""
293 304
294 305
295 306
296 307
297 308 def test_side_effects_I():
298 309 count = 0
299 310 def counter(lines):
300 311 nonlocal count
301 312 count += 1
302 313 return lines
303 314
304 315 counter.has_side_effects = True
305 316
306 317 manager = ipt2.TransformerManager()
307 318 manager.cleanup_transforms.insert(0, counter)
308 319 assert manager.check_complete("a=1\n") == ('complete', None)
309 320 assert count == 0
310 321
311 322
312 323
313 324
314 325 def test_side_effects_II():
315 326 count = 0
316 327 def counter(lines):
317 328 nonlocal count
318 329 count += 1
319 330 return lines
320 331
321 332 counter.has_side_effects = True
322 333
323 334 manager = ipt2.TransformerManager()
324 335 manager.line_transforms.insert(0, counter)
325 336 assert manager.check_complete("b=1\n") == ('complete', None)
326 337 assert count == 0
@@ -1,116 +1,126 b''
1 1 """Tests for the line-based transformers in IPython.core.inputtransformer2
2 2
3 3 Line-based transformers are the simpler ones; token-based transformers are
4 4 more complex. See test_inputtransformer2 for tests for token-based transformers.
5 5 """
6 6 import nose.tools as nt
7 7
8 8 from IPython.core import inputtransformer2 as ipt2
9 9
10 10 CELL_MAGIC = ("""\
11 11 %%foo arg
12 12 body 1
13 13 body 2
14 14 """, """\
15 15 get_ipython().run_cell_magic('foo', 'arg', 'body 1\\nbody 2\\n')
16 16 """)
17 17
18 18 def test_cell_magic():
19 19 for sample, expected in [CELL_MAGIC]:
20 20 nt.assert_equal(ipt2.cell_magic(sample.splitlines(keepends=True)),
21 21 expected.splitlines(keepends=True))
22 22
23 23 CLASSIC_PROMPT = ("""\
24 24 >>> for a in range(5):
25 25 ... print(a)
26 26 """, """\
27 27 for a in range(5):
28 28 print(a)
29 29 """)
30 30
31 31 CLASSIC_PROMPT_L2 = ("""\
32 32 for a in range(5):
33 33 ... print(a)
34 34 ... print(a ** 2)
35 35 """, """\
36 36 for a in range(5):
37 37 print(a)
38 38 print(a ** 2)
39 39 """)
40 40
41 41 def test_classic_prompt():
42 42 for sample, expected in [CLASSIC_PROMPT, CLASSIC_PROMPT_L2]:
43 43 nt.assert_equal(ipt2.classic_prompt(sample.splitlines(keepends=True)),
44 44 expected.splitlines(keepends=True))
45 45
46 46 IPYTHON_PROMPT = ("""\
47 47 In [1]: for a in range(5):
48 48 ...: print(a)
49 49 """, """\
50 50 for a in range(5):
51 51 print(a)
52 52 """)
53 53
54 54 IPYTHON_PROMPT_L2 = ("""\
55 55 for a in range(5):
56 56 ...: print(a)
57 57 ...: print(a ** 2)
58 58 """, """\
59 59 for a in range(5):
60 60 print(a)
61 61 print(a ** 2)
62 62 """)
63 63
64 64 def test_ipython_prompt():
65 65 for sample, expected in [IPYTHON_PROMPT, IPYTHON_PROMPT_L2]:
66 66 nt.assert_equal(ipt2.ipython_prompt(sample.splitlines(keepends=True)),
67 67 expected.splitlines(keepends=True))
68 68
69 69 INDENT_SPACES = ("""\
70 70 if True:
71 71 a = 3
72 72 """, """\
73 73 if True:
74 74 a = 3
75 75 """)
76 76
77 77 INDENT_TABS = ("""\
78 78 \tif True:
79 79 \t\tb = 4
80 80 """, """\
81 81 if True:
82 82 \tb = 4
83 83 """)
84 84
85 85 def test_leading_indent():
86 86 for sample, expected in [INDENT_SPACES, INDENT_TABS]:
87 87 nt.assert_equal(ipt2.leading_indent(sample.splitlines(keepends=True)),
88 88 expected.splitlines(keepends=True))
89 89
90 90 LEADING_EMPTY_LINES = ("""\
91 91 \t
92 92
93 93 if True:
94 94 a = 3
95 95
96 96 b = 4
97 97 """, """\
98 98 if True:
99 99 a = 3
100 100
101 101 b = 4
102 102 """)
103 103
104 104 ONLY_EMPTY_LINES = ("""\
105 105 \t
106 106
107 107 """, """\
108 108 \t
109 109
110 110 """)
111 111
112 112 def test_leading_empty_lines():
113 113 for sample, expected in [LEADING_EMPTY_LINES, ONLY_EMPTY_LINES]:
114 114 nt.assert_equal(
115 115 ipt2.leading_empty_lines(sample.splitlines(keepends=True)),
116 116 expected.splitlines(keepends=True))
117
118 CRLF_MAGIC = ([
119 "%%ls\r\n"
120 ], [
121 "get_ipython().run_cell_magic('ls', '', '')\n"
122 ])
123
124 def test_crlf_magic():
125 for sample, expected in [CRLF_MAGIC]:
126 nt.assert_equal(ipt2.cell_magic(sample), expected) No newline at end of file
General Comments 0
You need to be logged in to leave comments. Login now