##// END OF EJS Templates
Merge pull request #11425 from haivp3010/master...
Matthias Bussonnier -
r24761:00810c00 merge
parent child Browse files
Show More
@@ -1,704 +1,707
1 1 """Input transformer machinery to support IPython special syntax.
2 2
3 3 This includes the machinery to recognise and transform ``%magic`` commands,
4 4 ``!system`` commands, ``help?`` querying, prompt stripping, and so forth.
5 5
6 6 Added: IPython 7.0. Replaces inputsplitter and inputtransformer which were
7 7 deprecated in 7.0.
8 8 """
9 9
10 10 # Copyright (c) IPython Development Team.
11 11 # Distributed under the terms of the Modified BSD License.
12 12
13 13 from codeop import compile_command
14 14 import re
15 15 import tokenize
16 16 from typing import List, Tuple, Union
17 17 import warnings
18 18
19 19 _indent_re = re.compile(r'^[ \t]+')
20 20
21 21 def leading_indent(lines):
22 22 """Remove leading indentation.
23 23
24 24 If the first line starts with a spaces or tabs, the same whitespace will be
25 25 removed from each following line in the cell.
26 26 """
27 27 if not lines:
28 28 return lines
29 29 m = _indent_re.match(lines[0])
30 30 if not m:
31 31 return lines
32 32 space = m.group(0)
33 33 n = len(space)
34 34 return [l[n:] if l.startswith(space) else l
35 35 for l in lines]
36 36
37 37 class PromptStripper:
38 38 """Remove matching input prompts from a block of input.
39 39
40 40 Parameters
41 41 ----------
42 42 prompt_re : regular expression
43 43 A regular expression matching any input prompt (including continuation,
44 44 e.g. ``...``)
45 45 initial_re : regular expression, optional
46 46 A regular expression matching only the initial prompt, but not continuation.
47 47 If no initial expression is given, prompt_re will be used everywhere.
48 48 Used mainly for plain Python prompts (``>>>``), where the continuation prompt
49 49 ``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
50 50
51 51 If initial_re and prompt_re differ,
52 52 only initial_re will be tested against the first line.
53 53 If any prompt is found on the first two lines,
54 54 prompts will be stripped from the rest of the block.
55 55 """
56 56 def __init__(self, prompt_re, initial_re=None):
57 57 self.prompt_re = prompt_re
58 58 self.initial_re = initial_re or prompt_re
59 59
60 60 def _strip(self, lines):
61 61 return [self.prompt_re.sub('', l, count=1) for l in lines]
62 62
63 63 def __call__(self, lines):
64 64 if not lines:
65 65 return lines
66 66 if self.initial_re.match(lines[0]) or \
67 67 (len(lines) > 1 and self.prompt_re.match(lines[1])):
68 68 return self._strip(lines)
69 69 return lines
70 70
71 71 classic_prompt = PromptStripper(
72 72 prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'),
73 73 initial_re=re.compile(r'^>>>( |$)')
74 74 )
75 75
76 76 ipython_prompt = PromptStripper(re.compile(r'^(In \[\d+\]: |\s*\.{3,}: ?)'))
77 77
78 78 def cell_magic(lines):
79 79 if not lines or not lines[0].startswith('%%'):
80 80 return lines
81 81 if re.match('%%\w+\?', lines[0]):
82 82 # This case will be handled by help_end
83 83 return lines
84 84 magic_name, _, first_line = lines[0][2:-1].partition(' ')
85 85 body = ''.join(lines[1:])
86 86 return ['get_ipython().run_cell_magic(%r, %r, %r)\n'
87 87 % (magic_name, first_line, body)]
88 88
89 89
90 90 def _find_assign_op(token_line) -> Union[int, None]:
91 91 """Get the index of the first assignment in the line ('=' not inside brackets)
92 92
93 93 Note: We don't try to support multiple special assignment (a = b = %foo)
94 94 """
95 95 paren_level = 0
96 96 for i, ti in enumerate(token_line):
97 97 s = ti.string
98 98 if s == '=' and paren_level == 0:
99 99 return i
100 100 if s in {'(','[','{'}:
101 101 paren_level += 1
102 102 elif s in {')', ']', '}'}:
103 103 if paren_level > 0:
104 104 paren_level -= 1
105 105
106 106 def find_end_of_continued_line(lines, start_line: int):
107 107 """Find the last line of a line explicitly extended using backslashes.
108 108
109 109 Uses 0-indexed line numbers.
110 110 """
111 111 end_line = start_line
112 112 while lines[end_line].endswith('\\\n'):
113 113 end_line += 1
114 114 if end_line >= len(lines):
115 115 break
116 116 return end_line
117 117
118 118 def assemble_continued_line(lines, start: Tuple[int, int], end_line: int):
119 119 """Assemble a single line from multiple continued line pieces
120 120
121 121 Continued lines are lines ending in ``\``, and the line following the last
122 122 ``\`` in the block.
123 123
124 124 For example, this code continues over multiple lines::
125 125
126 126 if (assign_ix is not None) \
127 127 and (len(line) >= assign_ix + 2) \
128 128 and (line[assign_ix+1].string == '%') \
129 129 and (line[assign_ix+2].type == tokenize.NAME):
130 130
131 131 This statement contains four continued line pieces.
132 132 Assembling these pieces into a single line would give::
133 133
134 134 if (assign_ix is not None) and (len(line) >= assign_ix + 2) and (line[...
135 135
136 136 This uses 0-indexed line numbers. *start* is (lineno, colno).
137 137
138 138 Used to allow ``%magic`` and ``!system`` commands to be continued over
139 139 multiple lines.
140 140 """
141 141 parts = [lines[start[0]][start[1]:]] + lines[start[0]+1:end_line+1]
142 142 return ' '.join([p[:-2] for p in parts[:-1]] # Strip backslash+newline
143 143 + [parts[-1][:-1]]) # Strip newline from last line
144 144
145 145 class TokenTransformBase:
146 146 """Base class for transformations which examine tokens.
147 147
148 148 Special syntax should not be transformed when it occurs inside strings or
149 149 comments. This is hard to reliably avoid with regexes. The solution is to
150 150 tokenise the code as Python, and recognise the special syntax in the tokens.
151 151
152 152 IPython's special syntax is not valid Python syntax, so tokenising may go
153 153 wrong after the special syntax starts. These classes therefore find and
154 154 transform *one* instance of special syntax at a time into regular Python
155 155 syntax. After each transformation, tokens are regenerated to find the next
156 156 piece of special syntax.
157 157
158 158 Subclasses need to implement one class method (find)
159 159 and one regular method (transform).
160 160
161 161 The priority attribute can select which transformation to apply if multiple
162 162 transformers match in the same place. Lower numbers have higher priority.
163 163 This allows "%magic?" to be turned into a help call rather than a magic call.
164 164 """
165 165 # Lower numbers -> higher priority (for matches in the same location)
166 166 priority = 10
167 167
168 168 def sortby(self):
169 169 return self.start_line, self.start_col, self.priority
170 170
171 171 def __init__(self, start):
172 172 self.start_line = start[0] - 1 # Shift from 1-index to 0-index
173 173 self.start_col = start[1]
174 174
175 175 @classmethod
176 176 def find(cls, tokens_by_line):
177 177 """Find one instance of special syntax in the provided tokens.
178 178
179 179 Tokens are grouped into logical lines for convenience,
180 180 so it is easy to e.g. look at the first token of each line.
181 181 *tokens_by_line* is a list of lists of tokenize.TokenInfo objects.
182 182
183 183 This should return an instance of its class, pointing to the start
184 184 position it has found, or None if it found no match.
185 185 """
186 186 raise NotImplementedError
187 187
188 188 def transform(self, lines: List[str]):
189 189 """Transform one instance of special syntax found by ``find()``
190 190
191 191 Takes a list of strings representing physical lines,
192 192 returns a similar list of transformed lines.
193 193 """
194 194 raise NotImplementedError
195 195
196 196 class MagicAssign(TokenTransformBase):
197 197 """Transformer for assignments from magics (a = %foo)"""
198 198 @classmethod
199 199 def find(cls, tokens_by_line):
200 200 """Find the first magic assignment (a = %foo) in the cell.
201 201 """
202 202 for line in tokens_by_line:
203 203 assign_ix = _find_assign_op(line)
204 204 if (assign_ix is not None) \
205 205 and (len(line) >= assign_ix + 2) \
206 206 and (line[assign_ix+1].string == '%') \
207 207 and (line[assign_ix+2].type == tokenize.NAME):
208 208 return cls(line[assign_ix+1].start)
209 209
210 210 def transform(self, lines: List[str]):
211 211 """Transform a magic assignment found by the ``find()`` classmethod.
212 212 """
213 213 start_line, start_col = self.start_line, self.start_col
214 214 lhs = lines[start_line][:start_col]
215 215 end_line = find_end_of_continued_line(lines, start_line)
216 216 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
217 217 assert rhs.startswith('%'), rhs
218 218 magic_name, _, args = rhs[1:].partition(' ')
219 219
220 220 lines_before = lines[:start_line]
221 221 call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)
222 222 new_line = lhs + call + '\n'
223 223 lines_after = lines[end_line+1:]
224 224
225 225 return lines_before + [new_line] + lines_after
226 226
227 227
228 228 class SystemAssign(TokenTransformBase):
229 229 """Transformer for assignments from system commands (a = !foo)"""
230 230 @classmethod
231 231 def find(cls, tokens_by_line):
232 232 """Find the first system assignment (a = !foo) in the cell.
233 233 """
234 234 for line in tokens_by_line:
235 235 assign_ix = _find_assign_op(line)
236 236 if (assign_ix is not None) \
237 237 and not line[assign_ix].line.strip().startswith('=') \
238 238 and (len(line) >= assign_ix + 2) \
239 239 and (line[assign_ix + 1].type == tokenize.ERRORTOKEN):
240 240 ix = assign_ix + 1
241 241
242 242 while ix < len(line) and line[ix].type == tokenize.ERRORTOKEN:
243 243 if line[ix].string == '!':
244 244 return cls(line[ix].start)
245 245 elif not line[ix].string.isspace():
246 246 break
247 247 ix += 1
248 248
249 249 def transform(self, lines: List[str]):
250 250 """Transform a system assignment found by the ``find()`` classmethod.
251 251 """
252 252 start_line, start_col = self.start_line, self.start_col
253 253
254 254 lhs = lines[start_line][:start_col]
255 255 end_line = find_end_of_continued_line(lines, start_line)
256 256 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
257 257 assert rhs.startswith('!'), rhs
258 258 cmd = rhs[1:]
259 259
260 260 lines_before = lines[:start_line]
261 261 call = "get_ipython().getoutput({!r})".format(cmd)
262 262 new_line = lhs + call + '\n'
263 263 lines_after = lines[end_line + 1:]
264 264
265 265 return lines_before + [new_line] + lines_after
266 266
267 267 # The escape sequences that define the syntax transformations IPython will
268 268 # apply to user input. These can NOT be just changed here: many regular
269 269 # expressions and other parts of the code may use their hardcoded values, and
270 270 # for all intents and purposes they constitute the 'IPython syntax', so they
271 271 # should be considered fixed.
272 272
273 273 ESC_SHELL = '!' # Send line to underlying system shell
274 274 ESC_SH_CAP = '!!' # Send line to system shell and capture output
275 275 ESC_HELP = '?' # Find information about object
276 276 ESC_HELP2 = '??' # Find extra-detailed information about object
277 277 ESC_MAGIC = '%' # Call magic function
278 278 ESC_MAGIC2 = '%%' # Call cell-magic function
279 279 ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call
280 280 ESC_QUOTE2 = ';' # Quote all args as a single string, call
281 281 ESC_PAREN = '/' # Call first argument with rest of line as arguments
282 282
283 283 ESCAPE_SINGLES = {'!', '?', '%', ',', ';', '/'}
284 284 ESCAPE_DOUBLES = {'!!', '??'} # %% (cell magic) is handled separately
285 285
286 286 def _make_help_call(target, esc, next_input=None):
287 287 """Prepares a pinfo(2)/psearch call from a target name and the escape
288 288 (i.e. ? or ??)"""
289 289 method = 'pinfo2' if esc == '??' \
290 290 else 'psearch' if '*' in target \
291 291 else 'pinfo'
292 292 arg = " ".join([method, target])
293 293 #Prepare arguments for get_ipython().run_line_magic(magic_name, magic_args)
294 294 t_magic_name, _, t_magic_arg_s = arg.partition(' ')
295 295 t_magic_name = t_magic_name.lstrip(ESC_MAGIC)
296 296 if next_input is None:
297 297 return 'get_ipython().run_line_magic(%r, %r)' % (t_magic_name, t_magic_arg_s)
298 298 else:
299 299 return 'get_ipython().set_next_input(%r);get_ipython().run_line_magic(%r, %r)' % \
300 300 (next_input, t_magic_name, t_magic_arg_s)
301 301
302 302 def _tr_help(content):
303 303 """Translate lines escaped with: ?
304 304
305 305 A naked help line should fire the intro help screen (shell.show_usage())
306 306 """
307 307 if not content:
308 308 return 'get_ipython().show_usage()'
309 309
310 310 return _make_help_call(content, '?')
311 311
312 312 def _tr_help2(content):
313 313 """Translate lines escaped with: ??
314 314
315 315 A naked help line should fire the intro help screen (shell.show_usage())
316 316 """
317 317 if not content:
318 318 return 'get_ipython().show_usage()'
319 319
320 320 return _make_help_call(content, '??')
321 321
322 322 def _tr_magic(content):
323 323 "Translate lines escaped with a percent sign: %"
324 324 name, _, args = content.partition(' ')
325 325 return 'get_ipython().run_line_magic(%r, %r)' % (name, args)
326 326
327 327 def _tr_quote(content):
328 328 "Translate lines escaped with a comma: ,"
329 329 name, _, args = content.partition(' ')
330 330 return '%s("%s")' % (name, '", "'.join(args.split()) )
331 331
332 332 def _tr_quote2(content):
333 333 "Translate lines escaped with a semicolon: ;"
334 334 name, _, args = content.partition(' ')
335 335 return '%s("%s")' % (name, args)
336 336
337 337 def _tr_paren(content):
338 338 "Translate lines escaped with a slash: /"
339 339 name, _, args = content.partition(' ')
340 340 return '%s(%s)' % (name, ", ".join(args.split()))
341 341
342 342 tr = { ESC_SHELL : 'get_ipython().system({!r})'.format,
343 343 ESC_SH_CAP : 'get_ipython().getoutput({!r})'.format,
344 344 ESC_HELP : _tr_help,
345 345 ESC_HELP2 : _tr_help2,
346 346 ESC_MAGIC : _tr_magic,
347 347 ESC_QUOTE : _tr_quote,
348 348 ESC_QUOTE2 : _tr_quote2,
349 349 ESC_PAREN : _tr_paren }
350 350
351 351 class EscapedCommand(TokenTransformBase):
352 352 """Transformer for escaped commands like %foo, !foo, or /foo"""
353 353 @classmethod
354 354 def find(cls, tokens_by_line):
355 355 """Find the first escaped command (%foo, !foo, etc.) in the cell.
356 356 """
357 357 for line in tokens_by_line:
358 358 if not line:
359 359 continue
360 360 ix = 0
361 361 ll = len(line)
362 362 while ll > ix and line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
363 363 ix += 1
364 364 if ix >= ll:
365 365 continue
366 366 if line[ix].string in ESCAPE_SINGLES:
367 367 return cls(line[ix].start)
368 368
369 369 def transform(self, lines):
370 370 """Transform an escaped line found by the ``find()`` classmethod.
371 371 """
372 372 start_line, start_col = self.start_line, self.start_col
373 373
374 374 indent = lines[start_line][:start_col]
375 375 end_line = find_end_of_continued_line(lines, start_line)
376 376 line = assemble_continued_line(lines, (start_line, start_col), end_line)
377 377
378 378 if len(line) > 1 and line[:2] in ESCAPE_DOUBLES:
379 379 escape, content = line[:2], line[2:]
380 380 else:
381 381 escape, content = line[:1], line[1:]
382 382
383 383 if escape in tr:
384 384 call = tr[escape](content)
385 385 else:
386 386 call = ''
387 387
388 388 lines_before = lines[:start_line]
389 389 new_line = indent + call + '\n'
390 390 lines_after = lines[end_line + 1:]
391 391
392 392 return lines_before + [new_line] + lines_after
393 393
394 394 _help_end_re = re.compile(r"""(%{0,2}
395 395 [a-zA-Z_*][\w*]* # Variable name
396 396 (\.[a-zA-Z_*][\w*]*)* # .etc.etc
397 397 )
398 398 (\?\??)$ # ? or ??
399 399 """,
400 400 re.VERBOSE)
401 401
402 402 class HelpEnd(TokenTransformBase):
403 403 """Transformer for help syntax: obj? and obj??"""
404 404 # This needs to be higher priority (lower number) than EscapedCommand so
405 405 # that inspecting magics (%foo?) works.
406 406 priority = 5
407 407
408 408 def __init__(self, start, q_locn):
409 409 super().__init__(start)
410 410 self.q_line = q_locn[0] - 1 # Shift from 1-indexed to 0-indexed
411 411 self.q_col = q_locn[1]
412 412
413 413 @classmethod
414 414 def find(cls, tokens_by_line):
415 415 """Find the first help command (foo?) in the cell.
416 416 """
417 417 for line in tokens_by_line:
418 418 # Last token is NEWLINE; look at last but one
419 419 if len(line) > 2 and line[-2].string == '?':
420 420 # Find the first token that's not INDENT/DEDENT
421 421 ix = 0
422 422 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
423 423 ix += 1
424 424 return cls(line[ix].start, line[-2].start)
425 425
426 426 def transform(self, lines):
427 427 """Transform a help command found by the ``find()`` classmethod.
428 428 """
429 429 piece = ''.join(lines[self.start_line:self.q_line+1])
430 430 indent, content = piece[:self.start_col], piece[self.start_col:]
431 431 lines_before = lines[:self.start_line]
432 432 lines_after = lines[self.q_line + 1:]
433 433
434 434 m = _help_end_re.search(content)
435 435 if not m:
436 436 raise SyntaxError(content)
437 437 assert m is not None, content
438 438 target = m.group(1)
439 439 esc = m.group(3)
440 440
441 441 # If we're mid-command, put it back on the next prompt for the user.
442 442 next_input = None
443 443 if (not lines_before) and (not lines_after) \
444 444 and content.strip() != m.group(0):
445 445 next_input = content.rstrip('?\n')
446 446
447 447 call = _make_help_call(target, esc, next_input=next_input)
448 448 new_line = indent + call + '\n'
449 449
450 450 return lines_before + [new_line] + lines_after
451 451
452 452 def make_tokens_by_line(lines:List[str]):
453 453 """Tokenize a series of lines and group tokens by line.
454 454
455 455 The tokens for a multiline Python string or expression are grouped as one
456 456 line. All lines except the last lines should keep their line ending ('\\n',
457 457 '\\r\\n') for this to properly work. Use `.splitlines(keeplineending=True)`
458 458 for example when passing block of text to this function.
459 459
460 460 """
461 461 # NL tokens are used inside multiline expressions, but also after blank
462 462 # lines or comments. This is intentional - see https://bugs.python.org/issue17061
463 463 # We want to group the former case together but split the latter, so we
464 464 # track parentheses level, similar to the internals of tokenize.
465 465 NEWLINE, NL = tokenize.NEWLINE, tokenize.NL
466 466 tokens_by_line = [[]]
467 467 if len(lines) > 1 and not lines[0].endswith(('\n', '\r', '\r\n', '\x0b', '\x0c')):
468 468 warnings.warn("`make_tokens_by_line` received a list of lines which do not have lineending markers ('\\n', '\\r', '\\r\\n', '\\x0b', '\\x0c'), behavior will be unspecified")
469 469 parenlev = 0
470 470 try:
471 471 for token in tokenize.generate_tokens(iter(lines).__next__):
472 472 tokens_by_line[-1].append(token)
473 473 if (token.type == NEWLINE) \
474 474 or ((token.type == NL) and (parenlev <= 0)):
475 475 tokens_by_line.append([])
476 476 elif token.string in {'(', '[', '{'}:
477 477 parenlev += 1
478 478 elif token.string in {')', ']', '}'}:
479 479 if parenlev > 0:
480 480 parenlev -= 1
481 481 except tokenize.TokenError:
482 482 # Input ended in a multiline string or expression. That's OK for us.
483 483 pass
484 484
485 485
486 486 if not tokens_by_line[-1]:
487 487 tokens_by_line.pop()
488 488
489 489
490 490 return tokens_by_line
491 491
492 492 def show_linewise_tokens(s: str):
493 493 """For investigation and debugging"""
494 494 if not s.endswith('\n'):
495 495 s += '\n'
496 496 lines = s.splitlines(keepends=True)
497 497 for line in make_tokens_by_line(lines):
498 498 print("Line -------")
499 499 for tokinfo in line:
500 500 print(" ", tokinfo)
501 501
502 502 # Arbitrary limit to prevent getting stuck in infinite loops
503 503 TRANSFORM_LOOP_LIMIT = 500
504 504
505 505 class TransformerManager:
506 506 """Applies various transformations to a cell or code block.
507 507
508 508 The key methods for external use are ``transform_cell()``
509 509 and ``check_complete()``.
510 510 """
511 511 def __init__(self):
512 512 self.cleanup_transforms = [
513 513 leading_indent,
514 514 classic_prompt,
515 515 ipython_prompt,
516 516 ]
517 517 self.line_transforms = [
518 518 cell_magic,
519 519 ]
520 520 self.token_transformers = [
521 521 MagicAssign,
522 522 SystemAssign,
523 523 EscapedCommand,
524 524 HelpEnd,
525 525 ]
526 526
527 527 def do_one_token_transform(self, lines):
528 528 """Find and run the transform earliest in the code.
529 529
530 530 Returns (changed, lines).
531 531
532 532 This method is called repeatedly until changed is False, indicating
533 533 that all available transformations are complete.
534 534
535 535 The tokens following IPython special syntax might not be valid, so
536 536 the transformed code is retokenised every time to identify the next
537 537 piece of special syntax. Hopefully long code cells are mostly valid
538 538 Python, not using lots of IPython special syntax, so this shouldn't be
539 539 a performance issue.
540 540 """
541 541 tokens_by_line = make_tokens_by_line(lines)
542 542 candidates = []
543 543 for transformer_cls in self.token_transformers:
544 544 transformer = transformer_cls.find(tokens_by_line)
545 545 if transformer:
546 546 candidates.append(transformer)
547 547
548 548 if not candidates:
549 549 # Nothing to transform
550 550 return False, lines
551 551 ordered_transformers = sorted(candidates, key=TokenTransformBase.sortby)
552 552 for transformer in ordered_transformers:
553 553 try:
554 554 return True, transformer.transform(lines)
555 555 except SyntaxError:
556 556 pass
557 557 return False, lines
558 558
559 559 def do_token_transforms(self, lines):
560 560 for _ in range(TRANSFORM_LOOP_LIMIT):
561 561 changed, lines = self.do_one_token_transform(lines)
562 562 if not changed:
563 563 return lines
564 564
565 565 raise RuntimeError("Input transformation still changing after "
566 566 "%d iterations. Aborting." % TRANSFORM_LOOP_LIMIT)
567 567
568 568 def transform_cell(self, cell: str) -> str:
569 569 """Transforms a cell of input code"""
570 570 if not cell.endswith('\n'):
571 571 cell += '\n' # Ensure the cell has a trailing newline
572 572 lines = cell.splitlines(keepends=True)
573 573 for transform in self.cleanup_transforms + self.line_transforms:
574 574 lines = transform(lines)
575 575
576 576 lines = self.do_token_transforms(lines)
577 577 return ''.join(lines)
578 578
579 579 def check_complete(self, cell: str):
580 580 """Return whether a block of code is ready to execute, or should be continued
581 581
582 582 Parameters
583 583 ----------
584 584 source : string
585 585 Python input code, which can be multiline.
586 586
587 587 Returns
588 588 -------
589 589 status : str
590 590 One of 'complete', 'incomplete', or 'invalid' if source is not a
591 591 prefix of valid code.
592 592 indent_spaces : int or None
593 593 The number of spaces by which to indent the next line of code. If
594 594 status is not 'incomplete', this is None.
595 595 """
596 596 # Remember if the lines ends in a new line.
597 597 ends_with_newline = False
598 598 for character in reversed(cell):
599 599 if character == '\n':
600 600 ends_with_newline = True
601 601 break
602 602 elif character.strip():
603 603 break
604 604 else:
605 605 continue
606 606
607 if ends_with_newline:
607 if not ends_with_newline:
608 608 # Append an newline for consistent tokenization
609 609 # See https://bugs.python.org/issue33899
610 610 cell += '\n'
611 611
612 612 lines = cell.splitlines(keepends=True)
613 613
614 614 if not lines:
615 615 return 'complete', None
616 616
617 617 if lines[-1].endswith('\\'):
618 618 # Explicit backslash continuation
619 619 return 'incomplete', find_last_indent(lines)
620 620
621 621 try:
622 622 for transform in self.cleanup_transforms:
623 623 lines = transform(lines)
624 624 except SyntaxError:
625 625 return 'invalid', None
626 626
627 627 if lines[0].startswith('%%'):
628 628 # Special case for cell magics - completion marked by blank line
629 629 if lines[-1].strip():
630 630 return 'incomplete', find_last_indent(lines)
631 631 else:
632 632 return 'complete', None
633 633
634 634 try:
635 635 for transform in self.line_transforms:
636 636 lines = transform(lines)
637 637 lines = self.do_token_transforms(lines)
638 638 except SyntaxError:
639 639 return 'invalid', None
640 640
641 641 tokens_by_line = make_tokens_by_line(lines)
642 642
643 643 if not tokens_by_line:
644 644 return 'incomplete', find_last_indent(lines)
645 645
646 646 if tokens_by_line[-1][-1].type != tokenize.ENDMARKER:
647 647 # We're in a multiline string or expression
648 648 return 'incomplete', find_last_indent(lines)
649 649
650 650 newline_types = {tokenize.NEWLINE, tokenize.COMMENT, tokenize.ENDMARKER}
651 651
652 # Remove newline_types for the list of tokens
653 while len(tokens_by_line) > 1 and len(tokens_by_line[-1]) == 1 \
654 and tokens_by_line[-1][-1].type in newline_types:
655 tokens_by_line.pop()
652 # Pop the last line which only contains DEDENTs and ENDMARKER
653 last_token_line = None
654 if {t.type for t in tokens_by_line[-1]} in [
655 {tokenize.DEDENT, tokenize.ENDMARKER},
656 {tokenize.ENDMARKER}
657 ] and len(tokens_by_line) > 1:
658 last_token_line = tokens_by_line.pop()
656 659
657 660 while tokens_by_line[-1] and tokens_by_line[-1][-1].type in newline_types:
658 661 tokens_by_line[-1].pop()
659 662
660 663 if len(tokens_by_line) == 1 and not tokens_by_line[-1]:
661 664 return 'incomplete', 0
662 665
663 666 if tokens_by_line[-1][-1].string == ':':
664 667 # The last line starts a block (e.g. 'if foo:')
665 668 ix = 0
666 669 while tokens_by_line[-1][ix].type in {tokenize.INDENT, tokenize.DEDENT}:
667 670 ix += 1
668 671
669 672 indent = tokens_by_line[-1][ix].start[1]
670 673 return 'incomplete', indent + 4
671 674
672 675 if tokens_by_line[-1][0].line.endswith('\\'):
673 676 return 'incomplete', None
674 677
675 678 # At this point, our checks think the code is complete (or invalid).
676 679 # We'll use codeop.compile_command to check this with the real parser
677 680 try:
678 681 with warnings.catch_warnings():
679 682 warnings.simplefilter('error', SyntaxWarning)
680 683 res = compile_command(''.join(lines), symbol='exec')
681 684 except (SyntaxError, OverflowError, ValueError, TypeError,
682 685 MemoryError, SyntaxWarning):
683 686 return 'invalid', None
684 687 else:
685 688 if res is None:
686 689 return 'incomplete', find_last_indent(lines)
687 690
688 if tokens_by_line[-1][-1].type == tokenize.DEDENT:
691 if last_token_line and last_token_line[0].type == tokenize.DEDENT:
689 692 if ends_with_newline:
690 693 return 'complete', None
691 694 return 'incomplete', find_last_indent(lines)
692 695
693 696 # If there's a blank line at the end, assume we're ready to execute
694 697 if not lines[-1].strip():
695 698 return 'complete', None
696 699
697 700 return 'complete', None
698 701
699 702
700 703 def find_last_indent(lines):
701 704 m = _indent_re.match(lines[-1])
702 705 if not m:
703 706 return 0
704 707 return len(m.group(0).replace('\t', ' '*4))
@@ -1,281 +1,282
1 1 """Tests for the token-based transformers in IPython.core.inputtransformer2
2 2
3 3 Line-based transformers are the simpler ones; token-based transformers are
4 4 more complex. See test_inputtransformer2_line for tests for line-based
5 5 transformations.
6 6 """
7 7 import nose.tools as nt
8 8 import string
9 9
10 10 from IPython.core import inputtransformer2 as ipt2
11 11 from IPython.core.inputtransformer2 import make_tokens_by_line, _find_assign_op
12 12
13 13 from textwrap import dedent
14 14
15 15 MULTILINE_MAGIC = ("""\
16 16 a = f()
17 17 %foo \\
18 18 bar
19 19 g()
20 20 """.splitlines(keepends=True), (2, 0), """\
21 21 a = f()
22 22 get_ipython().run_line_magic('foo', ' bar')
23 23 g()
24 24 """.splitlines(keepends=True))
25 25
26 26 INDENTED_MAGIC = ("""\
27 27 for a in range(5):
28 28 %ls
29 29 """.splitlines(keepends=True), (2, 4), """\
30 30 for a in range(5):
31 31 get_ipython().run_line_magic('ls', '')
32 32 """.splitlines(keepends=True))
33 33
34 34 MULTILINE_MAGIC_ASSIGN = ("""\
35 35 a = f()
36 36 b = %foo \\
37 37 bar
38 38 g()
39 39 """.splitlines(keepends=True), (2, 4), """\
40 40 a = f()
41 41 b = get_ipython().run_line_magic('foo', ' bar')
42 42 g()
43 43 """.splitlines(keepends=True))
44 44
45 45 MULTILINE_SYSTEM_ASSIGN = ("""\
46 46 a = f()
47 47 b = !foo \\
48 48 bar
49 49 g()
50 50 """.splitlines(keepends=True), (2, 4), """\
51 51 a = f()
52 52 b = get_ipython().getoutput('foo bar')
53 53 g()
54 54 """.splitlines(keepends=True))
55 55
56 56 #####
57 57
58 58 MULTILINE_SYSTEM_ASSIGN_AFTER_DEDENT = ("""\
59 59 def test():
60 60 for i in range(1):
61 61 print(i)
62 62 res =! ls
63 63 """.splitlines(keepends=True), (4, 7), '''\
64 64 def test():
65 65 for i in range(1):
66 66 print(i)
67 67 res =get_ipython().getoutput(\' ls\')
68 68 '''.splitlines(keepends=True))
69 69
70 70 ######
71 71
72 72 AUTOCALL_QUOTE = (
73 73 [",f 1 2 3\n"], (1, 0),
74 74 ['f("1", "2", "3")\n']
75 75 )
76 76
77 77 AUTOCALL_QUOTE2 = (
78 78 [";f 1 2 3\n"], (1, 0),
79 79 ['f("1 2 3")\n']
80 80 )
81 81
82 82 AUTOCALL_PAREN = (
83 83 ["/f 1 2 3\n"], (1, 0),
84 84 ['f(1, 2, 3)\n']
85 85 )
86 86
87 87 SIMPLE_HELP = (
88 88 ["foo?\n"], (1, 0),
89 89 ["get_ipython().run_line_magic('pinfo', 'foo')\n"]
90 90 )
91 91
92 92 DETAILED_HELP = (
93 93 ["foo??\n"], (1, 0),
94 94 ["get_ipython().run_line_magic('pinfo2', 'foo')\n"]
95 95 )
96 96
97 97 MAGIC_HELP = (
98 98 ["%foo?\n"], (1, 0),
99 99 ["get_ipython().run_line_magic('pinfo', '%foo')\n"]
100 100 )
101 101
102 102 HELP_IN_EXPR = (
103 103 ["a = b + c?\n"], (1, 0),
104 104 ["get_ipython().set_next_input('a = b + c');"
105 105 "get_ipython().run_line_magic('pinfo', 'c')\n"]
106 106 )
107 107
108 108 HELP_CONTINUED_LINE = ("""\
109 109 a = \\
110 110 zip?
111 111 """.splitlines(keepends=True), (1, 0),
112 112 [r"get_ipython().set_next_input('a = \\\nzip');get_ipython().run_line_magic('pinfo', 'zip')" + "\n"]
113 113 )
114 114
115 115 HELP_MULTILINE = ("""\
116 116 (a,
117 117 b) = zip?
118 118 """.splitlines(keepends=True), (1, 0),
119 119 [r"get_ipython().set_next_input('(a,\nb) = zip');get_ipython().run_line_magic('pinfo', 'zip')" + "\n"]
120 120 )
121 121
122 122
123 123 def null_cleanup_transformer(lines):
124 124 """
125 125 A cleanup transform that returns an empty list.
126 126 """
127 127 return []
128 128
129 129 def check_make_token_by_line_never_ends_empty():
130 130 """
131 131 Check that not sequence of single or double characters ends up leading to en empty list of tokens
132 132 """
133 133 from string import printable
134 134 for c in printable:
135 135 nt.assert_not_equal(make_tokens_by_line(c)[-1], [])
136 136 for k in printable:
137 137 nt.assert_not_equal(make_tokens_by_line(c+k)[-1], [])
138 138
139 139 def check_find(transformer, case, match=True):
140 140 sample, expected_start, _ = case
141 141 tbl = make_tokens_by_line(sample)
142 142 res = transformer.find(tbl)
143 143 if match:
144 144 # start_line is stored 0-indexed, expected values are 1-indexed
145 145 nt.assert_equal((res.start_line+1, res.start_col), expected_start)
146 146 return res
147 147 else:
148 148 nt.assert_is(res, None)
149 149
150 150 def check_transform(transformer_cls, case):
151 151 lines, start, expected = case
152 152 transformer = transformer_cls(start)
153 153 nt.assert_equal(transformer.transform(lines), expected)
154 154
155 155 def test_continued_line():
156 156 lines = MULTILINE_MAGIC_ASSIGN[0]
157 157 nt.assert_equal(ipt2.find_end_of_continued_line(lines, 1), 2)
158 158
159 159 nt.assert_equal(ipt2.assemble_continued_line(lines, (1, 5), 2), "foo bar")
160 160
161 161 def test_find_assign_magic():
162 162 check_find(ipt2.MagicAssign, MULTILINE_MAGIC_ASSIGN)
163 163 check_find(ipt2.MagicAssign, MULTILINE_SYSTEM_ASSIGN, match=False)
164 164 check_find(ipt2.MagicAssign, MULTILINE_SYSTEM_ASSIGN_AFTER_DEDENT, match=False)
165 165
166 166 def test_transform_assign_magic():
167 167 check_transform(ipt2.MagicAssign, MULTILINE_MAGIC_ASSIGN)
168 168
169 169 def test_find_assign_system():
170 170 check_find(ipt2.SystemAssign, MULTILINE_SYSTEM_ASSIGN)
171 171 check_find(ipt2.SystemAssign, MULTILINE_SYSTEM_ASSIGN_AFTER_DEDENT)
172 172 check_find(ipt2.SystemAssign, (["a = !ls\n"], (1, 5), None))
173 173 check_find(ipt2.SystemAssign, (["a=!ls\n"], (1, 2), None))
174 174 check_find(ipt2.SystemAssign, MULTILINE_MAGIC_ASSIGN, match=False)
175 175
176 176 def test_transform_assign_system():
177 177 check_transform(ipt2.SystemAssign, MULTILINE_SYSTEM_ASSIGN)
178 178 check_transform(ipt2.SystemAssign, MULTILINE_SYSTEM_ASSIGN_AFTER_DEDENT)
179 179
180 180 def test_find_magic_escape():
181 181 check_find(ipt2.EscapedCommand, MULTILINE_MAGIC)
182 182 check_find(ipt2.EscapedCommand, INDENTED_MAGIC)
183 183 check_find(ipt2.EscapedCommand, MULTILINE_MAGIC_ASSIGN, match=False)
184 184
185 185 def test_transform_magic_escape():
186 186 check_transform(ipt2.EscapedCommand, MULTILINE_MAGIC)
187 187 check_transform(ipt2.EscapedCommand, INDENTED_MAGIC)
188 188
189 189 def test_find_autocalls():
190 190 for case in [AUTOCALL_QUOTE, AUTOCALL_QUOTE2, AUTOCALL_PAREN]:
191 191 print("Testing %r" % case[0])
192 192 check_find(ipt2.EscapedCommand, case)
193 193
194 194 def test_transform_autocall():
195 195 for case in [AUTOCALL_QUOTE, AUTOCALL_QUOTE2, AUTOCALL_PAREN]:
196 196 print("Testing %r" % case[0])
197 197 check_transform(ipt2.EscapedCommand, case)
198 198
199 199 def test_find_help():
200 200 for case in [SIMPLE_HELP, DETAILED_HELP, MAGIC_HELP, HELP_IN_EXPR]:
201 201 check_find(ipt2.HelpEnd, case)
202 202
203 203 tf = check_find(ipt2.HelpEnd, HELP_CONTINUED_LINE)
204 204 nt.assert_equal(tf.q_line, 1)
205 205 nt.assert_equal(tf.q_col, 3)
206 206
207 207 tf = check_find(ipt2.HelpEnd, HELP_MULTILINE)
208 208 nt.assert_equal(tf.q_line, 1)
209 209 nt.assert_equal(tf.q_col, 8)
210 210
211 211 # ? in a comment does not trigger help
212 212 check_find(ipt2.HelpEnd, (["foo # bar?\n"], None, None), match=False)
213 213 # Nor in a string
214 214 check_find(ipt2.HelpEnd, (["foo = '''bar?\n"], None, None), match=False)
215 215
216 216 def test_transform_help():
217 217 tf = ipt2.HelpEnd((1, 0), (1, 9))
218 218 nt.assert_equal(tf.transform(HELP_IN_EXPR[0]), HELP_IN_EXPR[2])
219 219
220 220 tf = ipt2.HelpEnd((1, 0), (2, 3))
221 221 nt.assert_equal(tf.transform(HELP_CONTINUED_LINE[0]), HELP_CONTINUED_LINE[2])
222 222
223 223 tf = ipt2.HelpEnd((1, 0), (2, 8))
224 224 nt.assert_equal(tf.transform(HELP_MULTILINE[0]), HELP_MULTILINE[2])
225 225
226 226 def test_find_assign_op_dedent():
227 227 """
228 228 be careful that empty token like dedent are not counted as parens
229 229 """
230 230 class Tk:
231 231 def __init__(self, s):
232 232 self.string = s
233 233
234 234 nt.assert_equal(_find_assign_op([Tk(s) for s in ('','a','=','b')]), 2)
235 235 nt.assert_equal(_find_assign_op([Tk(s) for s in ('','(', 'a','=','b', ')', '=' ,'5')]), 6)
236 236
237 237 def test_check_complete():
238 238 cc = ipt2.TransformerManager().check_complete
239 239 nt.assert_equal(cc("a = 1"), ('complete', None))
240 240 nt.assert_equal(cc("for a in range(5):"), ('incomplete', 4))
241 nt.assert_equal(cc("for a in range(5):\n if a > 0:"), ('incomplete', 8))
241 242 nt.assert_equal(cc("raise = 2"), ('invalid', None))
242 243 nt.assert_equal(cc("a = [1,\n2,"), ('incomplete', 0))
243 244 nt.assert_equal(cc(")"), ('incomplete', 0))
244 245 nt.assert_equal(cc("\\\r\n"), ('incomplete', 0))
245 246 nt.assert_equal(cc("a = '''\n hi"), ('incomplete', 3))
246 247 nt.assert_equal(cc("def a():\n x=1\n global x"), ('invalid', None))
247 248 nt.assert_equal(cc("a \\ "), ('invalid', None)) # Nothing allowed after backslash
248 249 nt.assert_equal(cc("1\\\n+2"), ('complete', None))
249 250 nt.assert_equal(cc("exit"), ('complete', None))
250 251
251 252 example = dedent("""
252 253 if True:
253 254 a=1""" )
254 255
255 256 nt.assert_equal(cc(example), ('incomplete', 4))
256 257 nt.assert_equal(cc(example+'\n'), ('complete', None))
257 258 nt.assert_equal(cc(example+'\n '), ('complete', None))
258 259
259 260 # no need to loop on all the letters/numbers.
260 261 short = '12abAB'+string.printable[62:]
261 262 for c in short:
262 263 # test does not raise:
263 264 cc(c)
264 265 for k in short:
265 266 cc(c+k)
266 267
267 268 def test_check_complete_II():
268 269 """
269 270 Test that multiple line strings are properly handled.
270 271
271 272 Separate test function for convenience
272 273
273 274 """
274 275 cc = ipt2.TransformerManager().check_complete
275 276 nt.assert_equal(cc('''def foo():\n """'''), ('incomplete', 4))
276 277
277 278
278 279 def test_null_cleanup_transformer():
279 280 manager = ipt2.TransformerManager()
280 281 manager.cleanup_transforms.insert(0, null_cleanup_transformer)
281 282 nt.assert_is(manager.transform_cell(""), "")
General Comments 0
You need to be logged in to leave comments. Login now