##// END OF EJS Templates
Bail on singleline cells with sunken brackets...
Blazej Michalik -
Show More
@@ -1,729 +1,748 b''
1 1 """Input transformer machinery to support IPython special syntax.
2 2
3 3 This includes the machinery to recognise and transform ``%magic`` commands,
4 4 ``!system`` commands, ``help?`` querying, prompt stripping, and so forth.
5 5
6 6 Added: IPython 7.0. Replaces inputsplitter and inputtransformer which were
7 7 deprecated in 7.0.
8 8 """
9 9
10 10 # Copyright (c) IPython Development Team.
11 11 # Distributed under the terms of the Modified BSD License.
12 12
13 13 from codeop import compile_command
14 14 import re
15 15 import tokenize
16 16 from typing import List, Tuple, Optional, Any
17 17 import warnings
18 18
19 19 _indent_re = re.compile(r'^[ \t]+')
20 20
21 21 def leading_empty_lines(lines):
22 22 """Remove leading empty lines
23 23
24 24 If the leading lines are empty or contain only whitespace, they will be
25 25 removed.
26 26 """
27 27 if not lines:
28 28 return lines
29 29 for i, line in enumerate(lines):
30 30 if line and not line.isspace():
31 31 return lines[i:]
32 32 return lines
33 33
34 34 def leading_indent(lines):
35 35 """Remove leading indentation.
36 36
37 37 If the first line starts with a spaces or tabs, the same whitespace will be
38 38 removed from each following line in the cell.
39 39 """
40 40 if not lines:
41 41 return lines
42 42 m = _indent_re.match(lines[0])
43 43 if not m:
44 44 return lines
45 45 space = m.group(0)
46 46 n = len(space)
47 47 return [l[n:] if l.startswith(space) else l
48 48 for l in lines]
49 49
50 50 class PromptStripper:
51 51 """Remove matching input prompts from a block of input.
52 52
53 53 Parameters
54 54 ----------
55 55 prompt_re : regular expression
56 56 A regular expression matching any input prompt (including continuation,
57 57 e.g. ``...``)
58 58 initial_re : regular expression, optional
59 59 A regular expression matching only the initial prompt, but not continuation.
60 60 If no initial expression is given, prompt_re will be used everywhere.
61 61 Used mainly for plain Python prompts (``>>>``), where the continuation prompt
62 62 ``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
63 63
64 64 Notes
65 65 -----
66 66
67 67 If initial_re and prompt_re differ,
68 68 only initial_re will be tested against the first line.
69 69 If any prompt is found on the first two lines,
70 70 prompts will be stripped from the rest of the block.
71 71 """
72 72 def __init__(self, prompt_re, initial_re=None):
73 73 self.prompt_re = prompt_re
74 74 self.initial_re = initial_re or prompt_re
75 75
76 76 def _strip(self, lines):
77 77 return [self.prompt_re.sub('', l, count=1) for l in lines]
78 78
79 79 def __call__(self, lines):
80 80 if not lines:
81 81 return lines
82 82 if self.initial_re.match(lines[0]) or \
83 83 (len(lines) > 1 and self.prompt_re.match(lines[1])):
84 84 return self._strip(lines)
85 85 return lines
86 86
87 87 classic_prompt = PromptStripper(
88 88 prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'),
89 89 initial_re=re.compile(r'^>>>( |$)')
90 90 )
91 91
92 92 ipython_prompt = PromptStripper(re.compile(r'^(In \[\d+\]: |\s*\.{3,}: ?)'))
93 93
94 94 def cell_magic(lines):
95 95 if not lines or not lines[0].startswith('%%'):
96 96 return lines
97 97 if re.match(r'%%\w+\?', lines[0]):
98 98 # This case will be handled by help_end
99 99 return lines
100 100 magic_name, _, first_line = lines[0][2:].rstrip().partition(' ')
101 101 body = ''.join(lines[1:])
102 102 return ['get_ipython().run_cell_magic(%r, %r, %r)\n'
103 103 % (magic_name, first_line, body)]
104 104
105 105
106 106 def _find_assign_op(token_line) -> Optional[int]:
107 107 """Get the index of the first assignment in the line ('=' not inside brackets)
108 108
109 109 Note: We don't try to support multiple special assignment (a = b = %foo)
110 110 """
111 111 paren_level = 0
112 112 for i, ti in enumerate(token_line):
113 113 s = ti.string
114 114 if s == '=' and paren_level == 0:
115 115 return i
116 116 if s in {'(','[','{'}:
117 117 paren_level += 1
118 118 elif s in {')', ']', '}'}:
119 119 if paren_level > 0:
120 120 paren_level -= 1
121 121 return None
122 122
123 123 def find_end_of_continued_line(lines, start_line: int):
124 124 """Find the last line of a line explicitly extended using backslashes.
125 125
126 126 Uses 0-indexed line numbers.
127 127 """
128 128 end_line = start_line
129 129 while lines[end_line].endswith('\\\n'):
130 130 end_line += 1
131 131 if end_line >= len(lines):
132 132 break
133 133 return end_line
134 134
135 135 def assemble_continued_line(lines, start: Tuple[int, int], end_line: int):
136 136 r"""Assemble a single line from multiple continued line pieces
137 137
138 138 Continued lines are lines ending in ``\``, and the line following the last
139 139 ``\`` in the block.
140 140
141 141 For example, this code continues over multiple lines::
142 142
143 143 if (assign_ix is not None) \
144 144 and (len(line) >= assign_ix + 2) \
145 145 and (line[assign_ix+1].string == '%') \
146 146 and (line[assign_ix+2].type == tokenize.NAME):
147 147
148 148 This statement contains four continued line pieces.
149 149 Assembling these pieces into a single line would give::
150 150
151 151 if (assign_ix is not None) and (len(line) >= assign_ix + 2) and (line[...
152 152
153 153 This uses 0-indexed line numbers. *start* is (lineno, colno).
154 154
155 155 Used to allow ``%magic`` and ``!system`` commands to be continued over
156 156 multiple lines.
157 157 """
158 158 parts = [lines[start[0]][start[1]:]] + lines[start[0]+1:end_line+1]
159 159 return ' '.join([p.rstrip()[:-1] for p in parts[:-1]] # Strip backslash+newline
160 160 + [parts[-1].rstrip()]) # Strip newline from last line
161 161
162 162 class TokenTransformBase:
163 163 """Base class for transformations which examine tokens.
164 164
165 165 Special syntax should not be transformed when it occurs inside strings or
166 166 comments. This is hard to reliably avoid with regexes. The solution is to
167 167 tokenise the code as Python, and recognise the special syntax in the tokens.
168 168
169 169 IPython's special syntax is not valid Python syntax, so tokenising may go
170 170 wrong after the special syntax starts. These classes therefore find and
171 171 transform *one* instance of special syntax at a time into regular Python
172 172 syntax. After each transformation, tokens are regenerated to find the next
173 173 piece of special syntax.
174 174
175 175 Subclasses need to implement one class method (find)
176 176 and one regular method (transform).
177 177
178 178 The priority attribute can select which transformation to apply if multiple
179 179 transformers match in the same place. Lower numbers have higher priority.
180 180 This allows "%magic?" to be turned into a help call rather than a magic call.
181 181 """
182 182 # Lower numbers -> higher priority (for matches in the same location)
183 183 priority = 10
184 184
185 185 def sortby(self):
186 186 return self.start_line, self.start_col, self.priority
187 187
188 188 def __init__(self, start):
189 189 self.start_line = start[0] - 1 # Shift from 1-index to 0-index
190 190 self.start_col = start[1]
191 191
192 192 @classmethod
193 193 def find(cls, tokens_by_line):
194 194 """Find one instance of special syntax in the provided tokens.
195 195
196 196 Tokens are grouped into logical lines for convenience,
197 197 so it is easy to e.g. look at the first token of each line.
198 198 *tokens_by_line* is a list of lists of tokenize.TokenInfo objects.
199 199
200 200 This should return an instance of its class, pointing to the start
201 201 position it has found, or None if it found no match.
202 202 """
203 203 raise NotImplementedError
204 204
205 205 def transform(self, lines: List[str]):
206 206 """Transform one instance of special syntax found by ``find()``
207 207
208 208 Takes a list of strings representing physical lines,
209 209 returns a similar list of transformed lines.
210 210 """
211 211 raise NotImplementedError
212 212
213 213 class MagicAssign(TokenTransformBase):
214 214 """Transformer for assignments from magics (a = %foo)"""
215 215 @classmethod
216 216 def find(cls, tokens_by_line):
217 217 """Find the first magic assignment (a = %foo) in the cell.
218 218 """
219 219 for line in tokens_by_line:
220 220 assign_ix = _find_assign_op(line)
221 221 if (assign_ix is not None) \
222 222 and (len(line) >= assign_ix + 2) \
223 223 and (line[assign_ix+1].string == '%') \
224 224 and (line[assign_ix+2].type == tokenize.NAME):
225 225 return cls(line[assign_ix+1].start)
226 226
227 227 def transform(self, lines: List[str]):
228 228 """Transform a magic assignment found by the ``find()`` classmethod.
229 229 """
230 230 start_line, start_col = self.start_line, self.start_col
231 231 lhs = lines[start_line][:start_col]
232 232 end_line = find_end_of_continued_line(lines, start_line)
233 233 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
234 234 assert rhs.startswith('%'), rhs
235 235 magic_name, _, args = rhs[1:].partition(' ')
236 236
237 237 lines_before = lines[:start_line]
238 238 call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)
239 239 new_line = lhs + call + '\n'
240 240 lines_after = lines[end_line+1:]
241 241
242 242 return lines_before + [new_line] + lines_after
243 243
244 244
245 245 class SystemAssign(TokenTransformBase):
246 246 """Transformer for assignments from system commands (a = !foo)"""
247 247 @classmethod
248 248 def find(cls, tokens_by_line):
249 249 """Find the first system assignment (a = !foo) in the cell.
250 250 """
251 251 for line in tokens_by_line:
252 252 assign_ix = _find_assign_op(line)
253 253 if (assign_ix is not None) \
254 254 and not line[assign_ix].line.strip().startswith('=') \
255 255 and (len(line) >= assign_ix + 2) \
256 256 and (line[assign_ix + 1].type == tokenize.ERRORTOKEN):
257 257 ix = assign_ix + 1
258 258
259 259 while ix < len(line) and line[ix].type == tokenize.ERRORTOKEN:
260 260 if line[ix].string == '!':
261 261 return cls(line[ix].start)
262 262 elif not line[ix].string.isspace():
263 263 break
264 264 ix += 1
265 265
266 266 def transform(self, lines: List[str]):
267 267 """Transform a system assignment found by the ``find()`` classmethod.
268 268 """
269 269 start_line, start_col = self.start_line, self.start_col
270 270
271 271 lhs = lines[start_line][:start_col]
272 272 end_line = find_end_of_continued_line(lines, start_line)
273 273 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
274 274 assert rhs.startswith('!'), rhs
275 275 cmd = rhs[1:]
276 276
277 277 lines_before = lines[:start_line]
278 278 call = "get_ipython().getoutput({!r})".format(cmd)
279 279 new_line = lhs + call + '\n'
280 280 lines_after = lines[end_line + 1:]
281 281
282 282 return lines_before + [new_line] + lines_after
283 283
284 284 # The escape sequences that define the syntax transformations IPython will
285 285 # apply to user input. These can NOT be just changed here: many regular
286 286 # expressions and other parts of the code may use their hardcoded values, and
287 287 # for all intents and purposes they constitute the 'IPython syntax', so they
288 288 # should be considered fixed.
289 289
290 290 ESC_SHELL = '!' # Send line to underlying system shell
291 291 ESC_SH_CAP = '!!' # Send line to system shell and capture output
292 292 ESC_HELP = '?' # Find information about object
293 293 ESC_HELP2 = '??' # Find extra-detailed information about object
294 294 ESC_MAGIC = '%' # Call magic function
295 295 ESC_MAGIC2 = '%%' # Call cell-magic function
296 296 ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call
297 297 ESC_QUOTE2 = ';' # Quote all args as a single string, call
298 298 ESC_PAREN = '/' # Call first argument with rest of line as arguments
299 299
300 300 ESCAPE_SINGLES = {'!', '?', '%', ',', ';', '/'}
301 301 ESCAPE_DOUBLES = {'!!', '??'} # %% (cell magic) is handled separately
302 302
303 303 def _make_help_call(target, esc, next_input=None):
304 304 """Prepares a pinfo(2)/psearch call from a target name and the escape
305 305 (i.e. ? or ??)"""
306 306 method = 'pinfo2' if esc == '??' \
307 307 else 'psearch' if '*' in target \
308 308 else 'pinfo'
309 309 arg = " ".join([method, target])
310 310 #Prepare arguments for get_ipython().run_line_magic(magic_name, magic_args)
311 311 t_magic_name, _, t_magic_arg_s = arg.partition(' ')
312 312 t_magic_name = t_magic_name.lstrip(ESC_MAGIC)
313 313 if next_input is None:
314 314 return 'get_ipython().run_line_magic(%r, %r)' % (t_magic_name, t_magic_arg_s)
315 315 else:
316 316 return 'get_ipython().set_next_input(%r);get_ipython().run_line_magic(%r, %r)' % \
317 317 (next_input, t_magic_name, t_magic_arg_s)
318 318
319 319 def _tr_help(content):
320 320 """Translate lines escaped with: ?
321 321
322 322 A naked help line should fire the intro help screen (shell.show_usage())
323 323 """
324 324 if not content:
325 325 return 'get_ipython().show_usage()'
326 326
327 327 return _make_help_call(content, '?')
328 328
329 329 def _tr_help2(content):
330 330 """Translate lines escaped with: ??
331 331
332 332 A naked help line should fire the intro help screen (shell.show_usage())
333 333 """
334 334 if not content:
335 335 return 'get_ipython().show_usage()'
336 336
337 337 return _make_help_call(content, '??')
338 338
339 339 def _tr_magic(content):
340 340 "Translate lines escaped with a percent sign: %"
341 341 name, _, args = content.partition(' ')
342 342 return 'get_ipython().run_line_magic(%r, %r)' % (name, args)
343 343
344 344 def _tr_quote(content):
345 345 "Translate lines escaped with a comma: ,"
346 346 name, _, args = content.partition(' ')
347 347 return '%s("%s")' % (name, '", "'.join(args.split()) )
348 348
349 349 def _tr_quote2(content):
350 350 "Translate lines escaped with a semicolon: ;"
351 351 name, _, args = content.partition(' ')
352 352 return '%s("%s")' % (name, args)
353 353
354 354 def _tr_paren(content):
355 355 "Translate lines escaped with a slash: /"
356 356 name, _, args = content.partition(' ')
357 357 return '%s(%s)' % (name, ", ".join(args.split()))
358 358
359 359 tr = { ESC_SHELL : 'get_ipython().system({!r})'.format,
360 360 ESC_SH_CAP : 'get_ipython().getoutput({!r})'.format,
361 361 ESC_HELP : _tr_help,
362 362 ESC_HELP2 : _tr_help2,
363 363 ESC_MAGIC : _tr_magic,
364 364 ESC_QUOTE : _tr_quote,
365 365 ESC_QUOTE2 : _tr_quote2,
366 366 ESC_PAREN : _tr_paren }
367 367
368 368 class EscapedCommand(TokenTransformBase):
369 369 """Transformer for escaped commands like %foo, !foo, or /foo"""
370 370 @classmethod
371 371 def find(cls, tokens_by_line):
372 372 """Find the first escaped command (%foo, !foo, etc.) in the cell.
373 373 """
374 374 for line in tokens_by_line:
375 375 if not line:
376 376 continue
377 377 ix = 0
378 378 ll = len(line)
379 379 while ll > ix and line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
380 380 ix += 1
381 381 if ix >= ll:
382 382 continue
383 383 if line[ix].string in ESCAPE_SINGLES:
384 384 return cls(line[ix].start)
385 385
386 386 def transform(self, lines):
387 387 """Transform an escaped line found by the ``find()`` classmethod.
388 388 """
389 389 start_line, start_col = self.start_line, self.start_col
390 390
391 391 indent = lines[start_line][:start_col]
392 392 end_line = find_end_of_continued_line(lines, start_line)
393 393 line = assemble_continued_line(lines, (start_line, start_col), end_line)
394 394
395 395 if len(line) > 1 and line[:2] in ESCAPE_DOUBLES:
396 396 escape, content = line[:2], line[2:]
397 397 else:
398 398 escape, content = line[:1], line[1:]
399 399
400 400 if escape in tr:
401 401 call = tr[escape](content)
402 402 else:
403 403 call = ''
404 404
405 405 lines_before = lines[:start_line]
406 406 new_line = indent + call + '\n'
407 407 lines_after = lines[end_line + 1:]
408 408
409 409 return lines_before + [new_line] + lines_after
410 410
411 411 _help_end_re = re.compile(r"""(%{0,2}
412 412 (?!\d)[\w*]+ # Variable name
413 413 (\.(?!\d)[\w*]+)* # .etc.etc
414 414 )
415 415 (\?\??)$ # ? or ??
416 416 """,
417 417 re.VERBOSE)
418 418
419 419 class HelpEnd(TokenTransformBase):
420 420 """Transformer for help syntax: obj? and obj??"""
421 421 # This needs to be higher priority (lower number) than EscapedCommand so
422 422 # that inspecting magics (%foo?) works.
423 423 priority = 5
424 424
425 425 def __init__(self, start, q_locn):
426 426 super().__init__(start)
427 427 self.q_line = q_locn[0] - 1 # Shift from 1-indexed to 0-indexed
428 428 self.q_col = q_locn[1]
429 429
430 430 @classmethod
431 431 def find(cls, tokens_by_line):
432 432 """Find the first help command (foo?) in the cell.
433 433 """
434 434 for line in tokens_by_line:
435 435 # Last token is NEWLINE; look at last but one
436 436 if len(line) > 2 and line[-2].string == '?':
437 437 # Find the first token that's not INDENT/DEDENT
438 438 ix = 0
439 439 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
440 440 ix += 1
441 441 return cls(line[ix].start, line[-2].start)
442 442
443 443 def transform(self, lines):
444 444 """Transform a help command found by the ``find()`` classmethod.
445 445 """
446 446 piece = ''.join(lines[self.start_line:self.q_line+1])
447 447 indent, content = piece[:self.start_col], piece[self.start_col:]
448 448 lines_before = lines[:self.start_line]
449 449 lines_after = lines[self.q_line + 1:]
450 450
451 451 m = _help_end_re.search(content)
452 452 if not m:
453 453 raise SyntaxError(content)
454 454 assert m is not None, content
455 455 target = m.group(1)
456 456 esc = m.group(3)
457 457
458 458 # If we're mid-command, put it back on the next prompt for the user.
459 459 next_input = None
460 460 if (not lines_before) and (not lines_after) \
461 461 and content.strip() != m.group(0):
462 462 next_input = content.rstrip('?\n')
463 463
464 464 call = _make_help_call(target, esc, next_input=next_input)
465 465 new_line = indent + call + '\n'
466 466
467 467 return lines_before + [new_line] + lines_after
468 468
469 469 def make_tokens_by_line(lines:List[str]):
470 470 """Tokenize a series of lines and group tokens by line.
471 471
472 472 The tokens for a multiline Python string or expression are grouped as one
473 473 line. All lines except the last lines should keep their line ending ('\\n',
474 474 '\\r\\n') for this to properly work. Use `.splitlines(keeplineending=True)`
475 475 for example when passing block of text to this function.
476 476
477 477 """
478 478 # NL tokens are used inside multiline expressions, but also after blank
479 479 # lines or comments. This is intentional - see https://bugs.python.org/issue17061
480 480 # We want to group the former case together but split the latter, so we
481 481 # track parentheses level, similar to the internals of tokenize.
482 482
483 483 # reexported from token on 3.7+
484 484 NEWLINE, NL = tokenize.NEWLINE, tokenize.NL # type: ignore
485 485 tokens_by_line:List[List[Any]] = [[]]
486 486 if len(lines) > 1 and not lines[0].endswith(('\n', '\r', '\r\n', '\x0b', '\x0c')):
487 487 warnings.warn("`make_tokens_by_line` received a list of lines which do not have lineending markers ('\\n', '\\r', '\\r\\n', '\\x0b', '\\x0c'), behavior will be unspecified")
488 488 parenlev = 0
489 489 try:
490 490 for token in tokenize.generate_tokens(iter(lines).__next__):
491 491 tokens_by_line[-1].append(token)
492 492 if (token.type == NEWLINE) \
493 493 or ((token.type == NL) and (parenlev <= 0)):
494 494 tokens_by_line.append([])
495 495 elif token.string in {'(', '[', '{'}:
496 496 parenlev += 1
497 497 elif token.string in {')', ']', '}'}:
498 498 if parenlev > 0:
499 499 parenlev -= 1
500 500 except tokenize.TokenError:
501 501 # Input ended in a multiline string or expression. That's OK for us.
502 502 pass
503 503
504 504
505 505 if not tokens_by_line[-1]:
506 506 tokens_by_line.pop()
507 507
508 508
509 509 return tokens_by_line
510 510
511
512 def has_sunken_brackets(tokens: List[tokenize.TokenInfo]):
513 """Check if the depth of brackets in the list of tokens drops below 0"""
514 parenlev = 0
515 for token in tokens:
516 if token.string in {"(", "[", "{"}:
517 parenlev += 1
518 elif token.string in {")", "]", "}"}:
519 parenlev -= 1
520 if parenlev < 0:
521 return True
522 return False
523
524
511 525 def show_linewise_tokens(s: str):
512 526 """For investigation and debugging"""
513 527 if not s.endswith('\n'):
514 528 s += '\n'
515 529 lines = s.splitlines(keepends=True)
516 530 for line in make_tokens_by_line(lines):
517 531 print("Line -------")
518 532 for tokinfo in line:
519 533 print(" ", tokinfo)
520 534
521 535 # Arbitrary limit to prevent getting stuck in infinite loops
522 536 TRANSFORM_LOOP_LIMIT = 500
523 537
524 538 class TransformerManager:
525 539 """Applies various transformations to a cell or code block.
526 540
527 541 The key methods for external use are ``transform_cell()``
528 542 and ``check_complete()``.
529 543 """
530 544 def __init__(self):
531 545 self.cleanup_transforms = [
532 546 leading_empty_lines,
533 547 leading_indent,
534 548 classic_prompt,
535 549 ipython_prompt,
536 550 ]
537 551 self.line_transforms = [
538 552 cell_magic,
539 553 ]
540 554 self.token_transformers = [
541 555 MagicAssign,
542 556 SystemAssign,
543 557 EscapedCommand,
544 558 HelpEnd,
545 559 ]
546 560
547 561 def do_one_token_transform(self, lines):
548 562 """Find and run the transform earliest in the code.
549 563
550 564 Returns (changed, lines).
551 565
552 566 This method is called repeatedly until changed is False, indicating
553 567 that all available transformations are complete.
554 568
555 569 The tokens following IPython special syntax might not be valid, so
556 570 the transformed code is retokenised every time to identify the next
557 571 piece of special syntax. Hopefully long code cells are mostly valid
558 572 Python, not using lots of IPython special syntax, so this shouldn't be
559 573 a performance issue.
560 574 """
561 575 tokens_by_line = make_tokens_by_line(lines)
562 576 candidates = []
563 577 for transformer_cls in self.token_transformers:
564 578 transformer = transformer_cls.find(tokens_by_line)
565 579 if transformer:
566 580 candidates.append(transformer)
567 581
568 582 if not candidates:
569 583 # Nothing to transform
570 584 return False, lines
571 585 ordered_transformers = sorted(candidates, key=TokenTransformBase.sortby)
572 586 for transformer in ordered_transformers:
573 587 try:
574 588 return True, transformer.transform(lines)
575 589 except SyntaxError:
576 590 pass
577 591 return False, lines
578 592
579 593 def do_token_transforms(self, lines):
580 594 for _ in range(TRANSFORM_LOOP_LIMIT):
581 595 changed, lines = self.do_one_token_transform(lines)
582 596 if not changed:
583 597 return lines
584 598
585 599 raise RuntimeError("Input transformation still changing after "
586 600 "%d iterations. Aborting." % TRANSFORM_LOOP_LIMIT)
587 601
588 602 def transform_cell(self, cell: str) -> str:
589 603 """Transforms a cell of input code"""
590 604 if not cell.endswith('\n'):
591 605 cell += '\n' # Ensure the cell has a trailing newline
592 606 lines = cell.splitlines(keepends=True)
593 607 for transform in self.cleanup_transforms + self.line_transforms:
594 608 lines = transform(lines)
595 609
596 610 lines = self.do_token_transforms(lines)
597 611 return ''.join(lines)
598 612
599 613 def check_complete(self, cell: str):
600 614 """Return whether a block of code is ready to execute, or should be continued
601 615
602 616 Parameters
603 617 ----------
604 618 source : string
605 619 Python input code, which can be multiline.
606 620
607 621 Returns
608 622 -------
609 623 status : str
610 624 One of 'complete', 'incomplete', or 'invalid' if source is not a
611 625 prefix of valid code.
612 626 indent_spaces : int or None
613 627 The number of spaces by which to indent the next line of code. If
614 628 status is not 'incomplete', this is None.
615 629 """
616 630 # Remember if the lines ends in a new line.
617 631 ends_with_newline = False
618 632 for character in reversed(cell):
619 633 if character == '\n':
620 634 ends_with_newline = True
621 635 break
622 636 elif character.strip():
623 637 break
624 638 else:
625 639 continue
626 640
627 641 if not ends_with_newline:
628 642 # Append an newline for consistent tokenization
629 643 # See https://bugs.python.org/issue33899
630 644 cell += '\n'
631 645
632 646 lines = cell.splitlines(keepends=True)
633 647
634 648 if not lines:
635 649 return 'complete', None
636 650
637 651 if lines[-1].endswith('\\'):
638 652 # Explicit backslash continuation
639 653 return 'incomplete', find_last_indent(lines)
640 654
641 655 try:
642 656 for transform in self.cleanup_transforms:
643 657 if not getattr(transform, 'has_side_effects', False):
644 658 lines = transform(lines)
645 659 except SyntaxError:
646 660 return 'invalid', None
647 661
648 662 if lines[0].startswith('%%'):
649 663 # Special case for cell magics - completion marked by blank line
650 664 if lines[-1].strip():
651 665 return 'incomplete', find_last_indent(lines)
652 666 else:
653 667 return 'complete', None
654 668
655 669 try:
656 670 for transform in self.line_transforms:
657 671 if not getattr(transform, 'has_side_effects', False):
658 672 lines = transform(lines)
659 673 lines = self.do_token_transforms(lines)
660 674 except SyntaxError:
661 675 return 'invalid', None
662 676
663 677 tokens_by_line = make_tokens_by_line(lines)
664 678
679 # Bail if we got one line and there are more closing parentheses than
680 # the opening ones
681 if len(lines) == 1 and has_sunken_brackets(tokens_by_line[0]):
682 return "invalid", None
683
665 684 if not tokens_by_line:
666 685 return 'incomplete', find_last_indent(lines)
667 686
668 687 if tokens_by_line[-1][-1].type != tokenize.ENDMARKER:
669 688 # We're in a multiline string or expression
670 689 return 'incomplete', find_last_indent(lines)
671 690
672 691 newline_types = {tokenize.NEWLINE, tokenize.COMMENT, tokenize.ENDMARKER} # type: ignore
673 692
674 693 # Pop the last line which only contains DEDENTs and ENDMARKER
675 694 last_token_line = None
676 695 if {t.type for t in tokens_by_line[-1]} in [
677 696 {tokenize.DEDENT, tokenize.ENDMARKER},
678 697 {tokenize.ENDMARKER}
679 698 ] and len(tokens_by_line) > 1:
680 699 last_token_line = tokens_by_line.pop()
681 700
682 701 while tokens_by_line[-1] and tokens_by_line[-1][-1].type in newline_types:
683 702 tokens_by_line[-1].pop()
684 703
685 704 if not tokens_by_line[-1]:
686 705 return 'incomplete', find_last_indent(lines)
687 706
688 707 if tokens_by_line[-1][-1].string == ':':
689 708 # The last line starts a block (e.g. 'if foo:')
690 709 ix = 0
691 710 while tokens_by_line[-1][ix].type in {tokenize.INDENT, tokenize.DEDENT}:
692 711 ix += 1
693 712
694 713 indent = tokens_by_line[-1][ix].start[1]
695 714 return 'incomplete', indent + 4
696 715
697 716 if tokens_by_line[-1][0].line.endswith('\\'):
698 717 return 'incomplete', None
699 718
700 719 # At this point, our checks think the code is complete (or invalid).
701 720 # We'll use codeop.compile_command to check this with the real parser
702 721 try:
703 722 with warnings.catch_warnings():
704 723 warnings.simplefilter('error', SyntaxWarning)
705 724 res = compile_command(''.join(lines), symbol='exec')
706 725 except (SyntaxError, OverflowError, ValueError, TypeError,
707 726 MemoryError, SyntaxWarning):
708 727 return 'invalid', None
709 728 else:
710 729 if res is None:
711 730 return 'incomplete', find_last_indent(lines)
712 731
713 732 if last_token_line and last_token_line[0].type == tokenize.DEDENT:
714 733 if ends_with_newline:
715 734 return 'complete', None
716 735 return 'incomplete', find_last_indent(lines)
717 736
718 737 # If there's a blank line at the end, assume we're ready to execute
719 738 if not lines[-1].strip():
720 739 return 'complete', None
721 740
722 741 return 'complete', None
723 742
724 743
725 744 def find_last_indent(lines):
726 745 m = _indent_re.match(lines[-1])
727 746 if not m:
728 747 return 0
729 748 return len(m.group(0).replace('\t', ' '*4))
@@ -1,337 +1,355 b''
1 1 """Tests for the token-based transformers in IPython.core.inputtransformer2
2 2
3 3 Line-based transformers are the simpler ones; token-based transformers are
4 4 more complex. See test_inputtransformer2_line for tests for line-based
5 5 transformations.
6 6 """
7 7 import nose.tools as nt
8 8 import string
9 9
10 10 from IPython.core import inputtransformer2 as ipt2
11 11 from IPython.core.inputtransformer2 import make_tokens_by_line, _find_assign_op
12 12
13 13 from textwrap import dedent
14 14
15 15 MULTILINE_MAGIC = ("""\
16 16 a = f()
17 17 %foo \\
18 18 bar
19 19 g()
20 20 """.splitlines(keepends=True), (2, 0), """\
21 21 a = f()
22 22 get_ipython().run_line_magic('foo', ' bar')
23 23 g()
24 24 """.splitlines(keepends=True))
25 25
26 26 INDENTED_MAGIC = ("""\
27 27 for a in range(5):
28 28 %ls
29 29 """.splitlines(keepends=True), (2, 4), """\
30 30 for a in range(5):
31 31 get_ipython().run_line_magic('ls', '')
32 32 """.splitlines(keepends=True))
33 33
34 34 CRLF_MAGIC = ([
35 35 "a = f()\n",
36 36 "%ls\r\n",
37 37 "g()\n"
38 38 ], (2, 0), [
39 39 "a = f()\n",
40 40 "get_ipython().run_line_magic('ls', '')\n",
41 41 "g()\n"
42 42 ])
43 43
44 44 MULTILINE_MAGIC_ASSIGN = ("""\
45 45 a = f()
46 46 b = %foo \\
47 47 bar
48 48 g()
49 49 """.splitlines(keepends=True), (2, 4), """\
50 50 a = f()
51 51 b = get_ipython().run_line_magic('foo', ' bar')
52 52 g()
53 53 """.splitlines(keepends=True))
54 54
55 55 MULTILINE_SYSTEM_ASSIGN = ("""\
56 56 a = f()
57 57 b = !foo \\
58 58 bar
59 59 g()
60 60 """.splitlines(keepends=True), (2, 4), """\
61 61 a = f()
62 62 b = get_ipython().getoutput('foo bar')
63 63 g()
64 64 """.splitlines(keepends=True))
65 65
66 66 #####
67 67
68 68 MULTILINE_SYSTEM_ASSIGN_AFTER_DEDENT = ("""\
69 69 def test():
70 70 for i in range(1):
71 71 print(i)
72 72 res =! ls
73 73 """.splitlines(keepends=True), (4, 7), '''\
74 74 def test():
75 75 for i in range(1):
76 76 print(i)
77 77 res =get_ipython().getoutput(\' ls\')
78 78 '''.splitlines(keepends=True))
79 79
80 80 ######
81 81
82 82 AUTOCALL_QUOTE = (
83 83 [",f 1 2 3\n"], (1, 0),
84 84 ['f("1", "2", "3")\n']
85 85 )
86 86
87 87 AUTOCALL_QUOTE2 = (
88 88 [";f 1 2 3\n"], (1, 0),
89 89 ['f("1 2 3")\n']
90 90 )
91 91
92 92 AUTOCALL_PAREN = (
93 93 ["/f 1 2 3\n"], (1, 0),
94 94 ['f(1, 2, 3)\n']
95 95 )
96 96
97 97 SIMPLE_HELP = (
98 98 ["foo?\n"], (1, 0),
99 99 ["get_ipython().run_line_magic('pinfo', 'foo')\n"]
100 100 )
101 101
102 102 DETAILED_HELP = (
103 103 ["foo??\n"], (1, 0),
104 104 ["get_ipython().run_line_magic('pinfo2', 'foo')\n"]
105 105 )
106 106
107 107 MAGIC_HELP = (
108 108 ["%foo?\n"], (1, 0),
109 109 ["get_ipython().run_line_magic('pinfo', '%foo')\n"]
110 110 )
111 111
112 112 HELP_IN_EXPR = (
113 113 ["a = b + c?\n"], (1, 0),
114 114 ["get_ipython().set_next_input('a = b + c');"
115 115 "get_ipython().run_line_magic('pinfo', 'c')\n"]
116 116 )
117 117
118 118 HELP_CONTINUED_LINE = ("""\
119 119 a = \\
120 120 zip?
121 121 """.splitlines(keepends=True), (1, 0),
122 122 [r"get_ipython().set_next_input('a = \\\nzip');get_ipython().run_line_magic('pinfo', 'zip')" + "\n"]
123 123 )
124 124
125 125 HELP_MULTILINE = ("""\
126 126 (a,
127 127 b) = zip?
128 128 """.splitlines(keepends=True), (1, 0),
129 129 [r"get_ipython().set_next_input('(a,\nb) = zip');get_ipython().run_line_magic('pinfo', 'zip')" + "\n"]
130 130 )
131 131
132 132 HELP_UNICODE = (
133 133 ["Ο€.foo?\n"], (1, 0),
134 134 ["get_ipython().run_line_magic('pinfo', 'Ο€.foo')\n"]
135 135 )
136 136
137 137
138 138 def null_cleanup_transformer(lines):
139 139 """
140 140 A cleanup transform that returns an empty list.
141 141 """
142 142 return []
143 143
144 144 def check_make_token_by_line_never_ends_empty():
145 145 """
146 146 Check that not sequence of single or double characters ends up leading to en empty list of tokens
147 147 """
148 148 from string import printable
149 149 for c in printable:
150 150 nt.assert_not_equal(make_tokens_by_line(c)[-1], [])
151 151 for k in printable:
152 152 nt.assert_not_equal(make_tokens_by_line(c+k)[-1], [])
153 153
154 154 def check_find(transformer, case, match=True):
155 155 sample, expected_start, _ = case
156 156 tbl = make_tokens_by_line(sample)
157 157 res = transformer.find(tbl)
158 158 if match:
159 159 # start_line is stored 0-indexed, expected values are 1-indexed
160 160 nt.assert_equal((res.start_line+1, res.start_col), expected_start)
161 161 return res
162 162 else:
163 163 nt.assert_is(res, None)
164 164
165 165 def check_transform(transformer_cls, case):
166 166 lines, start, expected = case
167 167 transformer = transformer_cls(start)
168 168 nt.assert_equal(transformer.transform(lines), expected)
169 169
170 170 def test_continued_line():
171 171 lines = MULTILINE_MAGIC_ASSIGN[0]
172 172 nt.assert_equal(ipt2.find_end_of_continued_line(lines, 1), 2)
173 173
174 174 nt.assert_equal(ipt2.assemble_continued_line(lines, (1, 5), 2), "foo bar")
175 175
176 176 def test_find_assign_magic():
177 177 check_find(ipt2.MagicAssign, MULTILINE_MAGIC_ASSIGN)
178 178 check_find(ipt2.MagicAssign, MULTILINE_SYSTEM_ASSIGN, match=False)
179 179 check_find(ipt2.MagicAssign, MULTILINE_SYSTEM_ASSIGN_AFTER_DEDENT, match=False)
180 180
181 181 def test_transform_assign_magic():
182 182 check_transform(ipt2.MagicAssign, MULTILINE_MAGIC_ASSIGN)
183 183
184 184 def test_find_assign_system():
185 185 check_find(ipt2.SystemAssign, MULTILINE_SYSTEM_ASSIGN)
186 186 check_find(ipt2.SystemAssign, MULTILINE_SYSTEM_ASSIGN_AFTER_DEDENT)
187 187 check_find(ipt2.SystemAssign, (["a = !ls\n"], (1, 5), None))
188 188 check_find(ipt2.SystemAssign, (["a=!ls\n"], (1, 2), None))
189 189 check_find(ipt2.SystemAssign, MULTILINE_MAGIC_ASSIGN, match=False)
190 190
191 191 def test_transform_assign_system():
192 192 check_transform(ipt2.SystemAssign, MULTILINE_SYSTEM_ASSIGN)
193 193 check_transform(ipt2.SystemAssign, MULTILINE_SYSTEM_ASSIGN_AFTER_DEDENT)
194 194
195 195 def test_find_magic_escape():
196 196 check_find(ipt2.EscapedCommand, MULTILINE_MAGIC)
197 197 check_find(ipt2.EscapedCommand, INDENTED_MAGIC)
198 198 check_find(ipt2.EscapedCommand, MULTILINE_MAGIC_ASSIGN, match=False)
199 199
200 200 def test_transform_magic_escape():
201 201 check_transform(ipt2.EscapedCommand, MULTILINE_MAGIC)
202 202 check_transform(ipt2.EscapedCommand, INDENTED_MAGIC)
203 203 check_transform(ipt2.EscapedCommand, CRLF_MAGIC)
204 204
205 205 def test_find_autocalls():
206 206 for case in [AUTOCALL_QUOTE, AUTOCALL_QUOTE2, AUTOCALL_PAREN]:
207 207 print("Testing %r" % case[0])
208 208 check_find(ipt2.EscapedCommand, case)
209 209
210 210 def test_transform_autocall():
211 211 for case in [AUTOCALL_QUOTE, AUTOCALL_QUOTE2, AUTOCALL_PAREN]:
212 212 print("Testing %r" % case[0])
213 213 check_transform(ipt2.EscapedCommand, case)
214 214
215 215 def test_find_help():
216 216 for case in [SIMPLE_HELP, DETAILED_HELP, MAGIC_HELP, HELP_IN_EXPR]:
217 217 check_find(ipt2.HelpEnd, case)
218 218
219 219 tf = check_find(ipt2.HelpEnd, HELP_CONTINUED_LINE)
220 220 nt.assert_equal(tf.q_line, 1)
221 221 nt.assert_equal(tf.q_col, 3)
222 222
223 223 tf = check_find(ipt2.HelpEnd, HELP_MULTILINE)
224 224 nt.assert_equal(tf.q_line, 1)
225 225 nt.assert_equal(tf.q_col, 8)
226 226
227 227 # ? in a comment does not trigger help
228 228 check_find(ipt2.HelpEnd, (["foo # bar?\n"], None, None), match=False)
229 229 # Nor in a string
230 230 check_find(ipt2.HelpEnd, (["foo = '''bar?\n"], None, None), match=False)
231 231
232 232 def test_transform_help():
233 233 tf = ipt2.HelpEnd((1, 0), (1, 9))
234 234 nt.assert_equal(tf.transform(HELP_IN_EXPR[0]), HELP_IN_EXPR[2])
235 235
236 236 tf = ipt2.HelpEnd((1, 0), (2, 3))
237 237 nt.assert_equal(tf.transform(HELP_CONTINUED_LINE[0]), HELP_CONTINUED_LINE[2])
238 238
239 239 tf = ipt2.HelpEnd((1, 0), (2, 8))
240 240 nt.assert_equal(tf.transform(HELP_MULTILINE[0]), HELP_MULTILINE[2])
241 241
242 242 tf = ipt2.HelpEnd((1, 0), (1, 0))
243 243 nt.assert_equal(tf.transform(HELP_UNICODE[0]), HELP_UNICODE[2])
244 244
245 245 def test_find_assign_op_dedent():
246 246 """
247 247 be careful that empty token like dedent are not counted as parens
248 248 """
249 249 class Tk:
250 250 def __init__(self, s):
251 251 self.string = s
252 252
253 253 nt.assert_equal(_find_assign_op([Tk(s) for s in ('','a','=','b')]), 2)
254 254 nt.assert_equal(_find_assign_op([Tk(s) for s in ('','(', 'a','=','b', ')', '=' ,'5')]), 6)
255 255
256 256 def test_check_complete():
257 257 cc = ipt2.TransformerManager().check_complete
258 258 nt.assert_equal(cc("a = 1"), ('complete', None))
259 259 nt.assert_equal(cc("for a in range(5):"), ('incomplete', 4))
260 260 nt.assert_equal(cc("for a in range(5):\n if a > 0:"), ('incomplete', 8))
261 261 nt.assert_equal(cc("raise = 2"), ('invalid', None))
262 262 nt.assert_equal(cc("a = [1,\n2,"), ('incomplete', 0))
263 263 nt.assert_equal(cc(")"), ('incomplete', 0))
264 264 nt.assert_equal(cc("\\\r\n"), ('incomplete', 0))
265 265 nt.assert_equal(cc("a = '''\n hi"), ('incomplete', 3))
266 266 nt.assert_equal(cc("def a():\n x=1\n global x"), ('invalid', None))
267 267 nt.assert_equal(cc("a \\ "), ('invalid', None)) # Nothing allowed after backslash
268 268 nt.assert_equal(cc("1\\\n+2"), ('complete', None))
269 269 nt.assert_equal(cc("exit"), ('complete', None))
270 270
271 271 example = dedent("""
272 272 if True:
273 273 a=1""" )
274 274
275 275 nt.assert_equal(cc(example), ('incomplete', 4))
276 276 nt.assert_equal(cc(example+'\n'), ('complete', None))
277 277 nt.assert_equal(cc(example+'\n '), ('complete', None))
278 278
279 279 # no need to loop on all the letters/numbers.
280 280 short = '12abAB'+string.printable[62:]
281 281 for c in short:
282 282 # test does not raise:
283 283 cc(c)
284 284 for k in short:
285 285 cc(c+k)
286 286
287 287 nt.assert_equal(cc("def f():\n x=0\n \\\n "), ('incomplete', 2))
288 288
289 289 def test_check_complete_II():
290 290 """
291 291 Test that multiple line strings are properly handled.
292 292
293 293 Separate test function for convenience
294 294
295 295 """
296 296 cc = ipt2.TransformerManager().check_complete
297 297 nt.assert_equal(cc('''def foo():\n """'''), ('incomplete', 4))
298 298
299 299
300 def test_check_complete_invalidates_sunken_brackets():
301 """
302 Test that a single line with more closing brackets than the opening ones is
303 interpretted as invalid
304 """
305 cc = ipt2.TransformerManager().check_complete
306 nt.assert_equal(cc(")"), ("invalid", None))
307 nt.assert_equal(cc("]"), ("invalid", None))
308 nt.assert_equal(cc("}"), ("invalid", None))
309 nt.assert_equal(cc(")("), ("invalid", None))
310 nt.assert_equal(cc("]["), ("invalid", None))
311 nt.assert_equal(cc("}{"), ("invalid", None))
312 nt.assert_equal(cc("[()("), ("invalid", None))
313 nt.assert_equal(cc("())("), ("invalid", None))
314 nt.assert_equal(cc(")[]("), ("invalid", None))
315 nt.assert_equal(cc("()]("), ("invalid", None))
316
317
300 318 def test_null_cleanup_transformer():
301 319 manager = ipt2.TransformerManager()
302 320 manager.cleanup_transforms.insert(0, null_cleanup_transformer)
303 321 assert manager.transform_cell("") == ""
304 322
305 323
306 324
307 325
308 326 def test_side_effects_I():
309 327 count = 0
310 328 def counter(lines):
311 329 nonlocal count
312 330 count += 1
313 331 return lines
314 332
315 333 counter.has_side_effects = True
316 334
317 335 manager = ipt2.TransformerManager()
318 336 manager.cleanup_transforms.insert(0, counter)
319 337 assert manager.check_complete("a=1\n") == ('complete', None)
320 338 assert count == 0
321 339
322 340
323 341
324 342
325 343 def test_side_effects_II():
326 344 count = 0
327 345 def counter(lines):
328 346 nonlocal count
329 347 count += 1
330 348 return lines
331 349
332 350 counter.has_side_effects = True
333 351
334 352 manager = ipt2.TransformerManager()
335 353 manager.line_transforms.insert(0, counter)
336 354 assert manager.check_complete("b=1\n") == ('complete', None)
337 355 assert count == 0
General Comments 0
You need to be logged in to leave comments. Login now