##// END OF EJS Templates
Prevent infinite loops in input transformation
Thomas Kluyver -
Show More
@@ -1,558 +1,564 b''
1 1 """Input transformer machinery to support IPython special syntax.
2 2
3 3 This includes the machinery to recognise and transform ``%magic`` commands,
4 4 ``!system`` commands, ``help?`` querying, prompt stripping, and so forth.
5 5 """
6 6
7 7 # Copyright (c) IPython Development Team.
8 8 # Distributed under the terms of the Modified BSD License.
9 9
10 10 from codeop import compile_command
11 11 import re
12 12 import tokenize
13 13 from typing import List, Tuple
14 14 import warnings
15 15
16 16 _indent_re = re.compile(r'^[ \t]+')
17 17
18 18 def leading_indent(lines):
19 19 """Remove leading indentation.
20 20
21 21 If the first line starts with a spaces or tabs, the same whitespace will be
22 22 removed from each following line.
23 23 """
24 24 m = _indent_re.match(lines[0])
25 25 if not m:
26 26 return lines
27 27 space = m.group(0)
28 28 n = len(space)
29 29 return [l[n:] if l.startswith(space) else l
30 30 for l in lines]
31 31
32 32 class PromptStripper:
33 33 """Remove matching input prompts from a block of input.
34 34
35 35 Parameters
36 36 ----------
37 37 prompt_re : regular expression
38 38 A regular expression matching any input prompt (including continuation)
39 39 initial_re : regular expression, optional
40 40 A regular expression matching only the initial prompt, but not continuation.
41 41 If no initial expression is given, prompt_re will be used everywhere.
42 42 Used mainly for plain Python prompts, where the continuation prompt
43 43 ``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
44 44
45 45 If initial_re and prompt_re differ,
46 46 only initial_re will be tested against the first line.
47 47 If any prompt is found on the first two lines,
48 48 prompts will be stripped from the rest of the block.
49 49 """
50 50 def __init__(self, prompt_re, initial_re=None):
51 51 self.prompt_re = prompt_re
52 52 self.initial_re = initial_re or prompt_re
53 53
54 54 def _strip(self, lines):
55 55 return [self.prompt_re.sub('', l, count=1) for l in lines]
56 56
57 57 def __call__(self, lines):
58 58 if self.initial_re.match(lines[0]) or \
59 59 (len(lines) > 1 and self.prompt_re.match(lines[1])):
60 60 return self._strip(lines)
61 61 return lines
62 62
63 63 classic_prompt = PromptStripper(
64 64 prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'),
65 65 initial_re=re.compile(r'^>>>( |$)')
66 66 )
67 67
68 68 ipython_prompt = PromptStripper(re.compile(r'^(In \[\d+\]: |\s*\.{3,}: ?)'))
69 69
70 70 def cell_magic(lines):
71 71 if not lines[0].startswith('%%'):
72 72 return lines
73 73 if re.match('%%\w+\?', lines[0]):
74 74 # This case will be handled by help_end
75 75 return lines
76 76 magic_name, _, first_line = lines[0][2:-1].partition(' ')
77 77 body = ''.join(lines[1:])
78 78 return ['get_ipython().run_cell_magic(%r, %r, %r)\n'
79 79 % (magic_name, first_line, body)]
80 80
81 81 # -----
82 82
83 83 def _find_assign_op(token_line):
84 84 # Get the index of the first assignment in the line ('=' not inside brackets)
85 85 # We don't try to support multiple special assignment (a = b = %foo)
86 86 paren_level = 0
87 87 for i, ti in enumerate(token_line):
88 88 s = ti.string
89 89 if s == '=' and paren_level == 0:
90 90 return i
91 91 if s in '([{':
92 92 paren_level += 1
93 93 elif s in ')]}':
94 94 if paren_level > 0:
95 95 paren_level -= 1
96 96
97 97 def find_end_of_continued_line(lines, start_line: int):
98 98 """Find the last line of a line explicitly extended using backslashes.
99 99
100 100 Uses 0-indexed line numbers.
101 101 """
102 102 end_line = start_line
103 103 while lines[end_line].endswith('\\\n'):
104 104 end_line += 1
105 105 if end_line >= len(lines):
106 106 break
107 107 return end_line
108 108
109 109 def assemble_continued_line(lines, start: Tuple[int, int], end_line: int):
110 110 """Assemble pieces of a continued line into a single line.
111 111
112 112 Uses 0-indexed line numbers. *start* is (lineno, colno).
113 113 """
114 114 parts = [lines[start[0]][start[1]:]] + lines[start[0]+1:end_line+1]
115 115 return ' '.join([p[:-2] for p in parts[:-1]] # Strip backslash+newline
116 116 + [parts[-1][:-1]]) # Strip newline from last line
117 117
118 118 class TokenTransformBase:
119 119 # Lower numbers -> higher priority (for matches in the same location)
120 120 priority = 10
121 121
122 122 def sortby(self):
123 123 return self.start_line, self.start_col, self.priority
124 124
125 125 def __init__(self, start):
126 126 self.start_line = start[0] - 1 # Shift from 1-index to 0-index
127 127 self.start_col = start[1]
128 128
129 129 def transform(self, lines: List[str]):
130 130 raise NotImplementedError
131 131
132 132 class MagicAssign(TokenTransformBase):
133 133 @classmethod
134 134 def find(cls, tokens_by_line):
135 135 """Find the first magic assignment (a = %foo) in the cell.
136 136
137 137 Returns (line, column) of the % if found, or None. *line* is 1-indexed.
138 138 """
139 139 for line in tokens_by_line:
140 140 assign_ix = _find_assign_op(line)
141 141 if (assign_ix is not None) \
142 142 and (len(line) >= assign_ix + 2) \
143 143 and (line[assign_ix+1].string == '%') \
144 144 and (line[assign_ix+2].type == tokenize.NAME):
145 145 return cls(line[assign_ix+1].start)
146 146
147 147 def transform(self, lines: List[str]):
148 148 """Transform a magic assignment found by find
149 149 """
150 150 start_line, start_col = self.start_line, self.start_col
151 151 lhs = lines[start_line][:start_col]
152 152 end_line = find_end_of_continued_line(lines, start_line)
153 153 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
154 154 assert rhs.startswith('%'), rhs
155 155 magic_name, _, args = rhs[1:].partition(' ')
156 156
157 157 lines_before = lines[:start_line]
158 158 call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)
159 159 new_line = lhs + call + '\n'
160 160 lines_after = lines[end_line+1:]
161 161
162 162 return lines_before + [new_line] + lines_after
163 163
164 164
165 165 class SystemAssign(TokenTransformBase):
166 166 @classmethod
167 167 def find(cls, tokens_by_line):
168 168 """Find the first system assignment (a = !foo) in the cell.
169 169
170 170 Returns (line, column) of the ! if found, or None. *line* is 1-indexed.
171 171 """
172 172 for line in tokens_by_line:
173 173 assign_ix = _find_assign_op(line)
174 174 if (assign_ix is not None) \
175 175 and (len(line) >= assign_ix + 2) \
176 176 and (line[assign_ix + 1].type == tokenize.ERRORTOKEN):
177 177 ix = assign_ix + 1
178 178
179 179 while ix < len(line) and line[ix].type == tokenize.ERRORTOKEN:
180 180 if line[ix].string == '!':
181 181 return cls(line[ix].start)
182 182 elif not line[ix].string.isspace():
183 183 break
184 184 ix += 1
185 185
186 186 def transform(self, lines: List[str]):
187 187 """Transform a system assignment found by find
188 188 """
189 189 start_line, start_col = self.start_line, self.start_col
190 190
191 191 lhs = lines[start_line][:start_col]
192 192 end_line = find_end_of_continued_line(lines, start_line)
193 193 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
194 194 assert rhs.startswith('!'), rhs
195 195 cmd = rhs[1:]
196 196
197 197 lines_before = lines[:start_line]
198 198 call = "get_ipython().getoutput({!r})".format(cmd)
199 199 new_line = lhs + call + '\n'
200 200 lines_after = lines[end_line + 1:]
201 201
202 202 return lines_before + [new_line] + lines_after
203 203
204 204 # The escape sequences that define the syntax transformations IPython will
205 205 # apply to user input. These can NOT be just changed here: many regular
206 206 # expressions and other parts of the code may use their hardcoded values, and
207 207 # for all intents and purposes they constitute the 'IPython syntax', so they
208 208 # should be considered fixed.
209 209
210 210 ESC_SHELL = '!' # Send line to underlying system shell
211 211 ESC_SH_CAP = '!!' # Send line to system shell and capture output
212 212 ESC_HELP = '?' # Find information about object
213 213 ESC_HELP2 = '??' # Find extra-detailed information about object
214 214 ESC_MAGIC = '%' # Call magic function
215 215 ESC_MAGIC2 = '%%' # Call cell-magic function
216 216 ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call
217 217 ESC_QUOTE2 = ';' # Quote all args as a single string, call
218 218 ESC_PAREN = '/' # Call first argument with rest of line as arguments
219 219
220 220 ESCAPE_SINGLES = {'!', '?', '%', ',', ';', '/'}
221 221 ESCAPE_DOUBLES = {'!!', '??'} # %% (cell magic) is handled separately
222 222
223 223 def _make_help_call(target, esc, next_input=None):
224 224 """Prepares a pinfo(2)/psearch call from a target name and the escape
225 225 (i.e. ? or ??)"""
226 226 method = 'pinfo2' if esc == '??' \
227 227 else 'psearch' if '*' in target \
228 228 else 'pinfo'
229 229 arg = " ".join([method, target])
230 230 #Prepare arguments for get_ipython().run_line_magic(magic_name, magic_args)
231 231 t_magic_name, _, t_magic_arg_s = arg.partition(' ')
232 232 t_magic_name = t_magic_name.lstrip(ESC_MAGIC)
233 233 if next_input is None:
234 234 return 'get_ipython().run_line_magic(%r, %r)' % (t_magic_name, t_magic_arg_s)
235 235 else:
236 236 return 'get_ipython().set_next_input(%r);get_ipython().run_line_magic(%r, %r)' % \
237 237 (next_input, t_magic_name, t_magic_arg_s)
238 238
239 239 def _tr_help(content):
240 240 "Translate lines escaped with: ?"
241 241 # A naked help line should just fire the intro help screen
242 242 if not content:
243 243 return 'get_ipython().show_usage()'
244 244
245 245 return _make_help_call(content, '?')
246 246
247 247 def _tr_help2(content):
248 248 "Translate lines escaped with: ??"
249 249 # A naked help line should just fire the intro help screen
250 250 if not content:
251 251 return 'get_ipython().show_usage()'
252 252
253 253 return _make_help_call(content, '??')
254 254
255 255 def _tr_magic(content):
256 256 "Translate lines escaped with: %"
257 257 name, _, args = content.partition(' ')
258 258 return 'get_ipython().run_line_magic(%r, %r)' % (name, args)
259 259
260 260 def _tr_quote(content):
261 261 "Translate lines escaped with: ,"
262 262 name, _, args = content.partition(' ')
263 263 return '%s("%s")' % (name, '", "'.join(args.split()) )
264 264
265 265 def _tr_quote2(content):
266 266 "Translate lines escaped with: ;"
267 267 name, _, args = content.partition(' ')
268 268 return '%s("%s")' % (name, args)
269 269
270 270 def _tr_paren(content):
271 271 "Translate lines escaped with: /"
272 272 name, _, args = content.partition(' ')
273 273 return '%s(%s)' % (name, ", ".join(args.split()))
274 274
275 275 tr = { ESC_SHELL : 'get_ipython().system({!r})'.format,
276 276 ESC_SH_CAP : 'get_ipython().getoutput({!r})'.format,
277 277 ESC_HELP : _tr_help,
278 278 ESC_HELP2 : _tr_help2,
279 279 ESC_MAGIC : _tr_magic,
280 280 ESC_QUOTE : _tr_quote,
281 281 ESC_QUOTE2 : _tr_quote2,
282 282 ESC_PAREN : _tr_paren }
283 283
284 284 class EscapedCommand(TokenTransformBase):
285 285 @classmethod
286 286 def find(cls, tokens_by_line):
287 287 """Find the first escaped command (%foo, !foo, etc.) in the cell.
288 288
289 289 Returns (line, column) of the escape if found, or None. *line* is 1-indexed.
290 290 """
291 291 for line in tokens_by_line:
292 292 ix = 0
293 293 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
294 294 ix += 1
295 295 if line[ix].string in ESCAPE_SINGLES:
296 296 return cls(line[ix].start)
297 297
298 298 def transform(self, lines):
299 299 start_line, start_col = self.start_line, self.start_col
300 300
301 301 indent = lines[start_line][:start_col]
302 302 end_line = find_end_of_continued_line(lines, start_line)
303 303 line = assemble_continued_line(lines, (start_line, start_col), end_line)
304 304
305 305 if line[:2] in ESCAPE_DOUBLES:
306 306 escape, content = line[:2], line[2:]
307 307 else:
308 308 escape, content = line[:1], line[1:]
309 309 call = tr[escape](content)
310 310
311 311 lines_before = lines[:start_line]
312 312 new_line = indent + call + '\n'
313 313 lines_after = lines[end_line + 1:]
314 314
315 315 return lines_before + [new_line] + lines_after
316 316
317 317 _help_end_re = re.compile(r"""(%{0,2}
318 318 [a-zA-Z_*][\w*]* # Variable name
319 319 (\.[a-zA-Z_*][\w*]*)* # .etc.etc
320 320 )
321 321 (\?\??)$ # ? or ??
322 322 """,
323 323 re.VERBOSE)
324 324
325 325 class HelpEnd(TokenTransformBase):
326 326 # This needs to be higher priority (lower number) than EscapedCommand so
327 327 # that inspecting magics (%foo?) works.
328 328 priority = 5
329 329
330 330 def __init__(self, start, q_locn):
331 331 super().__init__(start)
332 332 self.q_line = q_locn[0] - 1 # Shift from 1-indexed to 0-indexed
333 333 self.q_col = q_locn[1]
334 334
335 335 @classmethod
336 336 def find(cls, tokens_by_line):
337 337 for line in tokens_by_line:
338 338 # Last token is NEWLINE; look at last but one
339 339 if len(line) > 2 and line[-2].string == '?':
340 340 # Find the first token that's not INDENT/DEDENT
341 341 ix = 0
342 342 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
343 343 ix += 1
344 344 return cls(line[ix].start, line[-2].start)
345 345
346 346 def transform(self, lines):
347 347 piece = ''.join(lines[self.start_line:self.q_line+1])
348 348 indent, content = piece[:self.start_col], piece[self.start_col:]
349 349 lines_before = lines[:self.start_line]
350 350 lines_after = lines[self.q_line + 1:]
351 351
352 352 m = _help_end_re.search(content)
353 353 assert m is not None, content
354 354 target = m.group(1)
355 355 esc = m.group(3)
356 356
357 357 # If we're mid-command, put it back on the next prompt for the user.
358 358 next_input = None
359 359 if (not lines_before) and (not lines_after) \
360 360 and content.strip() != m.group(0):
361 361 next_input = content.rstrip('?\n')
362 362
363 363 call = _make_help_call(target, esc, next_input=next_input)
364 364 new_line = indent + call + '\n'
365 365
366 366 return lines_before + [new_line] + lines_after
367 367
368 368 def make_tokens_by_line(lines):
369 369 """Tokenize a series of lines and group tokens by line.
370 370
371 371 The tokens for a multiline Python string or expression are
372 372 grouped as one line.
373 373 """
374 374 # NL tokens are used inside multiline expressions, but also after blank
375 375 # lines or comments. This is intentional - see https://bugs.python.org/issue17061
376 376 # We want to group the former case together but split the latter, so we
377 377 # track parentheses level, similar to the internals of tokenize.
378 378 NEWLINE, NL = tokenize.NEWLINE, tokenize.NL
379 379 tokens_by_line = [[]]
380 380 parenlev = 0
381 381 try:
382 382 for token in tokenize.generate_tokens(iter(lines).__next__):
383 383 tokens_by_line[-1].append(token)
384 384 if (token.type == NEWLINE) \
385 385 or ((token.type == NL) and (parenlev <= 0)):
386 386 tokens_by_line.append([])
387 387 elif token.string in {'(', '[', '{'}:
388 388 parenlev += 1
389 389 elif token.string in {')', ']', '}'}:
390 390 if parenlev > 0:
391 391 parenlev -= 1
392 392 except tokenize.TokenError:
393 393 # Input ended in a multiline string or expression. That's OK for us.
394 394 pass
395 395
396 396 return tokens_by_line
397 397
398 398 def show_linewise_tokens(s: str):
399 399 """For investigation"""
400 400 if not s.endswith('\n'):
401 401 s += '\n'
402 402 lines = s.splitlines(keepends=True)
403 403 for line in make_tokens_by_line(lines):
404 404 print("Line -------")
405 405 for tokinfo in line:
406 406 print(" ", tokinfo)
407 407
408 # Arbitrary limit to prevent getting stuck in infinite loops
409 TRANSFORM_LOOP_LIMIT = 500
410
408 411 class TransformerManager:
409 412 def __init__(self):
410 413 self.cleanup_transforms = [
411 414 leading_indent,
412 415 classic_prompt,
413 416 ipython_prompt,
414 417 ]
415 418 self.line_transforms = [
416 419 cell_magic,
417 420 ]
418 421 self.token_transformers = [
419 422 MagicAssign,
420 423 SystemAssign,
421 424 EscapedCommand,
422 425 HelpEnd,
423 426 ]
424 427
425 428 def do_one_token_transform(self, lines):
426 429 """Find and run the transform earliest in the code.
427 430
428 431 Returns (changed, lines).
429 432
430 433 This method is called repeatedly until changed is False, indicating
431 434 that all available transformations are complete.
432 435
433 436 The tokens following IPython special syntax might not be valid, so
434 437 the transformed code is retokenised every time to identify the next
435 438 piece of special syntax. Hopefully long code cells are mostly valid
436 439 Python, not using lots of IPython special syntax, so this shouldn't be
437 440 a performance issue.
438 441 """
439 442 tokens_by_line = make_tokens_by_line(lines)
440 443 candidates = []
441 444 for transformer_cls in self.token_transformers:
442 445 transformer = transformer_cls.find(tokens_by_line)
443 446 if transformer:
444 447 candidates.append(transformer)
445 448
446 449 if not candidates:
447 450 # Nothing to transform
448 451 return False, lines
449 452
450 453 transformer = min(candidates, key=TokenTransformBase.sortby)
451 454 return True, transformer.transform(lines)
452 455
453 456 def do_token_transforms(self, lines):
454 while True:
457 for _ in range(TRANSFORM_LOOP_LIMIT):
455 458 changed, lines = self.do_one_token_transform(lines)
456 459 if not changed:
457 460 return lines
458 461
462 raise RuntimeError("Input transformation still changing after "
463 "%d iterations. Aborting." % TRANSFORM_LOOP_LIMIT)
464
459 465 def transform_cell(self, cell: str):
460 466 if not cell.endswith('\n'):
461 467 cell += '\n' # Ensure the cell has a trailing newline
462 468 lines = cell.splitlines(keepends=True)
463 469 for transform in self.cleanup_transforms + self.line_transforms:
464 470 #print(transform, lines)
465 471 lines = transform(lines)
466 472
467 473 lines = self.do_token_transforms(lines)
468 474 return ''.join(lines)
469 475
470 476 def check_complete(self, cell: str):
471 477 """Return whether a block of code is ready to execute, or should be continued
472 478
473 479 Parameters
474 480 ----------
475 481 source : string
476 482 Python input code, which can be multiline.
477 483
478 484 Returns
479 485 -------
480 486 status : str
481 487 One of 'complete', 'incomplete', or 'invalid' if source is not a
482 488 prefix of valid code.
483 489 indent_spaces : int or None
484 490 The number of spaces by which to indent the next line of code. If
485 491 status is not 'incomplete', this is None.
486 492 """
487 493 if not cell.endswith('\n'):
488 494 cell += '\n' # Ensure the cell has a trailing newline
489 495 lines = cell.splitlines(keepends=True)
490 496 if lines[-1][:-1].endswith('\\'):
491 497 # Explicit backslash continuation
492 498 return 'incomplete', find_last_indent(lines)
493 499
494 500 try:
495 501 for transform in self.cleanup_transforms:
496 502 lines = transform(lines)
497 503 except SyntaxError:
498 504 return 'invalid', None
499 505
500 506 if lines[0].startswith('%%'):
501 507 # Special case for cell magics - completion marked by blank line
502 508 if lines[-1].strip():
503 509 return 'incomplete', find_last_indent(lines)
504 510 else:
505 511 return 'complete', None
506 512
507 513 try:
508 514 for transform in self.line_transforms:
509 515 lines = transform(lines)
510 516 lines = self.do_token_transforms(lines)
511 517 except SyntaxError:
512 518 return 'invalid', None
513 519
514 520 tokens_by_line = make_tokens_by_line(lines)
515 521 if tokens_by_line[-1][-1].type != tokenize.ENDMARKER:
516 522 # We're in a multiline string or expression
517 523 return 'incomplete', find_last_indent(lines)
518 524
519 525 # Find the last token on the previous line that's not NEWLINE or COMMENT
520 526 toks_last_line = tokens_by_line[-2]
521 527 ix = len(toks_last_line) - 1
522 528 while ix >= 0 and toks_last_line[ix].type in {tokenize.NEWLINE,
523 529 tokenize.COMMENT}:
524 530 ix -= 1
525 531
526 532 if toks_last_line[ix].string == ':':
527 533 # The last line starts a block (e.g. 'if foo:')
528 534 ix = 0
529 535 while toks_last_line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
530 536 ix += 1
531 537 indent = toks_last_line[ix].start[1]
532 538 return 'incomplete', indent + 4
533 539
534 540 # If there's a blank line at the end, assume we're ready to execute.
535 541 if not lines[-1].strip():
536 542 return 'complete', None
537 543
538 544 # At this point, our checks think the code is complete (or invalid).
539 545 # We'll use codeop.compile_command to check this with the real parser.
540 546
541 547 try:
542 548 with warnings.catch_warnings():
543 549 warnings.simplefilter('error', SyntaxWarning)
544 550 res = compile_command(''.join(lines), symbol='exec')
545 551 except (SyntaxError, OverflowError, ValueError, TypeError,
546 552 MemoryError, SyntaxWarning):
547 553 return 'invalid', None
548 554 else:
549 555 if res is None:
550 556 return 'incomplete', find_last_indent(lines)
551 557 return 'complete', None
552 558
553 559
554 560 def find_last_indent(lines):
555 561 m = _indent_re.match(lines[-1])
556 562 if not m:
557 563 return 0
558 564 return len(m.group(0).replace('\t', ' '*4))
General Comments 0
You need to be logged in to leave comments. Login now