##// END OF EJS Templates
Don't let parentheses level go below 0
Thomas Kluyver -
Show More
@@ -1,556 +1,558 b''
1 1 """Input transformer machinery to support IPython special syntax.
2 2
3 3 This includes the machinery to recognise and transform ``%magic`` commands,
4 4 ``!system`` commands, ``help?`` querying, prompt stripping, and so forth.
5 5 """
6 6
7 7 # Copyright (c) IPython Development Team.
8 8 # Distributed under the terms of the Modified BSD License.
9 9
10 10 from codeop import compile_command
11 11 import re
12 12 import tokenize
13 13 from typing import List, Tuple
14 14 import warnings
15 15
16 16 _indent_re = re.compile(r'^[ \t]+')
17 17
18 18 def leading_indent(lines):
19 19 """Remove leading indentation.
20 20
21 21 If the first line starts with a spaces or tabs, the same whitespace will be
22 22 removed from each following line.
23 23 """
24 24 m = _indent_re.match(lines[0])
25 25 if not m:
26 26 return lines
27 27 space = m.group(0)
28 28 n = len(space)
29 29 return [l[n:] if l.startswith(space) else l
30 30 for l in lines]
31 31
32 32 class PromptStripper:
33 33 """Remove matching input prompts from a block of input.
34 34
35 35 Parameters
36 36 ----------
37 37 prompt_re : regular expression
38 38 A regular expression matching any input prompt (including continuation)
39 39 initial_re : regular expression, optional
40 40 A regular expression matching only the initial prompt, but not continuation.
41 41 If no initial expression is given, prompt_re will be used everywhere.
42 42 Used mainly for plain Python prompts, where the continuation prompt
43 43 ``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
44 44
45 45 If initial_re and prompt_re differ,
46 46 only initial_re will be tested against the first line.
47 47 If any prompt is found on the first two lines,
48 48 prompts will be stripped from the rest of the block.
49 49 """
50 50 def __init__(self, prompt_re, initial_re=None):
51 51 self.prompt_re = prompt_re
52 52 self.initial_re = initial_re or prompt_re
53 53
54 54 def _strip(self, lines):
55 55 return [self.prompt_re.sub('', l, count=1) for l in lines]
56 56
57 57 def __call__(self, lines):
58 58 if self.initial_re.match(lines[0]) or \
59 59 (len(lines) > 1 and self.prompt_re.match(lines[1])):
60 60 return self._strip(lines)
61 61 return lines
62 62
63 63 classic_prompt = PromptStripper(
64 64 prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'),
65 65 initial_re=re.compile(r'^>>>( |$)')
66 66 )
67 67
68 68 ipython_prompt = PromptStripper(re.compile(r'^(In \[\d+\]: |\s*\.{3,}: ?)'))
69 69
70 70 def cell_magic(lines):
71 71 if not lines[0].startswith('%%'):
72 72 return lines
73 73 if re.match('%%\w+\?', lines[0]):
74 74 # This case will be handled by help_end
75 75 return lines
76 76 magic_name, _, first_line = lines[0][2:-1].partition(' ')
77 77 body = ''.join(lines[1:])
78 78 return ['get_ipython().run_cell_magic(%r, %r, %r)\n'
79 79 % (magic_name, first_line, body)]
80 80
81 81 # -----
82 82
83 83 def _find_assign_op(token_line):
84 # Find the first assignment in the line ('=' not inside brackets)
84 # Get the index of the first assignment in the line ('=' not inside brackets)
85 85 # We don't try to support multiple special assignment (a = b = %foo)
86 86 paren_level = 0
87 87 for i, ti in enumerate(token_line):
88 88 s = ti.string
89 89 if s == '=' and paren_level == 0:
90 90 return i
91 91 if s in '([{':
92 92 paren_level += 1
93 93 elif s in ')]}':
94 if paren_level > 0:
94 95 paren_level -= 1
95 96
96 97 def find_end_of_continued_line(lines, start_line: int):
97 98 """Find the last line of a line explicitly extended using backslashes.
98 99
99 100 Uses 0-indexed line numbers.
100 101 """
101 102 end_line = start_line
102 103 while lines[end_line].endswith('\\\n'):
103 104 end_line += 1
104 105 if end_line >= len(lines):
105 106 break
106 107 return end_line
107 108
108 109 def assemble_continued_line(lines, start: Tuple[int, int], end_line: int):
109 110 """Assemble pieces of a continued line into a single line.
110 111
111 112 Uses 0-indexed line numbers. *start* is (lineno, colno).
112 113 """
113 114 parts = [lines[start[0]][start[1]:]] + lines[start[0]+1:end_line+1]
114 115 return ' '.join([p[:-2] for p in parts[:-1]] # Strip backslash+newline
115 116 + [parts[-1][:-1]]) # Strip newline from last line
116 117
117 118 class TokenTransformBase:
118 119 # Lower numbers -> higher priority (for matches in the same location)
119 120 priority = 10
120 121
121 122 def sortby(self):
122 123 return self.start_line, self.start_col, self.priority
123 124
124 125 def __init__(self, start):
125 126 self.start_line = start[0] - 1 # Shift from 1-index to 0-index
126 127 self.start_col = start[1]
127 128
128 129 def transform(self, lines: List[str]):
129 130 raise NotImplementedError
130 131
131 132 class MagicAssign(TokenTransformBase):
132 133 @classmethod
133 134 def find(cls, tokens_by_line):
134 135 """Find the first magic assignment (a = %foo) in the cell.
135 136
136 137 Returns (line, column) of the % if found, or None. *line* is 1-indexed.
137 138 """
138 139 for line in tokens_by_line:
139 140 assign_ix = _find_assign_op(line)
140 141 if (assign_ix is not None) \
141 142 and (len(line) >= assign_ix + 2) \
142 143 and (line[assign_ix+1].string == '%') \
143 144 and (line[assign_ix+2].type == tokenize.NAME):
144 145 return cls(line[assign_ix+1].start)
145 146
146 147 def transform(self, lines: List[str]):
147 148 """Transform a magic assignment found by find
148 149 """
149 150 start_line, start_col = self.start_line, self.start_col
150 151 lhs = lines[start_line][:start_col]
151 152 end_line = find_end_of_continued_line(lines, start_line)
152 153 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
153 154 assert rhs.startswith('%'), rhs
154 155 magic_name, _, args = rhs[1:].partition(' ')
155 156
156 157 lines_before = lines[:start_line]
157 158 call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)
158 159 new_line = lhs + call + '\n'
159 160 lines_after = lines[end_line+1:]
160 161
161 162 return lines_before + [new_line] + lines_after
162 163
163 164
164 165 class SystemAssign(TokenTransformBase):
165 166 @classmethod
166 167 def find(cls, tokens_by_line):
167 168 """Find the first system assignment (a = !foo) in the cell.
168 169
169 170 Returns (line, column) of the ! if found, or None. *line* is 1-indexed.
170 171 """
171 172 for line in tokens_by_line:
172 173 assign_ix = _find_assign_op(line)
173 174 if (assign_ix is not None) \
174 175 and (len(line) >= assign_ix + 2) \
175 176 and (line[assign_ix + 1].type == tokenize.ERRORTOKEN):
176 177 ix = assign_ix + 1
177 178
178 179 while ix < len(line) and line[ix].type == tokenize.ERRORTOKEN:
179 180 if line[ix].string == '!':
180 181 return cls(line[ix].start)
181 182 elif not line[ix].string.isspace():
182 183 break
183 184 ix += 1
184 185
185 186 def transform(self, lines: List[str]):
186 187 """Transform a system assignment found by find
187 188 """
188 189 start_line, start_col = self.start_line, self.start_col
189 190
190 191 lhs = lines[start_line][:start_col]
191 192 end_line = find_end_of_continued_line(lines, start_line)
192 193 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
193 194 assert rhs.startswith('!'), rhs
194 195 cmd = rhs[1:]
195 196
196 197 lines_before = lines[:start_line]
197 198 call = "get_ipython().getoutput({!r})".format(cmd)
198 199 new_line = lhs + call + '\n'
199 200 lines_after = lines[end_line + 1:]
200 201
201 202 return lines_before + [new_line] + lines_after
202 203
203 204 # The escape sequences that define the syntax transformations IPython will
204 205 # apply to user input. These can NOT be just changed here: many regular
205 206 # expressions and other parts of the code may use their hardcoded values, and
206 207 # for all intents and purposes they constitute the 'IPython syntax', so they
207 208 # should be considered fixed.
208 209
209 210 ESC_SHELL = '!' # Send line to underlying system shell
210 211 ESC_SH_CAP = '!!' # Send line to system shell and capture output
211 212 ESC_HELP = '?' # Find information about object
212 213 ESC_HELP2 = '??' # Find extra-detailed information about object
213 214 ESC_MAGIC = '%' # Call magic function
214 215 ESC_MAGIC2 = '%%' # Call cell-magic function
215 216 ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call
216 217 ESC_QUOTE2 = ';' # Quote all args as a single string, call
217 218 ESC_PAREN = '/' # Call first argument with rest of line as arguments
218 219
219 220 ESCAPE_SINGLES = {'!', '?', '%', ',', ';', '/'}
220 221 ESCAPE_DOUBLES = {'!!', '??'} # %% (cell magic) is handled separately
221 222
222 223 def _make_help_call(target, esc, next_input=None):
223 224 """Prepares a pinfo(2)/psearch call from a target name and the escape
224 225 (i.e. ? or ??)"""
225 226 method = 'pinfo2' if esc == '??' \
226 227 else 'psearch' if '*' in target \
227 228 else 'pinfo'
228 229 arg = " ".join([method, target])
229 230 #Prepare arguments for get_ipython().run_line_magic(magic_name, magic_args)
230 231 t_magic_name, _, t_magic_arg_s = arg.partition(' ')
231 232 t_magic_name = t_magic_name.lstrip(ESC_MAGIC)
232 233 if next_input is None:
233 234 return 'get_ipython().run_line_magic(%r, %r)' % (t_magic_name, t_magic_arg_s)
234 235 else:
235 236 return 'get_ipython().set_next_input(%r);get_ipython().run_line_magic(%r, %r)' % \
236 237 (next_input, t_magic_name, t_magic_arg_s)
237 238
238 239 def _tr_help(content):
239 240 "Translate lines escaped with: ?"
240 241 # A naked help line should just fire the intro help screen
241 242 if not content:
242 243 return 'get_ipython().show_usage()'
243 244
244 245 return _make_help_call(content, '?')
245 246
246 247 def _tr_help2(content):
247 248 "Translate lines escaped with: ??"
248 249 # A naked help line should just fire the intro help screen
249 250 if not content:
250 251 return 'get_ipython().show_usage()'
251 252
252 253 return _make_help_call(content, '??')
253 254
254 255 def _tr_magic(content):
255 256 "Translate lines escaped with: %"
256 257 name, _, args = content.partition(' ')
257 258 return 'get_ipython().run_line_magic(%r, %r)' % (name, args)
258 259
259 260 def _tr_quote(content):
260 261 "Translate lines escaped with: ,"
261 262 name, _, args = content.partition(' ')
262 263 return '%s("%s")' % (name, '", "'.join(args.split()) )
263 264
264 265 def _tr_quote2(content):
265 266 "Translate lines escaped with: ;"
266 267 name, _, args = content.partition(' ')
267 268 return '%s("%s")' % (name, args)
268 269
269 270 def _tr_paren(content):
270 271 "Translate lines escaped with: /"
271 272 name, _, args = content.partition(' ')
272 273 return '%s(%s)' % (name, ", ".join(args.split()))
273 274
274 275 tr = { ESC_SHELL : 'get_ipython().system({!r})'.format,
275 276 ESC_SH_CAP : 'get_ipython().getoutput({!r})'.format,
276 277 ESC_HELP : _tr_help,
277 278 ESC_HELP2 : _tr_help2,
278 279 ESC_MAGIC : _tr_magic,
279 280 ESC_QUOTE : _tr_quote,
280 281 ESC_QUOTE2 : _tr_quote2,
281 282 ESC_PAREN : _tr_paren }
282 283
283 284 class EscapedCommand(TokenTransformBase):
284 285 @classmethod
285 286 def find(cls, tokens_by_line):
286 287 """Find the first escaped command (%foo, !foo, etc.) in the cell.
287 288
288 289 Returns (line, column) of the escape if found, or None. *line* is 1-indexed.
289 290 """
290 291 for line in tokens_by_line:
291 292 ix = 0
292 293 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
293 294 ix += 1
294 295 if line[ix].string in ESCAPE_SINGLES:
295 296 return cls(line[ix].start)
296 297
297 298 def transform(self, lines):
298 299 start_line, start_col = self.start_line, self.start_col
299 300
300 301 indent = lines[start_line][:start_col]
301 302 end_line = find_end_of_continued_line(lines, start_line)
302 303 line = assemble_continued_line(lines, (start_line, start_col), end_line)
303 304
304 305 if line[:2] in ESCAPE_DOUBLES:
305 306 escape, content = line[:2], line[2:]
306 307 else:
307 308 escape, content = line[:1], line[1:]
308 309 call = tr[escape](content)
309 310
310 311 lines_before = lines[:start_line]
311 312 new_line = indent + call + '\n'
312 313 lines_after = lines[end_line + 1:]
313 314
314 315 return lines_before + [new_line] + lines_after
315 316
316 317 _help_end_re = re.compile(r"""(%{0,2}
317 318 [a-zA-Z_*][\w*]* # Variable name
318 319 (\.[a-zA-Z_*][\w*]*)* # .etc.etc
319 320 )
320 321 (\?\??)$ # ? or ??
321 322 """,
322 323 re.VERBOSE)
323 324
324 325 class HelpEnd(TokenTransformBase):
325 326 # This needs to be higher priority (lower number) than EscapedCommand so
326 327 # that inspecting magics (%foo?) works.
327 328 priority = 5
328 329
329 330 def __init__(self, start, q_locn):
330 331 super().__init__(start)
331 332 self.q_line = q_locn[0] - 1 # Shift from 1-indexed to 0-indexed
332 333 self.q_col = q_locn[1]
333 334
334 335 @classmethod
335 336 def find(cls, tokens_by_line):
336 337 for line in tokens_by_line:
337 338 # Last token is NEWLINE; look at last but one
338 339 if len(line) > 2 and line[-2].string == '?':
339 340 # Find the first token that's not INDENT/DEDENT
340 341 ix = 0
341 342 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
342 343 ix += 1
343 344 return cls(line[ix].start, line[-2].start)
344 345
345 346 def transform(self, lines):
346 347 piece = ''.join(lines[self.start_line:self.q_line+1])
347 348 indent, content = piece[:self.start_col], piece[self.start_col:]
348 349 lines_before = lines[:self.start_line]
349 350 lines_after = lines[self.q_line + 1:]
350 351
351 352 m = _help_end_re.search(content)
352 353 assert m is not None, content
353 354 target = m.group(1)
354 355 esc = m.group(3)
355 356
356 357 # If we're mid-command, put it back on the next prompt for the user.
357 358 next_input = None
358 359 if (not lines_before) and (not lines_after) \
359 360 and content.strip() != m.group(0):
360 361 next_input = content.rstrip('?\n')
361 362
362 363 call = _make_help_call(target, esc, next_input=next_input)
363 364 new_line = indent + call + '\n'
364 365
365 366 return lines_before + [new_line] + lines_after
366 367
367 368 def make_tokens_by_line(lines):
368 369 """Tokenize a series of lines and group tokens by line.
369 370
370 371 The tokens for a multiline Python string or expression are
371 372 grouped as one line.
372 373 """
373 374 # NL tokens are used inside multiline expressions, but also after blank
374 375 # lines or comments. This is intentional - see https://bugs.python.org/issue17061
375 376 # We want to group the former case together but split the latter, so we
376 377 # track parentheses level, similar to the internals of tokenize.
377 378 NEWLINE, NL = tokenize.NEWLINE, tokenize.NL
378 379 tokens_by_line = [[]]
379 380 parenlev = 0
380 381 try:
381 382 for token in tokenize.generate_tokens(iter(lines).__next__):
382 383 tokens_by_line[-1].append(token)
383 384 if (token.type == NEWLINE) \
384 385 or ((token.type == NL) and (parenlev <= 0)):
385 386 tokens_by_line.append([])
386 387 elif token.string in {'(', '[', '{'}:
387 388 parenlev += 1
388 389 elif token.string in {')', ']', '}'}:
390 if parenlev > 0:
389 391 parenlev -= 1
390 392 except tokenize.TokenError:
391 393 # Input ended in a multiline string or expression. That's OK for us.
392 394 pass
393 395
394 396 return tokens_by_line
395 397
396 398 def show_linewise_tokens(s: str):
397 399 """For investigation"""
398 400 if not s.endswith('\n'):
399 401 s += '\n'
400 402 lines = s.splitlines(keepends=True)
401 403 for line in make_tokens_by_line(lines):
402 404 print("Line -------")
403 405 for tokinfo in line:
404 406 print(" ", tokinfo)
405 407
406 408 class TransformerManager:
407 409 def __init__(self):
408 410 self.cleanup_transforms = [
409 411 leading_indent,
410 412 classic_prompt,
411 413 ipython_prompt,
412 414 ]
413 415 self.line_transforms = [
414 416 cell_magic,
415 417 ]
416 418 self.token_transformers = [
417 419 MagicAssign,
418 420 SystemAssign,
419 421 EscapedCommand,
420 422 HelpEnd,
421 423 ]
422 424
423 425 def do_one_token_transform(self, lines):
424 426 """Find and run the transform earliest in the code.
425 427
426 428 Returns (changed, lines).
427 429
428 430 This method is called repeatedly until changed is False, indicating
429 431 that all available transformations are complete.
430 432
431 433 The tokens following IPython special syntax might not be valid, so
432 434 the transformed code is retokenised every time to identify the next
433 435 piece of special syntax. Hopefully long code cells are mostly valid
434 436 Python, not using lots of IPython special syntax, so this shouldn't be
435 437 a performance issue.
436 438 """
437 439 tokens_by_line = make_tokens_by_line(lines)
438 440 candidates = []
439 441 for transformer_cls in self.token_transformers:
440 442 transformer = transformer_cls.find(tokens_by_line)
441 443 if transformer:
442 444 candidates.append(transformer)
443 445
444 446 if not candidates:
445 447 # Nothing to transform
446 448 return False, lines
447 449
448 450 transformer = min(candidates, key=TokenTransformBase.sortby)
449 451 return True, transformer.transform(lines)
450 452
451 453 def do_token_transforms(self, lines):
452 454 while True:
453 455 changed, lines = self.do_one_token_transform(lines)
454 456 if not changed:
455 457 return lines
456 458
457 459 def transform_cell(self, cell: str):
458 460 if not cell.endswith('\n'):
459 461 cell += '\n' # Ensure the cell has a trailing newline
460 462 lines = cell.splitlines(keepends=True)
461 463 for transform in self.cleanup_transforms + self.line_transforms:
462 464 #print(transform, lines)
463 465 lines = transform(lines)
464 466
465 467 lines = self.do_token_transforms(lines)
466 468 return ''.join(lines)
467 469
468 470 def check_complete(self, cell: str):
469 471 """Return whether a block of code is ready to execute, or should be continued
470 472
471 473 Parameters
472 474 ----------
473 475 source : string
474 476 Python input code, which can be multiline.
475 477
476 478 Returns
477 479 -------
478 480 status : str
479 481 One of 'complete', 'incomplete', or 'invalid' if source is not a
480 482 prefix of valid code.
481 483 indent_spaces : int or None
482 484 The number of spaces by which to indent the next line of code. If
483 485 status is not 'incomplete', this is None.
484 486 """
485 487 if not cell.endswith('\n'):
486 488 cell += '\n' # Ensure the cell has a trailing newline
487 489 lines = cell.splitlines(keepends=True)
488 490 if lines[-1][:-1].endswith('\\'):
489 491 # Explicit backslash continuation
490 492 return 'incomplete', find_last_indent(lines)
491 493
492 494 try:
493 495 for transform in self.cleanup_transforms:
494 496 lines = transform(lines)
495 497 except SyntaxError:
496 498 return 'invalid', None
497 499
498 500 if lines[0].startswith('%%'):
499 501 # Special case for cell magics - completion marked by blank line
500 502 if lines[-1].strip():
501 503 return 'incomplete', find_last_indent(lines)
502 504 else:
503 505 return 'complete', None
504 506
505 507 try:
506 508 for transform in self.line_transforms:
507 509 lines = transform(lines)
508 510 lines = self.do_token_transforms(lines)
509 511 except SyntaxError:
510 512 return 'invalid', None
511 513
512 514 tokens_by_line = make_tokens_by_line(lines)
513 515 if tokens_by_line[-1][-1].type != tokenize.ENDMARKER:
514 516 # We're in a multiline string or expression
515 517 return 'incomplete', find_last_indent(lines)
516 518
517 519 # Find the last token on the previous line that's not NEWLINE or COMMENT
518 520 toks_last_line = tokens_by_line[-2]
519 521 ix = len(toks_last_line) - 1
520 522 while ix >= 0 and toks_last_line[ix].type in {tokenize.NEWLINE,
521 523 tokenize.COMMENT}:
522 524 ix -= 1
523 525
524 526 if toks_last_line[ix].string == ':':
525 527 # The last line starts a block (e.g. 'if foo:')
526 528 ix = 0
527 529 while toks_last_line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
528 530 ix += 1
529 531 indent = toks_last_line[ix].start[1]
530 532 return 'incomplete', indent + 4
531 533
532 534 # If there's a blank line at the end, assume we're ready to execute.
533 535 if not lines[-1].strip():
534 536 return 'complete', None
535 537
536 538 # At this point, our checks think the code is complete (or invalid).
537 539 # We'll use codeop.compile_command to check this with the real parser.
538 540
539 541 try:
540 542 with warnings.catch_warnings():
541 543 warnings.simplefilter('error', SyntaxWarning)
542 544 res = compile_command(''.join(lines), symbol='exec')
543 545 except (SyntaxError, OverflowError, ValueError, TypeError,
544 546 MemoryError, SyntaxWarning):
545 547 return 'invalid', None
546 548 else:
547 549 if res is None:
548 550 return 'incomplete', find_last_indent(lines)
549 551 return 'complete', None
550 552
551 553
552 554 def find_last_indent(lines):
553 555 m = _indent_re.match(lines[-1])
554 556 if not m:
555 557 return 0
556 558 return len(m.group(0).replace('\t', ' '*4))
General Comments 0
You need to be logged in to leave comments. Login now