##// END OF EJS Templates
Fix indentation of the transformer regex
Blazej Michalik -
Show More
@@ -1,775 +1,775 b''
1 1 """Input transformer machinery to support IPython special syntax.
2 2
3 3 This includes the machinery to recognise and transform ``%magic`` commands,
4 4 ``!system`` commands, ``help?`` querying, prompt stripping, and so forth.
5 5
6 6 Added: IPython 7.0. Replaces inputsplitter and inputtransformer which were
7 7 deprecated in 7.0.
8 8 """
9 9
10 10 # Copyright (c) IPython Development Team.
11 11 # Distributed under the terms of the Modified BSD License.
12 12
13 13 from codeop import compile_command
14 14 import re
15 15 import tokenize
16 16 from typing import List, Tuple, Optional, Any
17 17 import warnings
18 18
19 19 _indent_re = re.compile(r'^[ \t]+')
20 20
21 21 def leading_empty_lines(lines):
22 22 """Remove leading empty lines
23 23
24 24 If the leading lines are empty or contain only whitespace, they will be
25 25 removed.
26 26 """
27 27 if not lines:
28 28 return lines
29 29 for i, line in enumerate(lines):
30 30 if line and not line.isspace():
31 31 return lines[i:]
32 32 return lines
33 33
34 34 def leading_indent(lines):
35 35 """Remove leading indentation.
36 36
37 37 If the first line starts with a spaces or tabs, the same whitespace will be
38 38 removed from each following line in the cell.
39 39 """
40 40 if not lines:
41 41 return lines
42 42 m = _indent_re.match(lines[0])
43 43 if not m:
44 44 return lines
45 45 space = m.group(0)
46 46 n = len(space)
47 47 return [l[n:] if l.startswith(space) else l
48 48 for l in lines]
49 49
50 50 class PromptStripper:
51 51 """Remove matching input prompts from a block of input.
52 52
53 53 Parameters
54 54 ----------
55 55 prompt_re : regular expression
56 56 A regular expression matching any input prompt (including continuation,
57 57 e.g. ``...``)
58 58 initial_re : regular expression, optional
59 59 A regular expression matching only the initial prompt, but not continuation.
60 60 If no initial expression is given, prompt_re will be used everywhere.
61 61 Used mainly for plain Python prompts (``>>>``), where the continuation prompt
62 62 ``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
63 63
64 64 Notes
65 65 -----
66 66
67 67 If initial_re and prompt_re differ,
68 68 only initial_re will be tested against the first line.
69 69 If any prompt is found on the first two lines,
70 70 prompts will be stripped from the rest of the block.
71 71 """
72 72 def __init__(self, prompt_re, initial_re=None):
73 73 self.prompt_re = prompt_re
74 74 self.initial_re = initial_re or prompt_re
75 75
76 76 def _strip(self, lines):
77 77 return [self.prompt_re.sub('', l, count=1) for l in lines]
78 78
79 79 def __call__(self, lines):
80 80 if not lines:
81 81 return lines
82 82 if self.initial_re.match(lines[0]) or \
83 83 (len(lines) > 1 and self.prompt_re.match(lines[1])):
84 84 return self._strip(lines)
85 85 return lines
86 86
87 87 classic_prompt = PromptStripper(
88 88 prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'),
89 89 initial_re=re.compile(r'^>>>( |$)')
90 90 )
91 91
92 92 ipython_prompt = PromptStripper(
93 93 re.compile(
94 94 r"""
95 ^( # Match from the beginning of a line, either:
95 ^( # Match from the beginning of a line, either:
96 96
97 # 1. First-line prompt:
98 ((\[nav\]|\[ins\])?\ )? # Vi editing mode prompt, if it's there
99 In\ # The 'In' of the prompt, with a space
100 \[\d+\]: # Command index, as displayed in the prompt
101 \ # With a mandatory trailing space
97 # 1. First-line prompt:
98 ((\[nav\]|\[ins\])?\ )? # Vi editing mode prompt, if it's there
99 In\ # The 'In' of the prompt, with a space
100 \[\d+\]: # Command index, as displayed in the prompt
101 \ # With a mandatory trailing space
102 102
103 | # ... or ...
103 | # ... or ...
104 104
105 # 2. The three dots of the multiline prompt
106 \s* # All leading whitespace characters
107 \.{3,}: # The three (or more) dots
108 \ ? # With an optional trailing space
105 # 2. The three dots of the multiline prompt
106 \s* # All leading whitespace characters
107 \.{3,}: # The three (or more) dots
108 \ ? # With an optional trailing space
109 109
110 )
111 """,
110 )
111 """,
112 112 re.VERBOSE,
113 113 )
114 114 )
115 115
116 116
117 117 def cell_magic(lines):
118 118 if not lines or not lines[0].startswith('%%'):
119 119 return lines
120 120 if re.match(r'%%\w+\?', lines[0]):
121 121 # This case will be handled by help_end
122 122 return lines
123 123 magic_name, _, first_line = lines[0][2:].rstrip().partition(' ')
124 124 body = ''.join(lines[1:])
125 125 return ['get_ipython().run_cell_magic(%r, %r, %r)\n'
126 126 % (magic_name, first_line, body)]
127 127
128 128
129 129 def _find_assign_op(token_line) -> Optional[int]:
130 130 """Get the index of the first assignment in the line ('=' not inside brackets)
131 131
132 132 Note: We don't try to support multiple special assignment (a = b = %foo)
133 133 """
134 134 paren_level = 0
135 135 for i, ti in enumerate(token_line):
136 136 s = ti.string
137 137 if s == '=' and paren_level == 0:
138 138 return i
139 139 if s in {'(','[','{'}:
140 140 paren_level += 1
141 141 elif s in {')', ']', '}'}:
142 142 if paren_level > 0:
143 143 paren_level -= 1
144 144 return None
145 145
146 146 def find_end_of_continued_line(lines, start_line: int):
147 147 """Find the last line of a line explicitly extended using backslashes.
148 148
149 149 Uses 0-indexed line numbers.
150 150 """
151 151 end_line = start_line
152 152 while lines[end_line].endswith('\\\n'):
153 153 end_line += 1
154 154 if end_line >= len(lines):
155 155 break
156 156 return end_line
157 157
158 158 def assemble_continued_line(lines, start: Tuple[int, int], end_line: int):
159 159 r"""Assemble a single line from multiple continued line pieces
160 160
161 161 Continued lines are lines ending in ``\``, and the line following the last
162 162 ``\`` in the block.
163 163
164 164 For example, this code continues over multiple lines::
165 165
166 166 if (assign_ix is not None) \
167 167 and (len(line) >= assign_ix + 2) \
168 168 and (line[assign_ix+1].string == '%') \
169 169 and (line[assign_ix+2].type == tokenize.NAME):
170 170
171 171 This statement contains four continued line pieces.
172 172 Assembling these pieces into a single line would give::
173 173
174 174 if (assign_ix is not None) and (len(line) >= assign_ix + 2) and (line[...
175 175
176 176 This uses 0-indexed line numbers. *start* is (lineno, colno).
177 177
178 178 Used to allow ``%magic`` and ``!system`` commands to be continued over
179 179 multiple lines.
180 180 """
181 181 parts = [lines[start[0]][start[1]:]] + lines[start[0]+1:end_line+1]
182 182 return ' '.join([p.rstrip()[:-1] for p in parts[:-1]] # Strip backslash+newline
183 183 + [parts[-1].rstrip()]) # Strip newline from last line
184 184
185 185 class TokenTransformBase:
186 186 """Base class for transformations which examine tokens.
187 187
188 188 Special syntax should not be transformed when it occurs inside strings or
189 189 comments. This is hard to reliably avoid with regexes. The solution is to
190 190 tokenise the code as Python, and recognise the special syntax in the tokens.
191 191
192 192 IPython's special syntax is not valid Python syntax, so tokenising may go
193 193 wrong after the special syntax starts. These classes therefore find and
194 194 transform *one* instance of special syntax at a time into regular Python
195 195 syntax. After each transformation, tokens are regenerated to find the next
196 196 piece of special syntax.
197 197
198 198 Subclasses need to implement one class method (find)
199 199 and one regular method (transform).
200 200
201 201 The priority attribute can select which transformation to apply if multiple
202 202 transformers match in the same place. Lower numbers have higher priority.
203 203 This allows "%magic?" to be turned into a help call rather than a magic call.
204 204 """
205 205 # Lower numbers -> higher priority (for matches in the same location)
206 206 priority = 10
207 207
208 208 def sortby(self):
209 209 return self.start_line, self.start_col, self.priority
210 210
211 211 def __init__(self, start):
212 212 self.start_line = start[0] - 1 # Shift from 1-index to 0-index
213 213 self.start_col = start[1]
214 214
215 215 @classmethod
216 216 def find(cls, tokens_by_line):
217 217 """Find one instance of special syntax in the provided tokens.
218 218
219 219 Tokens are grouped into logical lines for convenience,
220 220 so it is easy to e.g. look at the first token of each line.
221 221 *tokens_by_line* is a list of lists of tokenize.TokenInfo objects.
222 222
223 223 This should return an instance of its class, pointing to the start
224 224 position it has found, or None if it found no match.
225 225 """
226 226 raise NotImplementedError
227 227
228 228 def transform(self, lines: List[str]):
229 229 """Transform one instance of special syntax found by ``find()``
230 230
231 231 Takes a list of strings representing physical lines,
232 232 returns a similar list of transformed lines.
233 233 """
234 234 raise NotImplementedError
235 235
236 236 class MagicAssign(TokenTransformBase):
237 237 """Transformer for assignments from magics (a = %foo)"""
238 238 @classmethod
239 239 def find(cls, tokens_by_line):
240 240 """Find the first magic assignment (a = %foo) in the cell.
241 241 """
242 242 for line in tokens_by_line:
243 243 assign_ix = _find_assign_op(line)
244 244 if (assign_ix is not None) \
245 245 and (len(line) >= assign_ix + 2) \
246 246 and (line[assign_ix+1].string == '%') \
247 247 and (line[assign_ix+2].type == tokenize.NAME):
248 248 return cls(line[assign_ix+1].start)
249 249
250 250 def transform(self, lines: List[str]):
251 251 """Transform a magic assignment found by the ``find()`` classmethod.
252 252 """
253 253 start_line, start_col = self.start_line, self.start_col
254 254 lhs = lines[start_line][:start_col]
255 255 end_line = find_end_of_continued_line(lines, start_line)
256 256 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
257 257 assert rhs.startswith('%'), rhs
258 258 magic_name, _, args = rhs[1:].partition(' ')
259 259
260 260 lines_before = lines[:start_line]
261 261 call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)
262 262 new_line = lhs + call + '\n'
263 263 lines_after = lines[end_line+1:]
264 264
265 265 return lines_before + [new_line] + lines_after
266 266
267 267
268 268 class SystemAssign(TokenTransformBase):
269 269 """Transformer for assignments from system commands (a = !foo)"""
270 270 @classmethod
271 271 def find(cls, tokens_by_line):
272 272 """Find the first system assignment (a = !foo) in the cell.
273 273 """
274 274 for line in tokens_by_line:
275 275 assign_ix = _find_assign_op(line)
276 276 if (assign_ix is not None) \
277 277 and not line[assign_ix].line.strip().startswith('=') \
278 278 and (len(line) >= assign_ix + 2) \
279 279 and (line[assign_ix + 1].type == tokenize.ERRORTOKEN):
280 280 ix = assign_ix + 1
281 281
282 282 while ix < len(line) and line[ix].type == tokenize.ERRORTOKEN:
283 283 if line[ix].string == '!':
284 284 return cls(line[ix].start)
285 285 elif not line[ix].string.isspace():
286 286 break
287 287 ix += 1
288 288
289 289 def transform(self, lines: List[str]):
290 290 """Transform a system assignment found by the ``find()`` classmethod.
291 291 """
292 292 start_line, start_col = self.start_line, self.start_col
293 293
294 294 lhs = lines[start_line][:start_col]
295 295 end_line = find_end_of_continued_line(lines, start_line)
296 296 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
297 297 assert rhs.startswith('!'), rhs
298 298 cmd = rhs[1:]
299 299
300 300 lines_before = lines[:start_line]
301 301 call = "get_ipython().getoutput({!r})".format(cmd)
302 302 new_line = lhs + call + '\n'
303 303 lines_after = lines[end_line + 1:]
304 304
305 305 return lines_before + [new_line] + lines_after
306 306
307 307 # The escape sequences that define the syntax transformations IPython will
308 308 # apply to user input. These can NOT be just changed here: many regular
309 309 # expressions and other parts of the code may use their hardcoded values, and
310 310 # for all intents and purposes they constitute the 'IPython syntax', so they
311 311 # should be considered fixed.
312 312
313 313 ESC_SHELL = '!' # Send line to underlying system shell
314 314 ESC_SH_CAP = '!!' # Send line to system shell and capture output
315 315 ESC_HELP = '?' # Find information about object
316 316 ESC_HELP2 = '??' # Find extra-detailed information about object
317 317 ESC_MAGIC = '%' # Call magic function
318 318 ESC_MAGIC2 = '%%' # Call cell-magic function
319 319 ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call
320 320 ESC_QUOTE2 = ';' # Quote all args as a single string, call
321 321 ESC_PAREN = '/' # Call first argument with rest of line as arguments
322 322
323 323 ESCAPE_SINGLES = {'!', '?', '%', ',', ';', '/'}
324 324 ESCAPE_DOUBLES = {'!!', '??'} # %% (cell magic) is handled separately
325 325
326 326 def _make_help_call(target, esc, next_input=None):
327 327 """Prepares a pinfo(2)/psearch call from a target name and the escape
328 328 (i.e. ? or ??)"""
329 329 method = 'pinfo2' if esc == '??' \
330 330 else 'psearch' if '*' in target \
331 331 else 'pinfo'
332 332 arg = " ".join([method, target])
333 333 #Prepare arguments for get_ipython().run_line_magic(magic_name, magic_args)
334 334 t_magic_name, _, t_magic_arg_s = arg.partition(' ')
335 335 t_magic_name = t_magic_name.lstrip(ESC_MAGIC)
336 336 if next_input is None:
337 337 return 'get_ipython().run_line_magic(%r, %r)' % (t_magic_name, t_magic_arg_s)
338 338 else:
339 339 return 'get_ipython().set_next_input(%r);get_ipython().run_line_magic(%r, %r)' % \
340 340 (next_input, t_magic_name, t_magic_arg_s)
341 341
342 342 def _tr_help(content):
343 343 """Translate lines escaped with: ?
344 344
345 345 A naked help line should fire the intro help screen (shell.show_usage())
346 346 """
347 347 if not content:
348 348 return 'get_ipython().show_usage()'
349 349
350 350 return _make_help_call(content, '?')
351 351
352 352 def _tr_help2(content):
353 353 """Translate lines escaped with: ??
354 354
355 355 A naked help line should fire the intro help screen (shell.show_usage())
356 356 """
357 357 if not content:
358 358 return 'get_ipython().show_usage()'
359 359
360 360 return _make_help_call(content, '??')
361 361
362 362 def _tr_magic(content):
363 363 "Translate lines escaped with a percent sign: %"
364 364 name, _, args = content.partition(' ')
365 365 return 'get_ipython().run_line_magic(%r, %r)' % (name, args)
366 366
367 367 def _tr_quote(content):
368 368 "Translate lines escaped with a comma: ,"
369 369 name, _, args = content.partition(' ')
370 370 return '%s("%s")' % (name, '", "'.join(args.split()) )
371 371
372 372 def _tr_quote2(content):
373 373 "Translate lines escaped with a semicolon: ;"
374 374 name, _, args = content.partition(' ')
375 375 return '%s("%s")' % (name, args)
376 376
377 377 def _tr_paren(content):
378 378 "Translate lines escaped with a slash: /"
379 379 name, _, args = content.partition(' ')
380 380 return '%s(%s)' % (name, ", ".join(args.split()))
381 381
382 382 tr = { ESC_SHELL : 'get_ipython().system({!r})'.format,
383 383 ESC_SH_CAP : 'get_ipython().getoutput({!r})'.format,
384 384 ESC_HELP : _tr_help,
385 385 ESC_HELP2 : _tr_help2,
386 386 ESC_MAGIC : _tr_magic,
387 387 ESC_QUOTE : _tr_quote,
388 388 ESC_QUOTE2 : _tr_quote2,
389 389 ESC_PAREN : _tr_paren }
390 390
391 391 class EscapedCommand(TokenTransformBase):
392 392 """Transformer for escaped commands like %foo, !foo, or /foo"""
393 393 @classmethod
394 394 def find(cls, tokens_by_line):
395 395 """Find the first escaped command (%foo, !foo, etc.) in the cell.
396 396 """
397 397 for line in tokens_by_line:
398 398 if not line:
399 399 continue
400 400 ix = 0
401 401 ll = len(line)
402 402 while ll > ix and line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
403 403 ix += 1
404 404 if ix >= ll:
405 405 continue
406 406 if line[ix].string in ESCAPE_SINGLES:
407 407 return cls(line[ix].start)
408 408
409 409 def transform(self, lines):
410 410 """Transform an escaped line found by the ``find()`` classmethod.
411 411 """
412 412 start_line, start_col = self.start_line, self.start_col
413 413
414 414 indent = lines[start_line][:start_col]
415 415 end_line = find_end_of_continued_line(lines, start_line)
416 416 line = assemble_continued_line(lines, (start_line, start_col), end_line)
417 417
418 418 if len(line) > 1 and line[:2] in ESCAPE_DOUBLES:
419 419 escape, content = line[:2], line[2:]
420 420 else:
421 421 escape, content = line[:1], line[1:]
422 422
423 423 if escape in tr:
424 424 call = tr[escape](content)
425 425 else:
426 426 call = ''
427 427
428 428 lines_before = lines[:start_line]
429 429 new_line = indent + call + '\n'
430 430 lines_after = lines[end_line + 1:]
431 431
432 432 return lines_before + [new_line] + lines_after
433 433
434 434 _help_end_re = re.compile(r"""(%{0,2}
435 435 (?!\d)[\w*]+ # Variable name
436 436 (\.(?!\d)[\w*]+)* # .etc.etc
437 437 )
438 438 (\?\??)$ # ? or ??
439 439 """,
440 440 re.VERBOSE)
441 441
442 442 class HelpEnd(TokenTransformBase):
443 443 """Transformer for help syntax: obj? and obj??"""
444 444 # This needs to be higher priority (lower number) than EscapedCommand so
445 445 # that inspecting magics (%foo?) works.
446 446 priority = 5
447 447
448 448 def __init__(self, start, q_locn):
449 449 super().__init__(start)
450 450 self.q_line = q_locn[0] - 1 # Shift from 1-indexed to 0-indexed
451 451 self.q_col = q_locn[1]
452 452
453 453 @classmethod
454 454 def find(cls, tokens_by_line):
455 455 """Find the first help command (foo?) in the cell.
456 456 """
457 457 for line in tokens_by_line:
458 458 # Last token is NEWLINE; look at last but one
459 459 if len(line) > 2 and line[-2].string == '?':
460 460 # Find the first token that's not INDENT/DEDENT
461 461 ix = 0
462 462 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
463 463 ix += 1
464 464 return cls(line[ix].start, line[-2].start)
465 465
466 466 def transform(self, lines):
467 467 """Transform a help command found by the ``find()`` classmethod.
468 468 """
469 469 piece = ''.join(lines[self.start_line:self.q_line+1])
470 470 indent, content = piece[:self.start_col], piece[self.start_col:]
471 471 lines_before = lines[:self.start_line]
472 472 lines_after = lines[self.q_line + 1:]
473 473
474 474 m = _help_end_re.search(content)
475 475 if not m:
476 476 raise SyntaxError(content)
477 477 assert m is not None, content
478 478 target = m.group(1)
479 479 esc = m.group(3)
480 480
481 481 # If we're mid-command, put it back on the next prompt for the user.
482 482 next_input = None
483 483 if (not lines_before) and (not lines_after) \
484 484 and content.strip() != m.group(0):
485 485 next_input = content.rstrip('?\n')
486 486
487 487 call = _make_help_call(target, esc, next_input=next_input)
488 488 new_line = indent + call + '\n'
489 489
490 490 return lines_before + [new_line] + lines_after
491 491
492 492 def make_tokens_by_line(lines:List[str]):
493 493 """Tokenize a series of lines and group tokens by line.
494 494
495 495 The tokens for a multiline Python string or expression are grouped as one
496 496 line. All lines except the last lines should keep their line ending ('\\n',
497 497 '\\r\\n') for this to properly work. Use `.splitlines(keeplineending=True)`
498 498 for example when passing block of text to this function.
499 499
500 500 """
501 501 # NL tokens are used inside multiline expressions, but also after blank
502 502 # lines or comments. This is intentional - see https://bugs.python.org/issue17061
503 503 # We want to group the former case together but split the latter, so we
504 504 # track parentheses level, similar to the internals of tokenize.
505 505
506 506 # reexported from token on 3.7+
507 507 NEWLINE, NL = tokenize.NEWLINE, tokenize.NL # type: ignore
508 508 tokens_by_line:List[List[Any]] = [[]]
509 509 if len(lines) > 1 and not lines[0].endswith(('\n', '\r', '\r\n', '\x0b', '\x0c')):
510 510 warnings.warn("`make_tokens_by_line` received a list of lines which do not have lineending markers ('\\n', '\\r', '\\r\\n', '\\x0b', '\\x0c'), behavior will be unspecified")
511 511 parenlev = 0
512 512 try:
513 513 for token in tokenize.generate_tokens(iter(lines).__next__):
514 514 tokens_by_line[-1].append(token)
515 515 if (token.type == NEWLINE) \
516 516 or ((token.type == NL) and (parenlev <= 0)):
517 517 tokens_by_line.append([])
518 518 elif token.string in {'(', '[', '{'}:
519 519 parenlev += 1
520 520 elif token.string in {')', ']', '}'}:
521 521 if parenlev > 0:
522 522 parenlev -= 1
523 523 except tokenize.TokenError:
524 524 # Input ended in a multiline string or expression. That's OK for us.
525 525 pass
526 526
527 527
528 528 if not tokens_by_line[-1]:
529 529 tokens_by_line.pop()
530 530
531 531
532 532 return tokens_by_line
533 533
534 534
535 535 def has_sunken_brackets(tokens: List[tokenize.TokenInfo]):
536 536 """Check if the depth of brackets in the list of tokens drops below 0"""
537 537 parenlev = 0
538 538 for token in tokens:
539 539 if token.string in {"(", "[", "{"}:
540 540 parenlev += 1
541 541 elif token.string in {")", "]", "}"}:
542 542 parenlev -= 1
543 543 if parenlev < 0:
544 544 return True
545 545 return False
546 546
547 547
548 548 def show_linewise_tokens(s: str):
549 549 """For investigation and debugging"""
550 550 if not s.endswith('\n'):
551 551 s += '\n'
552 552 lines = s.splitlines(keepends=True)
553 553 for line in make_tokens_by_line(lines):
554 554 print("Line -------")
555 555 for tokinfo in line:
556 556 print(" ", tokinfo)
557 557
558 558 # Arbitrary limit to prevent getting stuck in infinite loops
559 559 TRANSFORM_LOOP_LIMIT = 500
560 560
561 561 class TransformerManager:
562 562 """Applies various transformations to a cell or code block.
563 563
564 564 The key methods for external use are ``transform_cell()``
565 565 and ``check_complete()``.
566 566 """
567 567 def __init__(self):
568 568 self.cleanup_transforms = [
569 569 leading_empty_lines,
570 570 leading_indent,
571 571 classic_prompt,
572 572 ipython_prompt,
573 573 ]
574 574 self.line_transforms = [
575 575 cell_magic,
576 576 ]
577 577 self.token_transformers = [
578 578 MagicAssign,
579 579 SystemAssign,
580 580 EscapedCommand,
581 581 HelpEnd,
582 582 ]
583 583
584 584 def do_one_token_transform(self, lines):
585 585 """Find and run the transform earliest in the code.
586 586
587 587 Returns (changed, lines).
588 588
589 589 This method is called repeatedly until changed is False, indicating
590 590 that all available transformations are complete.
591 591
592 592 The tokens following IPython special syntax might not be valid, so
593 593 the transformed code is retokenised every time to identify the next
594 594 piece of special syntax. Hopefully long code cells are mostly valid
595 595 Python, not using lots of IPython special syntax, so this shouldn't be
596 596 a performance issue.
597 597 """
598 598 tokens_by_line = make_tokens_by_line(lines)
599 599 candidates = []
600 600 for transformer_cls in self.token_transformers:
601 601 transformer = transformer_cls.find(tokens_by_line)
602 602 if transformer:
603 603 candidates.append(transformer)
604 604
605 605 if not candidates:
606 606 # Nothing to transform
607 607 return False, lines
608 608 ordered_transformers = sorted(candidates, key=TokenTransformBase.sortby)
609 609 for transformer in ordered_transformers:
610 610 try:
611 611 return True, transformer.transform(lines)
612 612 except SyntaxError:
613 613 pass
614 614 return False, lines
615 615
616 616 def do_token_transforms(self, lines):
617 617 for _ in range(TRANSFORM_LOOP_LIMIT):
618 618 changed, lines = self.do_one_token_transform(lines)
619 619 if not changed:
620 620 return lines
621 621
622 622 raise RuntimeError("Input transformation still changing after "
623 623 "%d iterations. Aborting." % TRANSFORM_LOOP_LIMIT)
624 624
625 625 def transform_cell(self, cell: str) -> str:
626 626 """Transforms a cell of input code"""
627 627 if not cell.endswith('\n'):
628 628 cell += '\n' # Ensure the cell has a trailing newline
629 629 lines = cell.splitlines(keepends=True)
630 630 for transform in self.cleanup_transforms + self.line_transforms:
631 631 lines = transform(lines)
632 632
633 633 lines = self.do_token_transforms(lines)
634 634 return ''.join(lines)
635 635
636 636 def check_complete(self, cell: str):
637 637 """Return whether a block of code is ready to execute, or should be continued
638 638
639 639 Parameters
640 640 ----------
641 641 source : string
642 642 Python input code, which can be multiline.
643 643
644 644 Returns
645 645 -------
646 646 status : str
647 647 One of 'complete', 'incomplete', or 'invalid' if source is not a
648 648 prefix of valid code.
649 649 indent_spaces : int or None
650 650 The number of spaces by which to indent the next line of code. If
651 651 status is not 'incomplete', this is None.
652 652 """
653 653 # Remember if the lines ends in a new line.
654 654 ends_with_newline = False
655 655 for character in reversed(cell):
656 656 if character == '\n':
657 657 ends_with_newline = True
658 658 break
659 659 elif character.strip():
660 660 break
661 661 else:
662 662 continue
663 663
664 664 if not ends_with_newline:
665 665 # Append an newline for consistent tokenization
666 666 # See https://bugs.python.org/issue33899
667 667 cell += '\n'
668 668
669 669 lines = cell.splitlines(keepends=True)
670 670
671 671 if not lines:
672 672 return 'complete', None
673 673
674 674 if lines[-1].endswith('\\'):
675 675 # Explicit backslash continuation
676 676 return 'incomplete', find_last_indent(lines)
677 677
678 678 try:
679 679 for transform in self.cleanup_transforms:
680 680 if not getattr(transform, 'has_side_effects', False):
681 681 lines = transform(lines)
682 682 except SyntaxError:
683 683 return 'invalid', None
684 684
685 685 if lines[0].startswith('%%'):
686 686 # Special case for cell magics - completion marked by blank line
687 687 if lines[-1].strip():
688 688 return 'incomplete', find_last_indent(lines)
689 689 else:
690 690 return 'complete', None
691 691
692 692 try:
693 693 for transform in self.line_transforms:
694 694 if not getattr(transform, 'has_side_effects', False):
695 695 lines = transform(lines)
696 696 lines = self.do_token_transforms(lines)
697 697 except SyntaxError:
698 698 return 'invalid', None
699 699
700 700 tokens_by_line = make_tokens_by_line(lines)
701 701
702 702 # Bail if we got one line and there are more closing parentheses than
703 703 # the opening ones
704 704 if (
705 705 len(lines) == 1
706 706 and tokens_by_line
707 707 and has_sunken_brackets(tokens_by_line[0])
708 708 ):
709 709 return "invalid", None
710 710
711 711 if not tokens_by_line:
712 712 return 'incomplete', find_last_indent(lines)
713 713
714 714 if tokens_by_line[-1][-1].type != tokenize.ENDMARKER:
715 715 # We're in a multiline string or expression
716 716 return 'incomplete', find_last_indent(lines)
717 717
718 718 newline_types = {tokenize.NEWLINE, tokenize.COMMENT, tokenize.ENDMARKER} # type: ignore
719 719
720 720 # Pop the last line which only contains DEDENTs and ENDMARKER
721 721 last_token_line = None
722 722 if {t.type for t in tokens_by_line[-1]} in [
723 723 {tokenize.DEDENT, tokenize.ENDMARKER},
724 724 {tokenize.ENDMARKER}
725 725 ] and len(tokens_by_line) > 1:
726 726 last_token_line = tokens_by_line.pop()
727 727
728 728 while tokens_by_line[-1] and tokens_by_line[-1][-1].type in newline_types:
729 729 tokens_by_line[-1].pop()
730 730
731 731 if not tokens_by_line[-1]:
732 732 return 'incomplete', find_last_indent(lines)
733 733
734 734 if tokens_by_line[-1][-1].string == ':':
735 735 # The last line starts a block (e.g. 'if foo:')
736 736 ix = 0
737 737 while tokens_by_line[-1][ix].type in {tokenize.INDENT, tokenize.DEDENT}:
738 738 ix += 1
739 739
740 740 indent = tokens_by_line[-1][ix].start[1]
741 741 return 'incomplete', indent + 4
742 742
743 743 if tokens_by_line[-1][0].line.endswith('\\'):
744 744 return 'incomplete', None
745 745
746 746 # At this point, our checks think the code is complete (or invalid).
747 747 # We'll use codeop.compile_command to check this with the real parser
748 748 try:
749 749 with warnings.catch_warnings():
750 750 warnings.simplefilter('error', SyntaxWarning)
751 751 res = compile_command(''.join(lines), symbol='exec')
752 752 except (SyntaxError, OverflowError, ValueError, TypeError,
753 753 MemoryError, SyntaxWarning):
754 754 return 'invalid', None
755 755 else:
756 756 if res is None:
757 757 return 'incomplete', find_last_indent(lines)
758 758
759 759 if last_token_line and last_token_line[0].type == tokenize.DEDENT:
760 760 if ends_with_newline:
761 761 return 'complete', None
762 762 return 'incomplete', find_last_indent(lines)
763 763
764 764 # If there's a blank line at the end, assume we're ready to execute
765 765 if not lines[-1].strip():
766 766 return 'complete', None
767 767
768 768 return 'complete', None
769 769
770 770
771 771 def find_last_indent(lines):
772 772 m = _indent_re.match(lines[-1])
773 773 if not m:
774 774 return 0
775 775 return len(m.group(0).replace('\t', ' '*4))
General Comments 0
You need to be logged in to leave comments. Login now