##// END OF EJS Templates
A refactor to check_complete to pass the test cases.
Tony Fast -
Show More
@@ -1,670 +1,695 b''
1 1 """Input transformer machinery to support IPython special syntax.
2 2
3 3 This includes the machinery to recognise and transform ``%magic`` commands,
4 4 ``!system`` commands, ``help?`` querying, prompt stripping, and so forth.
5 5
6 6 Added: IPython 7.0. Replaces inputsplitter and inputtransformer which were
7 7 deprecated in 7.0.
8 8 """
9 9
10 10 # Copyright (c) IPython Development Team.
11 11 # Distributed under the terms of the Modified BSD License.
12 12
13 13 from codeop import compile_command
14 14 import re
15 15 import tokenize
16 16 from typing import List, Tuple
17 17 import warnings
18 18
19 19 _indent_re = re.compile(r'^[ \t]+')
20 20
21 21 def leading_indent(lines):
22 22 """Remove leading indentation.
23 23
24 24 If the first line starts with a spaces or tabs, the same whitespace will be
25 25 removed from each following line in the cell.
26 26 """
27 27 if not lines:
28 28 return lines
29 29 m = _indent_re.match(lines[0])
30 30 if not m:
31 31 return lines
32 32 space = m.group(0)
33 33 n = len(space)
34 34 return [l[n:] if l.startswith(space) else l
35 35 for l in lines]
36 36
37 37 class PromptStripper:
38 38 """Remove matching input prompts from a block of input.
39 39
40 40 Parameters
41 41 ----------
42 42 prompt_re : regular expression
43 43 A regular expression matching any input prompt (including continuation,
44 44 e.g. ``...``)
45 45 initial_re : regular expression, optional
46 46 A regular expression matching only the initial prompt, but not continuation.
47 47 If no initial expression is given, prompt_re will be used everywhere.
48 48 Used mainly for plain Python prompts (``>>>``), where the continuation prompt
49 49 ``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
50 50
51 51 If initial_re and prompt_re differ,
52 52 only initial_re will be tested against the first line.
53 53 If any prompt is found on the first two lines,
54 54 prompts will be stripped from the rest of the block.
55 55 """
56 56 def __init__(self, prompt_re, initial_re=None):
57 57 self.prompt_re = prompt_re
58 58 self.initial_re = initial_re or prompt_re
59 59
60 60 def _strip(self, lines):
61 61 return [self.prompt_re.sub('', l, count=1) for l in lines]
62 62
63 63 def __call__(self, lines):
64 64 if not lines:
65 65 return lines
66 66 if self.initial_re.match(lines[0]) or \
67 67 (len(lines) > 1 and self.prompt_re.match(lines[1])):
68 68 return self._strip(lines)
69 69 return lines
70 70
71 71 classic_prompt = PromptStripper(
72 72 prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'),
73 73 initial_re=re.compile(r'^>>>( |$)')
74 74 )
75 75
76 76 ipython_prompt = PromptStripper(re.compile(r'^(In \[\d+\]: |\s*\.{3,}: ?)'))
77 77
78 78 def cell_magic(lines):
79 79 if not lines or not lines[0].startswith('%%'):
80 80 return lines
81 81 if re.match('%%\w+\?', lines[0]):
82 82 # This case will be handled by help_end
83 83 return lines
84 84 magic_name, _, first_line = lines[0][2:-1].partition(' ')
85 85 body = ''.join(lines[1:])
86 86 return ['get_ipython().run_cell_magic(%r, %r, %r)\n'
87 87 % (magic_name, first_line, body)]
88 88
89 89
90 90 def _find_assign_op(token_line):
91 91 """Get the index of the first assignment in the line ('=' not inside brackets)
92 92
93 93 Note: We don't try to support multiple special assignment (a = b = %foo)
94 94 """
95 95 paren_level = 0
96 96 for i, ti in enumerate(token_line):
97 97 s = ti.string
98 98 if s == '=' and paren_level == 0:
99 99 return i
100 100 if s in '([{':
101 101 paren_level += 1
102 102 elif s in ')]}':
103 103 if paren_level > 0:
104 104 paren_level -= 1
105 105
106 106 def find_end_of_continued_line(lines, start_line: int):
107 107 """Find the last line of a line explicitly extended using backslashes.
108 108
109 109 Uses 0-indexed line numbers.
110 110 """
111 111 end_line = start_line
112 112 while lines[end_line].endswith('\\\n'):
113 113 end_line += 1
114 114 if end_line >= len(lines):
115 115 break
116 116 return end_line
117 117
118 118 def assemble_continued_line(lines, start: Tuple[int, int], end_line: int):
119 119 """Assemble a single line from multiple continued line pieces
120 120
121 121 Continued lines are lines ending in ``\``, and the line following the last
122 122 ``\`` in the block.
123 123
124 124 For example, this code continues over multiple lines::
125 125
126 126 if (assign_ix is not None) \
127 127 and (len(line) >= assign_ix + 2) \
128 128 and (line[assign_ix+1].string == '%') \
129 129 and (line[assign_ix+2].type == tokenize.NAME):
130 130
131 131 This statement contains four continued line pieces.
132 132 Assembling these pieces into a single line would give::
133 133
134 134 if (assign_ix is not None) and (len(line) >= assign_ix + 2) and (line[...
135 135
136 136 This uses 0-indexed line numbers. *start* is (lineno, colno).
137 137
138 138 Used to allow ``%magic`` and ``!system`` commands to be continued over
139 139 multiple lines.
140 140 """
141 141 parts = [lines[start[0]][start[1]:]] + lines[start[0]+1:end_line+1]
142 142 return ' '.join([p[:-2] for p in parts[:-1]] # Strip backslash+newline
143 143 + [parts[-1][:-1]]) # Strip newline from last line
144 144
145 145 class TokenTransformBase:
146 146 """Base class for transformations which examine tokens.
147 147
148 148 Special syntax should not be transformed when it occurs inside strings or
149 149 comments. This is hard to reliably avoid with regexes. The solution is to
150 150 tokenise the code as Python, and recognise the special syntax in the tokens.
151 151
152 152 IPython's special syntax is not valid Python syntax, so tokenising may go
153 153 wrong after the special syntax starts. These classes therefore find and
154 154 transform *one* instance of special syntax at a time into regular Python
155 155 syntax. After each transformation, tokens are regenerated to find the next
156 156 piece of special syntax.
157 157
158 158 Subclasses need to implement one class method (find)
159 159 and one regular method (transform).
160 160
161 161 The priority attribute can select which transformation to apply if multiple
162 162 transformers match in the same place. Lower numbers have higher priority.
163 163 This allows "%magic?" to be turned into a help call rather than a magic call.
164 164 """
165 165 # Lower numbers -> higher priority (for matches in the same location)
166 166 priority = 10
167 167
168 168 def sortby(self):
169 169 return self.start_line, self.start_col, self.priority
170 170
171 171 def __init__(self, start):
172 172 self.start_line = start[0] - 1 # Shift from 1-index to 0-index
173 173 self.start_col = start[1]
174 174
175 175 @classmethod
176 176 def find(cls, tokens_by_line):
177 177 """Find one instance of special syntax in the provided tokens.
178 178
179 179 Tokens are grouped into logical lines for convenience,
180 180 so it is easy to e.g. look at the first token of each line.
181 181 *tokens_by_line* is a list of lists of tokenize.TokenInfo objects.
182 182
183 183 This should return an instance of its class, pointing to the start
184 184 position it has found, or None if it found no match.
185 185 """
186 186 raise NotImplementedError
187 187
188 188 def transform(self, lines: List[str]):
189 189 """Transform one instance of special syntax found by ``find()``
190 190
191 191 Takes a list of strings representing physical lines,
192 192 returns a similar list of transformed lines.
193 193 """
194 194 raise NotImplementedError
195 195
196 196 class MagicAssign(TokenTransformBase):
197 197 """Transformer for assignments from magics (a = %foo)"""
198 198 @classmethod
199 199 def find(cls, tokens_by_line):
200 200 """Find the first magic assignment (a = %foo) in the cell.
201 201 """
202 202 for line in tokens_by_line:
203 203 assign_ix = _find_assign_op(line)
204 204 if (assign_ix is not None) \
205 205 and (len(line) >= assign_ix + 2) \
206 206 and (line[assign_ix+1].string == '%') \
207 207 and (line[assign_ix+2].type == tokenize.NAME):
208 208 return cls(line[assign_ix+1].start)
209 209
210 210 def transform(self, lines: List[str]):
211 211 """Transform a magic assignment found by the ``find()`` classmethod.
212 212 """
213 213 start_line, start_col = self.start_line, self.start_col
214 214 lhs = lines[start_line][:start_col]
215 215 end_line = find_end_of_continued_line(lines, start_line)
216 216 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
217 217 assert rhs.startswith('%'), rhs
218 218 magic_name, _, args = rhs[1:].partition(' ')
219 219
220 220 lines_before = lines[:start_line]
221 221 call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)
222 222 new_line = lhs + call + '\n'
223 223 lines_after = lines[end_line+1:]
224 224
225 225 return lines_before + [new_line] + lines_after
226 226
227 227
228 228 class SystemAssign(TokenTransformBase):
229 229 """Transformer for assignments from system commands (a = !foo)"""
230 230 @classmethod
231 231 def find(cls, tokens_by_line):
232 232 """Find the first system assignment (a = !foo) in the cell.
233 233 """
234 234 for line in tokens_by_line:
235 235 assign_ix = _find_assign_op(line)
236 236 if (assign_ix is not None) \
237 237 and (len(line) >= assign_ix + 2) \
238 238 and (line[assign_ix + 1].type == tokenize.ERRORTOKEN):
239 239 ix = assign_ix + 1
240 240
241 241 while ix < len(line) and line[ix].type == tokenize.ERRORTOKEN:
242 242 if line[ix].string == '!':
243 243 return cls(line[ix].start)
244 244 elif not line[ix].string.isspace():
245 245 break
246 246 ix += 1
247 247
248 248 def transform(self, lines: List[str]):
249 249 """Transform a system assignment found by the ``find()`` classmethod.
250 250 """
251 251 start_line, start_col = self.start_line, self.start_col
252 252
253 253 lhs = lines[start_line][:start_col]
254 254 end_line = find_end_of_continued_line(lines, start_line)
255 255 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
256 256 # assert rhs.startswith('!'), rhs
257 257 cmd = rhs[1:]
258 258
259 259 lines_before = lines[:start_line]
260 260 call = "get_ipython().getoutput({!r})".format(cmd)
261 261 new_line = lhs + call + '\n'
262 262 lines_after = lines[end_line + 1:]
263 263
264 264 return lines_before + [new_line] + lines_after
265 265
266 266 # The escape sequences that define the syntax transformations IPython will
267 267 # apply to user input. These can NOT be just changed here: many regular
268 268 # expressions and other parts of the code may use their hardcoded values, and
269 269 # for all intents and purposes they constitute the 'IPython syntax', so they
270 270 # should be considered fixed.
271 271
272 272 ESC_SHELL = '!' # Send line to underlying system shell
273 273 ESC_SH_CAP = '!!' # Send line to system shell and capture output
274 274 ESC_HELP = '?' # Find information about object
275 275 ESC_HELP2 = '??' # Find extra-detailed information about object
276 276 ESC_MAGIC = '%' # Call magic function
277 277 ESC_MAGIC2 = '%%' # Call cell-magic function
278 278 ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call
279 279 ESC_QUOTE2 = ';' # Quote all args as a single string, call
280 280 ESC_PAREN = '/' # Call first argument with rest of line as arguments
281 281
282 282 ESCAPE_SINGLES = {'!', '?', '%', ',', ';', '/'}
283 283 ESCAPE_DOUBLES = {'!!', '??'} # %% (cell magic) is handled separately
284 284
285 285 def _make_help_call(target, esc, next_input=None):
286 286 """Prepares a pinfo(2)/psearch call from a target name and the escape
287 287 (i.e. ? or ??)"""
288 288 method = 'pinfo2' if esc == '??' \
289 289 else 'psearch' if '*' in target \
290 290 else 'pinfo'
291 291 arg = " ".join([method, target])
292 292 #Prepare arguments for get_ipython().run_line_magic(magic_name, magic_args)
293 293 t_magic_name, _, t_magic_arg_s = arg.partition(' ')
294 294 t_magic_name = t_magic_name.lstrip(ESC_MAGIC)
295 295 if next_input is None:
296 296 return 'get_ipython().run_line_magic(%r, %r)' % (t_magic_name, t_magic_arg_s)
297 297 else:
298 298 return 'get_ipython().set_next_input(%r);get_ipython().run_line_magic(%r, %r)' % \
299 299 (next_input, t_magic_name, t_magic_arg_s)
300 300
301 301 def _tr_help(content):
302 302 """Translate lines escaped with: ?
303 303
304 304 A naked help line should fire the intro help screen (shell.show_usage())
305 305 """
306 306 if not content:
307 307 return 'get_ipython().show_usage()'
308 308
309 309 return _make_help_call(content, '?')
310 310
311 311 def _tr_help2(content):
312 312 """Translate lines escaped with: ??
313 313
314 314 A naked help line should fire the intro help screen (shell.show_usage())
315 315 """
316 316 if not content:
317 317 return 'get_ipython().show_usage()'
318 318
319 319 return _make_help_call(content, '??')
320 320
321 321 def _tr_magic(content):
322 322 "Translate lines escaped with a percent sign: %"
323 323 name, _, args = content.partition(' ')
324 324 return 'get_ipython().run_line_magic(%r, %r)' % (name, args)
325 325
326 326 def _tr_quote(content):
327 327 "Translate lines escaped with a comma: ,"
328 328 name, _, args = content.partition(' ')
329 329 return '%s("%s")' % (name, '", "'.join(args.split()) )
330 330
331 331 def _tr_quote2(content):
332 332 "Translate lines escaped with a semicolon: ;"
333 333 name, _, args = content.partition(' ')
334 334 return '%s("%s")' % (name, args)
335 335
336 336 def _tr_paren(content):
337 337 "Translate lines escaped with a slash: /"
338 338 name, _, args = content.partition(' ')
339 339 return '%s(%s)' % (name, ", ".join(args.split()))
340 340
341 341 tr = { ESC_SHELL : 'get_ipython().system({!r})'.format,
342 342 ESC_SH_CAP : 'get_ipython().getoutput({!r})'.format,
343 343 ESC_HELP : _tr_help,
344 344 ESC_HELP2 : _tr_help2,
345 345 ESC_MAGIC : _tr_magic,
346 346 ESC_QUOTE : _tr_quote,
347 347 ESC_QUOTE2 : _tr_quote2,
348 348 ESC_PAREN : _tr_paren }
349 349
350 350 class EscapedCommand(TokenTransformBase):
351 351 """Transformer for escaped commands like %foo, !foo, or /foo"""
352 352 @classmethod
353 353 def find(cls, tokens_by_line):
354 354 """Find the first escaped command (%foo, !foo, etc.) in the cell.
355 355 """
356 356 for line in tokens_by_line:
357 357 ix = 0
358 358 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
359 359 ix += 1
360 360 if line[ix].string in ESCAPE_SINGLES:
361 361 return cls(line[ix].start)
362 362
363 363 def transform(self, lines):
364 364 """Transform an escaped line found by the ``find()`` classmethod.
365 365 """
366 366 start_line, start_col = self.start_line, self.start_col
367 367
368 368 indent = lines[start_line][:start_col]
369 369 end_line = find_end_of_continued_line(lines, start_line)
370 370 line = assemble_continued_line(lines, (start_line, start_col), end_line)
371 371
372 372 if len(line) > 1 and line[:2] in ESCAPE_DOUBLES:
373 373 escape, content = line[:2], line[2:]
374 374 else:
375 375 escape, content = line[:1], line[1:]
376 376
377 377 if escape in tr:
378 378 call = tr[escape](content)
379 379 else:
380 380 call = ''
381 381
382 382 lines_before = lines[:start_line]
383 383 new_line = indent + call + '\n'
384 384 lines_after = lines[end_line + 1:]
385 385
386 386 return lines_before + [new_line] + lines_after
387 387
388 388 _help_end_re = re.compile(r"""(%{0,2}
389 389 [a-zA-Z_*][\w*]* # Variable name
390 390 (\.[a-zA-Z_*][\w*]*)* # .etc.etc
391 391 )
392 392 (\?\??)$ # ? or ??
393 393 """,
394 394 re.VERBOSE)
395 395
396 396 class HelpEnd(TokenTransformBase):
397 397 """Transformer for help syntax: obj? and obj??"""
398 398 # This needs to be higher priority (lower number) than EscapedCommand so
399 399 # that inspecting magics (%foo?) works.
400 400 priority = 5
401 401
402 402 def __init__(self, start, q_locn):
403 403 super().__init__(start)
404 404 self.q_line = q_locn[0] - 1 # Shift from 1-indexed to 0-indexed
405 405 self.q_col = q_locn[1]
406 406
407 407 @classmethod
408 408 def find(cls, tokens_by_line):
409 409 """Find the first help command (foo?) in the cell.
410 410 """
411 411 for line in tokens_by_line:
412 412 # Last token is NEWLINE; look at last but one
413 413 if len(line) > 2 and line[-2].string == '?':
414 414 # Find the first token that's not INDENT/DEDENT
415 415 ix = 0
416 416 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
417 417 ix += 1
418 418 return cls(line[ix].start, line[-2].start)
419 419
420 420 def transform(self, lines):
421 421 """Transform a help command found by the ``find()`` classmethod.
422 422 """
423 423 piece = ''.join(lines[self.start_line:self.q_line+1])
424 424 indent, content = piece[:self.start_col], piece[self.start_col:]
425 425 lines_before = lines[:self.start_line]
426 426 lines_after = lines[self.q_line + 1:]
427 427
428 428 m = _help_end_re.search(content)
429 429 if not m:
430 430 raise SyntaxError(content)
431 431 assert m is not None, content
432 432 target = m.group(1)
433 433 esc = m.group(3)
434 434
435 435 # If we're mid-command, put it back on the next prompt for the user.
436 436 next_input = None
437 437 if (not lines_before) and (not lines_after) \
438 438 and content.strip() != m.group(0):
439 439 next_input = content.rstrip('?\n')
440 440
441 441 call = _make_help_call(target, esc, next_input=next_input)
442 442 new_line = indent + call + '\n'
443 443
444 444 return lines_before + [new_line] + lines_after
445 445
446 446 def make_tokens_by_line(lines):
447 447 """Tokenize a series of lines and group tokens by line.
448 448
449 449 The tokens for a multiline Python string or expression are
450 450 grouped as one line.
451 451 """
452 452 # NL tokens are used inside multiline expressions, but also after blank
453 453 # lines or comments. This is intentional - see https://bugs.python.org/issue17061
454 454 # We want to group the former case together but split the latter, so we
455 455 # track parentheses level, similar to the internals of tokenize.
456 456 NEWLINE, NL = tokenize.NEWLINE, tokenize.NL
457 457 tokens_by_line = [[]]
458 458 parenlev = 0
459 459 try:
460 460 for token in tokenize.generate_tokens(iter(lines).__next__):
461 461 tokens_by_line[-1].append(token)
462 462 if (token.type == NEWLINE) \
463 463 or ((token.type == NL) and (parenlev <= 0)):
464 464 tokens_by_line.append([])
465 465 elif token.string in {'(', '[', '{'}:
466 466 parenlev += 1
467 467 elif token.string in {')', ']', '}'}:
468 468 if parenlev > 0:
469 469 parenlev -= 1
470 470 except tokenize.TokenError:
471 471 # Input ended in a multiline string or expression. That's OK for us.
472 472 pass
473
474
473 475 if not tokens_by_line[-1]:
474 476 tokens_by_line.pop()
475 477
478
476 479 return tokens_by_line
477 480
478 481 def show_linewise_tokens(s: str):
479 482 """For investigation and debugging"""
480 483 if not s.endswith('\n'):
481 484 s += '\n'
482 485 lines = s.splitlines(keepends=True)
483 486 for line in make_tokens_by_line(lines):
484 487 print("Line -------")
485 488 for tokinfo in line:
486 489 print(" ", tokinfo)
487 490
488 491 # Arbitrary limit to prevent getting stuck in infinite loops
489 492 TRANSFORM_LOOP_LIMIT = 500
490 493
491 494 class TransformerManager:
492 495 """Applies various transformations to a cell or code block.
493 496
494 497 The key methods for external use are ``transform_cell()``
495 498 and ``check_complete()``.
496 499 """
497 500 def __init__(self):
498 501 self.cleanup_transforms = [
499 502 leading_indent,
500 503 classic_prompt,
501 504 ipython_prompt,
502 505 ]
503 506 self.line_transforms = [
504 507 cell_magic,
505 508 ]
506 509 self.token_transformers = [
507 510 MagicAssign,
508 511 SystemAssign,
509 512 EscapedCommand,
510 513 HelpEnd,
511 514 ]
512 515
513 516 def do_one_token_transform(self, lines):
514 517 """Find and run the transform earliest in the code.
515 518
516 519 Returns (changed, lines).
517 520
518 521 This method is called repeatedly until changed is False, indicating
519 522 that all available transformations are complete.
520 523
521 524 The tokens following IPython special syntax might not be valid, so
522 525 the transformed code is retokenised every time to identify the next
523 526 piece of special syntax. Hopefully long code cells are mostly valid
524 527 Python, not using lots of IPython special syntax, so this shouldn't be
525 528 a performance issue.
526 529 """
527 530 tokens_by_line = make_tokens_by_line(lines)
528 531 candidates = []
529 532 for transformer_cls in self.token_transformers:
530 533 transformer = transformer_cls.find(tokens_by_line)
531 534 if transformer:
532 535 candidates.append(transformer)
533 536
534 537 if not candidates:
535 538 # Nothing to transform
536 539 return False, lines
537 540 ordered_transformers = sorted(candidates, key=TokenTransformBase.sortby)
538 541 for transformer in ordered_transformers:
539 542 try:
540 543 return True, transformer.transform(lines)
541 544 except SyntaxError:
542 545 pass
543 546 return False, lines
544 547
545 548 def do_token_transforms(self, lines):
546 549 for _ in range(TRANSFORM_LOOP_LIMIT):
547 550 changed, lines = self.do_one_token_transform(lines)
548 551 if not changed:
549 552 return lines
550 553
551 554 raise RuntimeError("Input transformation still changing after "
552 555 "%d iterations. Aborting." % TRANSFORM_LOOP_LIMIT)
553 556
554 557 def transform_cell(self, cell: str) -> str:
555 558 """Transforms a cell of input code"""
556 559 if not cell.endswith('\n'):
557 560 cell += '\n' # Ensure the cell has a trailing newline
558 561 lines = cell.splitlines(keepends=True)
559 562 for transform in self.cleanup_transforms + self.line_transforms:
560 563 lines = transform(lines)
561 564
562 565 lines = self.do_token_transforms(lines)
563 566 return ''.join(lines)
564 567
565 568 def check_complete(self, cell: str):
566 569 """Return whether a block of code is ready to execute, or should be continued
567 570
568 571 Parameters
569 572 ----------
570 573 source : string
571 574 Python input code, which can be multiline.
572 575
573 576 Returns
574 577 -------
575 578 status : str
576 579 One of 'complete', 'incomplete', or 'invalid' if source is not a
577 580 prefix of valid code.
578 581 indent_spaces : int or None
579 582 The number of spaces by which to indent the next line of code. If
580 583 status is not 'incomplete', this is None.
581 584 """
585 # Remember if the lines ends in a new line.
586 ends_with_newline = False
587 for character in reversed(cell):
588 if character == '\n':
589 ends_with_newline = True
590 break
591 elif character.strip():
592 break
593 else:
594 continue
595
596 if ends_with_newline:
597 # Append an newline for consistent tokenization
598 # See https://bugs.python.org/issue33899
599 cell += '\n'
600
582 601 lines = cell.splitlines(keepends=True)
602
583 603 if not lines:
584 604 return 'complete', None
585 605
586 606 if lines[-1].endswith('\\'):
587 607 # Explicit backslash continuation
588 608 return 'incomplete', find_last_indent(lines)
589 609
590 610 try:
591 611 for transform in self.cleanup_transforms:
592 612 lines = transform(lines)
593 613 except SyntaxError:
594 614 return 'invalid', None
595 615
596 616 if lines[0].startswith('%%'):
597 617 # Special case for cell magics - completion marked by blank line
598 618 if lines[-1].strip():
599 619 return 'incomplete', find_last_indent(lines)
600 620 else:
601 621 return 'complete', None
602 622
603 623 try:
604 624 for transform in self.line_transforms:
605 625 lines = transform(lines)
606 626 lines = self.do_token_transforms(lines)
607 627 except SyntaxError:
608 628 return 'invalid', None
609 629
610 630 tokens_by_line = make_tokens_by_line(lines)
631
611 632 if not tokens_by_line:
612 633 return 'incomplete', find_last_indent(lines)
613 634
614 635 if tokens_by_line[-1][-1].type != tokenize.ENDMARKER:
615 636 # We're in a multiline string or expression
616 637 return 'incomplete', find_last_indent(lines)
617 638
618 if len(tokens_by_line[-1]) == 1:
619 return 'incomplete', find_last_indent(lines)
620 # Find the last token on the previous line that's not NEWLINE or COMMENT
621 toks_last_line = tokens_by_line[-1]
622 ix = len(tokens_by_line) - 1
639 newline_types = {tokenize.NEWLINE, tokenize.COMMENT, tokenize.ENDMARKER}
640
641 # Remove newline_types for the list of tokens
642 while len(tokens_by_line) > 1 and len(tokens_by_line[-1]) == 1 \
643 and tokens_by_line[-1][-1].type in newline_types:
644 tokens_by_line.pop()
623 645
646 last_line_token = tokens_by_line[-1]
624 647
625 while ix >= 0 and toks_last_line[-1].type in {tokenize.NEWLINE,
626 tokenize.COMMENT}:
627 ix -= 1
628 if tokens_by_line[ix][-2].string == ':':
648 while tokens_by_line[-1][-1].type in newline_types:
649 last_line_token = tokens_by_line[-1].pop()
650
651 if len(last_line_token) == 1 and not last_line_token[-1]:
652 return 'incomplete', 0
653
654 if last_line_token[-1].string == ':':
629 655 # The last line starts a block (e.g. 'if foo:')
630 656 ix = 0
631 while toks_last_line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
657 while last_line_token[ix].type \
658 in {tokenize.INDENT, tokenize.DEDENT}:
632 659 ix += 1
633 indent = toks_last_line[ix].start[1]
660
661 indent = last_line_token[ix].start[1]
634 662 return 'incomplete', indent + 4
635 if tokens_by_line[ix][-2].string == '\\':
636 if not tokens_by_line[ix][-2].line.endswith('\\'):
637 return 'invalid', None
638 663
639 # If there's a blank line at the end, assume we're ready to execute
640 if not lines[-1].strip():
641 return 'complete', None
664 if last_line_token[-1].line.endswith('\\'):
665 return 'incomplete', None
642 666
643 667 # At this point, our checks think the code is complete (or invalid).
644 668 # We'll use codeop.compile_command to check this with the real parser
645 669 try:
646 670 with warnings.catch_warnings():
647 671 warnings.simplefilter('error', SyntaxWarning)
648 672 res = compile_command(''.join(lines), symbol='exec')
649 673 except (SyntaxError, OverflowError, ValueError, TypeError,
650 674 MemoryError, SyntaxWarning):
651 675 return 'invalid', None
652 676 else:
653 677 if res is None:
654 678 return 'incomplete', find_last_indent(lines)
655 679
656 if toks_last_line[-2].type in {tokenize.NEWLINE, tokenize.NL}:
657 return 'complete', None
680 if last_line_token[-1].type == tokenize.DEDENT:
681 if ends_with_newline:
682 return 'complete', None
683 return 'incomplete', find_last_indent(lines)
658 684
659 if toks_last_line[-2].type == tokenize.DEDENT:
660 if not lines[-1].endswith('\n'):
661 return 'incomplete', find_last_indent(lines)
685 if len(last_line_token) <= 1:
686 return 'incomplete', find_last_indent(lines)
662 687
663 688 return 'complete', None
664 689
665 690
666 691 def find_last_indent(lines):
667 692 m = _indent_re.match(lines[-1])
668 693 if not m:
669 694 return 0
670 695 return len(m.group(0).replace('\t', ' '*4))
General Comments 0
You need to be logged in to leave comments. Login now