##// END OF EJS Templates
Remove leading empty lines...
Robin Gustafsson -
Show More
@@ -1,707 +1,721 b''
1 1 """Input transformer machinery to support IPython special syntax.
2 2
3 3 This includes the machinery to recognise and transform ``%magic`` commands,
4 4 ``!system`` commands, ``help?`` querying, prompt stripping, and so forth.
5 5
6 6 Added: IPython 7.0. Replaces inputsplitter and inputtransformer which were
7 7 deprecated in 7.0.
8 8 """
9 9
10 10 # Copyright (c) IPython Development Team.
11 11 # Distributed under the terms of the Modified BSD License.
12 12
13 13 from codeop import compile_command
14 14 import re
15 15 import tokenize
16 16 from typing import List, Tuple, Union
17 17 import warnings
18 18
19 19 _indent_re = re.compile(r'^[ \t]+')
20 20
21 def leading_empty_lines(lines):
22 """Remove leading empty lines
23
24 If the leading lines are empty or contain only whitespace, they will be
25 removed.
26 """
27 if not lines:
28 return lines
29 for i, line in enumerate(lines):
30 if line and not line.isspace():
31 return lines[i:]
32 return lines
33
21 34 def leading_indent(lines):
22 35 """Remove leading indentation.
23 36
24 37 If the first line starts with a spaces or tabs, the same whitespace will be
25 38 removed from each following line in the cell.
26 39 """
27 40 if not lines:
28 41 return lines
29 42 m = _indent_re.match(lines[0])
30 43 if not m:
31 44 return lines
32 45 space = m.group(0)
33 46 n = len(space)
34 47 return [l[n:] if l.startswith(space) else l
35 48 for l in lines]
36 49
37 50 class PromptStripper:
38 51 """Remove matching input prompts from a block of input.
39 52
40 53 Parameters
41 54 ----------
42 55 prompt_re : regular expression
43 56 A regular expression matching any input prompt (including continuation,
44 57 e.g. ``...``)
45 58 initial_re : regular expression, optional
46 59 A regular expression matching only the initial prompt, but not continuation.
47 60 If no initial expression is given, prompt_re will be used everywhere.
48 61 Used mainly for plain Python prompts (``>>>``), where the continuation prompt
49 62 ``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
50 63
51 64 If initial_re and prompt_re differ,
52 65 only initial_re will be tested against the first line.
53 66 If any prompt is found on the first two lines,
54 67 prompts will be stripped from the rest of the block.
55 68 """
56 69 def __init__(self, prompt_re, initial_re=None):
57 70 self.prompt_re = prompt_re
58 71 self.initial_re = initial_re or prompt_re
59 72
60 73 def _strip(self, lines):
61 74 return [self.prompt_re.sub('', l, count=1) for l in lines]
62 75
63 76 def __call__(self, lines):
64 77 if not lines:
65 78 return lines
66 79 if self.initial_re.match(lines[0]) or \
67 80 (len(lines) > 1 and self.prompt_re.match(lines[1])):
68 81 return self._strip(lines)
69 82 return lines
70 83
71 84 classic_prompt = PromptStripper(
72 85 prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'),
73 86 initial_re=re.compile(r'^>>>( |$)')
74 87 )
75 88
76 89 ipython_prompt = PromptStripper(re.compile(r'^(In \[\d+\]: |\s*\.{3,}: ?)'))
77 90
78 91 def cell_magic(lines):
79 92 if not lines or not lines[0].startswith('%%'):
80 93 return lines
81 94 if re.match(r'%%\w+\?', lines[0]):
82 95 # This case will be handled by help_end
83 96 return lines
84 97 magic_name, _, first_line = lines[0][2:-1].partition(' ')
85 98 body = ''.join(lines[1:])
86 99 return ['get_ipython().run_cell_magic(%r, %r, %r)\n'
87 100 % (magic_name, first_line, body)]
88 101
89 102
90 103 def _find_assign_op(token_line) -> Union[int, None]:
91 104 """Get the index of the first assignment in the line ('=' not inside brackets)
92 105
93 106 Note: We don't try to support multiple special assignment (a = b = %foo)
94 107 """
95 108 paren_level = 0
96 109 for i, ti in enumerate(token_line):
97 110 s = ti.string
98 111 if s == '=' and paren_level == 0:
99 112 return i
100 113 if s in {'(','[','{'}:
101 114 paren_level += 1
102 115 elif s in {')', ']', '}'}:
103 116 if paren_level > 0:
104 117 paren_level -= 1
105 118
106 119 def find_end_of_continued_line(lines, start_line: int):
107 120 """Find the last line of a line explicitly extended using backslashes.
108 121
109 122 Uses 0-indexed line numbers.
110 123 """
111 124 end_line = start_line
112 125 while lines[end_line].endswith('\\\n'):
113 126 end_line += 1
114 127 if end_line >= len(lines):
115 128 break
116 129 return end_line
117 130
118 131 def assemble_continued_line(lines, start: Tuple[int, int], end_line: int):
119 132 r"""Assemble a single line from multiple continued line pieces
120 133
121 134 Continued lines are lines ending in ``\``, and the line following the last
122 135 ``\`` in the block.
123 136
124 137 For example, this code continues over multiple lines::
125 138
126 139 if (assign_ix is not None) \
127 140 and (len(line) >= assign_ix + 2) \
128 141 and (line[assign_ix+1].string == '%') \
129 142 and (line[assign_ix+2].type == tokenize.NAME):
130 143
131 144 This statement contains four continued line pieces.
132 145 Assembling these pieces into a single line would give::
133 146
134 147 if (assign_ix is not None) and (len(line) >= assign_ix + 2) and (line[...
135 148
136 149 This uses 0-indexed line numbers. *start* is (lineno, colno).
137 150
138 151 Used to allow ``%magic`` and ``!system`` commands to be continued over
139 152 multiple lines.
140 153 """
141 154 parts = [lines[start[0]][start[1]:]] + lines[start[0]+1:end_line+1]
142 155 return ' '.join([p[:-2] for p in parts[:-1]] # Strip backslash+newline
143 156 + [parts[-1][:-1]]) # Strip newline from last line
144 157
145 158 class TokenTransformBase:
146 159 """Base class for transformations which examine tokens.
147 160
148 161 Special syntax should not be transformed when it occurs inside strings or
149 162 comments. This is hard to reliably avoid with regexes. The solution is to
150 163 tokenise the code as Python, and recognise the special syntax in the tokens.
151 164
152 165 IPython's special syntax is not valid Python syntax, so tokenising may go
153 166 wrong after the special syntax starts. These classes therefore find and
154 167 transform *one* instance of special syntax at a time into regular Python
155 168 syntax. After each transformation, tokens are regenerated to find the next
156 169 piece of special syntax.
157 170
158 171 Subclasses need to implement one class method (find)
159 172 and one regular method (transform).
160 173
161 174 The priority attribute can select which transformation to apply if multiple
162 175 transformers match in the same place. Lower numbers have higher priority.
163 176 This allows "%magic?" to be turned into a help call rather than a magic call.
164 177 """
165 178 # Lower numbers -> higher priority (for matches in the same location)
166 179 priority = 10
167 180
168 181 def sortby(self):
169 182 return self.start_line, self.start_col, self.priority
170 183
171 184 def __init__(self, start):
172 185 self.start_line = start[0] - 1 # Shift from 1-index to 0-index
173 186 self.start_col = start[1]
174 187
175 188 @classmethod
176 189 def find(cls, tokens_by_line):
177 190 """Find one instance of special syntax in the provided tokens.
178 191
179 192 Tokens are grouped into logical lines for convenience,
180 193 so it is easy to e.g. look at the first token of each line.
181 194 *tokens_by_line* is a list of lists of tokenize.TokenInfo objects.
182 195
183 196 This should return an instance of its class, pointing to the start
184 197 position it has found, or None if it found no match.
185 198 """
186 199 raise NotImplementedError
187 200
188 201 def transform(self, lines: List[str]):
189 202 """Transform one instance of special syntax found by ``find()``
190 203
191 204 Takes a list of strings representing physical lines,
192 205 returns a similar list of transformed lines.
193 206 """
194 207 raise NotImplementedError
195 208
196 209 class MagicAssign(TokenTransformBase):
197 210 """Transformer for assignments from magics (a = %foo)"""
198 211 @classmethod
199 212 def find(cls, tokens_by_line):
200 213 """Find the first magic assignment (a = %foo) in the cell.
201 214 """
202 215 for line in tokens_by_line:
203 216 assign_ix = _find_assign_op(line)
204 217 if (assign_ix is not None) \
205 218 and (len(line) >= assign_ix + 2) \
206 219 and (line[assign_ix+1].string == '%') \
207 220 and (line[assign_ix+2].type == tokenize.NAME):
208 221 return cls(line[assign_ix+1].start)
209 222
210 223 def transform(self, lines: List[str]):
211 224 """Transform a magic assignment found by the ``find()`` classmethod.
212 225 """
213 226 start_line, start_col = self.start_line, self.start_col
214 227 lhs = lines[start_line][:start_col]
215 228 end_line = find_end_of_continued_line(lines, start_line)
216 229 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
217 230 assert rhs.startswith('%'), rhs
218 231 magic_name, _, args = rhs[1:].partition(' ')
219 232
220 233 lines_before = lines[:start_line]
221 234 call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)
222 235 new_line = lhs + call + '\n'
223 236 lines_after = lines[end_line+1:]
224 237
225 238 return lines_before + [new_line] + lines_after
226 239
227 240
228 241 class SystemAssign(TokenTransformBase):
229 242 """Transformer for assignments from system commands (a = !foo)"""
230 243 @classmethod
231 244 def find(cls, tokens_by_line):
232 245 """Find the first system assignment (a = !foo) in the cell.
233 246 """
234 247 for line in tokens_by_line:
235 248 assign_ix = _find_assign_op(line)
236 249 if (assign_ix is not None) \
237 250 and not line[assign_ix].line.strip().startswith('=') \
238 251 and (len(line) >= assign_ix + 2) \
239 252 and (line[assign_ix + 1].type == tokenize.ERRORTOKEN):
240 253 ix = assign_ix + 1
241 254
242 255 while ix < len(line) and line[ix].type == tokenize.ERRORTOKEN:
243 256 if line[ix].string == '!':
244 257 return cls(line[ix].start)
245 258 elif not line[ix].string.isspace():
246 259 break
247 260 ix += 1
248 261
249 262 def transform(self, lines: List[str]):
250 263 """Transform a system assignment found by the ``find()`` classmethod.
251 264 """
252 265 start_line, start_col = self.start_line, self.start_col
253 266
254 267 lhs = lines[start_line][:start_col]
255 268 end_line = find_end_of_continued_line(lines, start_line)
256 269 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
257 270 assert rhs.startswith('!'), rhs
258 271 cmd = rhs[1:]
259 272
260 273 lines_before = lines[:start_line]
261 274 call = "get_ipython().getoutput({!r})".format(cmd)
262 275 new_line = lhs + call + '\n'
263 276 lines_after = lines[end_line + 1:]
264 277
265 278 return lines_before + [new_line] + lines_after
266 279
267 280 # The escape sequences that define the syntax transformations IPython will
268 281 # apply to user input. These can NOT be just changed here: many regular
269 282 # expressions and other parts of the code may use their hardcoded values, and
270 283 # for all intents and purposes they constitute the 'IPython syntax', so they
271 284 # should be considered fixed.
272 285
273 286 ESC_SHELL = '!' # Send line to underlying system shell
274 287 ESC_SH_CAP = '!!' # Send line to system shell and capture output
275 288 ESC_HELP = '?' # Find information about object
276 289 ESC_HELP2 = '??' # Find extra-detailed information about object
277 290 ESC_MAGIC = '%' # Call magic function
278 291 ESC_MAGIC2 = '%%' # Call cell-magic function
279 292 ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call
280 293 ESC_QUOTE2 = ';' # Quote all args as a single string, call
281 294 ESC_PAREN = '/' # Call first argument with rest of line as arguments
282 295
283 296 ESCAPE_SINGLES = {'!', '?', '%', ',', ';', '/'}
284 297 ESCAPE_DOUBLES = {'!!', '??'} # %% (cell magic) is handled separately
285 298
286 299 def _make_help_call(target, esc, next_input=None):
287 300 """Prepares a pinfo(2)/psearch call from a target name and the escape
288 301 (i.e. ? or ??)"""
289 302 method = 'pinfo2' if esc == '??' \
290 303 else 'psearch' if '*' in target \
291 304 else 'pinfo'
292 305 arg = " ".join([method, target])
293 306 #Prepare arguments for get_ipython().run_line_magic(magic_name, magic_args)
294 307 t_magic_name, _, t_magic_arg_s = arg.partition(' ')
295 308 t_magic_name = t_magic_name.lstrip(ESC_MAGIC)
296 309 if next_input is None:
297 310 return 'get_ipython().run_line_magic(%r, %r)' % (t_magic_name, t_magic_arg_s)
298 311 else:
299 312 return 'get_ipython().set_next_input(%r);get_ipython().run_line_magic(%r, %r)' % \
300 313 (next_input, t_magic_name, t_magic_arg_s)
301 314
302 315 def _tr_help(content):
303 316 """Translate lines escaped with: ?
304 317
305 318 A naked help line should fire the intro help screen (shell.show_usage())
306 319 """
307 320 if not content:
308 321 return 'get_ipython().show_usage()'
309 322
310 323 return _make_help_call(content, '?')
311 324
312 325 def _tr_help2(content):
313 326 """Translate lines escaped with: ??
314 327
315 328 A naked help line should fire the intro help screen (shell.show_usage())
316 329 """
317 330 if not content:
318 331 return 'get_ipython().show_usage()'
319 332
320 333 return _make_help_call(content, '??')
321 334
322 335 def _tr_magic(content):
323 336 "Translate lines escaped with a percent sign: %"
324 337 name, _, args = content.partition(' ')
325 338 return 'get_ipython().run_line_magic(%r, %r)' % (name, args)
326 339
327 340 def _tr_quote(content):
328 341 "Translate lines escaped with a comma: ,"
329 342 name, _, args = content.partition(' ')
330 343 return '%s("%s")' % (name, '", "'.join(args.split()) )
331 344
332 345 def _tr_quote2(content):
333 346 "Translate lines escaped with a semicolon: ;"
334 347 name, _, args = content.partition(' ')
335 348 return '%s("%s")' % (name, args)
336 349
337 350 def _tr_paren(content):
338 351 "Translate lines escaped with a slash: /"
339 352 name, _, args = content.partition(' ')
340 353 return '%s(%s)' % (name, ", ".join(args.split()))
341 354
342 355 tr = { ESC_SHELL : 'get_ipython().system({!r})'.format,
343 356 ESC_SH_CAP : 'get_ipython().getoutput({!r})'.format,
344 357 ESC_HELP : _tr_help,
345 358 ESC_HELP2 : _tr_help2,
346 359 ESC_MAGIC : _tr_magic,
347 360 ESC_QUOTE : _tr_quote,
348 361 ESC_QUOTE2 : _tr_quote2,
349 362 ESC_PAREN : _tr_paren }
350 363
351 364 class EscapedCommand(TokenTransformBase):
352 365 """Transformer for escaped commands like %foo, !foo, or /foo"""
353 366 @classmethod
354 367 def find(cls, tokens_by_line):
355 368 """Find the first escaped command (%foo, !foo, etc.) in the cell.
356 369 """
357 370 for line in tokens_by_line:
358 371 if not line:
359 372 continue
360 373 ix = 0
361 374 ll = len(line)
362 375 while ll > ix and line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
363 376 ix += 1
364 377 if ix >= ll:
365 378 continue
366 379 if line[ix].string in ESCAPE_SINGLES:
367 380 return cls(line[ix].start)
368 381
369 382 def transform(self, lines):
370 383 """Transform an escaped line found by the ``find()`` classmethod.
371 384 """
372 385 start_line, start_col = self.start_line, self.start_col
373 386
374 387 indent = lines[start_line][:start_col]
375 388 end_line = find_end_of_continued_line(lines, start_line)
376 389 line = assemble_continued_line(lines, (start_line, start_col), end_line)
377 390
378 391 if len(line) > 1 and line[:2] in ESCAPE_DOUBLES:
379 392 escape, content = line[:2], line[2:]
380 393 else:
381 394 escape, content = line[:1], line[1:]
382 395
383 396 if escape in tr:
384 397 call = tr[escape](content)
385 398 else:
386 399 call = ''
387 400
388 401 lines_before = lines[:start_line]
389 402 new_line = indent + call + '\n'
390 403 lines_after = lines[end_line + 1:]
391 404
392 405 return lines_before + [new_line] + lines_after
393 406
394 407 _help_end_re = re.compile(r"""(%{0,2}
395 408 [a-zA-Z_*][\w*]* # Variable name
396 409 (\.[a-zA-Z_*][\w*]*)* # .etc.etc
397 410 )
398 411 (\?\??)$ # ? or ??
399 412 """,
400 413 re.VERBOSE)
401 414
402 415 class HelpEnd(TokenTransformBase):
403 416 """Transformer for help syntax: obj? and obj??"""
404 417 # This needs to be higher priority (lower number) than EscapedCommand so
405 418 # that inspecting magics (%foo?) works.
406 419 priority = 5
407 420
408 421 def __init__(self, start, q_locn):
409 422 super().__init__(start)
410 423 self.q_line = q_locn[0] - 1 # Shift from 1-indexed to 0-indexed
411 424 self.q_col = q_locn[1]
412 425
413 426 @classmethod
414 427 def find(cls, tokens_by_line):
415 428 """Find the first help command (foo?) in the cell.
416 429 """
417 430 for line in tokens_by_line:
418 431 # Last token is NEWLINE; look at last but one
419 432 if len(line) > 2 and line[-2].string == '?':
420 433 # Find the first token that's not INDENT/DEDENT
421 434 ix = 0
422 435 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
423 436 ix += 1
424 437 return cls(line[ix].start, line[-2].start)
425 438
426 439 def transform(self, lines):
427 440 """Transform a help command found by the ``find()`` classmethod.
428 441 """
429 442 piece = ''.join(lines[self.start_line:self.q_line+1])
430 443 indent, content = piece[:self.start_col], piece[self.start_col:]
431 444 lines_before = lines[:self.start_line]
432 445 lines_after = lines[self.q_line + 1:]
433 446
434 447 m = _help_end_re.search(content)
435 448 if not m:
436 449 raise SyntaxError(content)
437 450 assert m is not None, content
438 451 target = m.group(1)
439 452 esc = m.group(3)
440 453
441 454 # If we're mid-command, put it back on the next prompt for the user.
442 455 next_input = None
443 456 if (not lines_before) and (not lines_after) \
444 457 and content.strip() != m.group(0):
445 458 next_input = content.rstrip('?\n')
446 459
447 460 call = _make_help_call(target, esc, next_input=next_input)
448 461 new_line = indent + call + '\n'
449 462
450 463 return lines_before + [new_line] + lines_after
451 464
452 465 def make_tokens_by_line(lines:List[str]):
453 466 """Tokenize a series of lines and group tokens by line.
454 467
455 468 The tokens for a multiline Python string or expression are grouped as one
456 469 line. All lines except the last lines should keep their line ending ('\\n',
457 470 '\\r\\n') for this to properly work. Use `.splitlines(keeplineending=True)`
458 471 for example when passing block of text to this function.
459 472
460 473 """
461 474 # NL tokens are used inside multiline expressions, but also after blank
462 475 # lines or comments. This is intentional - see https://bugs.python.org/issue17061
463 476 # We want to group the former case together but split the latter, so we
464 477 # track parentheses level, similar to the internals of tokenize.
465 478 NEWLINE, NL = tokenize.NEWLINE, tokenize.NL
466 479 tokens_by_line = [[]]
467 480 if len(lines) > 1 and not lines[0].endswith(('\n', '\r', '\r\n', '\x0b', '\x0c')):
468 481 warnings.warn("`make_tokens_by_line` received a list of lines which do not have lineending markers ('\\n', '\\r', '\\r\\n', '\\x0b', '\\x0c'), behavior will be unspecified")
469 482 parenlev = 0
470 483 try:
471 484 for token in tokenize.generate_tokens(iter(lines).__next__):
472 485 tokens_by_line[-1].append(token)
473 486 if (token.type == NEWLINE) \
474 487 or ((token.type == NL) and (parenlev <= 0)):
475 488 tokens_by_line.append([])
476 489 elif token.string in {'(', '[', '{'}:
477 490 parenlev += 1
478 491 elif token.string in {')', ']', '}'}:
479 492 if parenlev > 0:
480 493 parenlev -= 1
481 494 except tokenize.TokenError:
482 495 # Input ended in a multiline string or expression. That's OK for us.
483 496 pass
484 497
485 498
486 499 if not tokens_by_line[-1]:
487 500 tokens_by_line.pop()
488 501
489 502
490 503 return tokens_by_line
491 504
492 505 def show_linewise_tokens(s: str):
493 506 """For investigation and debugging"""
494 507 if not s.endswith('\n'):
495 508 s += '\n'
496 509 lines = s.splitlines(keepends=True)
497 510 for line in make_tokens_by_line(lines):
498 511 print("Line -------")
499 512 for tokinfo in line:
500 513 print(" ", tokinfo)
501 514
502 515 # Arbitrary limit to prevent getting stuck in infinite loops
503 516 TRANSFORM_LOOP_LIMIT = 500
504 517
505 518 class TransformerManager:
506 519 """Applies various transformations to a cell or code block.
507 520
508 521 The key methods for external use are ``transform_cell()``
509 522 and ``check_complete()``.
510 523 """
511 524 def __init__(self):
512 525 self.cleanup_transforms = [
526 leading_empty_lines,
513 527 leading_indent,
514 528 classic_prompt,
515 529 ipython_prompt,
516 530 ]
517 531 self.line_transforms = [
518 532 cell_magic,
519 533 ]
520 534 self.token_transformers = [
521 535 MagicAssign,
522 536 SystemAssign,
523 537 EscapedCommand,
524 538 HelpEnd,
525 539 ]
526 540
527 541 def do_one_token_transform(self, lines):
528 542 """Find and run the transform earliest in the code.
529 543
530 544 Returns (changed, lines).
531 545
532 546 This method is called repeatedly until changed is False, indicating
533 547 that all available transformations are complete.
534 548
535 549 The tokens following IPython special syntax might not be valid, so
536 550 the transformed code is retokenised every time to identify the next
537 551 piece of special syntax. Hopefully long code cells are mostly valid
538 552 Python, not using lots of IPython special syntax, so this shouldn't be
539 553 a performance issue.
540 554 """
541 555 tokens_by_line = make_tokens_by_line(lines)
542 556 candidates = []
543 557 for transformer_cls in self.token_transformers:
544 558 transformer = transformer_cls.find(tokens_by_line)
545 559 if transformer:
546 560 candidates.append(transformer)
547 561
548 562 if not candidates:
549 563 # Nothing to transform
550 564 return False, lines
551 565 ordered_transformers = sorted(candidates, key=TokenTransformBase.sortby)
552 566 for transformer in ordered_transformers:
553 567 try:
554 568 return True, transformer.transform(lines)
555 569 except SyntaxError:
556 570 pass
557 571 return False, lines
558 572
559 573 def do_token_transforms(self, lines):
560 574 for _ in range(TRANSFORM_LOOP_LIMIT):
561 575 changed, lines = self.do_one_token_transform(lines)
562 576 if not changed:
563 577 return lines
564 578
565 579 raise RuntimeError("Input transformation still changing after "
566 580 "%d iterations. Aborting." % TRANSFORM_LOOP_LIMIT)
567 581
568 582 def transform_cell(self, cell: str) -> str:
569 583 """Transforms a cell of input code"""
570 584 if not cell.endswith('\n'):
571 585 cell += '\n' # Ensure the cell has a trailing newline
572 586 lines = cell.splitlines(keepends=True)
573 587 for transform in self.cleanup_transforms + self.line_transforms:
574 588 lines = transform(lines)
575 589
576 590 lines = self.do_token_transforms(lines)
577 591 return ''.join(lines)
578 592
579 593 def check_complete(self, cell: str):
580 594 """Return whether a block of code is ready to execute, or should be continued
581 595
582 596 Parameters
583 597 ----------
584 598 source : string
585 599 Python input code, which can be multiline.
586 600
587 601 Returns
588 602 -------
589 603 status : str
590 604 One of 'complete', 'incomplete', or 'invalid' if source is not a
591 605 prefix of valid code.
592 606 indent_spaces : int or None
593 607 The number of spaces by which to indent the next line of code. If
594 608 status is not 'incomplete', this is None.
595 609 """
596 610 # Remember if the lines ends in a new line.
597 611 ends_with_newline = False
598 612 for character in reversed(cell):
599 613 if character == '\n':
600 614 ends_with_newline = True
601 615 break
602 616 elif character.strip():
603 617 break
604 618 else:
605 619 continue
606 620
607 621 if not ends_with_newline:
608 622 # Append an newline for consistent tokenization
609 623 # See https://bugs.python.org/issue33899
610 624 cell += '\n'
611 625
612 626 lines = cell.splitlines(keepends=True)
613 627
614 628 if not lines:
615 629 return 'complete', None
616 630
617 631 if lines[-1].endswith('\\'):
618 632 # Explicit backslash continuation
619 633 return 'incomplete', find_last_indent(lines)
620 634
621 635 try:
622 636 for transform in self.cleanup_transforms:
623 637 lines = transform(lines)
624 638 except SyntaxError:
625 639 return 'invalid', None
626 640
627 641 if lines[0].startswith('%%'):
628 642 # Special case for cell magics - completion marked by blank line
629 643 if lines[-1].strip():
630 644 return 'incomplete', find_last_indent(lines)
631 645 else:
632 646 return 'complete', None
633 647
634 648 try:
635 649 for transform in self.line_transforms:
636 650 lines = transform(lines)
637 651 lines = self.do_token_transforms(lines)
638 652 except SyntaxError:
639 653 return 'invalid', None
640 654
641 655 tokens_by_line = make_tokens_by_line(lines)
642 656
643 657 if not tokens_by_line:
644 658 return 'incomplete', find_last_indent(lines)
645 659
646 660 if tokens_by_line[-1][-1].type != tokenize.ENDMARKER:
647 661 # We're in a multiline string or expression
648 662 return 'incomplete', find_last_indent(lines)
649 663
650 664 newline_types = {tokenize.NEWLINE, tokenize.COMMENT, tokenize.ENDMARKER}
651 665
652 666 # Pop the last line which only contains DEDENTs and ENDMARKER
653 667 last_token_line = None
654 668 if {t.type for t in tokens_by_line[-1]} in [
655 669 {tokenize.DEDENT, tokenize.ENDMARKER},
656 670 {tokenize.ENDMARKER}
657 671 ] and len(tokens_by_line) > 1:
658 672 last_token_line = tokens_by_line.pop()
659 673
660 674 while tokens_by_line[-1] and tokens_by_line[-1][-1].type in newline_types:
661 675 tokens_by_line[-1].pop()
662 676
663 677 if len(tokens_by_line) == 1 and not tokens_by_line[-1]:
664 678 return 'incomplete', 0
665 679
666 680 if tokens_by_line[-1][-1].string == ':':
667 681 # The last line starts a block (e.g. 'if foo:')
668 682 ix = 0
669 683 while tokens_by_line[-1][ix].type in {tokenize.INDENT, tokenize.DEDENT}:
670 684 ix += 1
671 685
672 686 indent = tokens_by_line[-1][ix].start[1]
673 687 return 'incomplete', indent + 4
674 688
675 689 if tokens_by_line[-1][0].line.endswith('\\'):
676 690 return 'incomplete', None
677 691
678 692 # At this point, our checks think the code is complete (or invalid).
679 693 # We'll use codeop.compile_command to check this with the real parser
680 694 try:
681 695 with warnings.catch_warnings():
682 696 warnings.simplefilter('error', SyntaxWarning)
683 697 res = compile_command(''.join(lines), symbol='exec')
684 698 except (SyntaxError, OverflowError, ValueError, TypeError,
685 699 MemoryError, SyntaxWarning):
686 700 return 'invalid', None
687 701 else:
688 702 if res is None:
689 703 return 'incomplete', find_last_indent(lines)
690 704
691 705 if last_token_line and last_token_line[0].type == tokenize.DEDENT:
692 706 if ends_with_newline:
693 707 return 'complete', None
694 708 return 'incomplete', find_last_indent(lines)
695 709
696 710 # If there's a blank line at the end, assume we're ready to execute
697 711 if not lines[-1].strip():
698 712 return 'complete', None
699 713
700 714 return 'complete', None
701 715
702 716
703 717 def find_last_indent(lines):
704 718 m = _indent_re.match(lines[-1])
705 719 if not m:
706 720 return 0
707 721 return len(m.group(0).replace('\t', ' '*4))
@@ -1,88 +1,116 b''
1 1 """Tests for the line-based transformers in IPython.core.inputtransformer2
2 2
3 3 Line-based transformers are the simpler ones; token-based transformers are
4 4 more complex. See test_inputtransformer2 for tests for token-based transformers.
5 5 """
6 6 import nose.tools as nt
7 7
8 8 from IPython.core import inputtransformer2 as ipt2
9 9
10 10 CELL_MAGIC = ("""\
11 11 %%foo arg
12 12 body 1
13 13 body 2
14 14 """, """\
15 15 get_ipython().run_cell_magic('foo', 'arg', 'body 1\\nbody 2\\n')
16 16 """)
17 17
18 18 def test_cell_magic():
19 19 for sample, expected in [CELL_MAGIC]:
20 20 nt.assert_equal(ipt2.cell_magic(sample.splitlines(keepends=True)),
21 21 expected.splitlines(keepends=True))
22 22
23 23 CLASSIC_PROMPT = ("""\
24 24 >>> for a in range(5):
25 25 ... print(a)
26 26 """, """\
27 27 for a in range(5):
28 28 print(a)
29 29 """)
30 30
31 31 CLASSIC_PROMPT_L2 = ("""\
32 32 for a in range(5):
33 33 ... print(a)
34 34 ... print(a ** 2)
35 35 """, """\
36 36 for a in range(5):
37 37 print(a)
38 38 print(a ** 2)
39 39 """)
40 40
41 41 def test_classic_prompt():
42 42 for sample, expected in [CLASSIC_PROMPT, CLASSIC_PROMPT_L2]:
43 43 nt.assert_equal(ipt2.classic_prompt(sample.splitlines(keepends=True)),
44 44 expected.splitlines(keepends=True))
45 45
46 46 IPYTHON_PROMPT = ("""\
47 47 In [1]: for a in range(5):
48 48 ...: print(a)
49 49 """, """\
50 50 for a in range(5):
51 51 print(a)
52 52 """)
53 53
54 54 IPYTHON_PROMPT_L2 = ("""\
55 55 for a in range(5):
56 56 ...: print(a)
57 57 ...: print(a ** 2)
58 58 """, """\
59 59 for a in range(5):
60 60 print(a)
61 61 print(a ** 2)
62 62 """)
63 63
64 64 def test_ipython_prompt():
65 65 for sample, expected in [IPYTHON_PROMPT, IPYTHON_PROMPT_L2]:
66 66 nt.assert_equal(ipt2.ipython_prompt(sample.splitlines(keepends=True)),
67 67 expected.splitlines(keepends=True))
68 68
69 69 INDENT_SPACES = ("""\
70 70 if True:
71 71 a = 3
72 72 """, """\
73 73 if True:
74 74 a = 3
75 75 """)
76 76
77 77 INDENT_TABS = ("""\
78 78 \tif True:
79 79 \t\tb = 4
80 80 """, """\
81 81 if True:
82 82 \tb = 4
83 83 """)
84 84
85 85 def test_leading_indent():
86 86 for sample, expected in [INDENT_SPACES, INDENT_TABS]:
87 87 nt.assert_equal(ipt2.leading_indent(sample.splitlines(keepends=True)),
88 88 expected.splitlines(keepends=True))
89
90 LEADING_EMPTY_LINES = ("""\
91 \t
92
93 if True:
94 a = 3
95
96 b = 4
97 """, """\
98 if True:
99 a = 3
100
101 b = 4
102 """)
103
104 ONLY_EMPTY_LINES = ("""\
105 \t
106
107 """, """\
108 \t
109
110 """)
111
112 def leading_empty_lines():
113 for sample, expected in [LEADING_EMPTY_LINES, ONLY_EMPTY_LINES]:
114 nt.assert_equal(
115 ipt2.leading_empty_lines(sample.splitlines(keepends=True)),
116 expected.splitlines(keepends=True))
General Comments 0
You need to be logged in to leave comments. Login now