##// END OF EJS Templates
Support CRLF line endings in magic transforms
Kyle Cutler -
Show More
@@ -1,726 +1,726 b''
1 1 """Input transformer machinery to support IPython special syntax.
2 2
3 3 This includes the machinery to recognise and transform ``%magic`` commands,
4 4 ``!system`` commands, ``help?`` querying, prompt stripping, and so forth.
5 5
6 6 Added: IPython 7.0. Replaces inputsplitter and inputtransformer which were
7 7 deprecated in 7.0.
8 8 """
9 9
10 10 # Copyright (c) IPython Development Team.
11 11 # Distributed under the terms of the Modified BSD License.
12 12
13 13 from codeop import compile_command
14 14 import re
15 15 import tokenize
16 16 from typing import List, Tuple, Optional, Any
17 17 import warnings
18 18
19 19 _indent_re = re.compile(r'^[ \t]+')
20 20
21 21 def leading_empty_lines(lines):
22 22 """Remove leading empty lines
23 23
24 24 If the leading lines are empty or contain only whitespace, they will be
25 25 removed.
26 26 """
27 27 if not lines:
28 28 return lines
29 29 for i, line in enumerate(lines):
30 30 if line and not line.isspace():
31 31 return lines[i:]
32 32 return lines
33 33
34 34 def leading_indent(lines):
35 35 """Remove leading indentation.
36 36
37 37 If the first line starts with a spaces or tabs, the same whitespace will be
38 38 removed from each following line in the cell.
39 39 """
40 40 if not lines:
41 41 return lines
42 42 m = _indent_re.match(lines[0])
43 43 if not m:
44 44 return lines
45 45 space = m.group(0)
46 46 n = len(space)
47 47 return [l[n:] if l.startswith(space) else l
48 48 for l in lines]
49 49
50 50 class PromptStripper:
51 51 """Remove matching input prompts from a block of input.
52 52
53 53 Parameters
54 54 ----------
55 55 prompt_re : regular expression
56 56 A regular expression matching any input prompt (including continuation,
57 57 e.g. ``...``)
58 58 initial_re : regular expression, optional
59 59 A regular expression matching only the initial prompt, but not continuation.
60 60 If no initial expression is given, prompt_re will be used everywhere.
61 61 Used mainly for plain Python prompts (``>>>``), where the continuation prompt
62 62 ``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
63 63
64 64 If initial_re and prompt_re differ,
65 65 only initial_re will be tested against the first line.
66 66 If any prompt is found on the first two lines,
67 67 prompts will be stripped from the rest of the block.
68 68 """
69 69 def __init__(self, prompt_re, initial_re=None):
70 70 self.prompt_re = prompt_re
71 71 self.initial_re = initial_re or prompt_re
72 72
73 73 def _strip(self, lines):
74 74 return [self.prompt_re.sub('', l, count=1) for l in lines]
75 75
76 76 def __call__(self, lines):
77 77 if not lines:
78 78 return lines
79 79 if self.initial_re.match(lines[0]) or \
80 80 (len(lines) > 1 and self.prompt_re.match(lines[1])):
81 81 return self._strip(lines)
82 82 return lines
83 83
84 84 classic_prompt = PromptStripper(
85 85 prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'),
86 86 initial_re=re.compile(r'^>>>( |$)')
87 87 )
88 88
89 89 ipython_prompt = PromptStripper(re.compile(r'^(In \[\d+\]: |\s*\.{3,}: ?)'))
90 90
91 91 def cell_magic(lines):
92 92 if not lines or not lines[0].startswith('%%'):
93 93 return lines
94 94 if re.match(r'%%\w+\?', lines[0]):
95 95 # This case will be handled by help_end
96 96 return lines
97 magic_name, _, first_line = lines[0][2:-1].partition(' ')
97 magic_name, _, first_line = lines[0][2:].rstrip().partition(' ')
98 98 body = ''.join(lines[1:])
99 99 return ['get_ipython().run_cell_magic(%r, %r, %r)\n'
100 100 % (magic_name, first_line, body)]
101 101
102 102
103 103 def _find_assign_op(token_line) -> Optional[int]:
104 104 """Get the index of the first assignment in the line ('=' not inside brackets)
105 105
106 106 Note: We don't try to support multiple special assignment (a = b = %foo)
107 107 """
108 108 paren_level = 0
109 109 for i, ti in enumerate(token_line):
110 110 s = ti.string
111 111 if s == '=' and paren_level == 0:
112 112 return i
113 113 if s in {'(','[','{'}:
114 114 paren_level += 1
115 115 elif s in {')', ']', '}'}:
116 116 if paren_level > 0:
117 117 paren_level -= 1
118 118 return None
119 119
120 120 def find_end_of_continued_line(lines, start_line: int):
121 121 """Find the last line of a line explicitly extended using backslashes.
122 122
123 123 Uses 0-indexed line numbers.
124 124 """
125 125 end_line = start_line
126 126 while lines[end_line].endswith('\\\n'):
127 127 end_line += 1
128 128 if end_line >= len(lines):
129 129 break
130 130 return end_line
131 131
132 132 def assemble_continued_line(lines, start: Tuple[int, int], end_line: int):
133 133 r"""Assemble a single line from multiple continued line pieces
134 134
135 135 Continued lines are lines ending in ``\``, and the line following the last
136 136 ``\`` in the block.
137 137
138 138 For example, this code continues over multiple lines::
139 139
140 140 if (assign_ix is not None) \
141 141 and (len(line) >= assign_ix + 2) \
142 142 and (line[assign_ix+1].string == '%') \
143 143 and (line[assign_ix+2].type == tokenize.NAME):
144 144
145 145 This statement contains four continued line pieces.
146 146 Assembling these pieces into a single line would give::
147 147
148 148 if (assign_ix is not None) and (len(line) >= assign_ix + 2) and (line[...
149 149
150 150 This uses 0-indexed line numbers. *start* is (lineno, colno).
151 151
152 152 Used to allow ``%magic`` and ``!system`` commands to be continued over
153 153 multiple lines.
154 154 """
155 155 parts = [lines[start[0]][start[1]:]] + lines[start[0]+1:end_line+1]
156 return ' '.join([p[:-2] for p in parts[:-1]] # Strip backslash+newline
157 + [parts[-1][:-1]]) # Strip newline from last line
156 return ' '.join([p.rstrip()[:-1] for p in parts[:-1]] # Strip backslash+newline
157 + [parts[-1].rstrip()]) # Strip newline from last line
158 158
159 159 class TokenTransformBase:
160 160 """Base class for transformations which examine tokens.
161 161
162 162 Special syntax should not be transformed when it occurs inside strings or
163 163 comments. This is hard to reliably avoid with regexes. The solution is to
164 164 tokenise the code as Python, and recognise the special syntax in the tokens.
165 165
166 166 IPython's special syntax is not valid Python syntax, so tokenising may go
167 167 wrong after the special syntax starts. These classes therefore find and
168 168 transform *one* instance of special syntax at a time into regular Python
169 169 syntax. After each transformation, tokens are regenerated to find the next
170 170 piece of special syntax.
171 171
172 172 Subclasses need to implement one class method (find)
173 173 and one regular method (transform).
174 174
175 175 The priority attribute can select which transformation to apply if multiple
176 176 transformers match in the same place. Lower numbers have higher priority.
177 177 This allows "%magic?" to be turned into a help call rather than a magic call.
178 178 """
179 179 # Lower numbers -> higher priority (for matches in the same location)
180 180 priority = 10
181 181
182 182 def sortby(self):
183 183 return self.start_line, self.start_col, self.priority
184 184
185 185 def __init__(self, start):
186 186 self.start_line = start[0] - 1 # Shift from 1-index to 0-index
187 187 self.start_col = start[1]
188 188
189 189 @classmethod
190 190 def find(cls, tokens_by_line):
191 191 """Find one instance of special syntax in the provided tokens.
192 192
193 193 Tokens are grouped into logical lines for convenience,
194 194 so it is easy to e.g. look at the first token of each line.
195 195 *tokens_by_line* is a list of lists of tokenize.TokenInfo objects.
196 196
197 197 This should return an instance of its class, pointing to the start
198 198 position it has found, or None if it found no match.
199 199 """
200 200 raise NotImplementedError
201 201
202 202 def transform(self, lines: List[str]):
203 203 """Transform one instance of special syntax found by ``find()``
204 204
205 205 Takes a list of strings representing physical lines,
206 206 returns a similar list of transformed lines.
207 207 """
208 208 raise NotImplementedError
209 209
210 210 class MagicAssign(TokenTransformBase):
211 211 """Transformer for assignments from magics (a = %foo)"""
212 212 @classmethod
213 213 def find(cls, tokens_by_line):
214 214 """Find the first magic assignment (a = %foo) in the cell.
215 215 """
216 216 for line in tokens_by_line:
217 217 assign_ix = _find_assign_op(line)
218 218 if (assign_ix is not None) \
219 219 and (len(line) >= assign_ix + 2) \
220 220 and (line[assign_ix+1].string == '%') \
221 221 and (line[assign_ix+2].type == tokenize.NAME):
222 222 return cls(line[assign_ix+1].start)
223 223
224 224 def transform(self, lines: List[str]):
225 225 """Transform a magic assignment found by the ``find()`` classmethod.
226 226 """
227 227 start_line, start_col = self.start_line, self.start_col
228 228 lhs = lines[start_line][:start_col]
229 229 end_line = find_end_of_continued_line(lines, start_line)
230 230 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
231 231 assert rhs.startswith('%'), rhs
232 232 magic_name, _, args = rhs[1:].partition(' ')
233 233
234 234 lines_before = lines[:start_line]
235 235 call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)
236 236 new_line = lhs + call + '\n'
237 237 lines_after = lines[end_line+1:]
238 238
239 239 return lines_before + [new_line] + lines_after
240 240
241 241
242 242 class SystemAssign(TokenTransformBase):
243 243 """Transformer for assignments from system commands (a = !foo)"""
244 244 @classmethod
245 245 def find(cls, tokens_by_line):
246 246 """Find the first system assignment (a = !foo) in the cell.
247 247 """
248 248 for line in tokens_by_line:
249 249 assign_ix = _find_assign_op(line)
250 250 if (assign_ix is not None) \
251 251 and not line[assign_ix].line.strip().startswith('=') \
252 252 and (len(line) >= assign_ix + 2) \
253 253 and (line[assign_ix + 1].type == tokenize.ERRORTOKEN):
254 254 ix = assign_ix + 1
255 255
256 256 while ix < len(line) and line[ix].type == tokenize.ERRORTOKEN:
257 257 if line[ix].string == '!':
258 258 return cls(line[ix].start)
259 259 elif not line[ix].string.isspace():
260 260 break
261 261 ix += 1
262 262
263 263 def transform(self, lines: List[str]):
264 264 """Transform a system assignment found by the ``find()`` classmethod.
265 265 """
266 266 start_line, start_col = self.start_line, self.start_col
267 267
268 268 lhs = lines[start_line][:start_col]
269 269 end_line = find_end_of_continued_line(lines, start_line)
270 270 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
271 271 assert rhs.startswith('!'), rhs
272 272 cmd = rhs[1:]
273 273
274 274 lines_before = lines[:start_line]
275 275 call = "get_ipython().getoutput({!r})".format(cmd)
276 276 new_line = lhs + call + '\n'
277 277 lines_after = lines[end_line + 1:]
278 278
279 279 return lines_before + [new_line] + lines_after
280 280
281 281 # The escape sequences that define the syntax transformations IPython will
282 282 # apply to user input. These can NOT be just changed here: many regular
283 283 # expressions and other parts of the code may use their hardcoded values, and
284 284 # for all intents and purposes they constitute the 'IPython syntax', so they
285 285 # should be considered fixed.
286 286
287 287 ESC_SHELL = '!' # Send line to underlying system shell
288 288 ESC_SH_CAP = '!!' # Send line to system shell and capture output
289 289 ESC_HELP = '?' # Find information about object
290 290 ESC_HELP2 = '??' # Find extra-detailed information about object
291 291 ESC_MAGIC = '%' # Call magic function
292 292 ESC_MAGIC2 = '%%' # Call cell-magic function
293 293 ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call
294 294 ESC_QUOTE2 = ';' # Quote all args as a single string, call
295 295 ESC_PAREN = '/' # Call first argument with rest of line as arguments
296 296
297 297 ESCAPE_SINGLES = {'!', '?', '%', ',', ';', '/'}
298 298 ESCAPE_DOUBLES = {'!!', '??'} # %% (cell magic) is handled separately
299 299
300 300 def _make_help_call(target, esc, next_input=None):
301 301 """Prepares a pinfo(2)/psearch call from a target name and the escape
302 302 (i.e. ? or ??)"""
303 303 method = 'pinfo2' if esc == '??' \
304 304 else 'psearch' if '*' in target \
305 305 else 'pinfo'
306 306 arg = " ".join([method, target])
307 307 #Prepare arguments for get_ipython().run_line_magic(magic_name, magic_args)
308 308 t_magic_name, _, t_magic_arg_s = arg.partition(' ')
309 309 t_magic_name = t_magic_name.lstrip(ESC_MAGIC)
310 310 if next_input is None:
311 311 return 'get_ipython().run_line_magic(%r, %r)' % (t_magic_name, t_magic_arg_s)
312 312 else:
313 313 return 'get_ipython().set_next_input(%r);get_ipython().run_line_magic(%r, %r)' % \
314 314 (next_input, t_magic_name, t_magic_arg_s)
315 315
316 316 def _tr_help(content):
317 317 """Translate lines escaped with: ?
318 318
319 319 A naked help line should fire the intro help screen (shell.show_usage())
320 320 """
321 321 if not content:
322 322 return 'get_ipython().show_usage()'
323 323
324 324 return _make_help_call(content, '?')
325 325
326 326 def _tr_help2(content):
327 327 """Translate lines escaped with: ??
328 328
329 329 A naked help line should fire the intro help screen (shell.show_usage())
330 330 """
331 331 if not content:
332 332 return 'get_ipython().show_usage()'
333 333
334 334 return _make_help_call(content, '??')
335 335
336 336 def _tr_magic(content):
337 337 "Translate lines escaped with a percent sign: %"
338 338 name, _, args = content.partition(' ')
339 339 return 'get_ipython().run_line_magic(%r, %r)' % (name, args)
340 340
341 341 def _tr_quote(content):
342 342 "Translate lines escaped with a comma: ,"
343 343 name, _, args = content.partition(' ')
344 344 return '%s("%s")' % (name, '", "'.join(args.split()) )
345 345
346 346 def _tr_quote2(content):
347 347 "Translate lines escaped with a semicolon: ;"
348 348 name, _, args = content.partition(' ')
349 349 return '%s("%s")' % (name, args)
350 350
351 351 def _tr_paren(content):
352 352 "Translate lines escaped with a slash: /"
353 353 name, _, args = content.partition(' ')
354 354 return '%s(%s)' % (name, ", ".join(args.split()))
355 355
356 356 tr = { ESC_SHELL : 'get_ipython().system({!r})'.format,
357 357 ESC_SH_CAP : 'get_ipython().getoutput({!r})'.format,
358 358 ESC_HELP : _tr_help,
359 359 ESC_HELP2 : _tr_help2,
360 360 ESC_MAGIC : _tr_magic,
361 361 ESC_QUOTE : _tr_quote,
362 362 ESC_QUOTE2 : _tr_quote2,
363 363 ESC_PAREN : _tr_paren }
364 364
365 365 class EscapedCommand(TokenTransformBase):
366 366 """Transformer for escaped commands like %foo, !foo, or /foo"""
367 367 @classmethod
368 368 def find(cls, tokens_by_line):
369 369 """Find the first escaped command (%foo, !foo, etc.) in the cell.
370 370 """
371 371 for line in tokens_by_line:
372 372 if not line:
373 373 continue
374 374 ix = 0
375 375 ll = len(line)
376 376 while ll > ix and line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
377 377 ix += 1
378 378 if ix >= ll:
379 379 continue
380 380 if line[ix].string in ESCAPE_SINGLES:
381 381 return cls(line[ix].start)
382 382
383 383 def transform(self, lines):
384 384 """Transform an escaped line found by the ``find()`` classmethod.
385 385 """
386 386 start_line, start_col = self.start_line, self.start_col
387 387
388 388 indent = lines[start_line][:start_col]
389 389 end_line = find_end_of_continued_line(lines, start_line)
390 390 line = assemble_continued_line(lines, (start_line, start_col), end_line)
391 391
392 392 if len(line) > 1 and line[:2] in ESCAPE_DOUBLES:
393 393 escape, content = line[:2], line[2:]
394 394 else:
395 395 escape, content = line[:1], line[1:]
396 396
397 397 if escape in tr:
398 398 call = tr[escape](content)
399 399 else:
400 400 call = ''
401 401
402 402 lines_before = lines[:start_line]
403 403 new_line = indent + call + '\n'
404 404 lines_after = lines[end_line + 1:]
405 405
406 406 return lines_before + [new_line] + lines_after
407 407
408 408 _help_end_re = re.compile(r"""(%{0,2}
409 409 (?!\d)[\w*]+ # Variable name
410 410 (\.(?!\d)[\w*]+)* # .etc.etc
411 411 )
412 412 (\?\??)$ # ? or ??
413 413 """,
414 414 re.VERBOSE)
415 415
416 416 class HelpEnd(TokenTransformBase):
417 417 """Transformer for help syntax: obj? and obj??"""
418 418 # This needs to be higher priority (lower number) than EscapedCommand so
419 419 # that inspecting magics (%foo?) works.
420 420 priority = 5
421 421
422 422 def __init__(self, start, q_locn):
423 423 super().__init__(start)
424 424 self.q_line = q_locn[0] - 1 # Shift from 1-indexed to 0-indexed
425 425 self.q_col = q_locn[1]
426 426
427 427 @classmethod
428 428 def find(cls, tokens_by_line):
429 429 """Find the first help command (foo?) in the cell.
430 430 """
431 431 for line in tokens_by_line:
432 432 # Last token is NEWLINE; look at last but one
433 433 if len(line) > 2 and line[-2].string == '?':
434 434 # Find the first token that's not INDENT/DEDENT
435 435 ix = 0
436 436 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
437 437 ix += 1
438 438 return cls(line[ix].start, line[-2].start)
439 439
440 440 def transform(self, lines):
441 441 """Transform a help command found by the ``find()`` classmethod.
442 442 """
443 443 piece = ''.join(lines[self.start_line:self.q_line+1])
444 444 indent, content = piece[:self.start_col], piece[self.start_col:]
445 445 lines_before = lines[:self.start_line]
446 446 lines_after = lines[self.q_line + 1:]
447 447
448 448 m = _help_end_re.search(content)
449 449 if not m:
450 450 raise SyntaxError(content)
451 451 assert m is not None, content
452 452 target = m.group(1)
453 453 esc = m.group(3)
454 454
455 455 # If we're mid-command, put it back on the next prompt for the user.
456 456 next_input = None
457 457 if (not lines_before) and (not lines_after) \
458 458 and content.strip() != m.group(0):
459 459 next_input = content.rstrip('?\n')
460 460
461 461 call = _make_help_call(target, esc, next_input=next_input)
462 462 new_line = indent + call + '\n'
463 463
464 464 return lines_before + [new_line] + lines_after
465 465
466 466 def make_tokens_by_line(lines:List[str]):
467 467 """Tokenize a series of lines and group tokens by line.
468 468
469 469 The tokens for a multiline Python string or expression are grouped as one
470 470 line. All lines except the last lines should keep their line ending ('\\n',
471 471 '\\r\\n') for this to properly work. Use `.splitlines(keeplineending=True)`
472 472 for example when passing block of text to this function.
473 473
474 474 """
475 475 # NL tokens are used inside multiline expressions, but also after blank
476 476 # lines or comments. This is intentional - see https://bugs.python.org/issue17061
477 477 # We want to group the former case together but split the latter, so we
478 478 # track parentheses level, similar to the internals of tokenize.
479 479
480 480 # reexported from token on 3.7+
481 481 NEWLINE, NL = tokenize.NEWLINE, tokenize.NL # type: ignore
482 482 tokens_by_line:List[List[Any]] = [[]]
483 483 if len(lines) > 1 and not lines[0].endswith(('\n', '\r', '\r\n', '\x0b', '\x0c')):
484 484 warnings.warn("`make_tokens_by_line` received a list of lines which do not have lineending markers ('\\n', '\\r', '\\r\\n', '\\x0b', '\\x0c'), behavior will be unspecified")
485 485 parenlev = 0
486 486 try:
487 487 for token in tokenize.generate_tokens(iter(lines).__next__):
488 488 tokens_by_line[-1].append(token)
489 489 if (token.type == NEWLINE) \
490 490 or ((token.type == NL) and (parenlev <= 0)):
491 491 tokens_by_line.append([])
492 492 elif token.string in {'(', '[', '{'}:
493 493 parenlev += 1
494 494 elif token.string in {')', ']', '}'}:
495 495 if parenlev > 0:
496 496 parenlev -= 1
497 497 except tokenize.TokenError:
498 498 # Input ended in a multiline string or expression. That's OK for us.
499 499 pass
500 500
501 501
502 502 if not tokens_by_line[-1]:
503 503 tokens_by_line.pop()
504 504
505 505
506 506 return tokens_by_line
507 507
508 508 def show_linewise_tokens(s: str):
509 509 """For investigation and debugging"""
510 510 if not s.endswith('\n'):
511 511 s += '\n'
512 512 lines = s.splitlines(keepends=True)
513 513 for line in make_tokens_by_line(lines):
514 514 print("Line -------")
515 515 for tokinfo in line:
516 516 print(" ", tokinfo)
517 517
518 518 # Arbitrary limit to prevent getting stuck in infinite loops
519 519 TRANSFORM_LOOP_LIMIT = 500
520 520
521 521 class TransformerManager:
522 522 """Applies various transformations to a cell or code block.
523 523
524 524 The key methods for external use are ``transform_cell()``
525 525 and ``check_complete()``.
526 526 """
527 527 def __init__(self):
528 528 self.cleanup_transforms = [
529 529 leading_empty_lines,
530 530 leading_indent,
531 531 classic_prompt,
532 532 ipython_prompt,
533 533 ]
534 534 self.line_transforms = [
535 535 cell_magic,
536 536 ]
537 537 self.token_transformers = [
538 538 MagicAssign,
539 539 SystemAssign,
540 540 EscapedCommand,
541 541 HelpEnd,
542 542 ]
543 543
544 544 def do_one_token_transform(self, lines):
545 545 """Find and run the transform earliest in the code.
546 546
547 547 Returns (changed, lines).
548 548
549 549 This method is called repeatedly until changed is False, indicating
550 550 that all available transformations are complete.
551 551
552 552 The tokens following IPython special syntax might not be valid, so
553 553 the transformed code is retokenised every time to identify the next
554 554 piece of special syntax. Hopefully long code cells are mostly valid
555 555 Python, not using lots of IPython special syntax, so this shouldn't be
556 556 a performance issue.
557 557 """
558 558 tokens_by_line = make_tokens_by_line(lines)
559 559 candidates = []
560 560 for transformer_cls in self.token_transformers:
561 561 transformer = transformer_cls.find(tokens_by_line)
562 562 if transformer:
563 563 candidates.append(transformer)
564 564
565 565 if not candidates:
566 566 # Nothing to transform
567 567 return False, lines
568 568 ordered_transformers = sorted(candidates, key=TokenTransformBase.sortby)
569 569 for transformer in ordered_transformers:
570 570 try:
571 571 return True, transformer.transform(lines)
572 572 except SyntaxError:
573 573 pass
574 574 return False, lines
575 575
576 576 def do_token_transforms(self, lines):
577 577 for _ in range(TRANSFORM_LOOP_LIMIT):
578 578 changed, lines = self.do_one_token_transform(lines)
579 579 if not changed:
580 580 return lines
581 581
582 582 raise RuntimeError("Input transformation still changing after "
583 583 "%d iterations. Aborting." % TRANSFORM_LOOP_LIMIT)
584 584
585 585 def transform_cell(self, cell: str) -> str:
586 586 """Transforms a cell of input code"""
587 587 if not cell.endswith('\n'):
588 588 cell += '\n' # Ensure the cell has a trailing newline
589 589 lines = cell.splitlines(keepends=True)
590 590 for transform in self.cleanup_transforms + self.line_transforms:
591 591 lines = transform(lines)
592 592
593 593 lines = self.do_token_transforms(lines)
594 594 return ''.join(lines)
595 595
596 596 def check_complete(self, cell: str):
597 597 """Return whether a block of code is ready to execute, or should be continued
598 598
599 599 Parameters
600 600 ----------
601 601 source : string
602 602 Python input code, which can be multiline.
603 603
604 604 Returns
605 605 -------
606 606 status : str
607 607 One of 'complete', 'incomplete', or 'invalid' if source is not a
608 608 prefix of valid code.
609 609 indent_spaces : int or None
610 610 The number of spaces by which to indent the next line of code. If
611 611 status is not 'incomplete', this is None.
612 612 """
613 613 # Remember if the lines ends in a new line.
614 614 ends_with_newline = False
615 615 for character in reversed(cell):
616 616 if character == '\n':
617 617 ends_with_newline = True
618 618 break
619 619 elif character.strip():
620 620 break
621 621 else:
622 622 continue
623 623
624 624 if not ends_with_newline:
625 625 # Append an newline for consistent tokenization
626 626 # See https://bugs.python.org/issue33899
627 627 cell += '\n'
628 628
629 629 lines = cell.splitlines(keepends=True)
630 630
631 631 if not lines:
632 632 return 'complete', None
633 633
634 634 if lines[-1].endswith('\\'):
635 635 # Explicit backslash continuation
636 636 return 'incomplete', find_last_indent(lines)
637 637
638 638 try:
639 639 for transform in self.cleanup_transforms:
640 640 if not getattr(transform, 'has_side_effects', False):
641 641 lines = transform(lines)
642 642 except SyntaxError:
643 643 return 'invalid', None
644 644
645 645 if lines[0].startswith('%%'):
646 646 # Special case for cell magics - completion marked by blank line
647 647 if lines[-1].strip():
648 648 return 'incomplete', find_last_indent(lines)
649 649 else:
650 650 return 'complete', None
651 651
652 652 try:
653 653 for transform in self.line_transforms:
654 654 if not getattr(transform, 'has_side_effects', False):
655 655 lines = transform(lines)
656 656 lines = self.do_token_transforms(lines)
657 657 except SyntaxError:
658 658 return 'invalid', None
659 659
660 660 tokens_by_line = make_tokens_by_line(lines)
661 661
662 662 if not tokens_by_line:
663 663 return 'incomplete', find_last_indent(lines)
664 664
665 665 if tokens_by_line[-1][-1].type != tokenize.ENDMARKER:
666 666 # We're in a multiline string or expression
667 667 return 'incomplete', find_last_indent(lines)
668 668
669 669 newline_types = {tokenize.NEWLINE, tokenize.COMMENT, tokenize.ENDMARKER} # type: ignore
670 670
671 671 # Pop the last line which only contains DEDENTs and ENDMARKER
672 672 last_token_line = None
673 673 if {t.type for t in tokens_by_line[-1]} in [
674 674 {tokenize.DEDENT, tokenize.ENDMARKER},
675 675 {tokenize.ENDMARKER}
676 676 ] and len(tokens_by_line) > 1:
677 677 last_token_line = tokens_by_line.pop()
678 678
679 679 while tokens_by_line[-1] and tokens_by_line[-1][-1].type in newline_types:
680 680 tokens_by_line[-1].pop()
681 681
682 682 if not tokens_by_line[-1]:
683 683 return 'incomplete', find_last_indent(lines)
684 684
685 685 if tokens_by_line[-1][-1].string == ':':
686 686 # The last line starts a block (e.g. 'if foo:')
687 687 ix = 0
688 688 while tokens_by_line[-1][ix].type in {tokenize.INDENT, tokenize.DEDENT}:
689 689 ix += 1
690 690
691 691 indent = tokens_by_line[-1][ix].start[1]
692 692 return 'incomplete', indent + 4
693 693
694 694 if tokens_by_line[-1][0].line.endswith('\\'):
695 695 return 'incomplete', None
696 696
697 697 # At this point, our checks think the code is complete (or invalid).
698 698 # We'll use codeop.compile_command to check this with the real parser
699 699 try:
700 700 with warnings.catch_warnings():
701 701 warnings.simplefilter('error', SyntaxWarning)
702 702 res = compile_command(''.join(lines), symbol='exec')
703 703 except (SyntaxError, OverflowError, ValueError, TypeError,
704 704 MemoryError, SyntaxWarning):
705 705 return 'invalid', None
706 706 else:
707 707 if res is None:
708 708 return 'incomplete', find_last_indent(lines)
709 709
710 710 if last_token_line and last_token_line[0].type == tokenize.DEDENT:
711 711 if ends_with_newline:
712 712 return 'complete', None
713 713 return 'incomplete', find_last_indent(lines)
714 714
715 715 # If there's a blank line at the end, assume we're ready to execute
716 716 if not lines[-1].strip():
717 717 return 'complete', None
718 718
719 719 return 'complete', None
720 720
721 721
722 722 def find_last_indent(lines):
723 723 m = _indent_re.match(lines[-1])
724 724 if not m:
725 725 return 0
726 726 return len(m.group(0).replace('\t', ' '*4))
@@ -1,326 +1,337 b''
1 1 """Tests for the token-based transformers in IPython.core.inputtransformer2
2 2
3 3 Line-based transformers are the simpler ones; token-based transformers are
4 4 more complex. See test_inputtransformer2_line for tests for line-based
5 5 transformations.
6 6 """
7 7 import nose.tools as nt
8 8 import string
9 9
10 10 from IPython.core import inputtransformer2 as ipt2
11 11 from IPython.core.inputtransformer2 import make_tokens_by_line, _find_assign_op
12 12
13 13 from textwrap import dedent
14 14
15 15 MULTILINE_MAGIC = ("""\
16 16 a = f()
17 17 %foo \\
18 18 bar
19 19 g()
20 20 """.splitlines(keepends=True), (2, 0), """\
21 21 a = f()
22 22 get_ipython().run_line_magic('foo', ' bar')
23 23 g()
24 24 """.splitlines(keepends=True))
25 25
26 26 INDENTED_MAGIC = ("""\
27 27 for a in range(5):
28 28 %ls
29 29 """.splitlines(keepends=True), (2, 4), """\
30 30 for a in range(5):
31 31 get_ipython().run_line_magic('ls', '')
32 32 """.splitlines(keepends=True))
33 33
34 CRLF_MAGIC = ([
35 "a = f()\n",
36 "%ls\r\n",
37 "g()\n"
38 ], (2, 0), [
39 "a = f()\n",
40 "get_ipython().run_line_magic('ls', '')\n",
41 "g()\n"
42 ])
43
34 44 MULTILINE_MAGIC_ASSIGN = ("""\
35 45 a = f()
36 46 b = %foo \\
37 47 bar
38 48 g()
39 49 """.splitlines(keepends=True), (2, 4), """\
40 50 a = f()
41 51 b = get_ipython().run_line_magic('foo', ' bar')
42 52 g()
43 53 """.splitlines(keepends=True))
44 54
45 55 MULTILINE_SYSTEM_ASSIGN = ("""\
46 56 a = f()
47 57 b = !foo \\
48 58 bar
49 59 g()
50 60 """.splitlines(keepends=True), (2, 4), """\
51 61 a = f()
52 62 b = get_ipython().getoutput('foo bar')
53 63 g()
54 64 """.splitlines(keepends=True))
55 65
56 66 #####
57 67
58 68 MULTILINE_SYSTEM_ASSIGN_AFTER_DEDENT = ("""\
59 69 def test():
60 70 for i in range(1):
61 71 print(i)
62 72 res =! ls
63 73 """.splitlines(keepends=True), (4, 7), '''\
64 74 def test():
65 75 for i in range(1):
66 76 print(i)
67 77 res =get_ipython().getoutput(\' ls\')
68 78 '''.splitlines(keepends=True))
69 79
70 80 ######
71 81
72 82 AUTOCALL_QUOTE = (
73 83 [",f 1 2 3\n"], (1, 0),
74 84 ['f("1", "2", "3")\n']
75 85 )
76 86
77 87 AUTOCALL_QUOTE2 = (
78 88 [";f 1 2 3\n"], (1, 0),
79 89 ['f("1 2 3")\n']
80 90 )
81 91
82 92 AUTOCALL_PAREN = (
83 93 ["/f 1 2 3\n"], (1, 0),
84 94 ['f(1, 2, 3)\n']
85 95 )
86 96
87 97 SIMPLE_HELP = (
88 98 ["foo?\n"], (1, 0),
89 99 ["get_ipython().run_line_magic('pinfo', 'foo')\n"]
90 100 )
91 101
92 102 DETAILED_HELP = (
93 103 ["foo??\n"], (1, 0),
94 104 ["get_ipython().run_line_magic('pinfo2', 'foo')\n"]
95 105 )
96 106
97 107 MAGIC_HELP = (
98 108 ["%foo?\n"], (1, 0),
99 109 ["get_ipython().run_line_magic('pinfo', '%foo')\n"]
100 110 )
101 111
102 112 HELP_IN_EXPR = (
103 113 ["a = b + c?\n"], (1, 0),
104 114 ["get_ipython().set_next_input('a = b + c');"
105 115 "get_ipython().run_line_magic('pinfo', 'c')\n"]
106 116 )
107 117
108 118 HELP_CONTINUED_LINE = ("""\
109 119 a = \\
110 120 zip?
111 121 """.splitlines(keepends=True), (1, 0),
112 122 [r"get_ipython().set_next_input('a = \\\nzip');get_ipython().run_line_magic('pinfo', 'zip')" + "\n"]
113 123 )
114 124
115 125 HELP_MULTILINE = ("""\
116 126 (a,
117 127 b) = zip?
118 128 """.splitlines(keepends=True), (1, 0),
119 129 [r"get_ipython().set_next_input('(a,\nb) = zip');get_ipython().run_line_magic('pinfo', 'zip')" + "\n"]
120 130 )
121 131
122 132 HELP_UNICODE = (
123 133 ["Ο€.foo?\n"], (1, 0),
124 134 ["get_ipython().run_line_magic('pinfo', 'Ο€.foo')\n"]
125 135 )
126 136
127 137
128 138 def null_cleanup_transformer(lines):
129 139 """
130 140 A cleanup transform that returns an empty list.
131 141 """
132 142 return []
133 143
134 144 def check_make_token_by_line_never_ends_empty():
135 145 """
136 146 Check that not sequence of single or double characters ends up leading to en empty list of tokens
137 147 """
138 148 from string import printable
139 149 for c in printable:
140 150 nt.assert_not_equal(make_tokens_by_line(c)[-1], [])
141 151 for k in printable:
142 152 nt.assert_not_equal(make_tokens_by_line(c+k)[-1], [])
143 153
144 154 def check_find(transformer, case, match=True):
145 155 sample, expected_start, _ = case
146 156 tbl = make_tokens_by_line(sample)
147 157 res = transformer.find(tbl)
148 158 if match:
149 159 # start_line is stored 0-indexed, expected values are 1-indexed
150 160 nt.assert_equal((res.start_line+1, res.start_col), expected_start)
151 161 return res
152 162 else:
153 163 nt.assert_is(res, None)
154 164
155 165 def check_transform(transformer_cls, case):
156 166 lines, start, expected = case
157 167 transformer = transformer_cls(start)
158 168 nt.assert_equal(transformer.transform(lines), expected)
159 169
160 170 def test_continued_line():
161 171 lines = MULTILINE_MAGIC_ASSIGN[0]
162 172 nt.assert_equal(ipt2.find_end_of_continued_line(lines, 1), 2)
163 173
164 174 nt.assert_equal(ipt2.assemble_continued_line(lines, (1, 5), 2), "foo bar")
165 175
166 176 def test_find_assign_magic():
167 177 check_find(ipt2.MagicAssign, MULTILINE_MAGIC_ASSIGN)
168 178 check_find(ipt2.MagicAssign, MULTILINE_SYSTEM_ASSIGN, match=False)
169 179 check_find(ipt2.MagicAssign, MULTILINE_SYSTEM_ASSIGN_AFTER_DEDENT, match=False)
170 180
171 181 def test_transform_assign_magic():
172 182 check_transform(ipt2.MagicAssign, MULTILINE_MAGIC_ASSIGN)
173 183
174 184 def test_find_assign_system():
175 185 check_find(ipt2.SystemAssign, MULTILINE_SYSTEM_ASSIGN)
176 186 check_find(ipt2.SystemAssign, MULTILINE_SYSTEM_ASSIGN_AFTER_DEDENT)
177 187 check_find(ipt2.SystemAssign, (["a = !ls\n"], (1, 5), None))
178 188 check_find(ipt2.SystemAssign, (["a=!ls\n"], (1, 2), None))
179 189 check_find(ipt2.SystemAssign, MULTILINE_MAGIC_ASSIGN, match=False)
180 190
181 191 def test_transform_assign_system():
182 192 check_transform(ipt2.SystemAssign, MULTILINE_SYSTEM_ASSIGN)
183 193 check_transform(ipt2.SystemAssign, MULTILINE_SYSTEM_ASSIGN_AFTER_DEDENT)
184 194
185 195 def test_find_magic_escape():
186 196 check_find(ipt2.EscapedCommand, MULTILINE_MAGIC)
187 197 check_find(ipt2.EscapedCommand, INDENTED_MAGIC)
188 198 check_find(ipt2.EscapedCommand, MULTILINE_MAGIC_ASSIGN, match=False)
189 199
190 200 def test_transform_magic_escape():
191 201 check_transform(ipt2.EscapedCommand, MULTILINE_MAGIC)
192 202 check_transform(ipt2.EscapedCommand, INDENTED_MAGIC)
203 check_transform(ipt2.EscapedCommand, CRLF_MAGIC)
193 204
194 205 def test_find_autocalls():
195 206 for case in [AUTOCALL_QUOTE, AUTOCALL_QUOTE2, AUTOCALL_PAREN]:
196 207 print("Testing %r" % case[0])
197 208 check_find(ipt2.EscapedCommand, case)
198 209
199 210 def test_transform_autocall():
200 211 for case in [AUTOCALL_QUOTE, AUTOCALL_QUOTE2, AUTOCALL_PAREN]:
201 212 print("Testing %r" % case[0])
202 213 check_transform(ipt2.EscapedCommand, case)
203 214
204 215 def test_find_help():
205 216 for case in [SIMPLE_HELP, DETAILED_HELP, MAGIC_HELP, HELP_IN_EXPR]:
206 217 check_find(ipt2.HelpEnd, case)
207 218
208 219 tf = check_find(ipt2.HelpEnd, HELP_CONTINUED_LINE)
209 220 nt.assert_equal(tf.q_line, 1)
210 221 nt.assert_equal(tf.q_col, 3)
211 222
212 223 tf = check_find(ipt2.HelpEnd, HELP_MULTILINE)
213 224 nt.assert_equal(tf.q_line, 1)
214 225 nt.assert_equal(tf.q_col, 8)
215 226
216 227 # ? in a comment does not trigger help
217 228 check_find(ipt2.HelpEnd, (["foo # bar?\n"], None, None), match=False)
218 229 # Nor in a string
219 230 check_find(ipt2.HelpEnd, (["foo = '''bar?\n"], None, None), match=False)
220 231
221 232 def test_transform_help():
222 233 tf = ipt2.HelpEnd((1, 0), (1, 9))
223 234 nt.assert_equal(tf.transform(HELP_IN_EXPR[0]), HELP_IN_EXPR[2])
224 235
225 236 tf = ipt2.HelpEnd((1, 0), (2, 3))
226 237 nt.assert_equal(tf.transform(HELP_CONTINUED_LINE[0]), HELP_CONTINUED_LINE[2])
227 238
228 239 tf = ipt2.HelpEnd((1, 0), (2, 8))
229 240 nt.assert_equal(tf.transform(HELP_MULTILINE[0]), HELP_MULTILINE[2])
230 241
231 242 tf = ipt2.HelpEnd((1, 0), (1, 0))
232 243 nt.assert_equal(tf.transform(HELP_UNICODE[0]), HELP_UNICODE[2])
233 244
234 245 def test_find_assign_op_dedent():
235 246 """
236 247 be careful that empty token like dedent are not counted as parens
237 248 """
238 249 class Tk:
239 250 def __init__(self, s):
240 251 self.string = s
241 252
242 253 nt.assert_equal(_find_assign_op([Tk(s) for s in ('','a','=','b')]), 2)
243 254 nt.assert_equal(_find_assign_op([Tk(s) for s in ('','(', 'a','=','b', ')', '=' ,'5')]), 6)
244 255
245 256 def test_check_complete():
246 257 cc = ipt2.TransformerManager().check_complete
247 258 nt.assert_equal(cc("a = 1"), ('complete', None))
248 259 nt.assert_equal(cc("for a in range(5):"), ('incomplete', 4))
249 260 nt.assert_equal(cc("for a in range(5):\n if a > 0:"), ('incomplete', 8))
250 261 nt.assert_equal(cc("raise = 2"), ('invalid', None))
251 262 nt.assert_equal(cc("a = [1,\n2,"), ('incomplete', 0))
252 263 nt.assert_equal(cc(")"), ('incomplete', 0))
253 264 nt.assert_equal(cc("\\\r\n"), ('incomplete', 0))
254 265 nt.assert_equal(cc("a = '''\n hi"), ('incomplete', 3))
255 266 nt.assert_equal(cc("def a():\n x=1\n global x"), ('invalid', None))
256 267 nt.assert_equal(cc("a \\ "), ('invalid', None)) # Nothing allowed after backslash
257 268 nt.assert_equal(cc("1\\\n+2"), ('complete', None))
258 269 nt.assert_equal(cc("exit"), ('complete', None))
259 270
260 271 example = dedent("""
261 272 if True:
262 273 a=1""" )
263 274
264 275 nt.assert_equal(cc(example), ('incomplete', 4))
265 276 nt.assert_equal(cc(example+'\n'), ('complete', None))
266 277 nt.assert_equal(cc(example+'\n '), ('complete', None))
267 278
268 279 # no need to loop on all the letters/numbers.
269 280 short = '12abAB'+string.printable[62:]
270 281 for c in short:
271 282 # test does not raise:
272 283 cc(c)
273 284 for k in short:
274 285 cc(c+k)
275 286
276 287 nt.assert_equal(cc("def f():\n x=0\n \\\n "), ('incomplete', 2))
277 288
278 289 def test_check_complete_II():
279 290 """
280 291 Test that multiple line strings are properly handled.
281 292
282 293 Separate test function for convenience
283 294
284 295 """
285 296 cc = ipt2.TransformerManager().check_complete
286 297 nt.assert_equal(cc('''def foo():\n """'''), ('incomplete', 4))
287 298
288 299
289 300 def test_null_cleanup_transformer():
290 301 manager = ipt2.TransformerManager()
291 302 manager.cleanup_transforms.insert(0, null_cleanup_transformer)
292 303 assert manager.transform_cell("") == ""
293 304
294 305
295 306
296 307
297 308 def test_side_effects_I():
298 309 count = 0
299 310 def counter(lines):
300 311 nonlocal count
301 312 count += 1
302 313 return lines
303 314
304 315 counter.has_side_effects = True
305 316
306 317 manager = ipt2.TransformerManager()
307 318 manager.cleanup_transforms.insert(0, counter)
308 319 assert manager.check_complete("a=1\n") == ('complete', None)
309 320 assert count == 0
310 321
311 322
312 323
313 324
314 325 def test_side_effects_II():
315 326 count = 0
316 327 def counter(lines):
317 328 nonlocal count
318 329 count += 1
319 330 return lines
320 331
321 332 counter.has_side_effects = True
322 333
323 334 manager = ipt2.TransformerManager()
324 335 manager.line_transforms.insert(0, counter)
325 336 assert manager.check_complete("b=1\n") == ('complete', None)
326 337 assert count == 0
@@ -1,116 +1,126 b''
1 1 """Tests for the line-based transformers in IPython.core.inputtransformer2
2 2
3 3 Line-based transformers are the simpler ones; token-based transformers are
4 4 more complex. See test_inputtransformer2 for tests for token-based transformers.
5 5 """
6 6 import nose.tools as nt
7 7
8 8 from IPython.core import inputtransformer2 as ipt2
9 9
10 10 CELL_MAGIC = ("""\
11 11 %%foo arg
12 12 body 1
13 13 body 2
14 14 """, """\
15 15 get_ipython().run_cell_magic('foo', 'arg', 'body 1\\nbody 2\\n')
16 16 """)
17 17
18 18 def test_cell_magic():
19 19 for sample, expected in [CELL_MAGIC]:
20 20 nt.assert_equal(ipt2.cell_magic(sample.splitlines(keepends=True)),
21 21 expected.splitlines(keepends=True))
22 22
23 23 CLASSIC_PROMPT = ("""\
24 24 >>> for a in range(5):
25 25 ... print(a)
26 26 """, """\
27 27 for a in range(5):
28 28 print(a)
29 29 """)
30 30
31 31 CLASSIC_PROMPT_L2 = ("""\
32 32 for a in range(5):
33 33 ... print(a)
34 34 ... print(a ** 2)
35 35 """, """\
36 36 for a in range(5):
37 37 print(a)
38 38 print(a ** 2)
39 39 """)
40 40
41 41 def test_classic_prompt():
42 42 for sample, expected in [CLASSIC_PROMPT, CLASSIC_PROMPT_L2]:
43 43 nt.assert_equal(ipt2.classic_prompt(sample.splitlines(keepends=True)),
44 44 expected.splitlines(keepends=True))
45 45
46 46 IPYTHON_PROMPT = ("""\
47 47 In [1]: for a in range(5):
48 48 ...: print(a)
49 49 """, """\
50 50 for a in range(5):
51 51 print(a)
52 52 """)
53 53
54 54 IPYTHON_PROMPT_L2 = ("""\
55 55 for a in range(5):
56 56 ...: print(a)
57 57 ...: print(a ** 2)
58 58 """, """\
59 59 for a in range(5):
60 60 print(a)
61 61 print(a ** 2)
62 62 """)
63 63
64 64 def test_ipython_prompt():
65 65 for sample, expected in [IPYTHON_PROMPT, IPYTHON_PROMPT_L2]:
66 66 nt.assert_equal(ipt2.ipython_prompt(sample.splitlines(keepends=True)),
67 67 expected.splitlines(keepends=True))
68 68
69 69 INDENT_SPACES = ("""\
70 70 if True:
71 71 a = 3
72 72 """, """\
73 73 if True:
74 74 a = 3
75 75 """)
76 76
77 77 INDENT_TABS = ("""\
78 78 \tif True:
79 79 \t\tb = 4
80 80 """, """\
81 81 if True:
82 82 \tb = 4
83 83 """)
84 84
85 85 def test_leading_indent():
86 86 for sample, expected in [INDENT_SPACES, INDENT_TABS]:
87 87 nt.assert_equal(ipt2.leading_indent(sample.splitlines(keepends=True)),
88 88 expected.splitlines(keepends=True))
89 89
90 90 LEADING_EMPTY_LINES = ("""\
91 91 \t
92 92
93 93 if True:
94 94 a = 3
95 95
96 96 b = 4
97 97 """, """\
98 98 if True:
99 99 a = 3
100 100
101 101 b = 4
102 102 """)
103 103
104 104 ONLY_EMPTY_LINES = ("""\
105 105 \t
106 106
107 107 """, """\
108 108 \t
109 109
110 110 """)
111 111
112 112 def test_leading_empty_lines():
113 113 for sample, expected in [LEADING_EMPTY_LINES, ONLY_EMPTY_LINES]:
114 114 nt.assert_equal(
115 115 ipt2.leading_empty_lines(sample.splitlines(keepends=True)),
116 116 expected.splitlines(keepends=True))
117
118 CRLF_MAGIC = ([
119 "%%ls\r\n"
120 ], [
121 "get_ipython().run_cell_magic('ls', '', '')\n"
122 ])
123
124 def test_crlf_magic():
125 for sample, expected in [CRLF_MAGIC]:
126 nt.assert_equal(ipt2.cell_magic(sample), expected) No newline at end of file
General Comments 0
You need to be logged in to leave comments. Login now