##// END OF EJS Templates
Merge pull request #11307 from Carreau/safe-input-transformer...
Min RK -
r24575:fcfc868d merge
parent child Browse files
Show More
@@ -1,637 +1,648 b''
1 1 """Input transformer machinery to support IPython special syntax.
2 2
3 3 This includes the machinery to recognise and transform ``%magic`` commands,
4 4 ``!system`` commands, ``help?`` querying, prompt stripping, and so forth.
5 5
6 6 Added: IPython 7.0. Replaces inputsplitter and inputtransformer which were
7 7 deprecated in 7.0.
8 8 """
9 9
10 10 # Copyright (c) IPython Development Team.
11 11 # Distributed under the terms of the Modified BSD License.
12 12
13 13 from codeop import compile_command
14 14 import re
15 15 import tokenize
16 16 from typing import List, Tuple
17 17 import warnings
18 18
19 19 _indent_re = re.compile(r'^[ \t]+')
20 20
21 21 def leading_indent(lines):
22 22 """Remove leading indentation.
23 23
24 24 If the first line starts with a spaces or tabs, the same whitespace will be
25 25 removed from each following line in the cell.
26 26 """
27 27 m = _indent_re.match(lines[0])
28 28 if not m:
29 29 return lines
30 30 space = m.group(0)
31 31 n = len(space)
32 32 return [l[n:] if l.startswith(space) else l
33 33 for l in lines]
34 34
35 35 class PromptStripper:
36 36 """Remove matching input prompts from a block of input.
37 37
38 38 Parameters
39 39 ----------
40 40 prompt_re : regular expression
41 41 A regular expression matching any input prompt (including continuation,
42 42 e.g. ``...``)
43 43 initial_re : regular expression, optional
44 44 A regular expression matching only the initial prompt, but not continuation.
45 45 If no initial expression is given, prompt_re will be used everywhere.
46 46 Used mainly for plain Python prompts (``>>>``), where the continuation prompt
47 47 ``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
48 48
49 49 If initial_re and prompt_re differ,
50 50 only initial_re will be tested against the first line.
51 51 If any prompt is found on the first two lines,
52 52 prompts will be stripped from the rest of the block.
53 53 """
54 54 def __init__(self, prompt_re, initial_re=None):
55 55 self.prompt_re = prompt_re
56 56 self.initial_re = initial_re or prompt_re
57 57
58 58 def _strip(self, lines):
59 59 return [self.prompt_re.sub('', l, count=1) for l in lines]
60 60
61 61 def __call__(self, lines):
62 62 if self.initial_re.match(lines[0]) or \
63 63 (len(lines) > 1 and self.prompt_re.match(lines[1])):
64 64 return self._strip(lines)
65 65 return lines
66 66
67 67 classic_prompt = PromptStripper(
68 68 prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'),
69 69 initial_re=re.compile(r'^>>>( |$)')
70 70 )
71 71
72 72 ipython_prompt = PromptStripper(re.compile(r'^(In \[\d+\]: |\s*\.{3,}: ?)'))
73 73
74 74 def cell_magic(lines):
75 75 if not lines[0].startswith('%%'):
76 76 return lines
77 77 if re.match('%%\w+\?', lines[0]):
78 78 # This case will be handled by help_end
79 79 return lines
80 80 magic_name, _, first_line = lines[0][2:-1].partition(' ')
81 81 body = ''.join(lines[1:])
82 82 return ['get_ipython().run_cell_magic(%r, %r, %r)\n'
83 83 % (magic_name, first_line, body)]
84 84
85 85
86 86 def _find_assign_op(token_line):
87 87 """Get the index of the first assignment in the line ('=' not inside brackets)
88 88
89 89 Note: We don't try to support multiple special assignment (a = b = %foo)
90 90 """
91 91 paren_level = 0
92 92 for i, ti in enumerate(token_line):
93 93 s = ti.string
94 94 if s == '=' and paren_level == 0:
95 95 return i
96 96 if s in '([{':
97 97 paren_level += 1
98 98 elif s in ')]}':
99 99 if paren_level > 0:
100 100 paren_level -= 1
101 101
102 102 def find_end_of_continued_line(lines, start_line: int):
103 103 """Find the last line of a line explicitly extended using backslashes.
104 104
105 105 Uses 0-indexed line numbers.
106 106 """
107 107 end_line = start_line
108 108 while lines[end_line].endswith('\\\n'):
109 109 end_line += 1
110 110 if end_line >= len(lines):
111 111 break
112 112 return end_line
113 113
114 114 def assemble_continued_line(lines, start: Tuple[int, int], end_line: int):
115 115 """Assemble a single line from multiple continued line pieces
116 116
117 117 Continued lines are lines ending in ``\``, and the line following the last
118 118 ``\`` in the block.
119 119
120 120 For example, this code continues over multiple lines::
121 121
122 122 if (assign_ix is not None) \
123 123 and (len(line) >= assign_ix + 2) \
124 124 and (line[assign_ix+1].string == '%') \
125 125 and (line[assign_ix+2].type == tokenize.NAME):
126 126
127 127 This statement contains four continued line pieces.
128 128 Assembling these pieces into a single line would give::
129 129
130 130 if (assign_ix is not None) and (len(line) >= assign_ix + 2) and (line[...
131 131
132 132 This uses 0-indexed line numbers. *start* is (lineno, colno).
133 133
134 134 Used to allow ``%magic`` and ``!system`` commands to be continued over
135 135 multiple lines.
136 136 """
137 137 parts = [lines[start[0]][start[1]:]] + lines[start[0]+1:end_line+1]
138 138 return ' '.join([p[:-2] for p in parts[:-1]] # Strip backslash+newline
139 139 + [parts[-1][:-1]]) # Strip newline from last line
140 140
141 141 class TokenTransformBase:
142 142 """Base class for transformations which examine tokens.
143 143
144 144 Special syntax should not be transformed when it occurs inside strings or
145 145 comments. This is hard to reliably avoid with regexes. The solution is to
146 146 tokenise the code as Python, and recognise the special syntax in the tokens.
147 147
148 148 IPython's special syntax is not valid Python syntax, so tokenising may go
149 149 wrong after the special syntax starts. These classes therefore find and
150 150 transform *one* instance of special syntax at a time into regular Python
151 151 syntax. After each transformation, tokens are regenerated to find the next
152 152 piece of special syntax.
153 153
154 154 Subclasses need to implement one class method (find)
155 155 and one regular method (transform).
156 156
157 157 The priority attribute can select which transformation to apply if multiple
158 158 transformers match in the same place. Lower numbers have higher priority.
159 159 This allows "%magic?" to be turned into a help call rather than a magic call.
160 160 """
161 161 # Lower numbers -> higher priority (for matches in the same location)
162 162 priority = 10
163 163
164 164 def sortby(self):
165 165 return self.start_line, self.start_col, self.priority
166 166
167 167 def __init__(self, start):
168 168 self.start_line = start[0] - 1 # Shift from 1-index to 0-index
169 169 self.start_col = start[1]
170 170
171 171 @classmethod
172 172 def find(cls, tokens_by_line):
173 173 """Find one instance of special syntax in the provided tokens.
174 174
175 175 Tokens are grouped into logical lines for convenience,
176 176 so it is easy to e.g. look at the first token of each line.
177 177 *tokens_by_line* is a list of lists of tokenize.TokenInfo objects.
178 178
179 179 This should return an instance of its class, pointing to the start
180 180 position it has found, or None if it found no match.
181 181 """
182 182 raise NotImplementedError
183 183
184 184 def transform(self, lines: List[str]):
185 185 """Transform one instance of special syntax found by ``find()``
186 186
187 187 Takes a list of strings representing physical lines,
188 188 returns a similar list of transformed lines.
189 189 """
190 190 raise NotImplementedError
191 191
192 192 class MagicAssign(TokenTransformBase):
193 193 """Transformer for assignments from magics (a = %foo)"""
194 194 @classmethod
195 195 def find(cls, tokens_by_line):
196 196 """Find the first magic assignment (a = %foo) in the cell.
197 197 """
198 198 for line in tokens_by_line:
199 199 assign_ix = _find_assign_op(line)
200 200 if (assign_ix is not None) \
201 201 and (len(line) >= assign_ix + 2) \
202 202 and (line[assign_ix+1].string == '%') \
203 203 and (line[assign_ix+2].type == tokenize.NAME):
204 204 return cls(line[assign_ix+1].start)
205 205
206 206 def transform(self, lines: List[str]):
207 207 """Transform a magic assignment found by the ``find()`` classmethod.
208 208 """
209 209 start_line, start_col = self.start_line, self.start_col
210 210 lhs = lines[start_line][:start_col]
211 211 end_line = find_end_of_continued_line(lines, start_line)
212 212 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
213 213 assert rhs.startswith('%'), rhs
214 214 magic_name, _, args = rhs[1:].partition(' ')
215 215
216 216 lines_before = lines[:start_line]
217 217 call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)
218 218 new_line = lhs + call + '\n'
219 219 lines_after = lines[end_line+1:]
220 220
221 221 return lines_before + [new_line] + lines_after
222 222
223 223
224 224 class SystemAssign(TokenTransformBase):
225 225 """Transformer for assignments from system commands (a = !foo)"""
226 226 @classmethod
227 227 def find(cls, tokens_by_line):
228 228 """Find the first system assignment (a = !foo) in the cell.
229 229 """
230 230 for line in tokens_by_line:
231 231 assign_ix = _find_assign_op(line)
232 232 if (assign_ix is not None) \
233 233 and (len(line) >= assign_ix + 2) \
234 234 and (line[assign_ix + 1].type == tokenize.ERRORTOKEN):
235 235 ix = assign_ix + 1
236 236
237 237 while ix < len(line) and line[ix].type == tokenize.ERRORTOKEN:
238 238 if line[ix].string == '!':
239 239 return cls(line[ix].start)
240 240 elif not line[ix].string.isspace():
241 241 break
242 242 ix += 1
243 243
244 244 def transform(self, lines: List[str]):
245 245 """Transform a system assignment found by the ``find()`` classmethod.
246 246 """
247 247 start_line, start_col = self.start_line, self.start_col
248 248
249 249 lhs = lines[start_line][:start_col]
250 250 end_line = find_end_of_continued_line(lines, start_line)
251 251 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
252 252 assert rhs.startswith('!'), rhs
253 253 cmd = rhs[1:]
254 254
255 255 lines_before = lines[:start_line]
256 256 call = "get_ipython().getoutput({!r})".format(cmd)
257 257 new_line = lhs + call + '\n'
258 258 lines_after = lines[end_line + 1:]
259 259
260 260 return lines_before + [new_line] + lines_after
261 261
262 262 # The escape sequences that define the syntax transformations IPython will
263 263 # apply to user input. These can NOT be just changed here: many regular
264 264 # expressions and other parts of the code may use their hardcoded values, and
265 265 # for all intents and purposes they constitute the 'IPython syntax', so they
266 266 # should be considered fixed.
267 267
268 268 ESC_SHELL = '!' # Send line to underlying system shell
269 269 ESC_SH_CAP = '!!' # Send line to system shell and capture output
270 270 ESC_HELP = '?' # Find information about object
271 271 ESC_HELP2 = '??' # Find extra-detailed information about object
272 272 ESC_MAGIC = '%' # Call magic function
273 273 ESC_MAGIC2 = '%%' # Call cell-magic function
274 274 ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call
275 275 ESC_QUOTE2 = ';' # Quote all args as a single string, call
276 276 ESC_PAREN = '/' # Call first argument with rest of line as arguments
277 277
278 278 ESCAPE_SINGLES = {'!', '?', '%', ',', ';', '/'}
279 279 ESCAPE_DOUBLES = {'!!', '??'} # %% (cell magic) is handled separately
280 280
281 281 def _make_help_call(target, esc, next_input=None):
282 282 """Prepares a pinfo(2)/psearch call from a target name and the escape
283 283 (i.e. ? or ??)"""
284 284 method = 'pinfo2' if esc == '??' \
285 285 else 'psearch' if '*' in target \
286 286 else 'pinfo'
287 287 arg = " ".join([method, target])
288 288 #Prepare arguments for get_ipython().run_line_magic(magic_name, magic_args)
289 289 t_magic_name, _, t_magic_arg_s = arg.partition(' ')
290 290 t_magic_name = t_magic_name.lstrip(ESC_MAGIC)
291 291 if next_input is None:
292 292 return 'get_ipython().run_line_magic(%r, %r)' % (t_magic_name, t_magic_arg_s)
293 293 else:
294 294 return 'get_ipython().set_next_input(%r);get_ipython().run_line_magic(%r, %r)' % \
295 295 (next_input, t_magic_name, t_magic_arg_s)
296 296
297 297 def _tr_help(content):
298 298 """Translate lines escaped with: ?
299 299
300 300 A naked help line should fire the intro help screen (shell.show_usage())
301 301 """
302 302 if not content:
303 303 return 'get_ipython().show_usage()'
304 304
305 305 return _make_help_call(content, '?')
306 306
307 307 def _tr_help2(content):
308 308 """Translate lines escaped with: ??
309 309
310 310 A naked help line should fire the intro help screen (shell.show_usage())
311 311 """
312 312 if not content:
313 313 return 'get_ipython().show_usage()'
314 314
315 315 return _make_help_call(content, '??')
316 316
317 317 def _tr_magic(content):
318 318 "Translate lines escaped with a percent sign: %"
319 319 name, _, args = content.partition(' ')
320 320 return 'get_ipython().run_line_magic(%r, %r)' % (name, args)
321 321
322 322 def _tr_quote(content):
323 323 "Translate lines escaped with a comma: ,"
324 324 name, _, args = content.partition(' ')
325 325 return '%s("%s")' % (name, '", "'.join(args.split()) )
326 326
327 327 def _tr_quote2(content):
328 328 "Translate lines escaped with a semicolon: ;"
329 329 name, _, args = content.partition(' ')
330 330 return '%s("%s")' % (name, args)
331 331
332 332 def _tr_paren(content):
333 333 "Translate lines escaped with a slash: /"
334 334 name, _, args = content.partition(' ')
335 335 return '%s(%s)' % (name, ", ".join(args.split()))
336 336
337 337 tr = { ESC_SHELL : 'get_ipython().system({!r})'.format,
338 338 ESC_SH_CAP : 'get_ipython().getoutput({!r})'.format,
339 339 ESC_HELP : _tr_help,
340 340 ESC_HELP2 : _tr_help2,
341 341 ESC_MAGIC : _tr_magic,
342 342 ESC_QUOTE : _tr_quote,
343 343 ESC_QUOTE2 : _tr_quote2,
344 344 ESC_PAREN : _tr_paren }
345 345
346 346 class EscapedCommand(TokenTransformBase):
347 347 """Transformer for escaped commands like %foo, !foo, or /foo"""
348 348 @classmethod
349 349 def find(cls, tokens_by_line):
350 350 """Find the first escaped command (%foo, !foo, etc.) in the cell.
351 351 """
352 352 for line in tokens_by_line:
353 353 ix = 0
354 354 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
355 355 ix += 1
356 356 if line[ix].string in ESCAPE_SINGLES:
357 357 return cls(line[ix].start)
358 358
359 359 def transform(self, lines):
360 360 """Transform an escaped line found by the ``find()`` classmethod.
361 361 """
362 362 start_line, start_col = self.start_line, self.start_col
363 363
364 364 indent = lines[start_line][:start_col]
365 365 end_line = find_end_of_continued_line(lines, start_line)
366 366 line = assemble_continued_line(lines, (start_line, start_col), end_line)
367 367
368 368 if line[:2] in ESCAPE_DOUBLES:
369 369 escape, content = line[:2], line[2:]
370 370 else:
371 371 escape, content = line[:1], line[1:]
372 372 call = tr[escape](content)
373 373
374 374 lines_before = lines[:start_line]
375 375 new_line = indent + call + '\n'
376 376 lines_after = lines[end_line + 1:]
377 377
378 378 return lines_before + [new_line] + lines_after
379 379
380 380 _help_end_re = re.compile(r"""(%{0,2}
381 381 [a-zA-Z_*][\w*]* # Variable name
382 382 (\.[a-zA-Z_*][\w*]*)* # .etc.etc
383 383 )
384 384 (\?\??)$ # ? or ??
385 385 """,
386 386 re.VERBOSE)
387 387
388 388 class HelpEnd(TokenTransformBase):
389 389 """Transformer for help syntax: obj? and obj??"""
390 390 # This needs to be higher priority (lower number) than EscapedCommand so
391 391 # that inspecting magics (%foo?) works.
392 392 priority = 5
393 393
394 394 def __init__(self, start, q_locn):
395 395 super().__init__(start)
396 396 self.q_line = q_locn[0] - 1 # Shift from 1-indexed to 0-indexed
397 397 self.q_col = q_locn[1]
398 398
399 399 @classmethod
400 400 def find(cls, tokens_by_line):
401 401 """Find the first help command (foo?) in the cell.
402 402 """
403 403 for line in tokens_by_line:
404 404 # Last token is NEWLINE; look at last but one
405 405 if len(line) > 2 and line[-2].string == '?':
406 406 # Find the first token that's not INDENT/DEDENT
407 407 ix = 0
408 408 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
409 409 ix += 1
410 410 return cls(line[ix].start, line[-2].start)
411 411
412 412 def transform(self, lines):
413 413 """Transform a help command found by the ``find()`` classmethod.
414 414 """
415 415 piece = ''.join(lines[self.start_line:self.q_line+1])
416 416 indent, content = piece[:self.start_col], piece[self.start_col:]
417 417 lines_before = lines[:self.start_line]
418 418 lines_after = lines[self.q_line + 1:]
419 419
420 420 m = _help_end_re.search(content)
421 if not m:
422 raise SyntaxError(content)
421 423 assert m is not None, content
422 424 target = m.group(1)
423 425 esc = m.group(3)
424 426
425 427 # If we're mid-command, put it back on the next prompt for the user.
426 428 next_input = None
427 429 if (not lines_before) and (not lines_after) \
428 430 and content.strip() != m.group(0):
429 431 next_input = content.rstrip('?\n')
430 432
431 433 call = _make_help_call(target, esc, next_input=next_input)
432 434 new_line = indent + call + '\n'
433 435
434 436 return lines_before + [new_line] + lines_after
435 437
436 438 def make_tokens_by_line(lines):
437 439 """Tokenize a series of lines and group tokens by line.
438 440
439 441 The tokens for a multiline Python string or expression are
440 442 grouped as one line.
441 443 """
442 444 # NL tokens are used inside multiline expressions, but also after blank
443 445 # lines or comments. This is intentional - see https://bugs.python.org/issue17061
444 446 # We want to group the former case together but split the latter, so we
445 447 # track parentheses level, similar to the internals of tokenize.
446 448 NEWLINE, NL = tokenize.NEWLINE, tokenize.NL
447 449 tokens_by_line = [[]]
448 450 parenlev = 0
449 451 try:
450 452 for token in tokenize.generate_tokens(iter(lines).__next__):
451 453 tokens_by_line[-1].append(token)
452 454 if (token.type == NEWLINE) \
453 455 or ((token.type == NL) and (parenlev <= 0)):
454 456 tokens_by_line.append([])
455 457 elif token.string in {'(', '[', '{'}:
456 458 parenlev += 1
457 459 elif token.string in {')', ']', '}'}:
458 460 if parenlev > 0:
459 461 parenlev -= 1
460 462 except tokenize.TokenError:
461 463 # Input ended in a multiline string or expression. That's OK for us.
462 464 pass
465 if not tokens_by_line[-1]:
466 tokens_by_line.pop()
463 467
464 468 return tokens_by_line
465 469
466 470 def show_linewise_tokens(s: str):
467 471 """For investigation and debugging"""
468 472 if not s.endswith('\n'):
469 473 s += '\n'
470 474 lines = s.splitlines(keepends=True)
471 475 for line in make_tokens_by_line(lines):
472 476 print("Line -------")
473 477 for tokinfo in line:
474 478 print(" ", tokinfo)
475 479
476 480 # Arbitrary limit to prevent getting stuck in infinite loops
477 481 TRANSFORM_LOOP_LIMIT = 500
478 482
479 483 class TransformerManager:
480 484 """Applies various transformations to a cell or code block.
481 485
482 486 The key methods for external use are ``transform_cell()``
483 487 and ``check_complete()``.
484 488 """
485 489 def __init__(self):
486 490 self.cleanup_transforms = [
487 491 leading_indent,
488 492 classic_prompt,
489 493 ipython_prompt,
490 494 ]
491 495 self.line_transforms = [
492 496 cell_magic,
493 497 ]
494 498 self.token_transformers = [
495 499 MagicAssign,
496 500 SystemAssign,
497 501 EscapedCommand,
498 502 HelpEnd,
499 503 ]
500 504
501 505 def do_one_token_transform(self, lines):
502 506 """Find and run the transform earliest in the code.
503 507
504 508 Returns (changed, lines).
505 509
506 510 This method is called repeatedly until changed is False, indicating
507 511 that all available transformations are complete.
508 512
509 513 The tokens following IPython special syntax might not be valid, so
510 514 the transformed code is retokenised every time to identify the next
511 515 piece of special syntax. Hopefully long code cells are mostly valid
512 516 Python, not using lots of IPython special syntax, so this shouldn't be
513 517 a performance issue.
514 518 """
515 519 tokens_by_line = make_tokens_by_line(lines)
516 520 candidates = []
517 521 for transformer_cls in self.token_transformers:
518 522 transformer = transformer_cls.find(tokens_by_line)
519 523 if transformer:
520 524 candidates.append(transformer)
521 525
522 526 if not candidates:
523 527 # Nothing to transform
524 528 return False, lines
525
526 transformer = min(candidates, key=TokenTransformBase.sortby)
527 return True, transformer.transform(lines)
529 ordered_transformers = sorted(candidates, key=TokenTransformBase.sortby)
530 for transformer in ordered_transformers:
531 try:
532 return True, transformer.transform(lines)
533 except SyntaxError:
534 pass
535 return False, lines
528 536
529 537 def do_token_transforms(self, lines):
530 538 for _ in range(TRANSFORM_LOOP_LIMIT):
531 539 changed, lines = self.do_one_token_transform(lines)
532 540 if not changed:
533 541 return lines
534 542
535 543 raise RuntimeError("Input transformation still changing after "
536 544 "%d iterations. Aborting." % TRANSFORM_LOOP_LIMIT)
537 545
538 546 def transform_cell(self, cell: str) -> str:
539 547 """Transforms a cell of input code"""
540 548 if not cell.endswith('\n'):
541 549 cell += '\n' # Ensure the cell has a trailing newline
542 550 lines = cell.splitlines(keepends=True)
543 551 for transform in self.cleanup_transforms + self.line_transforms:
544 552 lines = transform(lines)
545 553
546 554 lines = self.do_token_transforms(lines)
547 555 return ''.join(lines)
548 556
549 557 def check_complete(self, cell: str):
550 558 """Return whether a block of code is ready to execute, or should be continued
551 559
552 560 Parameters
553 561 ----------
554 562 source : string
555 563 Python input code, which can be multiline.
556 564
557 565 Returns
558 566 -------
559 567 status : str
560 568 One of 'complete', 'incomplete', or 'invalid' if source is not a
561 569 prefix of valid code.
562 570 indent_spaces : int or None
563 571 The number of spaces by which to indent the next line of code. If
564 572 status is not 'incomplete', this is None.
565 573 """
566 574 if not cell.endswith('\n'):
567 575 cell += '\n' # Ensure the cell has a trailing newline
568 576 lines = cell.splitlines(keepends=True)
569 577 if lines[-1][:-1].endswith('\\'):
570 578 # Explicit backslash continuation
571 579 return 'incomplete', find_last_indent(lines)
572 580
573 581 try:
574 582 for transform in self.cleanup_transforms:
575 583 lines = transform(lines)
576 584 except SyntaxError:
577 585 return 'invalid', None
578 586
579 587 if lines[0].startswith('%%'):
580 588 # Special case for cell magics - completion marked by blank line
581 589 if lines[-1].strip():
582 590 return 'incomplete', find_last_indent(lines)
583 591 else:
584 592 return 'complete', None
585 593
586 594 try:
587 595 for transform in self.line_transforms:
588 596 lines = transform(lines)
589 597 lines = self.do_token_transforms(lines)
590 598 except SyntaxError:
591 599 return 'invalid', None
592 600
593 601 tokens_by_line = make_tokens_by_line(lines)
602 if not tokens_by_line:
603 return 'incomplete', find_last_indent(lines)
594 604 if tokens_by_line[-1][-1].type != tokenize.ENDMARKER:
595 605 # We're in a multiline string or expression
596 606 return 'incomplete', find_last_indent(lines)
597
607 if len(tokens_by_line) == 1:
608 return 'incomplete', find_last_indent(lines)
598 609 # Find the last token on the previous line that's not NEWLINE or COMMENT
599 610 toks_last_line = tokens_by_line[-2]
600 611 ix = len(toks_last_line) - 1
601 612 while ix >= 0 and toks_last_line[ix].type in {tokenize.NEWLINE,
602 613 tokenize.COMMENT}:
603 614 ix -= 1
604 615
605 616 if toks_last_line[ix].string == ':':
606 617 # The last line starts a block (e.g. 'if foo:')
607 618 ix = 0
608 619 while toks_last_line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
609 620 ix += 1
610 621 indent = toks_last_line[ix].start[1]
611 622 return 'incomplete', indent + 4
612 623
613 624 # If there's a blank line at the end, assume we're ready to execute.
614 625 if not lines[-1].strip():
615 626 return 'complete', None
616 627
617 628 # At this point, our checks think the code is complete (or invalid).
618 629 # We'll use codeop.compile_command to check this with the real parser.
619 630
620 631 try:
621 632 with warnings.catch_warnings():
622 633 warnings.simplefilter('error', SyntaxWarning)
623 634 res = compile_command(''.join(lines), symbol='exec')
624 635 except (SyntaxError, OverflowError, ValueError, TypeError,
625 636 MemoryError, SyntaxWarning):
626 637 return 'invalid', None
627 638 else:
628 639 if res is None:
629 640 return 'incomplete', find_last_indent(lines)
630 641 return 'complete', None
631 642
632 643
633 644 def find_last_indent(lines):
634 645 m = _indent_re.match(lines[-1])
635 646 if not m:
636 647 return 0
637 648 return len(m.group(0).replace('\t', ' '*4))
@@ -1,195 +1,217 b''
1 1 """Tests for the token-based transformers in IPython.core.inputtransformer2
2 2
3 3 Line-based transformers are the simpler ones; token-based transformers are
4 4 more complex. See test_inputtransformer2_line for tests for line-based
5 5 transformations.
6 6 """
7 7 import nose.tools as nt
8 import string
8 9
9 10 from IPython.core import inputtransformer2 as ipt2
10 11 from IPython.core.inputtransformer2 import make_tokens_by_line
11 12
12 13 MULTILINE_MAGIC = ("""\
13 14 a = f()
14 15 %foo \\
15 16 bar
16 17 g()
17 18 """.splitlines(keepends=True), (2, 0), """\
18 19 a = f()
19 20 get_ipython().run_line_magic('foo', ' bar')
20 21 g()
21 22 """.splitlines(keepends=True))
22 23
23 24 INDENTED_MAGIC = ("""\
24 25 for a in range(5):
25 26 %ls
26 27 """.splitlines(keepends=True), (2, 4), """\
27 28 for a in range(5):
28 29 get_ipython().run_line_magic('ls', '')
29 30 """.splitlines(keepends=True))
30 31
31 32 MULTILINE_MAGIC_ASSIGN = ("""\
32 33 a = f()
33 34 b = %foo \\
34 35 bar
35 36 g()
36 37 """.splitlines(keepends=True), (2, 4), """\
37 38 a = f()
38 39 b = get_ipython().run_line_magic('foo', ' bar')
39 40 g()
40 41 """.splitlines(keepends=True))
41 42
42 43 MULTILINE_SYSTEM_ASSIGN = ("""\
43 44 a = f()
44 45 b = !foo \\
45 46 bar
46 47 g()
47 48 """.splitlines(keepends=True), (2, 4), """\
48 49 a = f()
49 50 b = get_ipython().getoutput('foo bar')
50 51 g()
51 52 """.splitlines(keepends=True))
52 53
53 54 AUTOCALL_QUOTE = (
54 55 [",f 1 2 3\n"], (1, 0),
55 56 ['f("1", "2", "3")\n']
56 57 )
57 58
58 59 AUTOCALL_QUOTE2 = (
59 60 [";f 1 2 3\n"], (1, 0),
60 61 ['f("1 2 3")\n']
61 62 )
62 63
63 64 AUTOCALL_PAREN = (
64 65 ["/f 1 2 3\n"], (1, 0),
65 66 ['f(1, 2, 3)\n']
66 67 )
67 68
68 69 SIMPLE_HELP = (
69 70 ["foo?\n"], (1, 0),
70 71 ["get_ipython().run_line_magic('pinfo', 'foo')\n"]
71 72 )
72 73
73 74 DETAILED_HELP = (
74 75 ["foo??\n"], (1, 0),
75 76 ["get_ipython().run_line_magic('pinfo2', 'foo')\n"]
76 77 )
77 78
78 79 MAGIC_HELP = (
79 80 ["%foo?\n"], (1, 0),
80 81 ["get_ipython().run_line_magic('pinfo', '%foo')\n"]
81 82 )
82 83
83 84 HELP_IN_EXPR = (
84 85 ["a = b + c?\n"], (1, 0),
85 86 ["get_ipython().set_next_input('a = b + c');"
86 87 "get_ipython().run_line_magic('pinfo', 'c')\n"]
87 88 )
88 89
89 90 HELP_CONTINUED_LINE = ("""\
90 91 a = \\
91 92 zip?
92 93 """.splitlines(keepends=True), (1, 0),
93 94 [r"get_ipython().set_next_input('a = \\\nzip');get_ipython().run_line_magic('pinfo', 'zip')" + "\n"]
94 95 )
95 96
96 97 HELP_MULTILINE = ("""\
97 98 (a,
98 99 b) = zip?
99 100 """.splitlines(keepends=True), (1, 0),
100 101 [r"get_ipython().set_next_input('(a,\nb) = zip');get_ipython().run_line_magic('pinfo', 'zip')" + "\n"]
101 102 )
102 103
104 def check_make_token_by_line_never_ends_empty():
105 """
106 Check that not sequence of single or double characters ends up leading to en empty list of tokens
107 """
108 from string import printable
109 for c in printable:
110 nt.assert_not_equal(make_tokens_by_line(c)[-1], [])
111 for k in printable:
112 nt.assert_not_equal(make_tokens_by_line(c+k)[-1], [])
113
103 114 def check_find(transformer, case, match=True):
104 115 sample, expected_start, _ = case
105 116 tbl = make_tokens_by_line(sample)
106 117 res = transformer.find(tbl)
107 118 if match:
108 119 # start_line is stored 0-indexed, expected values are 1-indexed
109 120 nt.assert_equal((res.start_line+1, res.start_col), expected_start)
110 121 return res
111 122 else:
112 123 nt.assert_is(res, None)
113 124
114 125 def check_transform(transformer_cls, case):
115 126 lines, start, expected = case
116 127 transformer = transformer_cls(start)
117 128 nt.assert_equal(transformer.transform(lines), expected)
118 129
119 130 def test_continued_line():
120 131 lines = MULTILINE_MAGIC_ASSIGN[0]
121 132 nt.assert_equal(ipt2.find_end_of_continued_line(lines, 1), 2)
122 133
123 134 nt.assert_equal(ipt2.assemble_continued_line(lines, (1, 5), 2), "foo bar")
124 135
125 136 def test_find_assign_magic():
126 137 check_find(ipt2.MagicAssign, MULTILINE_MAGIC_ASSIGN)
127 138 check_find(ipt2.MagicAssign, MULTILINE_SYSTEM_ASSIGN, match=False)
128 139
129 140 def test_transform_assign_magic():
130 141 check_transform(ipt2.MagicAssign, MULTILINE_MAGIC_ASSIGN)
131 142
132 143 def test_find_assign_system():
133 144 check_find(ipt2.SystemAssign, MULTILINE_SYSTEM_ASSIGN)
134 145 check_find(ipt2.SystemAssign, (["a = !ls\n"], (1, 5), None))
135 146 check_find(ipt2.SystemAssign, (["a=!ls\n"], (1, 2), None))
136 147 check_find(ipt2.SystemAssign, MULTILINE_MAGIC_ASSIGN, match=False)
137 148
138 149 def test_transform_assign_system():
139 150 check_transform(ipt2.SystemAssign, MULTILINE_SYSTEM_ASSIGN)
140 151
141 152 def test_find_magic_escape():
142 153 check_find(ipt2.EscapedCommand, MULTILINE_MAGIC)
143 154 check_find(ipt2.EscapedCommand, INDENTED_MAGIC)
144 155 check_find(ipt2.EscapedCommand, MULTILINE_MAGIC_ASSIGN, match=False)
145 156
146 157 def test_transform_magic_escape():
147 158 check_transform(ipt2.EscapedCommand, MULTILINE_MAGIC)
148 159 check_transform(ipt2.EscapedCommand, INDENTED_MAGIC)
149 160
150 161 def test_find_autocalls():
151 162 for case in [AUTOCALL_QUOTE, AUTOCALL_QUOTE2, AUTOCALL_PAREN]:
152 163 print("Testing %r" % case[0])
153 164 check_find(ipt2.EscapedCommand, case)
154 165
155 166 def test_transform_autocall():
156 167 for case in [AUTOCALL_QUOTE, AUTOCALL_QUOTE2, AUTOCALL_PAREN]:
157 168 print("Testing %r" % case[0])
158 169 check_transform(ipt2.EscapedCommand, case)
159 170
160 171 def test_find_help():
161 172 for case in [SIMPLE_HELP, DETAILED_HELP, MAGIC_HELP, HELP_IN_EXPR]:
162 173 check_find(ipt2.HelpEnd, case)
163 174
164 175 tf = check_find(ipt2.HelpEnd, HELP_CONTINUED_LINE)
165 176 nt.assert_equal(tf.q_line, 1)
166 177 nt.assert_equal(tf.q_col, 3)
167 178
168 179 tf = check_find(ipt2.HelpEnd, HELP_MULTILINE)
169 180 nt.assert_equal(tf.q_line, 1)
170 181 nt.assert_equal(tf.q_col, 8)
171 182
172 183 # ? in a comment does not trigger help
173 184 check_find(ipt2.HelpEnd, (["foo # bar?\n"], None, None), match=False)
174 185 # Nor in a string
175 186 check_find(ipt2.HelpEnd, (["foo = '''bar?\n"], None, None), match=False)
176 187
177 188 def test_transform_help():
178 189 tf = ipt2.HelpEnd((1, 0), (1, 9))
179 190 nt.assert_equal(tf.transform(HELP_IN_EXPR[0]), HELP_IN_EXPR[2])
180 191
181 192 tf = ipt2.HelpEnd((1, 0), (2, 3))
182 193 nt.assert_equal(tf.transform(HELP_CONTINUED_LINE[0]), HELP_CONTINUED_LINE[2])
183 194
184 195 tf = ipt2.HelpEnd((1, 0), (2, 8))
185 196 nt.assert_equal(tf.transform(HELP_MULTILINE[0]), HELP_MULTILINE[2])
186 197
187 198 def test_check_complete():
188 199 cc = ipt2.TransformerManager().check_complete
189 200 nt.assert_equal(cc("a = 1"), ('complete', None))
190 201 nt.assert_equal(cc("for a in range(5):"), ('incomplete', 4))
191 202 nt.assert_equal(cc("raise = 2"), ('invalid', None))
192 203 nt.assert_equal(cc("a = [1,\n2,"), ('incomplete', 0))
204 nt.assert_equal(cc(")"), ('incomplete', 0))
205 nt.assert_equal(cc("\\\r\n"), ('incomplete', 0))
193 206 nt.assert_equal(cc("a = '''\n hi"), ('incomplete', 3))
194 207 nt.assert_equal(cc("def a():\n x=1\n global x"), ('invalid', None))
195 208 nt.assert_equal(cc("a \\ "), ('invalid', None)) # Nothing allowed after backslash
209
210 # no need to loop on all the letters/numbers.
211 short = '12abAB'+string.printable[62:]
212 for c in short:
213 # test does not raise:
214 cc(c)
215 for k in short:
216 cc(c+k)
217
General Comments 0
You need to be logged in to leave comments. Login now