inputtransformer2.py
729 lines
| 26.2 KiB
| text/x-python
|
PythonLexer
Thomas Kluyver
|
r24177 | """Input transformer machinery to support IPython special syntax. | ||
This includes the machinery to recognise and transform ``%magic`` commands, | ||||
``!system`` commands, ``help?`` querying, prompt stripping, and so forth. | ||||
Thomas Kluyver
|
r24407 | |||
Added: IPython 7.0. Replaces inputsplitter and inputtransformer which were | ||||
deprecated in 7.0. | ||||
Thomas Kluyver
|
r24177 | """ | ||
# Copyright (c) IPython Development Team. | ||||
# Distributed under the terms of the Modified BSD License. | ||||
Thomas Kluyver
|
r24165 | from codeop import compile_command | ||
Thomas Kluyver
|
r24154 | import re | ||
Thomas Kluyver
|
r24178 | import tokenize | ||
Matthias Bussonnier
|
r25724 | from typing import List, Tuple, Optional, Any | ||
Thomas Kluyver
|
r24182 | import warnings | ||
Thomas Kluyver
|
r24154 | |||
Thomas Kluyver
|
r24165 | _indent_re = re.compile(r'^[ \t]+') | ||
Robin Gustafsson
|
r25026 | def leading_empty_lines(lines): | ||
"""Remove leading empty lines | ||||
If the leading lines are empty or contain only whitespace, they will be | ||||
removed. | ||||
""" | ||||
if not lines: | ||||
return lines | ||||
for i, line in enumerate(lines): | ||||
if line and not line.isspace(): | ||||
return lines[i:] | ||||
return lines | ||||
Thomas Kluyver
|
r24154 | def leading_indent(lines): | ||
"""Remove leading indentation. | ||||
Tony Fast
|
r24631 | |||
Thomas Kluyver
|
r24154 | If the first line starts with a spaces or tabs, the same whitespace will be | ||
Thomas Kluyver
|
r24407 | removed from each following line in the cell. | ||
Thomas Kluyver
|
r24154 | """ | ||
Tony Fast
|
r24631 | if not lines: | ||
Tony Fast
|
r24628 | return lines | ||
Thomas Kluyver
|
r24165 | m = _indent_re.match(lines[0]) | ||
Thomas Kluyver
|
r24154 | if not m: | ||
return lines | ||||
space = m.group(0) | ||||
n = len(space) | ||||
return [l[n:] if l.startswith(space) else l | ||||
for l in lines] | ||||
class PromptStripper: | ||||
"""Remove matching input prompts from a block of input. | ||||
Tony Fast
|
r24631 | |||
Thomas Kluyver
|
r24154 | Parameters | ||
---------- | ||||
prompt_re : regular expression | ||||
Thomas Kluyver
|
r24407 | A regular expression matching any input prompt (including continuation, | ||
e.g. ``...``) | ||||
Thomas Kluyver
|
r24154 | initial_re : regular expression, optional | ||
A regular expression matching only the initial prompt, but not continuation. | ||||
If no initial expression is given, prompt_re will be used everywhere. | ||||
Thomas Kluyver
|
r24407 | Used mainly for plain Python prompts (``>>>``), where the continuation prompt | ||
Thomas Kluyver
|
r24154 | ``...`` is a valid Python expression in Python 3, so shouldn't be stripped. | ||
Tony Fast
|
r24631 | |||
Matthias Bussonnier
|
r25960 | Notes | ||
----- | ||||
Thomas Kluyver
|
r24154 | If initial_re and prompt_re differ, | ||
only initial_re will be tested against the first line. | ||||
If any prompt is found on the first two lines, | ||||
prompts will be stripped from the rest of the block. | ||||
""" | ||||
def __init__(self, prompt_re, initial_re=None): | ||||
self.prompt_re = prompt_re | ||||
self.initial_re = initial_re or prompt_re | ||||
def _strip(self, lines): | ||||
return [self.prompt_re.sub('', l, count=1) for l in lines] | ||||
def __call__(self, lines): | ||||
Tony Fast
|
r24631 | if not lines: | ||
return lines | ||||
Thomas Kluyver
|
r24154 | if self.initial_re.match(lines[0]) or \ | ||
(len(lines) > 1 and self.prompt_re.match(lines[1])): | ||||
return self._strip(lines) | ||||
return lines | ||||
classic_prompt = PromptStripper( | ||||
prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'), | ||||
initial_re=re.compile(r'^>>>( |$)') | ||||
) | ||||
ipython_prompt = PromptStripper(re.compile(r'^(In \[\d+\]: |\s*\.{3,}: ?)')) | ||||
def cell_magic(lines): | ||||
Tony Fast
|
r24631 | if not lines or not lines[0].startswith('%%'): | ||
Thomas Kluyver
|
r24154 | return lines | ||
Matthias Bussonnier
|
r24780 | if re.match(r'%%\w+\?', lines[0]): | ||
Thomas Kluyver
|
r24154 | # This case will be handled by help_end | ||
return lines | ||||
Kyle Cutler
|
r25936 | magic_name, _, first_line = lines[0][2:].rstrip().partition(' ') | ||
Thomas Kluyver
|
r24162 | body = ''.join(lines[1:]) | ||
return ['get_ipython().run_cell_magic(%r, %r, %r)\n' | ||||
% (magic_name, first_line, body)] | ||||
Thomas Kluyver
|
r24154 | |||
Matthias Bussonnier
|
r25724 | def _find_assign_op(token_line) -> Optional[int]: | ||
Thomas Kluyver
|
r24407 | """Get the index of the first assignment in the line ('=' not inside brackets) | ||
Note: We don't try to support multiple special assignment (a = b = %foo) | ||||
""" | ||||
Thomas Kluyver
|
r24154 | paren_level = 0 | ||
for i, ti in enumerate(token_line): | ||||
s = ti.string | ||||
if s == '=' and paren_level == 0: | ||||
Tony Fast
|
r24631 | return i | ||
Matthias Bussonnier
|
r24728 | if s in {'(','[','{'}: | ||
Thomas Kluyver
|
r24154 | paren_level += 1 | ||
Matthias Bussonnier
|
r24728 | elif s in {')', ']', '}'}: | ||
Thomas Kluyver
|
r24370 | if paren_level > 0: | ||
paren_level -= 1 | ||||
Matthias Bussonnier
|
r25724 | return None | ||
Thomas Kluyver
|
r24154 | |||
Thomas Kluyver
|
r24157 | def find_end_of_continued_line(lines, start_line: int): | ||
"""Find the last line of a line explicitly extended using backslashes. | ||||
Uses 0-indexed line numbers. | ||||
""" | ||||
end_line = start_line | ||||
while lines[end_line].endswith('\\\n'): | ||||
end_line += 1 | ||||
if end_line >= len(lines): | ||||
break | ||||
return end_line | ||||
def assemble_continued_line(lines, start: Tuple[int, int], end_line: int): | ||||
Matthias Bussonnier
|
r24780 | r"""Assemble a single line from multiple continued line pieces | ||
Thomas Kluyver
|
r24407 | |||
Continued lines are lines ending in ``\``, and the line following the last | ||||
``\`` in the block. | ||||
For example, this code continues over multiple lines:: | ||||
if (assign_ix is not None) \ | ||||
and (len(line) >= assign_ix + 2) \ | ||||
and (line[assign_ix+1].string == '%') \ | ||||
and (line[assign_ix+2].type == tokenize.NAME): | ||||
This statement contains four continued line pieces. | ||||
Assembling these pieces into a single line would give:: | ||||
if (assign_ix is not None) and (len(line) >= assign_ix + 2) and (line[... | ||||
This uses 0-indexed line numbers. *start* is (lineno, colno). | ||||
Thomas Kluyver
|
r24157 | |||
Thomas Kluyver
|
r24407 | Used to allow ``%magic`` and ``!system`` commands to be continued over | ||
multiple lines. | ||||
Thomas Kluyver
|
r24157 | """ | ||
parts = [lines[start[0]][start[1]:]] + lines[start[0]+1:end_line+1] | ||||
Kyle Cutler
|
r25936 | return ' '.join([p.rstrip()[:-1] for p in parts[:-1]] # Strip backslash+newline | ||
+ [parts[-1].rstrip()]) # Strip newline from last line | ||||
Thomas Kluyver
|
r24157 | |||
Thomas Kluyver
|
r24161 | class TokenTransformBase: | ||
Thomas Kluyver
|
r24407 | """Base class for transformations which examine tokens. | ||
Special syntax should not be transformed when it occurs inside strings or | ||||
comments. This is hard to reliably avoid with regexes. The solution is to | ||||
tokenise the code as Python, and recognise the special syntax in the tokens. | ||||
IPython's special syntax is not valid Python syntax, so tokenising may go | ||||
wrong after the special syntax starts. These classes therefore find and | ||||
transform *one* instance of special syntax at a time into regular Python | ||||
syntax. After each transformation, tokens are regenerated to find the next | ||||
piece of special syntax. | ||||
Subclasses need to implement one class method (find) | ||||
and one regular method (transform). | ||||
Thomas Kluyver
|
r24409 | |||
The priority attribute can select which transformation to apply if multiple | ||||
transformers match in the same place. Lower numbers have higher priority. | ||||
This allows "%magic?" to be turned into a help call rather than a magic call. | ||||
Thomas Kluyver
|
r24407 | """ | ||
Thomas Kluyver
|
r24161 | # Lower numbers -> higher priority (for matches in the same location) | ||
priority = 10 | ||||
def sortby(self): | ||||
return self.start_line, self.start_col, self.priority | ||||
def __init__(self, start): | ||||
self.start_line = start[0] - 1 # Shift from 1-index to 0-index | ||||
self.start_col = start[1] | ||||
Thomas Kluyver
|
r24407 | @classmethod | ||
def find(cls, tokens_by_line): | ||||
"""Find one instance of special syntax in the provided tokens. | ||||
Tokens are grouped into logical lines for convenience, | ||||
so it is easy to e.g. look at the first token of each line. | ||||
*tokens_by_line* is a list of lists of tokenize.TokenInfo objects. | ||||
This should return an instance of its class, pointing to the start | ||||
position it has found, or None if it found no match. | ||||
""" | ||||
raise NotImplementedError | ||||
Thomas Kluyver
|
r24161 | def transform(self, lines: List[str]): | ||
Thomas Kluyver
|
r24407 | """Transform one instance of special syntax found by ``find()`` | ||
Takes a list of strings representing physical lines, | ||||
returns a similar list of transformed lines. | ||||
""" | ||||
Thomas Kluyver
|
r24161 | raise NotImplementedError | ||
class MagicAssign(TokenTransformBase): | ||||
Thomas Kluyver
|
r24407 | """Transformer for assignments from magics (a = %foo)""" | ||
Thomas Kluyver
|
r24161 | @classmethod | ||
def find(cls, tokens_by_line): | ||||
Thomas Kluyver
|
r24154 | """Find the first magic assignment (a = %foo) in the cell. | ||
""" | ||||
for line in tokens_by_line: | ||||
assign_ix = _find_assign_op(line) | ||||
if (assign_ix is not None) \ | ||||
and (len(line) >= assign_ix + 2) \ | ||||
and (line[assign_ix+1].string == '%') \ | ||||
Thomas Kluyver
|
r24178 | and (line[assign_ix+2].type == tokenize.NAME): | ||
Thomas Kluyver
|
r24161 | return cls(line[assign_ix+1].start) | ||
Tony Fast
|
r24631 | |||
Thomas Kluyver
|
r24161 | def transform(self, lines: List[str]): | ||
Thomas Kluyver
|
r24407 | """Transform a magic assignment found by the ``find()`` classmethod. | ||
Thomas Kluyver
|
r24154 | """ | ||
Thomas Kluyver
|
r24161 | start_line, start_col = self.start_line, self.start_col | ||
Thomas Kluyver
|
r24157 | lhs = lines[start_line][:start_col] | ||
end_line = find_end_of_continued_line(lines, start_line) | ||||
rhs = assemble_continued_line(lines, (start_line, start_col), end_line) | ||||
Thomas Kluyver
|
r24154 | assert rhs.startswith('%'), rhs | ||
magic_name, _, args = rhs[1:].partition(' ') | ||||
Tony Fast
|
r24631 | |||
Thomas Kluyver
|
r24154 | lines_before = lines[:start_line] | ||
call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args) | ||||
new_line = lhs + call + '\n' | ||||
lines_after = lines[end_line+1:] | ||||
Tony Fast
|
r24631 | |||
Thomas Kluyver
|
r24154 | return lines_before + [new_line] + lines_after | ||
Thomas Kluyver
|
r24156 | |||
Thomas Kluyver
|
r24161 | class SystemAssign(TokenTransformBase): | ||
Thomas Kluyver
|
r24407 | """Transformer for assignments from system commands (a = !foo)""" | ||
Thomas Kluyver
|
r24161 | @classmethod | ||
def find(cls, tokens_by_line): | ||||
Thomas Kluyver
|
r24156 | """Find the first system assignment (a = !foo) in the cell. | ||
""" | ||||
for line in tokens_by_line: | ||||
assign_ix = _find_assign_op(line) | ||||
if (assign_ix is not None) \ | ||||
Tony Fast
|
r24647 | and not line[assign_ix].line.strip().startswith('=') \ | ||
Thomas Kluyver
|
r24156 | and (len(line) >= assign_ix + 2) \ | ||
Thomas Kluyver
|
r24178 | and (line[assign_ix + 1].type == tokenize.ERRORTOKEN): | ||
Thomas Kluyver
|
r24156 | ix = assign_ix + 1 | ||
Thomas Kluyver
|
r24178 | while ix < len(line) and line[ix].type == tokenize.ERRORTOKEN: | ||
Thomas Kluyver
|
r24156 | if line[ix].string == '!': | ||
Thomas Kluyver
|
r24161 | return cls(line[ix].start) | ||
Thomas Kluyver
|
r24156 | elif not line[ix].string.isspace(): | ||
break | ||||
ix += 1 | ||||
Thomas Kluyver
|
r24161 | def transform(self, lines: List[str]): | ||
Thomas Kluyver
|
r24407 | """Transform a system assignment found by the ``find()`` classmethod. | ||
Thomas Kluyver
|
r24156 | """ | ||
Thomas Kluyver
|
r24161 | start_line, start_col = self.start_line, self.start_col | ||
Thomas Kluyver
|
r24156 | |||
Thomas Kluyver
|
r24157 | lhs = lines[start_line][:start_col] | ||
end_line = find_end_of_continued_line(lines, start_line) | ||||
rhs = assemble_continued_line(lines, (start_line, start_col), end_line) | ||||
Tony Fast
|
r24646 | assert rhs.startswith('!'), rhs | ||
Thomas Kluyver
|
r24157 | cmd = rhs[1:] | ||
Thomas Kluyver
|
r24156 | |||
lines_before = lines[:start_line] | ||||
call = "get_ipython().getoutput({!r})".format(cmd) | ||||
new_line = lhs + call + '\n' | ||||
lines_after = lines[end_line + 1:] | ||||
return lines_before + [new_line] + lines_after | ||||
Thomas Kluyver
|
r24159 | # The escape sequences that define the syntax transformations IPython will | ||
# apply to user input. These can NOT be just changed here: many regular | ||||
# expressions and other parts of the code may use their hardcoded values, and | ||||
# for all intents and purposes they constitute the 'IPython syntax', so they | ||||
# should be considered fixed. | ||||
ESC_SHELL = '!' # Send line to underlying system shell | ||||
ESC_SH_CAP = '!!' # Send line to system shell and capture output | ||||
ESC_HELP = '?' # Find information about object | ||||
ESC_HELP2 = '??' # Find extra-detailed information about object | ||||
ESC_MAGIC = '%' # Call magic function | ||||
ESC_MAGIC2 = '%%' # Call cell-magic function | ||||
ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call | ||||
ESC_QUOTE2 = ';' # Quote all args as a single string, call | ||||
ESC_PAREN = '/' # Call first argument with rest of line as arguments | ||||
ESCAPE_SINGLES = {'!', '?', '%', ',', ';', '/'} | ||||
ESCAPE_DOUBLES = {'!!', '??'} # %% (cell magic) is handled separately | ||||
def _make_help_call(target, esc, next_input=None): | ||||
"""Prepares a pinfo(2)/psearch call from a target name and the escape | ||||
(i.e. ? or ??)""" | ||||
method = 'pinfo2' if esc == '??' \ | ||||
else 'psearch' if '*' in target \ | ||||
else 'pinfo' | ||||
arg = " ".join([method, target]) | ||||
#Prepare arguments for get_ipython().run_line_magic(magic_name, magic_args) | ||||
t_magic_name, _, t_magic_arg_s = arg.partition(' ') | ||||
t_magic_name = t_magic_name.lstrip(ESC_MAGIC) | ||||
if next_input is None: | ||||
return 'get_ipython().run_line_magic(%r, %r)' % (t_magic_name, t_magic_arg_s) | ||||
else: | ||||
return 'get_ipython().set_next_input(%r);get_ipython().run_line_magic(%r, %r)' % \ | ||||
(next_input, t_magic_name, t_magic_arg_s) | ||||
def _tr_help(content): | ||||
Thomas Kluyver
|
r24407 | """Translate lines escaped with: ? | ||
A naked help line should fire the intro help screen (shell.show_usage()) | ||||
""" | ||||
Thomas Kluyver
|
r24159 | if not content: | ||
return 'get_ipython().show_usage()' | ||||
return _make_help_call(content, '?') | ||||
def _tr_help2(content): | ||||
Thomas Kluyver
|
r24407 | """Translate lines escaped with: ?? | ||
A naked help line should fire the intro help screen (shell.show_usage()) | ||||
""" | ||||
Thomas Kluyver
|
r24159 | if not content: | ||
return 'get_ipython().show_usage()' | ||||
return _make_help_call(content, '??') | ||||
def _tr_magic(content): | ||||
Thomas Kluyver
|
r24407 | "Translate lines escaped with a percent sign: %" | ||
Thomas Kluyver
|
r24159 | name, _, args = content.partition(' ') | ||
return 'get_ipython().run_line_magic(%r, %r)' % (name, args) | ||||
def _tr_quote(content): | ||||
Thomas Kluyver
|
r24407 | "Translate lines escaped with a comma: ," | ||
Thomas Kluyver
|
r24159 | name, _, args = content.partition(' ') | ||
return '%s("%s")' % (name, '", "'.join(args.split()) ) | ||||
def _tr_quote2(content): | ||||
Thomas Kluyver
|
r24407 | "Translate lines escaped with a semicolon: ;" | ||
Thomas Kluyver
|
r24159 | name, _, args = content.partition(' ') | ||
return '%s("%s")' % (name, args) | ||||
def _tr_paren(content): | ||||
Thomas Kluyver
|
r24407 | "Translate lines escaped with a slash: /" | ||
Thomas Kluyver
|
r24159 | name, _, args = content.partition(' ') | ||
return '%s(%s)' % (name, ", ".join(args.split())) | ||||
tr = { ESC_SHELL : 'get_ipython().system({!r})'.format, | ||||
ESC_SH_CAP : 'get_ipython().getoutput({!r})'.format, | ||||
ESC_HELP : _tr_help, | ||||
ESC_HELP2 : _tr_help2, | ||||
ESC_MAGIC : _tr_magic, | ||||
ESC_QUOTE : _tr_quote, | ||||
ESC_QUOTE2 : _tr_quote2, | ||||
ESC_PAREN : _tr_paren } | ||||
Thomas Kluyver
|
r24161 | class EscapedCommand(TokenTransformBase): | ||
Thomas Kluyver
|
r24407 | """Transformer for escaped commands like %foo, !foo, or /foo""" | ||
Thomas Kluyver
|
r24161 | @classmethod | ||
def find(cls, tokens_by_line): | ||||
Thomas Kluyver
|
r24159 | """Find the first escaped command (%foo, !foo, etc.) in the cell. | ||
""" | ||||
for line in tokens_by_line: | ||||
Matthias Bussonnier
|
r24701 | if not line: | ||
continue | ||||
Thomas Kluyver
|
r24159 | ix = 0 | ||
Matthias Bussonnier
|
r24701 | ll = len(line) | ||
while ll > ix and line[ix].type in {tokenize.INDENT, tokenize.DEDENT}: | ||||
Thomas Kluyver
|
r24159 | ix += 1 | ||
Matthias Bussonnier
|
r24701 | if ix >= ll: | ||
continue | ||||
Thomas Kluyver
|
r24159 | if line[ix].string in ESCAPE_SINGLES: | ||
Thomas Kluyver
|
r24161 | return cls(line[ix].start) | ||
Thomas Kluyver
|
r24159 | |||
Thomas Kluyver
|
r24161 | def transform(self, lines): | ||
Thomas Kluyver
|
r24407 | """Transform an escaped line found by the ``find()`` classmethod. | ||
""" | ||||
Thomas Kluyver
|
r24161 | start_line, start_col = self.start_line, self.start_col | ||
Thomas Kluyver
|
r24159 | |||
indent = lines[start_line][:start_col] | ||||
end_line = find_end_of_continued_line(lines, start_line) | ||||
line = assemble_continued_line(lines, (start_line, start_col), end_line) | ||||
Tony Fast
|
r24640 | if len(line) > 1 and line[:2] in ESCAPE_DOUBLES: | ||
Thomas Kluyver
|
r24159 | escape, content = line[:2], line[2:] | ||
else: | ||||
escape, content = line[:1], line[1:] | ||||
Tony Fast
|
r24640 | |||
if escape in tr: | ||||
call = tr[escape](content) | ||||
else: | ||||
call = '' | ||||
Thomas Kluyver
|
r24159 | |||
lines_before = lines[:start_line] | ||||
new_line = indent + call + '\n' | ||||
lines_after = lines[end_line + 1:] | ||||
return lines_before + [new_line] + lines_after | ||||
Thomas Kluyver
|
r24161 | _help_end_re = re.compile(r"""(%{0,2} | ||
Markus Wageringel
|
r25595 | (?!\d)[\w*]+ # Variable name | ||
(\.(?!\d)[\w*]+)* # .etc.etc | ||||
Thomas Kluyver
|
r24161 | ) | ||
(\?\??)$ # ? or ?? | ||||
""", | ||||
re.VERBOSE) | ||||
class HelpEnd(TokenTransformBase): | ||||
Thomas Kluyver
|
r24407 | """Transformer for help syntax: obj? and obj??""" | ||
Thomas Kluyver
|
r24161 | # This needs to be higher priority (lower number) than EscapedCommand so | ||
# that inspecting magics (%foo?) works. | ||||
priority = 5 | ||||
def __init__(self, start, q_locn): | ||||
super().__init__(start) | ||||
self.q_line = q_locn[0] - 1 # Shift from 1-indexed to 0-indexed | ||||
self.q_col = q_locn[1] | ||||
@classmethod | ||||
def find(cls, tokens_by_line): | ||||
Thomas Kluyver
|
r24407 | """Find the first help command (foo?) in the cell. | ||
""" | ||||
Thomas Kluyver
|
r24161 | for line in tokens_by_line: | ||
# Last token is NEWLINE; look at last but one | ||||
if len(line) > 2 and line[-2].string == '?': | ||||
# Find the first token that's not INDENT/DEDENT | ||||
ix = 0 | ||||
Thomas Kluyver
|
r24178 | while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}: | ||
Thomas Kluyver
|
r24161 | ix += 1 | ||
return cls(line[ix].start, line[-2].start) | ||||
def transform(self, lines): | ||||
Thomas Kluyver
|
r24407 | """Transform a help command found by the ``find()`` classmethod. | ||
""" | ||||
Thomas Kluyver
|
r24161 | piece = ''.join(lines[self.start_line:self.q_line+1]) | ||
indent, content = piece[:self.start_col], piece[self.start_col:] | ||||
lines_before = lines[:self.start_line] | ||||
lines_after = lines[self.q_line + 1:] | ||||
m = _help_end_re.search(content) | ||||
Matthias Bussonnier
|
r24567 | if not m: | ||
raise SyntaxError(content) | ||||
Thomas Kluyver
|
r24161 | assert m is not None, content | ||
target = m.group(1) | ||||
esc = m.group(3) | ||||
# If we're mid-command, put it back on the next prompt for the user. | ||||
next_input = None | ||||
if (not lines_before) and (not lines_after) \ | ||||
and content.strip() != m.group(0): | ||||
next_input = content.rstrip('?\n') | ||||
call = _make_help_call(target, esc, next_input=next_input) | ||||
new_line = indent + call + '\n' | ||||
return lines_before + [new_line] + lines_after | ||||
Matthias Bussonnier
|
r24728 | def make_tokens_by_line(lines:List[str]): | ||
Thomas Kluyver
|
r24178 | """Tokenize a series of lines and group tokens by line. | ||
Matthias Bussonnier
|
r24728 | The tokens for a multiline Python string or expression are grouped as one | ||
line. All lines except the last lines should keep their line ending ('\\n', | ||||
'\\r\\n') for this to properly work. Use `.splitlines(keeplineending=True)` | ||||
for example when passing block of text to this function. | ||||
Thomas Kluyver
|
r24178 | """ | ||
# NL tokens are used inside multiline expressions, but also after blank | ||||
# lines or comments. This is intentional - see https://bugs.python.org/issue17061 | ||||
# We want to group the former case together but split the latter, so we | ||||
# track parentheses level, similar to the internals of tokenize. | ||||
Matthias Bussonnier
|
r25724 | |||
# reexported from token on 3.7+ | ||||
NEWLINE, NL = tokenize.NEWLINE, tokenize.NL # type: ignore | ||||
tokens_by_line:List[List[Any]] = [[]] | ||||
Matthias Bussonnier
|
r24728 | if len(lines) > 1 and not lines[0].endswith(('\n', '\r', '\r\n', '\x0b', '\x0c')): | ||
warnings.warn("`make_tokens_by_line` received a list of lines which do not have lineending markers ('\\n', '\\r', '\\r\\n', '\\x0b', '\\x0c'), behavior will be unspecified") | ||||
Thomas Kluyver
|
r24178 | parenlev = 0 | ||
try: | ||||
for token in tokenize.generate_tokens(iter(lines).__next__): | ||||
tokens_by_line[-1].append(token) | ||||
if (token.type == NEWLINE) \ | ||||
or ((token.type == NL) and (parenlev <= 0)): | ||||
tokens_by_line.append([]) | ||||
elif token.string in {'(', '[', '{'}: | ||||
parenlev += 1 | ||||
elif token.string in {')', ']', '}'}: | ||||
Thomas Kluyver
|
r24370 | if parenlev > 0: | ||
parenlev -= 1 | ||||
Thomas Kluyver
|
r24178 | except tokenize.TokenError: | ||
# Input ended in a multiline string or expression. That's OK for us. | ||||
pass | ||||
Tony Fast
|
r24643 | |||
Matthias Bussonnier
|
r24567 | if not tokens_by_line[-1]: | ||
tokens_by_line.pop() | ||||
Tony Fast
|
r24631 | |||
Tony Fast
|
r24643 | |||
Thomas Kluyver
|
r24154 | return tokens_by_line | ||
Thomas Kluyver
|
r24158 | def show_linewise_tokens(s: str): | ||
Thomas Kluyver
|
r24407 | """For investigation and debugging""" | ||
Thomas Kluyver
|
r24158 | if not s.endswith('\n'): | ||
s += '\n' | ||||
lines = s.splitlines(keepends=True) | ||||
for line in make_tokens_by_line(lines): | ||||
print("Line -------") | ||||
for tokinfo in line: | ||||
print(" ", tokinfo) | ||||
Thomas Kluyver
|
r24371 | # Arbitrary limit to prevent getting stuck in infinite loops | ||
TRANSFORM_LOOP_LIMIT = 500 | ||||
Thomas Kluyver
|
r24164 | class TransformerManager: | ||
Thomas Kluyver
|
r24407 | """Applies various transformations to a cell or code block. | ||
The key methods for external use are ``transform_cell()`` | ||||
and ``check_complete()``. | ||||
""" | ||||
Thomas Kluyver
|
r24154 | def __init__(self): | ||
Thomas Kluyver
|
r24165 | self.cleanup_transforms = [ | ||
Robin Gustafsson
|
r25026 | leading_empty_lines, | ||
Thomas Kluyver
|
r24164 | leading_indent, | ||
classic_prompt, | ||||
ipython_prompt, | ||||
Thomas Kluyver
|
r24165 | ] | ||
self.line_transforms = [ | ||||
Thomas Kluyver
|
r24164 | cell_magic, | ||
] | ||||
self.token_transformers = [ | ||||
Thomas Kluyver
|
r24156 | MagicAssign, | ||
SystemAssign, | ||||
Thomas Kluyver
|
r24161 | EscapedCommand, | ||
HelpEnd, | ||||
Thomas Kluyver
|
r24154 | ] | ||
Tony Fast
|
r24631 | |||
Thomas Kluyver
|
r24164 | def do_one_token_transform(self, lines): | ||
Thomas Kluyver
|
r24154 | """Find and run the transform earliest in the code. | ||
Tony Fast
|
r24631 | |||
Thomas Kluyver
|
r24154 | Returns (changed, lines). | ||
Tony Fast
|
r24631 | |||
Thomas Kluyver
|
r24154 | This method is called repeatedly until changed is False, indicating | ||
that all available transformations are complete. | ||||
The tokens following IPython special syntax might not be valid, so | ||||
the transformed code is retokenised every time to identify the next | ||||
piece of special syntax. Hopefully long code cells are mostly valid | ||||
Python, not using lots of IPython special syntax, so this shouldn't be | ||||
Thomas Kluyver
|
r24164 | a performance issue. | ||
Thomas Kluyver
|
r24154 | """ | ||
tokens_by_line = make_tokens_by_line(lines) | ||||
candidates = [] | ||||
Thomas Kluyver
|
r24164 | for transformer_cls in self.token_transformers: | ||
Thomas Kluyver
|
r24161 | transformer = transformer_cls.find(tokens_by_line) | ||
if transformer: | ||||
candidates.append(transformer) | ||||
Thomas Kluyver
|
r24154 | if not candidates: | ||
# Nothing to transform | ||||
return False, lines | ||||
Matthias Bussonnier
|
r24568 | ordered_transformers = sorted(candidates, key=TokenTransformBase.sortby) | ||
for transformer in ordered_transformers: | ||||
try: | ||||
return True, transformer.transform(lines) | ||||
except SyntaxError: | ||||
pass | ||||
return False, lines | ||||
Thomas Kluyver
|
r24154 | |||
Thomas Kluyver
|
r24164 | def do_token_transforms(self, lines): | ||
Thomas Kluyver
|
r24371 | for _ in range(TRANSFORM_LOOP_LIMIT): | ||
Thomas Kluyver
|
r24164 | changed, lines = self.do_one_token_transform(lines) | ||
Thomas Kluyver
|
r24154 | if not changed: | ||
return lines | ||||
Thomas Kluyver
|
r24371 | raise RuntimeError("Input transformation still changing after " | ||
"%d iterations. Aborting." % TRANSFORM_LOOP_LIMIT) | ||||
Thomas Kluyver
|
r24407 | def transform_cell(self, cell: str) -> str: | ||
"""Transforms a cell of input code""" | ||||
Thomas Kluyver
|
r24164 | if not cell.endswith('\n'): | ||
Thomas Kluyver
|
r24172 | cell += '\n' # Ensure the cell has a trailing newline | ||
Thomas Kluyver
|
r24164 | lines = cell.splitlines(keepends=True) | ||
Thomas Kluyver
|
r24165 | for transform in self.cleanup_transforms + self.line_transforms: | ||
Thomas Kluyver
|
r24164 | lines = transform(lines) | ||
Thomas Kluyver
|
r24154 | |||
Thomas Kluyver
|
r24164 | lines = self.do_token_transforms(lines) | ||
return ''.join(lines) | ||||
Thomas Kluyver
|
r24165 | |||
def check_complete(self, cell: str): | ||||
"""Return whether a block of code is ready to execute, or should be continued | ||||
Parameters | ||||
---------- | ||||
source : string | ||||
Python input code, which can be multiline. | ||||
Returns | ||||
------- | ||||
status : str | ||||
One of 'complete', 'incomplete', or 'invalid' if source is not a | ||||
prefix of valid code. | ||||
indent_spaces : int or None | ||||
The number of spaces by which to indent the next line of code. If | ||||
status is not 'incomplete', this is None. | ||||
""" | ||||
Tony Fast
|
r24643 | # Remember if the lines ends in a new line. | ||
ends_with_newline = False | ||||
for character in reversed(cell): | ||||
if character == '\n': | ||||
ends_with_newline = True | ||||
break | ||||
elif character.strip(): | ||||
break | ||||
else: | ||||
continue | ||||
Nguyen Duy Hai
|
r24757 | if not ends_with_newline: | ||
Tony Fast
|
r24643 | # Append an newline for consistent tokenization | ||
# See https://bugs.python.org/issue33899 | ||||
cell += '\n' | ||||
Thomas Kluyver
|
r24165 | lines = cell.splitlines(keepends=True) | ||
Tony Fast
|
r24643 | |||
Tony Fast
|
r24640 | if not lines: | ||
return 'complete', None | ||||
if lines[-1].endswith('\\'): | ||||
Thomas Kluyver
|
r24165 | # Explicit backslash continuation | ||
return 'incomplete', find_last_indent(lines) | ||||
try: | ||||
for transform in self.cleanup_transforms: | ||||
Matthias Bussonnier
|
r25925 | if not getattr(transform, 'has_side_effects', False): | ||
lines = transform(lines) | ||||
Thomas Kluyver
|
r24165 | except SyntaxError: | ||
return 'invalid', None | ||||
if lines[0].startswith('%%'): | ||||
# Special case for cell magics - completion marked by blank line | ||||
if lines[-1].strip(): | ||||
return 'incomplete', find_last_indent(lines) | ||||
else: | ||||
return 'complete', None | ||||
try: | ||||
for transform in self.line_transforms: | ||||
Matthias Bussonnier
|
r25925 | if not getattr(transform, 'has_side_effects', False): | ||
lines = transform(lines) | ||||
Thomas Kluyver
|
r24165 | lines = self.do_token_transforms(lines) | ||
except SyntaxError: | ||||
return 'invalid', None | ||||
tokens_by_line = make_tokens_by_line(lines) | ||||
Tony Fast
|
r24643 | |||
Matthias Bussonnier
|
r24567 | if not tokens_by_line: | ||
return 'incomplete', find_last_indent(lines) | ||||
Tony Fast
|
r24640 | |||
Thomas Kluyver
|
r24178 | if tokens_by_line[-1][-1].type != tokenize.ENDMARKER: | ||
Thomas Kluyver
|
r24165 | # We're in a multiline string or expression | ||
return 'incomplete', find_last_indent(lines) | ||||
Tony Fast
|
r24640 | |||
Matthias Bussonnier
|
r25724 | newline_types = {tokenize.NEWLINE, tokenize.COMMENT, tokenize.ENDMARKER} # type: ignore | ||
Tony Fast
|
r24643 | |||
Nguyen Duy Hai
|
r24757 | # Pop the last line which only contains DEDENTs and ENDMARKER | ||
last_token_line = None | ||||
if {t.type for t in tokens_by_line[-1]} in [ | ||||
{tokenize.DEDENT, tokenize.ENDMARKER}, | ||||
{tokenize.ENDMARKER} | ||||
] and len(tokens_by_line) > 1: | ||||
last_token_line = tokens_by_line.pop() | ||||
Tony Fast
|
r24640 | |||
Tony Fast
|
r24644 | while tokens_by_line[-1] and tokens_by_line[-1][-1].type in newline_types: | ||
tokens_by_line[-1].pop() | ||||
Tony Fast
|
r24643 | |||
Dominik Miedziński
|
r25393 | if not tokens_by_line[-1]: | ||
return 'incomplete', find_last_indent(lines) | ||||
Tony Fast
|
r24643 | |||
Tony Fast
|
r24644 | if tokens_by_line[-1][-1].string == ':': | ||
Thomas Kluyver
|
r24165 | # The last line starts a block (e.g. 'if foo:') | ||
ix = 0 | ||||
Tony Fast
|
r24644 | while tokens_by_line[-1][ix].type in {tokenize.INDENT, tokenize.DEDENT}: | ||
Thomas Kluyver
|
r24165 | ix += 1 | ||
Tony Fast
|
r24643 | |||
Tony Fast
|
r24644 | indent = tokens_by_line[-1][ix].start[1] | ||
Thomas Kluyver
|
r24165 | return 'incomplete', indent + 4 | ||
Tony Fast
|
r24644 | if tokens_by_line[-1][0].line.endswith('\\'): | ||
Tony Fast
|
r24643 | return 'incomplete', None | ||
Thomas Kluyver
|
r24165 | |||
# At this point, our checks think the code is complete (or invalid). | ||||
Tony Fast
|
r24640 | # We'll use codeop.compile_command to check this with the real parser | ||
Thomas Kluyver
|
r24165 | try: | ||
Thomas Kluyver
|
r24182 | with warnings.catch_warnings(): | ||
warnings.simplefilter('error', SyntaxWarning) | ||||
Tony Fast
|
r24640 | res = compile_command(''.join(lines), symbol='exec') | ||
Thomas Kluyver
|
r24165 | except (SyntaxError, OverflowError, ValueError, TypeError, | ||
MemoryError, SyntaxWarning): | ||||
return 'invalid', None | ||||
else: | ||||
Tony Fast
|
r24640 | if res is None: | ||
Thomas Kluyver
|
r24165 | return 'incomplete', find_last_indent(lines) | ||
Tony Fast
|
r24640 | |||
Nguyen Duy Hai
|
r24757 | if last_token_line and last_token_line[0].type == tokenize.DEDENT: | ||
Tony Fast
|
r24643 | if ends_with_newline: | ||
return 'complete', None | ||||
return 'incomplete', find_last_indent(lines) | ||||
Tony Fast
|
r24641 | |||
Tony Fast
|
r24644 | # If there's a blank line at the end, assume we're ready to execute | ||
if not lines[-1].strip(): | ||||
return 'complete', None | ||||
Thomas Kluyver
|
r24165 | return 'complete', None | ||
def find_last_indent(lines): | ||||
m = _indent_re.match(lines[-1]) | ||||
if not m: | ||||
return 0 | ||||
return len(m.group(0).replace('\t', ' '*4)) | ||||