inputtransformer2.py
432 lines
| 15.4 KiB
| text/x-python
|
PythonLexer
Thomas Kluyver
|
r24154 | import re | ||
from typing import List, Tuple | ||||
from IPython.utils import tokenize2 | ||||
from IPython.utils.tokenutil import generate_tokens | ||||
def leading_indent(lines): | ||||
"""Remove leading indentation. | ||||
If the first line starts with a spaces or tabs, the same whitespace will be | ||||
removed from each following line. | ||||
""" | ||||
m = re.match(r'^[ \t]+', lines[0]) | ||||
if not m: | ||||
return lines | ||||
space = m.group(0) | ||||
n = len(space) | ||||
return [l[n:] if l.startswith(space) else l | ||||
for l in lines] | ||||
class PromptStripper: | ||||
"""Remove matching input prompts from a block of input. | ||||
Parameters | ||||
---------- | ||||
prompt_re : regular expression | ||||
A regular expression matching any input prompt (including continuation) | ||||
initial_re : regular expression, optional | ||||
A regular expression matching only the initial prompt, but not continuation. | ||||
If no initial expression is given, prompt_re will be used everywhere. | ||||
Used mainly for plain Python prompts, where the continuation prompt | ||||
``...`` is a valid Python expression in Python 3, so shouldn't be stripped. | ||||
If initial_re and prompt_re differ, | ||||
only initial_re will be tested against the first line. | ||||
If any prompt is found on the first two lines, | ||||
prompts will be stripped from the rest of the block. | ||||
""" | ||||
def __init__(self, prompt_re, initial_re=None): | ||||
self.prompt_re = prompt_re | ||||
self.initial_re = initial_re or prompt_re | ||||
def _strip(self, lines): | ||||
return [self.prompt_re.sub('', l, count=1) for l in lines] | ||||
def __call__(self, lines): | ||||
if self.initial_re.match(lines[0]) or \ | ||||
(len(lines) > 1 and self.prompt_re.match(lines[1])): | ||||
return self._strip(lines) | ||||
return lines | ||||
classic_prompt = PromptStripper( | ||||
prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'), | ||||
initial_re=re.compile(r'^>>>( |$)') | ||||
) | ||||
ipython_prompt = PromptStripper(re.compile(r'^(In \[\d+\]: |\s*\.{3,}: ?)')) | ||||
def cell_magic(lines): | ||||
if not lines[0].startswith('%%'): | ||||
return lines | ||||
if re.match('%%\w+\?', lines[0]): | ||||
# This case will be handled by help_end | ||||
return lines | ||||
Thomas Kluyver
|
r24162 | magic_name, _, first_line = lines[0][2:-1].partition(' ') | ||
body = ''.join(lines[1:]) | ||||
return ['get_ipython().run_cell_magic(%r, %r, %r)\n' | ||||
% (magic_name, first_line, body)] | ||||
Thomas Kluyver
|
r24154 | |||
# ----- | ||||
def _find_assign_op(token_line): | ||||
# Find the first assignment in the line ('=' not inside brackets) | ||||
# We don't try to support multiple special assignment (a = b = %foo) | ||||
paren_level = 0 | ||||
for i, ti in enumerate(token_line): | ||||
s = ti.string | ||||
if s == '=' and paren_level == 0: | ||||
return i | ||||
if s in '([{': | ||||
paren_level += 1 | ||||
elif s in ')]}': | ||||
paren_level -= 1 | ||||
Thomas Kluyver
|
r24157 | def find_end_of_continued_line(lines, start_line: int): | ||
"""Find the last line of a line explicitly extended using backslashes. | ||||
Uses 0-indexed line numbers. | ||||
""" | ||||
end_line = start_line | ||||
while lines[end_line].endswith('\\\n'): | ||||
end_line += 1 | ||||
if end_line >= len(lines): | ||||
break | ||||
return end_line | ||||
def assemble_continued_line(lines, start: Tuple[int, int], end_line: int): | ||||
"""Assemble pieces of a continued line into a single line. | ||||
Uses 0-indexed line numbers. *start* is (lineno, colno). | ||||
""" | ||||
parts = [lines[start[0]][start[1]:]] + lines[start[0]+1:end_line+1] | ||||
return ' '.join([p[:-2] for p in parts[:-1]] # Strip backslash+newline | ||||
+ [parts[-1][:-1]]) # Strip newline from last line | ||||
Thomas Kluyver
|
r24161 | class TokenTransformBase: | ||
# Lower numbers -> higher priority (for matches in the same location) | ||||
priority = 10 | ||||
def sortby(self): | ||||
return self.start_line, self.start_col, self.priority | ||||
def __init__(self, start): | ||||
self.start_line = start[0] - 1 # Shift from 1-index to 0-index | ||||
self.start_col = start[1] | ||||
def transform(self, lines: List[str]): | ||||
raise NotImplementedError | ||||
class MagicAssign(TokenTransformBase): | ||||
@classmethod | ||||
def find(cls, tokens_by_line): | ||||
Thomas Kluyver
|
r24154 | """Find the first magic assignment (a = %foo) in the cell. | ||
Thomas Kluyver
|
r24157 | Returns (line, column) of the % if found, or None. *line* is 1-indexed. | ||
Thomas Kluyver
|
r24154 | """ | ||
for line in tokens_by_line: | ||||
assign_ix = _find_assign_op(line) | ||||
if (assign_ix is not None) \ | ||||
and (len(line) >= assign_ix + 2) \ | ||||
and (line[assign_ix+1].string == '%') \ | ||||
and (line[assign_ix+2].type == tokenize2.NAME): | ||||
Thomas Kluyver
|
r24161 | return cls(line[assign_ix+1].start) | ||
Thomas Kluyver
|
r24154 | |||
Thomas Kluyver
|
r24161 | def transform(self, lines: List[str]): | ||
Thomas Kluyver
|
r24154 | """Transform a magic assignment found by find | ||
""" | ||||
Thomas Kluyver
|
r24161 | start_line, start_col = self.start_line, self.start_col | ||
Thomas Kluyver
|
r24157 | lhs = lines[start_line][:start_col] | ||
end_line = find_end_of_continued_line(lines, start_line) | ||||
rhs = assemble_continued_line(lines, (start_line, start_col), end_line) | ||||
Thomas Kluyver
|
r24154 | assert rhs.startswith('%'), rhs | ||
magic_name, _, args = rhs[1:].partition(' ') | ||||
lines_before = lines[:start_line] | ||||
call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args) | ||||
new_line = lhs + call + '\n' | ||||
lines_after = lines[end_line+1:] | ||||
return lines_before + [new_line] + lines_after | ||||
Thomas Kluyver
|
r24156 | |||
Thomas Kluyver
|
r24161 | class SystemAssign(TokenTransformBase): | ||
@classmethod | ||||
def find(cls, tokens_by_line): | ||||
Thomas Kluyver
|
r24156 | """Find the first system assignment (a = !foo) in the cell. | ||
Thomas Kluyver
|
r24157 | Returns (line, column) of the ! if found, or None. *line* is 1-indexed. | ||
Thomas Kluyver
|
r24156 | """ | ||
for line in tokens_by_line: | ||||
assign_ix = _find_assign_op(line) | ||||
if (assign_ix is not None) \ | ||||
and (len(line) >= assign_ix + 2) \ | ||||
and (line[assign_ix + 1].type == tokenize2.ERRORTOKEN): | ||||
ix = assign_ix + 1 | ||||
while ix < len(line) and line[ix].type == tokenize2.ERRORTOKEN: | ||||
if line[ix].string == '!': | ||||
Thomas Kluyver
|
r24161 | return cls(line[ix].start) | ||
Thomas Kluyver
|
r24156 | elif not line[ix].string.isspace(): | ||
break | ||||
ix += 1 | ||||
Thomas Kluyver
|
r24161 | def transform(self, lines: List[str]): | ||
Thomas Kluyver
|
r24156 | """Transform a system assignment found by find | ||
""" | ||||
Thomas Kluyver
|
r24161 | start_line, start_col = self.start_line, self.start_col | ||
Thomas Kluyver
|
r24156 | |||
Thomas Kluyver
|
r24157 | lhs = lines[start_line][:start_col] | ||
end_line = find_end_of_continued_line(lines, start_line) | ||||
rhs = assemble_continued_line(lines, (start_line, start_col), end_line) | ||||
Thomas Kluyver
|
r24156 | assert rhs.startswith('!'), rhs | ||
Thomas Kluyver
|
r24157 | cmd = rhs[1:] | ||
Thomas Kluyver
|
r24156 | |||
lines_before = lines[:start_line] | ||||
call = "get_ipython().getoutput({!r})".format(cmd) | ||||
new_line = lhs + call + '\n' | ||||
lines_after = lines[end_line + 1:] | ||||
return lines_before + [new_line] + lines_after | ||||
Thomas Kluyver
|
r24159 | # The escape sequences that define the syntax transformations IPython will | ||
# apply to user input. These can NOT be just changed here: many regular | ||||
# expressions and other parts of the code may use their hardcoded values, and | ||||
# for all intents and purposes they constitute the 'IPython syntax', so they | ||||
# should be considered fixed. | ||||
ESC_SHELL = '!' # Send line to underlying system shell | ||||
ESC_SH_CAP = '!!' # Send line to system shell and capture output | ||||
ESC_HELP = '?' # Find information about object | ||||
ESC_HELP2 = '??' # Find extra-detailed information about object | ||||
ESC_MAGIC = '%' # Call magic function | ||||
ESC_MAGIC2 = '%%' # Call cell-magic function | ||||
ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call | ||||
ESC_QUOTE2 = ';' # Quote all args as a single string, call | ||||
ESC_PAREN = '/' # Call first argument with rest of line as arguments | ||||
ESCAPE_SINGLES = {'!', '?', '%', ',', ';', '/'} | ||||
ESCAPE_DOUBLES = {'!!', '??'} # %% (cell magic) is handled separately | ||||
def _make_help_call(target, esc, next_input=None): | ||||
"""Prepares a pinfo(2)/psearch call from a target name and the escape | ||||
(i.e. ? or ??)""" | ||||
method = 'pinfo2' if esc == '??' \ | ||||
else 'psearch' if '*' in target \ | ||||
else 'pinfo' | ||||
arg = " ".join([method, target]) | ||||
#Prepare arguments for get_ipython().run_line_magic(magic_name, magic_args) | ||||
t_magic_name, _, t_magic_arg_s = arg.partition(' ') | ||||
t_magic_name = t_magic_name.lstrip(ESC_MAGIC) | ||||
if next_input is None: | ||||
return 'get_ipython().run_line_magic(%r, %r)' % (t_magic_name, t_magic_arg_s) | ||||
else: | ||||
return 'get_ipython().set_next_input(%r);get_ipython().run_line_magic(%r, %r)' % \ | ||||
(next_input, t_magic_name, t_magic_arg_s) | ||||
def _tr_help(content): | ||||
"Translate lines escaped with: ?" | ||||
# A naked help line should just fire the intro help screen | ||||
if not content: | ||||
return 'get_ipython().show_usage()' | ||||
return _make_help_call(content, '?') | ||||
def _tr_help2(content): | ||||
"Translate lines escaped with: ??" | ||||
# A naked help line should just fire the intro help screen | ||||
if not content: | ||||
return 'get_ipython().show_usage()' | ||||
return _make_help_call(content, '??') | ||||
def _tr_magic(content): | ||||
"Translate lines escaped with: %" | ||||
name, _, args = content.partition(' ') | ||||
return 'get_ipython().run_line_magic(%r, %r)' % (name, args) | ||||
def _tr_quote(content): | ||||
"Translate lines escaped with: ," | ||||
name, _, args = content.partition(' ') | ||||
return '%s("%s")' % (name, '", "'.join(args.split()) ) | ||||
def _tr_quote2(content): | ||||
"Translate lines escaped with: ;" | ||||
name, _, args = content.partition(' ') | ||||
return '%s("%s")' % (name, args) | ||||
def _tr_paren(content): | ||||
"Translate lines escaped with: /" | ||||
name, _, args = content.partition(' ') | ||||
return '%s(%s)' % (name, ", ".join(args.split())) | ||||
tr = { ESC_SHELL : 'get_ipython().system({!r})'.format, | ||||
ESC_SH_CAP : 'get_ipython().getoutput({!r})'.format, | ||||
ESC_HELP : _tr_help, | ||||
ESC_HELP2 : _tr_help2, | ||||
ESC_MAGIC : _tr_magic, | ||||
ESC_QUOTE : _tr_quote, | ||||
ESC_QUOTE2 : _tr_quote2, | ||||
ESC_PAREN : _tr_paren } | ||||
Thomas Kluyver
|
r24161 | class EscapedCommand(TokenTransformBase): | ||
@classmethod | ||||
def find(cls, tokens_by_line): | ||||
Thomas Kluyver
|
r24159 | """Find the first escaped command (%foo, !foo, etc.) in the cell. | ||
Returns (line, column) of the escape if found, or None. *line* is 1-indexed. | ||||
""" | ||||
for line in tokens_by_line: | ||||
ix = 0 | ||||
while line[ix].type in {tokenize2.INDENT, tokenize2.DEDENT}: | ||||
ix += 1 | ||||
if line[ix].string in ESCAPE_SINGLES: | ||||
Thomas Kluyver
|
r24161 | return cls(line[ix].start) | ||
Thomas Kluyver
|
r24159 | |||
Thomas Kluyver
|
r24161 | def transform(self, lines): | ||
start_line, start_col = self.start_line, self.start_col | ||||
Thomas Kluyver
|
r24159 | |||
indent = lines[start_line][:start_col] | ||||
end_line = find_end_of_continued_line(lines, start_line) | ||||
line = assemble_continued_line(lines, (start_line, start_col), end_line) | ||||
if line[:2] in ESCAPE_DOUBLES: | ||||
escape, content = line[:2], line[2:] | ||||
else: | ||||
escape, content = line[:1], line[1:] | ||||
call = tr[escape](content) | ||||
lines_before = lines[:start_line] | ||||
new_line = indent + call + '\n' | ||||
lines_after = lines[end_line + 1:] | ||||
return lines_before + [new_line] + lines_after | ||||
Thomas Kluyver
|
r24161 | _help_end_re = re.compile(r"""(%{0,2} | ||
[a-zA-Z_*][\w*]* # Variable name | ||||
(\.[a-zA-Z_*][\w*]*)* # .etc.etc | ||||
) | ||||
(\?\??)$ # ? or ?? | ||||
""", | ||||
re.VERBOSE) | ||||
class HelpEnd(TokenTransformBase): | ||||
# This needs to be higher priority (lower number) than EscapedCommand so | ||||
# that inspecting magics (%foo?) works. | ||||
priority = 5 | ||||
def __init__(self, start, q_locn): | ||||
super().__init__(start) | ||||
self.q_line = q_locn[0] - 1 # Shift from 1-indexed to 0-indexed | ||||
self.q_col = q_locn[1] | ||||
@classmethod | ||||
def find(cls, tokens_by_line): | ||||
for line in tokens_by_line: | ||||
# Last token is NEWLINE; look at last but one | ||||
if len(line) > 2 and line[-2].string == '?': | ||||
# Find the first token that's not INDENT/DEDENT | ||||
ix = 0 | ||||
while line[ix].type in {tokenize2.INDENT, tokenize2.DEDENT}: | ||||
ix += 1 | ||||
return cls(line[ix].start, line[-2].start) | ||||
def transform(self, lines): | ||||
piece = ''.join(lines[self.start_line:self.q_line+1]) | ||||
indent, content = piece[:self.start_col], piece[self.start_col:] | ||||
lines_before = lines[:self.start_line] | ||||
lines_after = lines[self.q_line + 1:] | ||||
m = _help_end_re.search(content) | ||||
assert m is not None, content | ||||
target = m.group(1) | ||||
esc = m.group(3) | ||||
# If we're mid-command, put it back on the next prompt for the user. | ||||
next_input = None | ||||
if (not lines_before) and (not lines_after) \ | ||||
and content.strip() != m.group(0): | ||||
next_input = content.rstrip('?\n') | ||||
call = _make_help_call(target, esc, next_input=next_input) | ||||
new_line = indent + call + '\n' | ||||
return lines_before + [new_line] + lines_after | ||||
Thomas Kluyver
|
r24154 | def make_tokens_by_line(lines): | ||
tokens_by_line = [[]] | ||||
for token in generate_tokens(iter(lines).__next__): | ||||
tokens_by_line[-1].append(token) | ||||
if token.type == tokenize2.NEWLINE: | ||||
tokens_by_line.append([]) | ||||
return tokens_by_line | ||||
Thomas Kluyver
|
r24158 | def show_linewise_tokens(s: str): | ||
"""For investigation""" | ||||
if not s.endswith('\n'): | ||||
s += '\n' | ||||
lines = s.splitlines(keepends=True) | ||||
for line in make_tokens_by_line(lines): | ||||
print("Line -------") | ||||
for tokinfo in line: | ||||
print(" ", tokinfo) | ||||
Thomas Kluyver
|
r24164 | class TransformerManager: | ||
Thomas Kluyver
|
r24154 | def __init__(self): | ||
Thomas Kluyver
|
r24164 | self.line_transforms = [ | ||
leading_indent, | ||||
classic_prompt, | ||||
ipython_prompt, | ||||
cell_magic, | ||||
] | ||||
self.token_transformers = [ | ||||
Thomas Kluyver
|
r24156 | MagicAssign, | ||
SystemAssign, | ||||
Thomas Kluyver
|
r24161 | EscapedCommand, | ||
HelpEnd, | ||||
Thomas Kluyver
|
r24154 | ] | ||
Thomas Kluyver
|
r24164 | def do_one_token_transform(self, lines): | ||
Thomas Kluyver
|
r24154 | """Find and run the transform earliest in the code. | ||
Returns (changed, lines). | ||||
This method is called repeatedly until changed is False, indicating | ||||
that all available transformations are complete. | ||||
The tokens following IPython special syntax might not be valid, so | ||||
the transformed code is retokenised every time to identify the next | ||||
piece of special syntax. Hopefully long code cells are mostly valid | ||||
Python, not using lots of IPython special syntax, so this shouldn't be | ||||
Thomas Kluyver
|
r24164 | a performance issue. | ||
Thomas Kluyver
|
r24154 | """ | ||
tokens_by_line = make_tokens_by_line(lines) | ||||
candidates = [] | ||||
Thomas Kluyver
|
r24164 | for transformer_cls in self.token_transformers: | ||
Thomas Kluyver
|
r24161 | transformer = transformer_cls.find(tokens_by_line) | ||
if transformer: | ||||
candidates.append(transformer) | ||||
Thomas Kluyver
|
r24154 | if not candidates: | ||
# Nothing to transform | ||||
return False, lines | ||||
Thomas Kluyver
|
r24161 | |||
transformer = min(candidates, key=TokenTransformBase.sortby) | ||||
return True, transformer.transform(lines) | ||||
Thomas Kluyver
|
r24154 | |||
Thomas Kluyver
|
r24164 | def do_token_transforms(self, lines): | ||
Thomas Kluyver
|
r24154 | while True: | ||
Thomas Kluyver
|
r24164 | changed, lines = self.do_one_token_transform(lines) | ||
Thomas Kluyver
|
r24154 | if not changed: | ||
return lines | ||||
Thomas Kluyver
|
r24164 | def transform_cell(self, cell: str): | ||
if not cell.endswith('\n'): | ||||
cell += '\n' # Ensure every line has a newline | ||||
lines = cell.splitlines(keepends=True) | ||||
for transform in self.line_transforms: | ||||
#print(transform, lines) | ||||
lines = transform(lines) | ||||
Thomas Kluyver
|
r24154 | |||
Thomas Kluyver
|
r24164 | lines = self.do_token_transforms(lines) | ||
return ''.join(lines) | ||||