|
|
import re
|
|
|
from typing import List, Tuple
|
|
|
from IPython.utils import tokenize2
|
|
|
from IPython.utils.tokenutil import generate_tokens
|
|
|
|
|
|
def leading_indent(lines):
|
|
|
"""Remove leading indentation.
|
|
|
|
|
|
If the first line starts with a spaces or tabs, the same whitespace will be
|
|
|
removed from each following line.
|
|
|
"""
|
|
|
m = re.match(r'^[ \t]+', lines[0])
|
|
|
if not m:
|
|
|
return lines
|
|
|
space = m.group(0)
|
|
|
n = len(space)
|
|
|
return [l[n:] if l.startswith(space) else l
|
|
|
for l in lines]
|
|
|
|
|
|
class PromptStripper:
|
|
|
"""Remove matching input prompts from a block of input.
|
|
|
|
|
|
Parameters
|
|
|
----------
|
|
|
prompt_re : regular expression
|
|
|
A regular expression matching any input prompt (including continuation)
|
|
|
initial_re : regular expression, optional
|
|
|
A regular expression matching only the initial prompt, but not continuation.
|
|
|
If no initial expression is given, prompt_re will be used everywhere.
|
|
|
Used mainly for plain Python prompts, where the continuation prompt
|
|
|
``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
|
|
|
|
|
|
If initial_re and prompt_re differ,
|
|
|
only initial_re will be tested against the first line.
|
|
|
If any prompt is found on the first two lines,
|
|
|
prompts will be stripped from the rest of the block.
|
|
|
"""
|
|
|
def __init__(self, prompt_re, initial_re=None):
|
|
|
self.prompt_re = prompt_re
|
|
|
self.initial_re = initial_re or prompt_re
|
|
|
|
|
|
def _strip(self, lines):
|
|
|
return [self.prompt_re.sub('', l, count=1) for l in lines]
|
|
|
|
|
|
def __call__(self, lines):
|
|
|
if self.initial_re.match(lines[0]) or \
|
|
|
(len(lines) > 1 and self.prompt_re.match(lines[1])):
|
|
|
return self._strip(lines)
|
|
|
return lines
|
|
|
|
|
|
classic_prompt = PromptStripper(
|
|
|
prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'),
|
|
|
initial_re=re.compile(r'^>>>( |$)')
|
|
|
)
|
|
|
|
|
|
ipython_prompt = PromptStripper(re.compile(r'^(In \[\d+\]: |\s*\.{3,}: ?)'))
|
|
|
|
|
|
def cell_magic(lines):
|
|
|
if not lines[0].startswith('%%'):
|
|
|
return lines
|
|
|
if re.match('%%\w+\?', lines[0]):
|
|
|
# This case will be handled by help_end
|
|
|
return lines
|
|
|
magic_name, _, first_line = lines[0][2:-1].partition(' ')
|
|
|
body = ''.join(lines[1:])
|
|
|
return ['get_ipython().run_cell_magic(%r, %r, %r)\n'
|
|
|
% (magic_name, first_line, body)]
|
|
|
|
|
|
line_transforms = [
|
|
|
leading_indent,
|
|
|
classic_prompt,
|
|
|
ipython_prompt,
|
|
|
cell_magic,
|
|
|
]
|
|
|
|
|
|
# -----
|
|
|
|
|
|
def _find_assign_op(token_line):
|
|
|
# Find the first assignment in the line ('=' not inside brackets)
|
|
|
# We don't try to support multiple special assignment (a = b = %foo)
|
|
|
paren_level = 0
|
|
|
for i, ti in enumerate(token_line):
|
|
|
s = ti.string
|
|
|
if s == '=' and paren_level == 0:
|
|
|
return i
|
|
|
if s in '([{':
|
|
|
paren_level += 1
|
|
|
elif s in ')]}':
|
|
|
paren_level -= 1
|
|
|
|
|
|
def find_end_of_continued_line(lines, start_line: int):
|
|
|
"""Find the last line of a line explicitly extended using backslashes.
|
|
|
|
|
|
Uses 0-indexed line numbers.
|
|
|
"""
|
|
|
end_line = start_line
|
|
|
while lines[end_line].endswith('\\\n'):
|
|
|
end_line += 1
|
|
|
if end_line >= len(lines):
|
|
|
break
|
|
|
return end_line
|
|
|
|
|
|
def assemble_continued_line(lines, start: Tuple[int, int], end_line: int):
|
|
|
"""Assemble pieces of a continued line into a single line.
|
|
|
|
|
|
Uses 0-indexed line numbers. *start* is (lineno, colno).
|
|
|
"""
|
|
|
parts = [lines[start[0]][start[1]:]] + lines[start[0]+1:end_line+1]
|
|
|
return ' '.join([p[:-2] for p in parts[:-1]] # Strip backslash+newline
|
|
|
+ [parts[-1][:-1]]) # Strip newline from last line
|
|
|
|
|
|
class TokenTransformBase:
|
|
|
# Lower numbers -> higher priority (for matches in the same location)
|
|
|
priority = 10
|
|
|
|
|
|
def sortby(self):
|
|
|
return self.start_line, self.start_col, self.priority
|
|
|
|
|
|
def __init__(self, start):
|
|
|
self.start_line = start[0] - 1 # Shift from 1-index to 0-index
|
|
|
self.start_col = start[1]
|
|
|
|
|
|
def transform(self, lines: List[str]):
|
|
|
raise NotImplementedError
|
|
|
|
|
|
class MagicAssign(TokenTransformBase):
|
|
|
@classmethod
|
|
|
def find(cls, tokens_by_line):
|
|
|
"""Find the first magic assignment (a = %foo) in the cell.
|
|
|
|
|
|
Returns (line, column) of the % if found, or None. *line* is 1-indexed.
|
|
|
"""
|
|
|
for line in tokens_by_line:
|
|
|
assign_ix = _find_assign_op(line)
|
|
|
if (assign_ix is not None) \
|
|
|
and (len(line) >= assign_ix + 2) \
|
|
|
and (line[assign_ix+1].string == '%') \
|
|
|
and (line[assign_ix+2].type == tokenize2.NAME):
|
|
|
return cls(line[assign_ix+1].start)
|
|
|
|
|
|
def transform(self, lines: List[str]):
|
|
|
"""Transform a magic assignment found by find
|
|
|
"""
|
|
|
start_line, start_col = self.start_line, self.start_col
|
|
|
lhs = lines[start_line][:start_col]
|
|
|
end_line = find_end_of_continued_line(lines, start_line)
|
|
|
rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
|
|
|
assert rhs.startswith('%'), rhs
|
|
|
magic_name, _, args = rhs[1:].partition(' ')
|
|
|
|
|
|
lines_before = lines[:start_line]
|
|
|
call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)
|
|
|
new_line = lhs + call + '\n'
|
|
|
lines_after = lines[end_line+1:]
|
|
|
|
|
|
return lines_before + [new_line] + lines_after
|
|
|
|
|
|
|
|
|
class SystemAssign(TokenTransformBase):
|
|
|
@classmethod
|
|
|
def find(cls, tokens_by_line):
|
|
|
"""Find the first system assignment (a = !foo) in the cell.
|
|
|
|
|
|
Returns (line, column) of the ! if found, or None. *line* is 1-indexed.
|
|
|
"""
|
|
|
for line in tokens_by_line:
|
|
|
assign_ix = _find_assign_op(line)
|
|
|
if (assign_ix is not None) \
|
|
|
and (len(line) >= assign_ix + 2) \
|
|
|
and (line[assign_ix + 1].type == tokenize2.ERRORTOKEN):
|
|
|
ix = assign_ix + 1
|
|
|
|
|
|
while ix < len(line) and line[ix].type == tokenize2.ERRORTOKEN:
|
|
|
if line[ix].string == '!':
|
|
|
return cls(line[ix].start)
|
|
|
elif not line[ix].string.isspace():
|
|
|
break
|
|
|
ix += 1
|
|
|
|
|
|
def transform(self, lines: List[str]):
|
|
|
"""Transform a system assignment found by find
|
|
|
"""
|
|
|
start_line, start_col = self.start_line, self.start_col
|
|
|
|
|
|
lhs = lines[start_line][:start_col]
|
|
|
end_line = find_end_of_continued_line(lines, start_line)
|
|
|
rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
|
|
|
assert rhs.startswith('!'), rhs
|
|
|
cmd = rhs[1:]
|
|
|
|
|
|
lines_before = lines[:start_line]
|
|
|
call = "get_ipython().getoutput({!r})".format(cmd)
|
|
|
new_line = lhs + call + '\n'
|
|
|
lines_after = lines[end_line + 1:]
|
|
|
|
|
|
return lines_before + [new_line] + lines_after
|
|
|
|
|
|
# The escape sequences that define the syntax transformations IPython will
|
|
|
# apply to user input. These can NOT be just changed here: many regular
|
|
|
# expressions and other parts of the code may use their hardcoded values, and
|
|
|
# for all intents and purposes they constitute the 'IPython syntax', so they
|
|
|
# should be considered fixed.
|
|
|
|
|
|
ESC_SHELL = '!' # Send line to underlying system shell
|
|
|
ESC_SH_CAP = '!!' # Send line to system shell and capture output
|
|
|
ESC_HELP = '?' # Find information about object
|
|
|
ESC_HELP2 = '??' # Find extra-detailed information about object
|
|
|
ESC_MAGIC = '%' # Call magic function
|
|
|
ESC_MAGIC2 = '%%' # Call cell-magic function
|
|
|
ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call
|
|
|
ESC_QUOTE2 = ';' # Quote all args as a single string, call
|
|
|
ESC_PAREN = '/' # Call first argument with rest of line as arguments
|
|
|
|
|
|
ESCAPE_SINGLES = {'!', '?', '%', ',', ';', '/'}
|
|
|
ESCAPE_DOUBLES = {'!!', '??'} # %% (cell magic) is handled separately
|
|
|
|
|
|
def _make_help_call(target, esc, next_input=None):
|
|
|
"""Prepares a pinfo(2)/psearch call from a target name and the escape
|
|
|
(i.e. ? or ??)"""
|
|
|
method = 'pinfo2' if esc == '??' \
|
|
|
else 'psearch' if '*' in target \
|
|
|
else 'pinfo'
|
|
|
arg = " ".join([method, target])
|
|
|
#Prepare arguments for get_ipython().run_line_magic(magic_name, magic_args)
|
|
|
t_magic_name, _, t_magic_arg_s = arg.partition(' ')
|
|
|
t_magic_name = t_magic_name.lstrip(ESC_MAGIC)
|
|
|
if next_input is None:
|
|
|
return 'get_ipython().run_line_magic(%r, %r)' % (t_magic_name, t_magic_arg_s)
|
|
|
else:
|
|
|
return 'get_ipython().set_next_input(%r);get_ipython().run_line_magic(%r, %r)' % \
|
|
|
(next_input, t_magic_name, t_magic_arg_s)
|
|
|
|
|
|
def _tr_help(content):
|
|
|
"Translate lines escaped with: ?"
|
|
|
# A naked help line should just fire the intro help screen
|
|
|
if not content:
|
|
|
return 'get_ipython().show_usage()'
|
|
|
|
|
|
return _make_help_call(content, '?')
|
|
|
|
|
|
def _tr_help2(content):
|
|
|
"Translate lines escaped with: ??"
|
|
|
# A naked help line should just fire the intro help screen
|
|
|
if not content:
|
|
|
return 'get_ipython().show_usage()'
|
|
|
|
|
|
return _make_help_call(content, '??')
|
|
|
|
|
|
def _tr_magic(content):
|
|
|
"Translate lines escaped with: %"
|
|
|
name, _, args = content.partition(' ')
|
|
|
return 'get_ipython().run_line_magic(%r, %r)' % (name, args)
|
|
|
|
|
|
def _tr_quote(content):
|
|
|
"Translate lines escaped with: ,"
|
|
|
name, _, args = content.partition(' ')
|
|
|
return '%s("%s")' % (name, '", "'.join(args.split()) )
|
|
|
|
|
|
def _tr_quote2(content):
|
|
|
"Translate lines escaped with: ;"
|
|
|
name, _, args = content.partition(' ')
|
|
|
return '%s("%s")' % (name, args)
|
|
|
|
|
|
def _tr_paren(content):
|
|
|
"Translate lines escaped with: /"
|
|
|
name, _, args = content.partition(' ')
|
|
|
return '%s(%s)' % (name, ", ".join(args.split()))
|
|
|
|
|
|
tr = { ESC_SHELL : 'get_ipython().system({!r})'.format,
|
|
|
ESC_SH_CAP : 'get_ipython().getoutput({!r})'.format,
|
|
|
ESC_HELP : _tr_help,
|
|
|
ESC_HELP2 : _tr_help2,
|
|
|
ESC_MAGIC : _tr_magic,
|
|
|
ESC_QUOTE : _tr_quote,
|
|
|
ESC_QUOTE2 : _tr_quote2,
|
|
|
ESC_PAREN : _tr_paren }
|
|
|
|
|
|
class EscapedCommand(TokenTransformBase):
|
|
|
@classmethod
|
|
|
def find(cls, tokens_by_line):
|
|
|
"""Find the first escaped command (%foo, !foo, etc.) in the cell.
|
|
|
|
|
|
Returns (line, column) of the escape if found, or None. *line* is 1-indexed.
|
|
|
"""
|
|
|
for line in tokens_by_line:
|
|
|
ix = 0
|
|
|
while line[ix].type in {tokenize2.INDENT, tokenize2.DEDENT}:
|
|
|
ix += 1
|
|
|
if line[ix].string in ESCAPE_SINGLES:
|
|
|
return cls(line[ix].start)
|
|
|
|
|
|
def transform(self, lines):
|
|
|
start_line, start_col = self.start_line, self.start_col
|
|
|
|
|
|
indent = lines[start_line][:start_col]
|
|
|
end_line = find_end_of_continued_line(lines, start_line)
|
|
|
line = assemble_continued_line(lines, (start_line, start_col), end_line)
|
|
|
|
|
|
if line[:2] in ESCAPE_DOUBLES:
|
|
|
escape, content = line[:2], line[2:]
|
|
|
else:
|
|
|
escape, content = line[:1], line[1:]
|
|
|
call = tr[escape](content)
|
|
|
|
|
|
lines_before = lines[:start_line]
|
|
|
new_line = indent + call + '\n'
|
|
|
lines_after = lines[end_line + 1:]
|
|
|
|
|
|
return lines_before + [new_line] + lines_after
|
|
|
|
|
|
_help_end_re = re.compile(r"""(%{0,2}
|
|
|
[a-zA-Z_*][\w*]* # Variable name
|
|
|
(\.[a-zA-Z_*][\w*]*)* # .etc.etc
|
|
|
)
|
|
|
(\?\??)$ # ? or ??
|
|
|
""",
|
|
|
re.VERBOSE)
|
|
|
|
|
|
class HelpEnd(TokenTransformBase):
|
|
|
# This needs to be higher priority (lower number) than EscapedCommand so
|
|
|
# that inspecting magics (%foo?) works.
|
|
|
priority = 5
|
|
|
|
|
|
def __init__(self, start, q_locn):
|
|
|
super().__init__(start)
|
|
|
self.q_line = q_locn[0] - 1 # Shift from 1-indexed to 0-indexed
|
|
|
self.q_col = q_locn[1]
|
|
|
|
|
|
@classmethod
|
|
|
def find(cls, tokens_by_line):
|
|
|
for line in tokens_by_line:
|
|
|
# Last token is NEWLINE; look at last but one
|
|
|
if len(line) > 2 and line[-2].string == '?':
|
|
|
# Find the first token that's not INDENT/DEDENT
|
|
|
ix = 0
|
|
|
while line[ix].type in {tokenize2.INDENT, tokenize2.DEDENT}:
|
|
|
ix += 1
|
|
|
return cls(line[ix].start, line[-2].start)
|
|
|
|
|
|
def transform(self, lines):
|
|
|
piece = ''.join(lines[self.start_line:self.q_line+1])
|
|
|
indent, content = piece[:self.start_col], piece[self.start_col:]
|
|
|
lines_before = lines[:self.start_line]
|
|
|
lines_after = lines[self.q_line + 1:]
|
|
|
|
|
|
m = _help_end_re.search(content)
|
|
|
assert m is not None, content
|
|
|
target = m.group(1)
|
|
|
esc = m.group(3)
|
|
|
|
|
|
# If we're mid-command, put it back on the next prompt for the user.
|
|
|
next_input = None
|
|
|
if (not lines_before) and (not lines_after) \
|
|
|
and content.strip() != m.group(0):
|
|
|
next_input = content.rstrip('?\n')
|
|
|
|
|
|
call = _make_help_call(target, esc, next_input=next_input)
|
|
|
new_line = indent + call + '\n'
|
|
|
|
|
|
return lines_before + [new_line] + lines_after
|
|
|
|
|
|
def make_tokens_by_line(lines):
|
|
|
tokens_by_line = [[]]
|
|
|
for token in generate_tokens(iter(lines).__next__):
|
|
|
tokens_by_line[-1].append(token)
|
|
|
if token.type == tokenize2.NEWLINE:
|
|
|
tokens_by_line.append([])
|
|
|
|
|
|
return tokens_by_line
|
|
|
|
|
|
def show_linewise_tokens(s: str):
|
|
|
"""For investigation"""
|
|
|
if not s.endswith('\n'):
|
|
|
s += '\n'
|
|
|
lines = s.splitlines(keepends=True)
|
|
|
for line in make_tokens_by_line(lines):
|
|
|
print("Line -------")
|
|
|
for tokinfo in line:
|
|
|
print(" ", tokinfo)
|
|
|
|
|
|
class TokenTransformers:
|
|
|
def __init__(self):
|
|
|
self.transformers = [
|
|
|
MagicAssign,
|
|
|
SystemAssign,
|
|
|
EscapedCommand,
|
|
|
HelpEnd,
|
|
|
]
|
|
|
|
|
|
def do_one_transform(self, lines):
|
|
|
"""Find and run the transform earliest in the code.
|
|
|
|
|
|
Returns (changed, lines).
|
|
|
|
|
|
This method is called repeatedly until changed is False, indicating
|
|
|
that all available transformations are complete.
|
|
|
|
|
|
The tokens following IPython special syntax might not be valid, so
|
|
|
the transformed code is retokenised every time to identify the next
|
|
|
piece of special syntax. Hopefully long code cells are mostly valid
|
|
|
Python, not using lots of IPython special syntax, so this shouldn't be
|
|
|
a performance issue.
|
|
|
"""
|
|
|
tokens_by_line = make_tokens_by_line(lines)
|
|
|
candidates = []
|
|
|
for transformer_cls in self.transformers:
|
|
|
transformer = transformer_cls.find(tokens_by_line)
|
|
|
if transformer:
|
|
|
candidates.append(transformer)
|
|
|
|
|
|
if not candidates:
|
|
|
# Nothing to transform
|
|
|
return False, lines
|
|
|
|
|
|
transformer = min(candidates, key=TokenTransformBase.sortby)
|
|
|
return True, transformer.transform(lines)
|
|
|
|
|
|
def __call__(self, lines):
|
|
|
while True:
|
|
|
changed, lines = self.do_one_transform(lines)
|
|
|
if not changed:
|
|
|
return lines
|
|
|
|
|
|
|
|
|
def transform_cell(cell):
|
|
|
if not cell.endswith('\n'):
|
|
|
cell += '\n' # Ensure every line has a newline
|
|
|
lines = cell.splitlines(keepends=True)
|
|
|
for transform in line_transforms:
|
|
|
#print(transform, lines)
|
|
|
lines = transform(lines)
|
|
|
|
|
|
lines = TokenTransformers()(lines)
|
|
|
return ''.join(lines)
|
|
|
|