##// END OF EJS Templates
fix some other syntax warnings
fix some other syntax warnings

File last commit:

r24780:2cccf225
r24780:2cccf225
Show More
inputtransformer2.py
707 lines | 25.6 KiB | text/x-python | PythonLexer
/ IPython / core / inputtransformer2.py
Thomas Kluyver
Mark inputsplitter & inputtransformer as deprecated
r24177 """Input transformer machinery to support IPython special syntax.
This includes the machinery to recognise and transform ``%magic`` commands,
``!system`` commands, ``help?`` querying, prompt stripping, and so forth.
Thomas Kluyver
Add & improve docstrings following @willingc's review
r24407
Added: IPython 7.0. Replaces inputsplitter and inputtransformer which were
deprecated in 7.0.
Thomas Kluyver
Mark inputsplitter & inputtransformer as deprecated
r24177 """
# Copyright (c) IPython Development Team.
# Distributed under the terms of the Modified BSD License.
Thomas Kluyver
Start adding code for checking when input is complete
r24165 from codeop import compile_command
Thomas Kluyver
Working on new input transformation machinery
r24154 import re
Thomas Kluyver
Switch inputtransformer2 back to stdlib tokenize module
r24178 import tokenize
Matthias Bussonnier
Fix miss-capturing of assign statement after a dedent....
r24728 from typing import List, Tuple, Union
Thomas Kluyver
Convert syntax warnings to errors when checking code completeness
r24182 import warnings
Thomas Kluyver
Working on new input transformation machinery
r24154
Thomas Kluyver
Start adding code for checking when input is complete
r24165 _indent_re = re.compile(r'^[ \t]+')
Thomas Kluyver
Working on new input transformation machinery
r24154 def leading_indent(lines):
"""Remove leading indentation.
Tony Fast
Include empty lines condition in PromptStipper and cell_magic.
r24631
Thomas Kluyver
Working on new input transformation machinery
r24154 If the first line starts with a spaces or tabs, the same whitespace will be
Thomas Kluyver
Add & improve docstrings following @willingc's review
r24407 removed from each following line in the cell.
Thomas Kluyver
Working on new input transformation machinery
r24154 """
Tony Fast
Include empty lines condition in PromptStipper and cell_magic.
r24631 if not lines:
Tony Fast
Fix an IndexError in leading_indent
r24628 return lines
Thomas Kluyver
Start adding code for checking when input is complete
r24165 m = _indent_re.match(lines[0])
Thomas Kluyver
Working on new input transformation machinery
r24154 if not m:
return lines
space = m.group(0)
n = len(space)
return [l[n:] if l.startswith(space) else l
for l in lines]
class PromptStripper:
"""Remove matching input prompts from a block of input.
Tony Fast
Include empty lines condition in PromptStipper and cell_magic.
r24631
Thomas Kluyver
Working on new input transformation machinery
r24154 Parameters
----------
prompt_re : regular expression
Thomas Kluyver
Add & improve docstrings following @willingc's review
r24407 A regular expression matching any input prompt (including continuation,
e.g. ``...``)
Thomas Kluyver
Working on new input transformation machinery
r24154 initial_re : regular expression, optional
A regular expression matching only the initial prompt, but not continuation.
If no initial expression is given, prompt_re will be used everywhere.
Thomas Kluyver
Add & improve docstrings following @willingc's review
r24407 Used mainly for plain Python prompts (``>>>``), where the continuation prompt
Thomas Kluyver
Working on new input transformation machinery
r24154 ``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
Tony Fast
Include empty lines condition in PromptStipper and cell_magic.
r24631
Thomas Kluyver
Working on new input transformation machinery
r24154 If initial_re and prompt_re differ,
only initial_re will be tested against the first line.
If any prompt is found on the first two lines,
prompts will be stripped from the rest of the block.
"""
def __init__(self, prompt_re, initial_re=None):
self.prompt_re = prompt_re
self.initial_re = initial_re or prompt_re
def _strip(self, lines):
return [self.prompt_re.sub('', l, count=1) for l in lines]
def __call__(self, lines):
Tony Fast
Include empty lines condition in PromptStipper and cell_magic.
r24631 if not lines:
return lines
Thomas Kluyver
Working on new input transformation machinery
r24154 if self.initial_re.match(lines[0]) or \
(len(lines) > 1 and self.prompt_re.match(lines[1])):
return self._strip(lines)
return lines
classic_prompt = PromptStripper(
prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'),
initial_re=re.compile(r'^>>>( |$)')
)
ipython_prompt = PromptStripper(re.compile(r'^(In \[\d+\]: |\s*\.{3,}: ?)'))
def cell_magic(lines):
Tony Fast
Include empty lines condition in PromptStipper and cell_magic.
r24631 if not lines or not lines[0].startswith('%%'):
Thomas Kluyver
Working on new input transformation machinery
r24154 return lines
Matthias Bussonnier
fix some other syntax warnings
r24780 if re.match(r'%%\w+\?', lines[0]):
Thomas Kluyver
Working on new input transformation machinery
r24154 # This case will be handled by help_end
return lines
Thomas Kluyver
Fix cell magic transformation
r24162 magic_name, _, first_line = lines[0][2:-1].partition(' ')
body = ''.join(lines[1:])
return ['get_ipython().run_cell_magic(%r, %r, %r)\n'
% (magic_name, first_line, body)]
Thomas Kluyver
Working on new input transformation machinery
r24154
Matthias Bussonnier
Fix miss-capturing of assign statement after a dedent....
r24728 def _find_assign_op(token_line) -> Union[int, None]:
Thomas Kluyver
Add & improve docstrings following @willingc's review
r24407 """Get the index of the first assignment in the line ('=' not inside brackets)
Note: We don't try to support multiple special assignment (a = b = %foo)
"""
Thomas Kluyver
Working on new input transformation machinery
r24154 paren_level = 0
for i, ti in enumerate(token_line):
s = ti.string
if s == '=' and paren_level == 0:
Tony Fast
Include empty lines condition in PromptStipper and cell_magic.
r24631 return i
Matthias Bussonnier
Fix miss-capturing of assign statement after a dedent....
r24728 if s in {'(','[','{'}:
Thomas Kluyver
Working on new input transformation machinery
r24154 paren_level += 1
Matthias Bussonnier
Fix miss-capturing of assign statement after a dedent....
r24728 elif s in {')', ']', '}'}:
Thomas Kluyver
Don't let parentheses level go below 0
r24370 if paren_level > 0:
paren_level -= 1
Thomas Kluyver
Working on new input transformation machinery
r24154
Thomas Kluyver
Factor out handling of line continuations
r24157 def find_end_of_continued_line(lines, start_line: int):
"""Find the last line of a line explicitly extended using backslashes.
Uses 0-indexed line numbers.
"""
end_line = start_line
while lines[end_line].endswith('\\\n'):
end_line += 1
if end_line >= len(lines):
break
return end_line
def assemble_continued_line(lines, start: Tuple[int, int], end_line: int):
Matthias Bussonnier
fix some other syntax warnings
r24780 r"""Assemble a single line from multiple continued line pieces
Thomas Kluyver
Add & improve docstrings following @willingc's review
r24407
Continued lines are lines ending in ``\``, and the line following the last
``\`` in the block.
For example, this code continues over multiple lines::
if (assign_ix is not None) \
and (len(line) >= assign_ix + 2) \
and (line[assign_ix+1].string == '%') \
and (line[assign_ix+2].type == tokenize.NAME):
This statement contains four continued line pieces.
Assembling these pieces into a single line would give::
if (assign_ix is not None) and (len(line) >= assign_ix + 2) and (line[...
This uses 0-indexed line numbers. *start* is (lineno, colno).
Thomas Kluyver
Factor out handling of line continuations
r24157
Thomas Kluyver
Add & improve docstrings following @willingc's review
r24407 Used to allow ``%magic`` and ``!system`` commands to be continued over
multiple lines.
Thomas Kluyver
Factor out handling of line continuations
r24157 """
parts = [lines[start[0]][start[1]:]] + lines[start[0]+1:end_line+1]
return ' '.join([p[:-2] for p in parts[:-1]] # Strip backslash+newline
+ [parts[-1][:-1]]) # Strip newline from last line
Thomas Kluyver
Transformations for 'help?' syntax
r24161 class TokenTransformBase:
Thomas Kluyver
Add & improve docstrings following @willingc's review
r24407 """Base class for transformations which examine tokens.
Special syntax should not be transformed when it occurs inside strings or
comments. This is hard to reliably avoid with regexes. The solution is to
tokenise the code as Python, and recognise the special syntax in the tokens.
IPython's special syntax is not valid Python syntax, so tokenising may go
wrong after the special syntax starts. These classes therefore find and
transform *one* instance of special syntax at a time into regular Python
syntax. After each transformation, tokens are regenerated to find the next
piece of special syntax.
Subclasses need to implement one class method (find)
and one regular method (transform).
Thomas Kluyver
Add description of priority system
r24409
The priority attribute can select which transformation to apply if multiple
transformers match in the same place. Lower numbers have higher priority.
This allows "%magic?" to be turned into a help call rather than a magic call.
Thomas Kluyver
Add & improve docstrings following @willingc's review
r24407 """
Thomas Kluyver
Transformations for 'help?' syntax
r24161 # Lower numbers -> higher priority (for matches in the same location)
priority = 10
def sortby(self):
return self.start_line, self.start_col, self.priority
def __init__(self, start):
self.start_line = start[0] - 1 # Shift from 1-index to 0-index
self.start_col = start[1]
Thomas Kluyver
Add & improve docstrings following @willingc's review
r24407 @classmethod
def find(cls, tokens_by_line):
"""Find one instance of special syntax in the provided tokens.
Tokens are grouped into logical lines for convenience,
so it is easy to e.g. look at the first token of each line.
*tokens_by_line* is a list of lists of tokenize.TokenInfo objects.
This should return an instance of its class, pointing to the start
position it has found, or None if it found no match.
"""
raise NotImplementedError
Thomas Kluyver
Transformations for 'help?' syntax
r24161 def transform(self, lines: List[str]):
Thomas Kluyver
Add & improve docstrings following @willingc's review
r24407 """Transform one instance of special syntax found by ``find()``
Takes a list of strings representing physical lines,
returns a similar list of transformed lines.
"""
Thomas Kluyver
Transformations for 'help?' syntax
r24161 raise NotImplementedError
class MagicAssign(TokenTransformBase):
Thomas Kluyver
Add & improve docstrings following @willingc's review
r24407 """Transformer for assignments from magics (a = %foo)"""
Thomas Kluyver
Transformations for 'help?' syntax
r24161 @classmethod
def find(cls, tokens_by_line):
Thomas Kluyver
Working on new input transformation machinery
r24154 """Find the first magic assignment (a = %foo) in the cell.
"""
for line in tokens_by_line:
assign_ix = _find_assign_op(line)
if (assign_ix is not None) \
and (len(line) >= assign_ix + 2) \
and (line[assign_ix+1].string == '%') \
Thomas Kluyver
Switch inputtransformer2 back to stdlib tokenize module
r24178 and (line[assign_ix+2].type == tokenize.NAME):
Thomas Kluyver
Transformations for 'help?' syntax
r24161 return cls(line[assign_ix+1].start)
Tony Fast
Include empty lines condition in PromptStipper and cell_magic.
r24631
Thomas Kluyver
Transformations for 'help?' syntax
r24161 def transform(self, lines: List[str]):
Thomas Kluyver
Add & improve docstrings following @willingc's review
r24407 """Transform a magic assignment found by the ``find()`` classmethod.
Thomas Kluyver
Working on new input transformation machinery
r24154 """
Thomas Kluyver
Transformations for 'help?' syntax
r24161 start_line, start_col = self.start_line, self.start_col
Thomas Kluyver
Factor out handling of line continuations
r24157 lhs = lines[start_line][:start_col]
end_line = find_end_of_continued_line(lines, start_line)
rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
Thomas Kluyver
Working on new input transformation machinery
r24154 assert rhs.startswith('%'), rhs
magic_name, _, args = rhs[1:].partition(' ')
Tony Fast
Include empty lines condition in PromptStipper and cell_magic.
r24631
Thomas Kluyver
Working on new input transformation machinery
r24154 lines_before = lines[:start_line]
call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)
new_line = lhs + call + '\n'
lines_after = lines[end_line+1:]
Tony Fast
Include empty lines condition in PromptStipper and cell_magic.
r24631
Thomas Kluyver
Working on new input transformation machinery
r24154 return lines_before + [new_line] + lines_after
Thomas Kluyver
Add transformation for system assignments
r24156
Thomas Kluyver
Transformations for 'help?' syntax
r24161 class SystemAssign(TokenTransformBase):
Thomas Kluyver
Add & improve docstrings following @willingc's review
r24407 """Transformer for assignments from system commands (a = !foo)"""
Thomas Kluyver
Transformations for 'help?' syntax
r24161 @classmethod
def find(cls, tokens_by_line):
Thomas Kluyver
Add transformation for system assignments
r24156 """Find the first system assignment (a = !foo) in the cell.
"""
for line in tokens_by_line:
assign_ix = _find_assign_op(line)
if (assign_ix is not None) \
Tony Fast
Add an extra condition to SystemAssign....
r24647 and not line[assign_ix].line.strip().startswith('=') \
Thomas Kluyver
Add transformation for system assignments
r24156 and (len(line) >= assign_ix + 2) \
Thomas Kluyver
Switch inputtransformer2 back to stdlib tokenize module
r24178 and (line[assign_ix + 1].type == tokenize.ERRORTOKEN):
Thomas Kluyver
Add transformation for system assignments
r24156 ix = assign_ix + 1
Thomas Kluyver
Switch inputtransformer2 back to stdlib tokenize module
r24178 while ix < len(line) and line[ix].type == tokenize.ERRORTOKEN:
Thomas Kluyver
Add transformation for system assignments
r24156 if line[ix].string == '!':
Thomas Kluyver
Transformations for 'help?' syntax
r24161 return cls(line[ix].start)
Thomas Kluyver
Add transformation for system assignments
r24156 elif not line[ix].string.isspace():
break
ix += 1
Thomas Kluyver
Transformations for 'help?' syntax
r24161 def transform(self, lines: List[str]):
Thomas Kluyver
Add & improve docstrings following @willingc's review
r24407 """Transform a system assignment found by the ``find()`` classmethod.
Thomas Kluyver
Add transformation for system assignments
r24156 """
Thomas Kluyver
Transformations for 'help?' syntax
r24161 start_line, start_col = self.start_line, self.start_col
Thomas Kluyver
Add transformation for system assignments
r24156
Thomas Kluyver
Factor out handling of line continuations
r24157 lhs = lines[start_line][:start_col]
end_line = find_end_of_continued_line(lines, start_line)
rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
Tony Fast
Uncomment the ! assertion....
r24646 assert rhs.startswith('!'), rhs
Thomas Kluyver
Factor out handling of line continuations
r24157 cmd = rhs[1:]
Thomas Kluyver
Add transformation for system assignments
r24156
lines_before = lines[:start_line]
call = "get_ipython().getoutput({!r})".format(cmd)
new_line = lhs + call + '\n'
lines_after = lines[end_line + 1:]
return lines_before + [new_line] + lines_after
Thomas Kluyver
Escaped commands
r24159 # The escape sequences that define the syntax transformations IPython will
# apply to user input. These can NOT be just changed here: many regular
# expressions and other parts of the code may use their hardcoded values, and
# for all intents and purposes they constitute the 'IPython syntax', so they
# should be considered fixed.
ESC_SHELL = '!' # Send line to underlying system shell
ESC_SH_CAP = '!!' # Send line to system shell and capture output
ESC_HELP = '?' # Find information about object
ESC_HELP2 = '??' # Find extra-detailed information about object
ESC_MAGIC = '%' # Call magic function
ESC_MAGIC2 = '%%' # Call cell-magic function
ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call
ESC_QUOTE2 = ';' # Quote all args as a single string, call
ESC_PAREN = '/' # Call first argument with rest of line as arguments
ESCAPE_SINGLES = {'!', '?', '%', ',', ';', '/'}
ESCAPE_DOUBLES = {'!!', '??'} # %% (cell magic) is handled separately
def _make_help_call(target, esc, next_input=None):
"""Prepares a pinfo(2)/psearch call from a target name and the escape
(i.e. ? or ??)"""
method = 'pinfo2' if esc == '??' \
else 'psearch' if '*' in target \
else 'pinfo'
arg = " ".join([method, target])
#Prepare arguments for get_ipython().run_line_magic(magic_name, magic_args)
t_magic_name, _, t_magic_arg_s = arg.partition(' ')
t_magic_name = t_magic_name.lstrip(ESC_MAGIC)
if next_input is None:
return 'get_ipython().run_line_magic(%r, %r)' % (t_magic_name, t_magic_arg_s)
else:
return 'get_ipython().set_next_input(%r);get_ipython().run_line_magic(%r, %r)' % \
(next_input, t_magic_name, t_magic_arg_s)
def _tr_help(content):
Thomas Kluyver
Add & improve docstrings following @willingc's review
r24407 """Translate lines escaped with: ?
A naked help line should fire the intro help screen (shell.show_usage())
"""
Thomas Kluyver
Escaped commands
r24159 if not content:
return 'get_ipython().show_usage()'
return _make_help_call(content, '?')
def _tr_help2(content):
Thomas Kluyver
Add & improve docstrings following @willingc's review
r24407 """Translate lines escaped with: ??
A naked help line should fire the intro help screen (shell.show_usage())
"""
Thomas Kluyver
Escaped commands
r24159 if not content:
return 'get_ipython().show_usage()'
return _make_help_call(content, '??')
def _tr_magic(content):
Thomas Kluyver
Add & improve docstrings following @willingc's review
r24407 "Translate lines escaped with a percent sign: %"
Thomas Kluyver
Escaped commands
r24159 name, _, args = content.partition(' ')
return 'get_ipython().run_line_magic(%r, %r)' % (name, args)
def _tr_quote(content):
Thomas Kluyver
Add & improve docstrings following @willingc's review
r24407 "Translate lines escaped with a comma: ,"
Thomas Kluyver
Escaped commands
r24159 name, _, args = content.partition(' ')
return '%s("%s")' % (name, '", "'.join(args.split()) )
def _tr_quote2(content):
Thomas Kluyver
Add & improve docstrings following @willingc's review
r24407 "Translate lines escaped with a semicolon: ;"
Thomas Kluyver
Escaped commands
r24159 name, _, args = content.partition(' ')
return '%s("%s")' % (name, args)
def _tr_paren(content):
Thomas Kluyver
Add & improve docstrings following @willingc's review
r24407 "Translate lines escaped with a slash: /"
Thomas Kluyver
Escaped commands
r24159 name, _, args = content.partition(' ')
return '%s(%s)' % (name, ", ".join(args.split()))
tr = { ESC_SHELL : 'get_ipython().system({!r})'.format,
ESC_SH_CAP : 'get_ipython().getoutput({!r})'.format,
ESC_HELP : _tr_help,
ESC_HELP2 : _tr_help2,
ESC_MAGIC : _tr_magic,
ESC_QUOTE : _tr_quote,
ESC_QUOTE2 : _tr_quote2,
ESC_PAREN : _tr_paren }
Thomas Kluyver
Transformations for 'help?' syntax
r24161 class EscapedCommand(TokenTransformBase):
Thomas Kluyver
Add & improve docstrings following @willingc's review
r24407 """Transformer for escaped commands like %foo, !foo, or /foo"""
Thomas Kluyver
Transformations for 'help?' syntax
r24161 @classmethod
def find(cls, tokens_by_line):
Thomas Kluyver
Escaped commands
r24159 """Find the first escaped command (%foo, !foo, etc.) in the cell.
"""
for line in tokens_by_line:
Matthias Bussonnier
Fix to allow entering docstring into IPython....
r24701 if not line:
continue
Thomas Kluyver
Escaped commands
r24159 ix = 0
Matthias Bussonnier
Fix to allow entering docstring into IPython....
r24701 ll = len(line)
while ll > ix and line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
Thomas Kluyver
Escaped commands
r24159 ix += 1
Matthias Bussonnier
Fix to allow entering docstring into IPython....
r24701 if ix >= ll:
continue
Thomas Kluyver
Escaped commands
r24159 if line[ix].string in ESCAPE_SINGLES:
Thomas Kluyver
Transformations for 'help?' syntax
r24161 return cls(line[ix].start)
Thomas Kluyver
Escaped commands
r24159
Thomas Kluyver
Transformations for 'help?' syntax
r24161 def transform(self, lines):
Thomas Kluyver
Add & improve docstrings following @willingc's review
r24407 """Transform an escaped line found by the ``find()`` classmethod.
"""
Thomas Kluyver
Transformations for 'help?' syntax
r24161 start_line, start_col = self.start_line, self.start_col
Thomas Kluyver
Escaped commands
r24159
indent = lines[start_line][:start_col]
end_line = find_end_of_continued_line(lines, start_line)
line = assemble_continued_line(lines, (start_line, start_col), end_line)
Tony Fast
Add some logic to pass all of the check_complete tests...
r24640 if len(line) > 1 and line[:2] in ESCAPE_DOUBLES:
Thomas Kluyver
Escaped commands
r24159 escape, content = line[:2], line[2:]
else:
escape, content = line[:1], line[1:]
Tony Fast
Add some logic to pass all of the check_complete tests...
r24640
if escape in tr:
call = tr[escape](content)
else:
call = ''
Thomas Kluyver
Escaped commands
r24159
lines_before = lines[:start_line]
new_line = indent + call + '\n'
lines_after = lines[end_line + 1:]
return lines_before + [new_line] + lines_after
Thomas Kluyver
Transformations for 'help?' syntax
r24161 _help_end_re = re.compile(r"""(%{0,2}
[a-zA-Z_*][\w*]* # Variable name
(\.[a-zA-Z_*][\w*]*)* # .etc.etc
)
(\?\??)$ # ? or ??
""",
re.VERBOSE)
class HelpEnd(TokenTransformBase):
Thomas Kluyver
Add & improve docstrings following @willingc's review
r24407 """Transformer for help syntax: obj? and obj??"""
Thomas Kluyver
Transformations for 'help?' syntax
r24161 # This needs to be higher priority (lower number) than EscapedCommand so
# that inspecting magics (%foo?) works.
priority = 5
def __init__(self, start, q_locn):
super().__init__(start)
self.q_line = q_locn[0] - 1 # Shift from 1-indexed to 0-indexed
self.q_col = q_locn[1]
@classmethod
def find(cls, tokens_by_line):
Thomas Kluyver
Add & improve docstrings following @willingc's review
r24407 """Find the first help command (foo?) in the cell.
"""
Thomas Kluyver
Transformations for 'help?' syntax
r24161 for line in tokens_by_line:
# Last token is NEWLINE; look at last but one
if len(line) > 2 and line[-2].string == '?':
# Find the first token that's not INDENT/DEDENT
ix = 0
Thomas Kluyver
Switch inputtransformer2 back to stdlib tokenize module
r24178 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
Thomas Kluyver
Transformations for 'help?' syntax
r24161 ix += 1
return cls(line[ix].start, line[-2].start)
def transform(self, lines):
Thomas Kluyver
Add & improve docstrings following @willingc's review
r24407 """Transform a help command found by the ``find()`` classmethod.
"""
Thomas Kluyver
Transformations for 'help?' syntax
r24161 piece = ''.join(lines[self.start_line:self.q_line+1])
indent, content = piece[:self.start_col], piece[self.start_col:]
lines_before = lines[:self.start_line]
lines_after = lines[self.q_line + 1:]
m = _help_end_re.search(content)
Matthias Bussonnier
Some propose fixes....
r24567 if not m:
raise SyntaxError(content)
Thomas Kluyver
Transformations for 'help?' syntax
r24161 assert m is not None, content
target = m.group(1)
esc = m.group(3)
# If we're mid-command, put it back on the next prompt for the user.
next_input = None
if (not lines_before) and (not lines_after) \
and content.strip() != m.group(0):
next_input = content.rstrip('?\n')
call = _make_help_call(target, esc, next_input=next_input)
new_line = indent + call + '\n'
return lines_before + [new_line] + lines_after
Matthias Bussonnier
Fix miss-capturing of assign statement after a dedent....
r24728 def make_tokens_by_line(lines:List[str]):
Thomas Kluyver
Switch inputtransformer2 back to stdlib tokenize module
r24178 """Tokenize a series of lines and group tokens by line.
Matthias Bussonnier
Fix miss-capturing of assign statement after a dedent....
r24728 The tokens for a multiline Python string or expression are grouped as one
line. All lines except the last lines should keep their line ending ('\\n',
'\\r\\n') for this to properly work. Use `.splitlines(keeplineending=True)`
for example when passing block of text to this function.
Thomas Kluyver
Switch inputtransformer2 back to stdlib tokenize module
r24178 """
# NL tokens are used inside multiline expressions, but also after blank
# lines or comments. This is intentional - see https://bugs.python.org/issue17061
# We want to group the former case together but split the latter, so we
# track parentheses level, similar to the internals of tokenize.
NEWLINE, NL = tokenize.NEWLINE, tokenize.NL
Thomas Kluyver
Working on new input transformation machinery
r24154 tokens_by_line = [[]]
Matthias Bussonnier
Fix miss-capturing of assign statement after a dedent....
r24728 if len(lines) > 1 and not lines[0].endswith(('\n', '\r', '\r\n', '\x0b', '\x0c')):
warnings.warn("`make_tokens_by_line` received a list of lines which do not have lineending markers ('\\n', '\\r', '\\r\\n', '\\x0b', '\\x0c'), behavior will be unspecified")
Thomas Kluyver
Switch inputtransformer2 back to stdlib tokenize module
r24178 parenlev = 0
try:
for token in tokenize.generate_tokens(iter(lines).__next__):
tokens_by_line[-1].append(token)
if (token.type == NEWLINE) \
or ((token.type == NL) and (parenlev <= 0)):
tokens_by_line.append([])
elif token.string in {'(', '[', '{'}:
parenlev += 1
elif token.string in {')', ']', '}'}:
Thomas Kluyver
Don't let parentheses level go below 0
r24370 if parenlev > 0:
parenlev -= 1
Thomas Kluyver
Switch inputtransformer2 back to stdlib tokenize module
r24178 except tokenize.TokenError:
# Input ended in a multiline string or expression. That's OK for us.
pass
Tony Fast
A refactor to check_complete to pass the test cases.
r24643
Matthias Bussonnier
Some propose fixes....
r24567 if not tokens_by_line[-1]:
tokens_by_line.pop()
Tony Fast
Include empty lines condition in PromptStipper and cell_magic.
r24631
Tony Fast
A refactor to check_complete to pass the test cases.
r24643
Thomas Kluyver
Working on new input transformation machinery
r24154 return tokens_by_line
Thomas Kluyver
Debugging function to see tokens
r24158 def show_linewise_tokens(s: str):
Thomas Kluyver
Add & improve docstrings following @willingc's review
r24407 """For investigation and debugging"""
Thomas Kluyver
Debugging function to see tokens
r24158 if not s.endswith('\n'):
s += '\n'
lines = s.splitlines(keepends=True)
for line in make_tokens_by_line(lines):
print("Line -------")
for tokinfo in line:
print(" ", tokinfo)
Thomas Kluyver
Prevent infinite loops in input transformation
r24371 # Arbitrary limit to prevent getting stuck in infinite loops
TRANSFORM_LOOP_LIMIT = 500
Thomas Kluyver
Start integrating new input transformation machinery into InteractiveShell
r24164 class TransformerManager:
Thomas Kluyver
Add & improve docstrings following @willingc's review
r24407 """Applies various transformations to a cell or code block.
The key methods for external use are ``transform_cell()``
and ``check_complete()``.
"""
Thomas Kluyver
Working on new input transformation machinery
r24154 def __init__(self):
Thomas Kluyver
Start adding code for checking when input is complete
r24165 self.cleanup_transforms = [
Thomas Kluyver
Start integrating new input transformation machinery into InteractiveShell
r24164 leading_indent,
classic_prompt,
ipython_prompt,
Thomas Kluyver
Start adding code for checking when input is complete
r24165 ]
self.line_transforms = [
Thomas Kluyver
Start integrating new input transformation machinery into InteractiveShell
r24164 cell_magic,
]
self.token_transformers = [
Thomas Kluyver
Add transformation for system assignments
r24156 MagicAssign,
SystemAssign,
Thomas Kluyver
Transformations for 'help?' syntax
r24161 EscapedCommand,
HelpEnd,
Thomas Kluyver
Working on new input transformation machinery
r24154 ]
Tony Fast
Include empty lines condition in PromptStipper and cell_magic.
r24631
Thomas Kluyver
Start integrating new input transformation machinery into InteractiveShell
r24164 def do_one_token_transform(self, lines):
Thomas Kluyver
Working on new input transformation machinery
r24154 """Find and run the transform earliest in the code.
Tony Fast
Include empty lines condition in PromptStipper and cell_magic.
r24631
Thomas Kluyver
Working on new input transformation machinery
r24154 Returns (changed, lines).
Tony Fast
Include empty lines condition in PromptStipper and cell_magic.
r24631
Thomas Kluyver
Working on new input transformation machinery
r24154 This method is called repeatedly until changed is False, indicating
that all available transformations are complete.
The tokens following IPython special syntax might not be valid, so
the transformed code is retokenised every time to identify the next
piece of special syntax. Hopefully long code cells are mostly valid
Python, not using lots of IPython special syntax, so this shouldn't be
Thomas Kluyver
Start integrating new input transformation machinery into InteractiveShell
r24164 a performance issue.
Thomas Kluyver
Working on new input transformation machinery
r24154 """
tokens_by_line = make_tokens_by_line(lines)
candidates = []
Thomas Kluyver
Start integrating new input transformation machinery into InteractiveShell
r24164 for transformer_cls in self.token_transformers:
Thomas Kluyver
Transformations for 'help?' syntax
r24161 transformer = transformer_cls.find(tokens_by_line)
if transformer:
candidates.append(transformer)
Thomas Kluyver
Working on new input transformation machinery
r24154 if not candidates:
# Nothing to transform
return False, lines
Matthias Bussonnier
Better alternative; try each transformer in a row,...
r24568 ordered_transformers = sorted(candidates, key=TokenTransformBase.sortby)
for transformer in ordered_transformers:
try:
return True, transformer.transform(lines)
except SyntaxError:
pass
return False, lines
Thomas Kluyver
Working on new input transformation machinery
r24154
Thomas Kluyver
Start integrating new input transformation machinery into InteractiveShell
r24164 def do_token_transforms(self, lines):
Thomas Kluyver
Prevent infinite loops in input transformation
r24371 for _ in range(TRANSFORM_LOOP_LIMIT):
Thomas Kluyver
Start integrating new input transformation machinery into InteractiveShell
r24164 changed, lines = self.do_one_token_transform(lines)
Thomas Kluyver
Working on new input transformation machinery
r24154 if not changed:
return lines
Thomas Kluyver
Prevent infinite loops in input transformation
r24371 raise RuntimeError("Input transformation still changing after "
"%d iterations. Aborting." % TRANSFORM_LOOP_LIMIT)
Thomas Kluyver
Add & improve docstrings following @willingc's review
r24407 def transform_cell(self, cell: str) -> str:
"""Transforms a cell of input code"""
Thomas Kluyver
Start integrating new input transformation machinery into InteractiveShell
r24164 if not cell.endswith('\n'):
Thomas Kluyver
Switch some references to input_splitter to input_transformer_manager
r24172 cell += '\n' # Ensure the cell has a trailing newline
Thomas Kluyver
Start integrating new input transformation machinery into InteractiveShell
r24164 lines = cell.splitlines(keepends=True)
Thomas Kluyver
Start adding code for checking when input is complete
r24165 for transform in self.cleanup_transforms + self.line_transforms:
Thomas Kluyver
Start integrating new input transformation machinery into InteractiveShell
r24164 lines = transform(lines)
Thomas Kluyver
Working on new input transformation machinery
r24154
Thomas Kluyver
Start integrating new input transformation machinery into InteractiveShell
r24164 lines = self.do_token_transforms(lines)
return ''.join(lines)
Thomas Kluyver
Start adding code for checking when input is complete
r24165
def check_complete(self, cell: str):
"""Return whether a block of code is ready to execute, or should be continued
Parameters
----------
source : string
Python input code, which can be multiline.
Returns
-------
status : str
One of 'complete', 'incomplete', or 'invalid' if source is not a
prefix of valid code.
indent_spaces : int or None
The number of spaces by which to indent the next line of code. If
status is not 'incomplete', this is None.
"""
Tony Fast
A refactor to check_complete to pass the test cases.
r24643 # Remember if the lines ends in a new line.
ends_with_newline = False
for character in reversed(cell):
if character == '\n':
ends_with_newline = True
break
elif character.strip():
break
else:
continue
Nguyen Duy Hai
Change logic to adding newline implicitly instead of removing the automatically added one
r24757 if not ends_with_newline:
Tony Fast
A refactor to check_complete to pass the test cases.
r24643 # Append an newline for consistent tokenization
# See https://bugs.python.org/issue33899
cell += '\n'
Thomas Kluyver
Start adding code for checking when input is complete
r24165 lines = cell.splitlines(keepends=True)
Tony Fast
A refactor to check_complete to pass the test cases.
r24643
Tony Fast
Add some logic to pass all of the check_complete tests...
r24640 if not lines:
return 'complete', None
if lines[-1].endswith('\\'):
Thomas Kluyver
Start adding code for checking when input is complete
r24165 # Explicit backslash continuation
return 'incomplete', find_last_indent(lines)
try:
for transform in self.cleanup_transforms:
lines = transform(lines)
except SyntaxError:
return 'invalid', None
if lines[0].startswith('%%'):
# Special case for cell magics - completion marked by blank line
if lines[-1].strip():
return 'incomplete', find_last_indent(lines)
else:
return 'complete', None
try:
for transform in self.line_transforms:
lines = transform(lines)
lines = self.do_token_transforms(lines)
except SyntaxError:
return 'invalid', None
tokens_by_line = make_tokens_by_line(lines)
Tony Fast
A refactor to check_complete to pass the test cases.
r24643
Matthias Bussonnier
Some propose fixes....
r24567 if not tokens_by_line:
return 'incomplete', find_last_indent(lines)
Tony Fast
Add some logic to pass all of the check_complete tests...
r24640
Thomas Kluyver
Switch inputtransformer2 back to stdlib tokenize module
r24178 if tokens_by_line[-1][-1].type != tokenize.ENDMARKER:
Thomas Kluyver
Start adding code for checking when input is complete
r24165 # We're in a multiline string or expression
return 'incomplete', find_last_indent(lines)
Tony Fast
Add some logic to pass all of the check_complete tests...
r24640
Tony Fast
A refactor to check_complete to pass the test cases.
r24643 newline_types = {tokenize.NEWLINE, tokenize.COMMENT, tokenize.ENDMARKER}
Nguyen Duy Hai
Change logic to adding newline implicitly instead of removing the automatically added one
r24757 # Pop the last line which only contains DEDENTs and ENDMARKER
last_token_line = None
if {t.type for t in tokens_by_line[-1]} in [
{tokenize.DEDENT, tokenize.ENDMARKER},
{tokenize.ENDMARKER}
] and len(tokens_by_line) > 1:
last_token_line = tokens_by_line.pop()
Tony Fast
Add some logic to pass all of the check_complete tests...
r24640
Tony Fast
Fix check_complete with a more verbose approach.
r24644 while tokens_by_line[-1] and tokens_by_line[-1][-1].type in newline_types:
tokens_by_line[-1].pop()
Tony Fast
A refactor to check_complete to pass the test cases.
r24643
Tony Fast
Fix check_complete with a more verbose approach.
r24644 if len(tokens_by_line) == 1 and not tokens_by_line[-1]:
Tony Fast
A refactor to check_complete to pass the test cases.
r24643 return 'incomplete', 0
Tony Fast
Fix check_complete with a more verbose approach.
r24644 if tokens_by_line[-1][-1].string == ':':
Thomas Kluyver
Start adding code for checking when input is complete
r24165 # The last line starts a block (e.g. 'if foo:')
ix = 0
Tony Fast
Fix check_complete with a more verbose approach.
r24644 while tokens_by_line[-1][ix].type in {tokenize.INDENT, tokenize.DEDENT}:
Thomas Kluyver
Start adding code for checking when input is complete
r24165 ix += 1
Tony Fast
A refactor to check_complete to pass the test cases.
r24643
Tony Fast
Fix check_complete with a more verbose approach.
r24644 indent = tokens_by_line[-1][ix].start[1]
Thomas Kluyver
Start adding code for checking when input is complete
r24165 return 'incomplete', indent + 4
Tony Fast
Fix check_complete with a more verbose approach.
r24644 if tokens_by_line[-1][0].line.endswith('\\'):
Tony Fast
A refactor to check_complete to pass the test cases.
r24643 return 'incomplete', None
Thomas Kluyver
Start adding code for checking when input is complete
r24165
# At this point, our checks think the code is complete (or invalid).
Tony Fast
Add some logic to pass all of the check_complete tests...
r24640 # We'll use codeop.compile_command to check this with the real parser
Thomas Kluyver
Start adding code for checking when input is complete
r24165 try:
Thomas Kluyver
Convert syntax warnings to errors when checking code completeness
r24182 with warnings.catch_warnings():
warnings.simplefilter('error', SyntaxWarning)
Tony Fast
Add some logic to pass all of the check_complete tests...
r24640 res = compile_command(''.join(lines), symbol='exec')
Thomas Kluyver
Start adding code for checking when input is complete
r24165 except (SyntaxError, OverflowError, ValueError, TypeError,
MemoryError, SyntaxWarning):
return 'invalid', None
else:
Tony Fast
Add some logic to pass all of the check_complete tests...
r24640 if res is None:
Thomas Kluyver
Start adding code for checking when input is complete
r24165 return 'incomplete', find_last_indent(lines)
Tony Fast
Add some logic to pass all of the check_complete tests...
r24640
Nguyen Duy Hai
Change logic to adding newline implicitly instead of removing the automatically added one
r24757 if last_token_line and last_token_line[0].type == tokenize.DEDENT:
Tony Fast
A refactor to check_complete to pass the test cases.
r24643 if ends_with_newline:
return 'complete', None
return 'incomplete', find_last_indent(lines)
Tony Fast
Use tokenize.NL and tokenize.NEWLINE in the check_complete logic...
r24641
Tony Fast
Fix check_complete with a more verbose approach.
r24644 # If there's a blank line at the end, assume we're ready to execute
if not lines[-1].strip():
return 'complete', None
Thomas Kluyver
Start adding code for checking when input is complete
r24165 return 'complete', None
def find_last_indent(lines):
m = _indent_re.match(lines[-1])
if not m:
return 0
return len(m.group(0).replace('\t', ' '*4))