##// END OF EJS Templates
Major refactoring of prefilter mechanism. Much of the transformation process has been moved out of the iplib.InteractiveShell class and into a separate module, IPython.prefilter. In addition, extensive tests have been added for prefiltering.
Major refactoring of prefilter mechanism. Much of the transformation process has been moved out of the iplib.InteractiveShell class and into a separate module, IPython.prefilter. In addition, extensive tests have been added for prefiltering.

File last commit:

r657:499f6d7b
r657:499f6d7b
Show More
prefilter.py
298 lines | 10.6 KiB | text/x-python | PythonLexer
# -*- coding: utf-8 -*-
"""
Classes and functions for prefiltering (transforming) a line of user input.
This module is responsible, primarily, for breaking the line up into useful
pieces and triggering the appropriate handlers in iplib to do the actual
transforming work.
"""
__docformat__ = "restructuredtext en"
import re
import IPython.ipapi
class LineInfo(object):
"""A single line of input and associated info.
Includes the following as properties:
line
The original, raw line
continue_prompt
Is this line a continuation in a sequence of multiline input?
pre
The initial esc character or whitespace.
preChar
The escape character(s) in pre or the empty string if there isn't one.
Note that '!!' is a possible value for preChar. Otherwise it will
always be a single character.
preWhitespace
The leading whitespace from pre if it exists. If there is a preChar,
this is just ''.
iFun
The 'function part', which is basically the maximal initial sequence
of valid python identifiers and the '.' character. This is what is
checked for alias and magic transformations, used for auto-calling,
etc.
theRest
Everything else on the line.
"""
def __init__(self, line, continue_prompt):
self.line = line
self.continue_prompt = continue_prompt
self.pre, self.iFun, self.theRest = splitUserInput(line)
self.preChar = self.pre.strip()
if self.preChar:
self.preWhitespace = '' # No whitespace allowd before esc chars
else:
self.preWhitespace = self.pre
self._oinfo = None
def ofind(self, ip):
"""Do a full, attribute-walking lookup of the iFun in the various
namespaces for the given IPython InteractiveShell instance.
Return a dict with keys: found,obj,ospace,ismagic
Note: can cause state changes because of calling getattr, but should
only be run if autocall is on and if the line hasn't matched any
other, less dangerous handlers.
Does cache the results of the call, so can be called multiple times
without worrying about *further* damaging state.
"""
if not self._oinfo:
self._oinfo = ip._ofind(self.iFun)
return self._oinfo
def splitUserInput(line, pattern=None):
"""Split user input into pre-char/whitespace, function part and rest.
Mostly internal to this module, but also used by iplib.expand_aliases,
which passes in a shell pattern.
"""
# It seems to me that the shell splitting should be a separate method.
if not pattern:
pattern = line_split
match = pattern.match(line)
if not match:
#print "match failed for line '%s'" % line
try:
iFun,theRest = line.split(None,1)
except ValueError:
#print "split failed for line '%s'" % line
iFun,theRest = line,''
pre = re.match('^(\s*)(.*)',line).groups()[0]
else:
pre,iFun,theRest = match.groups()
# iFun has to be a valid python identifier, so it better be only pure
# ascii, no unicode:
try:
iFun = iFun.encode('ascii')
except UnicodeEncodeError:
theRest = iFun + u' ' + theRest
iFun = u''
#print 'line:<%s>' % line # dbg
#print 'pre <%s> iFun <%s> rest <%s>' % (pre,iFun.strip(),theRest) # dbg
return pre,iFun.strip(),theRest
# RegExp for splitting line contents into pre-char//first word-method//rest.
# For clarity, each group in on one line.
# WARNING: update the regexp if the escapes in iplib are changed, as they
# are hardwired in.
# Although it's not solely driven by the regex, note that:
# ,;/% only trigger if they are the first character on the line
# ! and !! trigger if they are first char(s) *or* follow an indent
# ? triggers as first or last char.
# The three parts of the regex are:
# 1) pre: pre_char *or* initial whitespace
# 2) iFun: first word/method (mix of \w and '.')
# 3) theRest: rest of line
line_split = re.compile(r'^([,;/%?]|!!?|\s*)'
r'\s*([\w\.]+)\s*'
r'(.*)$')
shell_line_split = re.compile(r'^(\s*)(\S*\s*)(.*$)')
def prefilter(line_info, ip):
"""Call one of the passed-in InteractiveShell's handler preprocessors,
depending on the form of the line. Return the results, which must be a
value, even if it's a blank ('')."""
# Note: the order of these checks does matter.
for check in [ checkEmacs,
checkIPyAutocall,
checkMultiLineShell,
checkEscChars,
checkPythonChars,
checkAutomagic,
checkAlias,
checkAutocall,
]:
handler = check(line_info, ip)
if handler:
return handler(line_info)
return ip.handle_normal(line_info)
# Handler checks
#
# All have the same interface: they take a LineInfo object and a ref to the
# iplib.InteractiveShell object. They check the line to see if a particular
# handler should be called, and return either a handler or None. The
# handlers which they return are *bound* methods of the InteractiveShell
# object.
#
# In general, these checks should only take responsibility for their 'own'
# handler. If it doesn't get triggered, they should just return None and
# let the rest of the check sequence run.
def checkEmacs(l_info,ip):
"Emacs ipython-mode tags certain input lines."
if l_info.line.endswith('# PYTHON-MODE'):
return ip.handle_emacs
else:
return None
def checkIPyAutocall(l_info,ip):
"Instances of IPyAutocall in user_ns get autocalled immediately"
obj = ip.user_ns.get(l_info.iFun, None)
if isinstance(obj, IPython.ipapi.IPyAutocall):
obj.set_ip(ip.api)
return ip.handle_auto
else:
return None
def checkMultiLineShell(l_info,ip):
"Allow ! and !! in multi-line statements if multi_line_specials is on"
# Note that this one of the only places we check the first character of
# iFun and *not* the preChar. Also note that the below test matches
# both ! and !!.
if l_info.continue_prompt \
and ip.rc.multi_line_specials \
and l_info.iFun.startswith(ip.ESC_SHELL):
return ip.handle_shell_escape
else:
return None
def checkEscChars(l_info,ip):
"""Check for escape character and return either a handler to handle it,
or None if there is no escape char."""
if l_info.line[-1] == ip.ESC_HELP \
and l_info.preChar != ip.ESC_SHELL \
and l_info.preChar != ip.ESC_SH_CAP:
# the ? can be at the end, but *not* for either kind of shell escape,
# because a ? can be a vaild final char in a shell cmd
return ip.handle_help
elif l_info.preChar in ip.esc_handlers:
return ip.esc_handlers[l_info.preChar]
else:
return None
def checkPythonChars(l_info,ip):
"""If the 'rest' of the line begins with an (in)equality, assginment,
function call or tuple comma, we should simply execute the line
(regardless of whether or not there's a possible alias, automagic or
autocall expansion). This both avoids spurious geattr() accesses on
objects upon assignment, and also allows users to assign to either alias
or magic names true python variables (the magic/alias systems always
take second seat to true python code). E.g. ls='hi', or ls,that=1,2"""
if l_info.theRest and l_info.theRest[0] in '!=()<>,+*/%^&|':
return ip.handle_normal
else:
return None
def checkAutomagic(l_info,ip):
"""If the iFun is magic, and automagic is on, run it. Note: normal,
non-auto magic would already have been triggered via '%' in
check_esc_chars. This just checks for automagic."""
if not ip.rc.automagic or not hasattr(ip,'magic_'+l_info.iFun):
return None
# We have a likely magic method. Make sure we should actually call it.
if l_info.continue_prompt and not ip.rc.multi_line_specials:
return None
head = l_info.iFun.split('.',1)[0]
if isShadowed(head,ip):
return None
return ip.handle_magic
def checkAlias(l_info,ip):
"Check if the initital identifier on the line is an alias."
# Note: aliases can not contain '.'
head = l_info.iFun.split('.',1)[0]
if l_info.iFun not in ip.alias_table \
or head not in ip.alias_table \
or isShadowed(head,ip):
return None
return ip.handle_alias
def checkAutocall(l_info,ip):
"Check if the initial word/function is callable and autocall is on."
if not ip.rc.autocall:
return None
oinfo = l_info.ofind(ip) # This can mutate state via getattr
if not oinfo['found']:
return None
if callable(oinfo['obj']) \
and (not re_exclude_auto.match(l_info.theRest)) \
and re_fun_name.match(l_info.iFun):
#print 'going auto' # dbg
return ip.handle_auto
else:
#print 'was callable?', callable(l_info.oinfo['obj']) # dbg
return None
# RegExp to identify potential function names
re_fun_name = re.compile(r'[a-zA-Z_]([a-zA-Z0-9_.]*) *$')
# RegExp to exclude strings with this start from autocalling. In
# particular, all binary operators should be excluded, so that if foo is
# callable, foo OP bar doesn't become foo(OP bar), which is invalid. The
# characters '!=()' don't need to be checked for, as the checkPythonChars
# routine explicitely does so, to catch direct calls and rebindings of
# existing names.
# Warning: the '-' HAS TO BE AT THE END of the first group, otherwise
# it affects the rest of the group in square brackets.
re_exclude_auto = re.compile(r'^[,&^\|\*/\+-]'
r'|^is |^not |^in |^and |^or ')
# try to catch also methods for stuff in lists/tuples/dicts: off
# (experimental). For this to work, the line_split regexp would need
# to be modified so it wouldn't break things at '['. That line is
# nasty enough that I shouldn't change it until I can test it _well_.
#self.re_fun_name = re.compile (r'[a-zA-Z_]([a-zA-Z0-9_.\[\]]*) ?$')
# Handler Check Utilities
def isShadowed(identifier,ip):
"""Is the given identifier defined in one of the namespaces which shadow
the alias and magic namespaces? Note that an identifier is different
than iFun, because it can not contain a '.' character."""
# This is much safer than calling ofind, which can change state
return (identifier in ip.user_ns \
or identifier in ip.internal_ns \
or identifier in ip.ns_table['builtin'])