prefilter.py
320 lines
| 11.3 KiB
| text/x-python
|
PythonLexer
/ IPython / prefilter.py
dan.milstein
|
r657 | # -*- coding: utf-8 -*- | ||
""" | ||||
Classes and functions for prefiltering (transforming) a line of user input. | ||||
This module is responsible, primarily, for breaking the line up into useful | ||||
pieces and triggering the appropriate handlers in iplib to do the actual | ||||
transforming work. | ||||
""" | ||||
__docformat__ = "restructuredtext en" | ||||
import re | ||||
import IPython.ipapi | ||||
class LineInfo(object): | ||||
"""A single line of input and associated info. | ||||
Includes the following as properties: | ||||
line | ||||
The original, raw line | ||||
continue_prompt | ||||
Is this line a continuation in a sequence of multiline input? | ||||
pre | ||||
The initial esc character or whitespace. | ||||
preChar | ||||
The escape character(s) in pre or the empty string if there isn't one. | ||||
Note that '!!' is a possible value for preChar. Otherwise it will | ||||
always be a single character. | ||||
preWhitespace | ||||
The leading whitespace from pre if it exists. If there is a preChar, | ||||
this is just ''. | ||||
iFun | ||||
The 'function part', which is basically the maximal initial sequence | ||||
of valid python identifiers and the '.' character. This is what is | ||||
checked for alias and magic transformations, used for auto-calling, | ||||
etc. | ||||
theRest | ||||
Everything else on the line. | ||||
""" | ||||
def __init__(self, line, continue_prompt): | ||||
self.line = line | ||||
self.continue_prompt = continue_prompt | ||||
self.pre, self.iFun, self.theRest = splitUserInput(line) | ||||
self.preChar = self.pre.strip() | ||||
if self.preChar: | ||||
self.preWhitespace = '' # No whitespace allowd before esc chars | ||||
vivainio
|
r775 | else: | ||
dan.milstein
|
r657 | self.preWhitespace = self.pre | ||
self._oinfo = None | ||||
def ofind(self, ip): | ||||
"""Do a full, attribute-walking lookup of the iFun in the various | ||||
namespaces for the given IPython InteractiveShell instance. | ||||
Return a dict with keys: found,obj,ospace,ismagic | ||||
Note: can cause state changes because of calling getattr, but should | ||||
only be run if autocall is on and if the line hasn't matched any | ||||
other, less dangerous handlers. | ||||
Does cache the results of the call, so can be called multiple times | ||||
without worrying about *further* damaging state. | ||||
""" | ||||
if not self._oinfo: | ||||
self._oinfo = ip._ofind(self.iFun) | ||||
return self._oinfo | ||||
vivainio
|
r776 | def __str__(self): | ||
return "Lineinfo [%s|%s|%s]" %(self.pre,self.iFun,self.theRest) | ||||
dan.milstein
|
r657 | |||
def splitUserInput(line, pattern=None): | ||||
"""Split user input into pre-char/whitespace, function part and rest. | ||||
Mostly internal to this module, but also used by iplib.expand_aliases, | ||||
which passes in a shell pattern. | ||||
""" | ||||
# It seems to me that the shell splitting should be a separate method. | ||||
if not pattern: | ||||
pattern = line_split | ||||
match = pattern.match(line) | ||||
if not match: | ||||
#print "match failed for line '%s'" % line | ||||
try: | ||||
iFun,theRest = line.split(None,1) | ||||
except ValueError: | ||||
#print "split failed for line '%s'" % line | ||||
iFun,theRest = line,'' | ||||
pre = re.match('^(\s*)(.*)',line).groups()[0] | ||||
else: | ||||
pre,iFun,theRest = match.groups() | ||||
# iFun has to be a valid python identifier, so it better be only pure | ||||
# ascii, no unicode: | ||||
try: | ||||
iFun = iFun.encode('ascii') | ||||
except UnicodeEncodeError: | ||||
theRest = iFun + u' ' + theRest | ||||
iFun = u'' | ||||
#print 'line:<%s>' % line # dbg | ||||
#print 'pre <%s> iFun <%s> rest <%s>' % (pre,iFun.strip(),theRest) # dbg | ||||
dan.milstein
|
r707 | return pre,iFun.strip(),theRest.lstrip() | ||
dan.milstein
|
r657 | |||
# RegExp for splitting line contents into pre-char//first word-method//rest. | ||||
# For clarity, each group in on one line. | ||||
# WARNING: update the regexp if the escapes in iplib are changed, as they | ||||
# are hardwired in. | ||||
# Although it's not solely driven by the regex, note that: | ||||
# ,;/% only trigger if they are the first character on the line | ||||
# ! and !! trigger if they are first char(s) *or* follow an indent | ||||
# ? triggers as first or last char. | ||||
# The three parts of the regex are: | ||||
# 1) pre: pre_char *or* initial whitespace | ||||
# 2) iFun: first word/method (mix of \w and '.') | ||||
dan.milstein
|
r707 | # 3) theRest: rest of line (separated from iFun by space if non-empty) | ||
dan.milstein
|
r657 | line_split = re.compile(r'^([,;/%?]|!!?|\s*)' | ||
dan.milstein
|
r707 | r'\s*([\w\.]+)' | ||
r'(\s+.*$|$)') | ||||
dan.milstein
|
r657 | |||
shell_line_split = re.compile(r'^(\s*)(\S*\s*)(.*$)') | ||||
def prefilter(line_info, ip): | ||||
"""Call one of the passed-in InteractiveShell's handler preprocessors, | ||||
depending on the form of the line. Return the results, which must be a | ||||
value, even if it's a blank ('').""" | ||||
# Note: the order of these checks does matter. | ||||
for check in [ checkEmacs, | ||||
vivainio
|
r780 | checkShellEscape, | ||
dan.milstein
|
r657 | checkIPyAutocall, | ||
vivainio
|
r780 | checkMultiLineMagic, | ||
dan.milstein
|
r657 | checkEscChars, | ||
dan.milstein
|
r670 | checkAssignment, | ||
dan.milstein
|
r657 | checkAutomagic, | ||
checkAlias, | ||||
dan.milstein
|
r676 | checkPythonOps, | ||
dan.milstein
|
r657 | checkAutocall, | ||
]: | ||||
handler = check(line_info, ip) | ||||
if handler: | ||||
return handler(line_info) | ||||
return ip.handle_normal(line_info) | ||||
# Handler checks | ||||
# | ||||
# All have the same interface: they take a LineInfo object and a ref to the | ||||
# iplib.InteractiveShell object. They check the line to see if a particular | ||||
# handler should be called, and return either a handler or None. The | ||||
# handlers which they return are *bound* methods of the InteractiveShell | ||||
# object. | ||||
# | ||||
# In general, these checks should only take responsibility for their 'own' | ||||
# handler. If it doesn't get triggered, they should just return None and | ||||
# let the rest of the check sequence run. | ||||
vivainio
|
r780 | |||
def checkShellEscape(l_info,ip): | ||||
if l_info.line.lstrip().startswith(ip.ESC_SHELL): | ||||
return ip.handle_shell_escape | ||||
dan.milstein
|
r657 | def checkEmacs(l_info,ip): | ||
"Emacs ipython-mode tags certain input lines." | ||||
if l_info.line.endswith('# PYTHON-MODE'): | ||||
return ip.handle_emacs | ||||
else: | ||||
return None | ||||
def checkIPyAutocall(l_info,ip): | ||||
"Instances of IPyAutocall in user_ns get autocalled immediately" | ||||
obj = ip.user_ns.get(l_info.iFun, None) | ||||
if isinstance(obj, IPython.ipapi.IPyAutocall): | ||||
obj.set_ip(ip.api) | ||||
return ip.handle_auto | ||||
else: | ||||
return None | ||||
vivainio
|
r780 | def checkMultiLineMagic(l_info,ip): | ||
dan.milstein
|
r657 | "Allow ! and !! in multi-line statements if multi_line_specials is on" | ||
# Note that this one of the only places we check the first character of | ||||
# iFun and *not* the preChar. Also note that the below test matches | ||||
vivainio
|
r776 | # both ! and !!. | ||
dan.milstein
|
r657 | if l_info.continue_prompt \ | ||
vivainio
|
r776 | and ip.rc.multi_line_specials: | ||
if l_info.iFun.startswith(ip.ESC_MAGIC): | ||||
return ip.handle_magic | ||||
dan.milstein
|
r657 | else: | ||
return None | ||||
def checkEscChars(l_info,ip): | ||||
"""Check for escape character and return either a handler to handle it, | ||||
or None if there is no escape char.""" | ||||
if l_info.line[-1] == ip.ESC_HELP \ | ||||
and l_info.preChar != ip.ESC_SHELL \ | ||||
and l_info.preChar != ip.ESC_SH_CAP: | ||||
# the ? can be at the end, but *not* for either kind of shell escape, | ||||
# because a ? can be a vaild final char in a shell cmd | ||||
return ip.handle_help | ||||
elif l_info.preChar in ip.esc_handlers: | ||||
return ip.esc_handlers[l_info.preChar] | ||||
else: | ||||
return None | ||||
dan.milstein
|
r670 | |||
def checkAssignment(l_info,ip): | ||||
"""Check to see if user is assigning to a var for the first time, in | ||||
which case we want to avoid any sort of automagic / autocall games. | ||||
dan.milstein
|
r657 | |||
dan.milstein
|
r670 | This allows users to assign to either alias or magic names true python | ||
variables (the magic/alias systems always take second seat to true | ||||
python code). E.g. ls='hi', or ls,that=1,2""" | ||||
if l_info.theRest and l_info.theRest[0] in '=,': | ||||
dan.milstein
|
r657 | return ip.handle_normal | ||
else: | ||||
return None | ||||
dan.milstein
|
r670 | |||
dan.milstein
|
r657 | def checkAutomagic(l_info,ip): | ||
"""If the iFun is magic, and automagic is on, run it. Note: normal, | ||||
non-auto magic would already have been triggered via '%' in | ||||
dan.milstein
|
r670 | check_esc_chars. This just checks for automagic. Also, before | ||
triggering the magic handler, make sure that there is nothing in the | ||||
user namespace which could shadow it.""" | ||||
dan.milstein
|
r657 | if not ip.rc.automagic or not hasattr(ip,'magic_'+l_info.iFun): | ||
return None | ||||
# We have a likely magic method. Make sure we should actually call it. | ||||
if l_info.continue_prompt and not ip.rc.multi_line_specials: | ||||
return None | ||||
head = l_info.iFun.split('.',1)[0] | ||||
if isShadowed(head,ip): | ||||
return None | ||||
return ip.handle_magic | ||||
dan.milstein
|
r670 | |||
dan.milstein
|
r657 | def checkAlias(l_info,ip): | ||
"Check if the initital identifier on the line is an alias." | ||||
# Note: aliases can not contain '.' | ||||
head = l_info.iFun.split('.',1)[0] | ||||
if l_info.iFun not in ip.alias_table \ | ||||
or head not in ip.alias_table \ | ||||
or isShadowed(head,ip): | ||||
return None | ||||
return ip.handle_alias | ||||
dan.milstein
|
r676 | def checkPythonOps(l_info,ip): | ||
"""If the 'rest' of the line begins with a function call or pretty much | ||||
any python operator, we should simply execute the line (regardless of | ||||
whether or not there's a possible autocall expansion). This avoids | ||||
spurious (and very confusing) geattr() accesses.""" | ||||
if l_info.theRest and l_info.theRest[0] in '!=()<>,+*/%^&|': | ||||
return ip.handle_normal | ||||
else: | ||||
return None | ||||
dan.milstein
|
r657 | def checkAutocall(l_info,ip): | ||
"Check if the initial word/function is callable and autocall is on." | ||||
if not ip.rc.autocall: | ||||
return None | ||||
oinfo = l_info.ofind(ip) # This can mutate state via getattr | ||||
if not oinfo['found']: | ||||
return None | ||||
if callable(oinfo['obj']) \ | ||||
and (not re_exclude_auto.match(l_info.theRest)) \ | ||||
and re_fun_name.match(l_info.iFun): | ||||
#print 'going auto' # dbg | ||||
return ip.handle_auto | ||||
else: | ||||
#print 'was callable?', callable(l_info.oinfo['obj']) # dbg | ||||
return None | ||||
# RegExp to identify potential function names | ||||
re_fun_name = re.compile(r'[a-zA-Z_]([a-zA-Z0-9_.]*) *$') | ||||
# RegExp to exclude strings with this start from autocalling. In | ||||
# particular, all binary operators should be excluded, so that if foo is | ||||
# callable, foo OP bar doesn't become foo(OP bar), which is invalid. The | ||||
# characters '!=()' don't need to be checked for, as the checkPythonChars | ||||
# routine explicitely does so, to catch direct calls and rebindings of | ||||
# existing names. | ||||
# Warning: the '-' HAS TO BE AT THE END of the first group, otherwise | ||||
# it affects the rest of the group in square brackets. | ||||
re_exclude_auto = re.compile(r'^[,&^\|\*/\+-]' | ||||
r'|^is |^not |^in |^and |^or ') | ||||
# try to catch also methods for stuff in lists/tuples/dicts: off | ||||
# (experimental). For this to work, the line_split regexp would need | ||||
# to be modified so it wouldn't break things at '['. That line is | ||||
# nasty enough that I shouldn't change it until I can test it _well_. | ||||
#self.re_fun_name = re.compile (r'[a-zA-Z_]([a-zA-Z0-9_.\[\]]*) ?$') | ||||
# Handler Check Utilities | ||||
def isShadowed(identifier,ip): | ||||
"""Is the given identifier defined in one of the namespaces which shadow | ||||
the alias and magic namespaces? Note that an identifier is different | ||||
than iFun, because it can not contain a '.' character.""" | ||||
# This is much safer than calling ofind, which can change state | ||||
return (identifier in ip.user_ns \ | ||||
or identifier in ip.internal_ns \ | ||||
or identifier in ip.ns_table['builtin']) | ||||