From 499f6d7b4f915c99be84a6f3a6428a73f5b94bb7 2007-05-16 13:06:12 From: dan.milstein Date: 2007-05-16 13:06:12 Subject: [PATCH] Major refactoring of prefilter mechanism. Much of the transformation process has been moved out of the iplib.InteractiveShell class and into a separate module, IPython.prefilter. In addition, extensive tests have been added for prefiltering. --- diff --git a/IPython/iplib.py b/IPython/iplib.py index 9b601d5..34fecb5 100644 --- a/IPython/iplib.py +++ b/IPython/iplib.py @@ -6,7 +6,7 @@ Requires Python 2.3 or newer. This file contains all the classes and helper functions specific to IPython. -$Id: iplib.py 2350 2007-05-15 16:56:44Z vivainio $ +$Id: iplib.py 2354 2007-05-16 13:06:12Z dan.milstein $ """ #***************************************************************************** @@ -75,6 +75,8 @@ from IPython.genutils import * from IPython.strdispatch import StrDispatch import IPython.ipapi +import IPython.prefilter as prefilter + # Globals # store the builtin raw_input globally, and use this always, in case user code @@ -385,6 +387,7 @@ class InteractiveShell(object,Magic): # escapes for automatic behavior on the command line self.ESC_SHELL = '!' + self.ESC_SH_CAP = '!!' self.ESC_HELP = '?' self.ESC_MAGIC = '%' self.ESC_QUOTE = ',' @@ -398,6 +401,7 @@ class InteractiveShell(object,Magic): self.ESC_MAGIC : self.handle_magic, self.ESC_HELP : self.handle_help, self.ESC_SHELL : self.handle_shell_escape, + self.ESC_SH_CAP : self.handle_shell_escape, } # class initializations @@ -484,57 +488,6 @@ class InteractiveShell(object,Magic): header=self.rc.system_header, verbose=self.rc.system_verbose) - # RegExp for splitting line contents into pre-char//first - # word-method//rest. For clarity, each group in on one line. - - # WARNING: update the regexp if the above escapes are changed, as they - # are hardwired in. - - # Don't get carried away with trying to make the autocalling catch too - # much: it's better to be conservative rather than to trigger hidden - # evals() somewhere and end up causing side effects. - self.line_split = re.compile(r'^(\s*[,;/]?\s*)' - r'([\?\w\.]+\w*\s*)' - r'(\(?.*$)') - - self.shell_line_split = re.compile(r'^(\s*)' - r'(\S*\s*)' - r'(\(?.*$)') - - # A simpler regexp used as a fallback if the above doesn't work. This - # one is more conservative in how it partitions the input. This code - # can probably be cleaned up to do everything with just one regexp, but - # I'm afraid of breaking something; do it once the unit tests are in - # place. - self.line_split_fallback = re.compile(r'^(\s*)' - r'([%\!\?\w\.]*)' - r'(.*)') - - # Original re, keep around for a while in case changes break something - #self.line_split = re.compile(r'(^[\s*!\?%,/]?)' - # r'(\s*[\?\w\.]+\w*\s*)' - # r'(\(?.*$)') - - # RegExp to identify potential function names - self.re_fun_name = re.compile(r'[a-zA-Z_]([a-zA-Z0-9_.]*) *$') - - # RegExp to exclude strings with this start from autocalling. In - # particular, all binary operators should be excluded, so that if foo - # is callable, foo OP bar doesn't become foo(OP bar), which is - # invalid. The characters '!=()' don't need to be checked for, as the - # _prefilter routine explicitely does so, to catch direct calls and - # rebindings of existing names. - - # Warning: the '-' HAS TO BE AT THE END of the first group, otherwise - # it affects the rest of the group in square brackets. - self.re_exclude_auto = re.compile(r'^[<>,&^\|\*/\+-]' - '|^is |^not |^in |^and |^or ') - - # try to catch also methods for stuff in lists/tuples/dicts: off - # (experimental). For this to work, the line_split regexp would need - # to be modified so it wouldn't break things at '['. That line is - # nasty enough that I shouldn't change it until I can test it _well_. - #self.re_fun_name = re.compile (r'[a-zA-Z_]([a-zA-Z0-9_.\[\]]*) ?$') # keep track of where we started running (mainly for crash post-mortem) self.starting_dir = os.getcwd() @@ -1731,8 +1684,8 @@ want to merge them back into the new files.""" % locals() done = Set() while 1: - pre,fn,rest = self.split_user_input(line, pattern = self.shell_line_split) - # print "!",fn,"!",rest # dbg + pre,fn,rest = prefilter.splitUserInput(line, + prefilter.shell_line_split) if fn in self.alias_table: if fn in done: warn("Cyclic alias definition, repeated '%s'" % fn) @@ -2056,53 +2009,6 @@ want to merge them back into the new files.""" % locals() else: return lineout - def split_user_input(self,line, pattern = None): - """Split user input into pre-char, function part and rest.""" - - if pattern is None: - pattern = self.line_split - - lsplit = pattern.match(line) - if lsplit is None: # no regexp match returns None - #print "match failed for line '%s'" % line # dbg - try: - iFun,theRest = line.split(None,1) - except ValueError: - #print "split failed for line '%s'" % line # dbg - iFun,theRest = line,'' - pre = re.match('^(\s*)(.*)',line).groups()[0] - else: - pre,iFun,theRest = lsplit.groups() - - # iFun has to be a valid python identifier, so it better be only pure - #ascii, no unicode: - try: - iFun = iFun.encode('ascii') - except UnicodeEncodeError: - theRest = iFun+u' '+theRest - iFun = u'' - - #print 'line:<%s>' % line # dbg - #print 'pre <%s> iFun <%s> rest <%s>' % (pre,iFun.strip(),theRest) # dbg - return pre,iFun.strip(),theRest - - # THIS VERSION IS BROKEN!!! It was intended to prevent spurious attribute - # accesses with a more stringent check of inputs, but it introduced other - # bugs. Disable it for now until I can properly fix it. - def split_user_inputBROKEN(self,line): - """Split user input into pre-char, function part and rest.""" - - lsplit = self.line_split.match(line) - if lsplit is None: # no regexp match returns None - lsplit = self.line_split_fallback.match(line) - - #pre,iFun,theRest = lsplit.groups() # dbg - #print 'line:<%s>' % line # dbg - #print 'pre <%s> iFun <%s> rest <%s>' % (pre,iFun.strip(),theRest) # dbg - #return pre,iFun.strip(),theRest # dbg - - return lsplit.groups() - def _prefilter(self, line, continue_prompt): """Calls different preprocessors, depending on the form of line.""" @@ -2112,15 +2018,6 @@ want to merge them back into the new files.""" % locals() # needed, update the cache AND log it (so that the input cache array # stays synced). - # This function is _very_ delicate, and since it's also the one which - # determines IPython's response to user input, it must be as efficient - # as possible. For this reason it has _many_ returns in it, trying - # always to exit as quickly as it can figure out what it needs to do. - - # This function is the main responsible for maintaining IPython's - # behavior respectful of Python's semantics. So be _very_ careful if - # making changes to anything here. - #..................................................................... # Code begins @@ -2131,6 +2028,8 @@ want to merge them back into the new files.""" % locals() self._last_input_line = line #print '***line: <%s>' % line # dbg + + line_info = prefilter.LineInfo(line, continue_prompt) # the input history needs to track even empty lines stripped = line.strip() @@ -2138,139 +2037,25 @@ want to merge them back into the new files.""" % locals() if not stripped: if not continue_prompt: self.outputcache.prompt_count -= 1 - return self.handle_normal(line,continue_prompt) - #return self.handle_normal('',continue_prompt) + return self.handle_normal(line_info) # print '***cont',continue_prompt # dbg # special handlers are only allowed for single line statements if continue_prompt and not self.rc.multi_line_specials: - return self.handle_normal(line,continue_prompt) - + return self.handle_normal(line_info) - # For the rest, we need the structure of the input - pre,iFun,theRest = self.split_user_input(line) - # See whether any pre-existing handler can take care of it - + # See whether any pre-existing handler can take care of it rewritten = self.hooks.input_prefilter(stripped) if rewritten != stripped: # ok, some prefilter did something - rewritten = pre + rewritten # add indentation - return self.handle_normal(rewritten) + rewritten = line_info.pre + rewritten # add indentation + return self.handle_normal(prefilter.LineInfo(rewritten, + continue_prompt)) #print 'pre <%s> iFun <%s> rest <%s>' % (pre,iFun,theRest) # dbg - # Next, check if we can automatically execute this thing - - # Allow ! in multi-line statements if multi_line_specials is on: - if continue_prompt and self.rc.multi_line_specials and \ - iFun.startswith(self.ESC_SHELL): - return self.handle_shell_escape(line,continue_prompt, - pre=pre,iFun=iFun, - theRest=theRest) - - # First check for explicit escapes in the last/first character - handler = None - if line[-1] == self.ESC_HELP and line[0] != self.ESC_SHELL: - handler = self.esc_handlers.get(line[-1]) # the ? can be at the end - if handler is None: - # look at the first character of iFun, NOT of line, so we skip - # leading whitespace in multiline input - handler = self.esc_handlers.get(iFun[0:1]) - if handler is not None: - return handler(line,continue_prompt,pre,iFun,theRest) - # Emacs ipython-mode tags certain input lines - if line.endswith('# PYTHON-MODE'): - return self.handle_emacs(line,continue_prompt) - - # instances of IPyAutocall in user_ns get autocalled immediately - obj = self.user_ns.get(iFun,None) - if isinstance(obj, IPython.ipapi.IPyAutocall): - obj.set_ip(self.api) - return self.handle_auto(line,continue_prompt, - pre,iFun,theRest,obj) - - # Let's try to find if the input line is a magic fn - oinfo = None - if hasattr(self,'magic_'+iFun): - # WARNING: _ofind uses getattr(), so it can consume generators and - # cause other side effects. - oinfo = self._ofind(iFun) # FIXME - _ofind is part of Magic - if oinfo['ismagic']: - # Be careful not to call magics when a variable assignment is - # being made (ls='hi', for example) - if self.rc.automagic and \ - (len(theRest)==0 or theRest[0] not in '!=()<>,') and \ - (self.rc.multi_line_specials or not continue_prompt): - return self.handle_magic(line,continue_prompt, - pre,iFun,theRest) - else: - return self.handle_normal(line,continue_prompt) - - # If the rest of the line begins with an (in)equality, assginment or - # function call, we should not call _ofind but simply execute it. - # This avoids spurious geattr() accesses on objects upon assignment. - # - # It also allows users to assign to either alias or magic names true - # python variables (the magic/alias systems always take second seat to - # true python code). - if theRest and theRest[0] in '!=()': - return self.handle_normal(line,continue_prompt) - - if oinfo is None: - # let's try to ensure that _oinfo is ONLY called when autocall is - # on. Since it has inevitable potential side effects, at least - # having autocall off should be a guarantee to the user that no - # weird things will happen. - - if self.rc.autocall: - oinfo = self._ofind(iFun) # FIXME - _ofind is part of Magic - else: - # in this case, all that's left is either an alias or - # processing the line normally. - if iFun in self.alias_table: - # if autocall is off, by not running _ofind we won't know - # whether the given name may also exist in one of the - # user's namespace. At this point, it's best to do a - # quick check just to be sure that we don't let aliases - # shadow variables. - head = iFun.split('.',1)[0] - if head in self.user_ns or head in self.internal_ns \ - or head in __builtin__.__dict__: - return self.handle_normal(line,continue_prompt) - else: - return self.handle_alias(line,continue_prompt, - pre,iFun,theRest) - - else: - return self.handle_normal(line,continue_prompt) - - if not oinfo['found']: - return self.handle_normal(line,continue_prompt) - else: - #print 'pre<%s> iFun <%s> rest <%s>' % (pre,iFun,theRest) # dbg - if oinfo['isalias']: - return self.handle_alias(line,continue_prompt, - pre,iFun,theRest) - - if (self.rc.autocall - and - ( - #only consider exclusion re if not "," or ";" autoquoting - (pre == self.ESC_QUOTE or pre == self.ESC_QUOTE2 - or pre == self.ESC_PAREN) or - (not self.re_exclude_auto.match(theRest))) - and - self.re_fun_name.match(iFun) and - callable(oinfo['obj'])) : - #print 'going auto' # dbg - return self.handle_auto(line,continue_prompt, - pre,iFun,theRest,oinfo['obj']) - else: - #print 'was callable?', callable(oinfo['obj']) # dbg - return self.handle_normal(line,continue_prompt) + return prefilter.prefilter(line_info, self) - # If we get here, we have a normal Python line. Log and return. - return self.handle_normal(line,continue_prompt) def _prefilter_dumb(self, line, continue_prompt): """simple prefilter function, for debugging""" @@ -2293,8 +2078,7 @@ want to merge them back into the new files.""" % locals() # Set the default prefilter() function (this can be user-overridden) prefilter = multiline_prefilter - def handle_normal(self,line,continue_prompt=None, - pre=None,iFun=None,theRest=None): + def handle_normal(self,line_info): """Handle normal input lines. Use as a template for handlers.""" # With autoindent on, we need some way to exit the input loop, and I @@ -2302,6 +2086,8 @@ want to merge them back into the new files.""" % locals() # clear the line. The rule will be in this case, that either two # lines of pure whitespace in a row, or a line of pure whitespace but # of a size different to the indent level, will exit the input loop. + line = line_info.line + continue_prompt = line_info.continue_prompt if (continue_prompt and self.autoindent and line.isspace() and (0 < abs(len(line) - self.indent_current_nsp) <= 2 or @@ -2311,55 +2097,62 @@ want to merge them back into the new files.""" % locals() self.log(line,line,continue_prompt) return line - def handle_alias(self,line,continue_prompt=None, - pre=None,iFun=None,theRest=None): - """Handle alias input lines. """ + def handle_alias(self,line_info): + """Handle alias input lines. """ + transformed = self.expand_aliases(line_info.iFun,line_info.theRest) # pre is needed, because it carries the leading whitespace. Otherwise # aliases won't work in indented sections. - transformed = self.expand_aliases(iFun, theRest) - line_out = '%s_ip.system(%s)' % (pre, make_quoted_expr( transformed )) - self.log(line,line_out,continue_prompt) + line_out = '%s_ip.system(%s)' % (line_info.preWhitespace, + make_quoted_expr( transformed )) + + self.log(line_info.line,line_out,line_info.continue_prompt) #print 'line out:',line_out # dbg return line_out - def handle_shell_escape(self, line, continue_prompt=None, - pre=None,iFun=None,theRest=None): + def handle_shell_escape(self, line_info): """Execute the line in a shell, empty return value""" - #print 'line in :', `line` # dbg - # Example of a special handler. Others follow a similar pattern. + line = line_info.line if line.lstrip().startswith('!!'): - # rewrite iFun/theRest to properly hold the call to %sx and - # the actual command to be executed, so handle_magic can work - # correctly - theRest = '%s %s' % (iFun[2:],theRest) - iFun = 'sx' - return self.handle_magic('%ssx %s' % (self.ESC_MAGIC, - line.lstrip()[2:]), - continue_prompt,pre,iFun,theRest) + # rewrite LineInfo's line, iFun and theRest to properly hold the + # call to %sx and the actual command to be executed, so + # handle_magic can work correctly. Note that this works even if + # the line is indented, so it handles multi_line_specials + # properly. + new_rest = line.lstrip()[2:] + line_info.line = '%ssx %s' % (self.ESC_MAGIC,new_rest) + line_info.iFun = 'sx' + line_info.theRest = new_rest + return self.handle_magic(line_info) else: - cmd=line.lstrip().lstrip('!') - line_out = '%s_ip.system(%s)' % (pre,make_quoted_expr(cmd)) + cmd = line.lstrip().lstrip('!') + line_out = '%s_ip.system(%s)' % (line_info.preWhitespace, + make_quoted_expr(cmd)) # update cache/log and return - self.log(line,line_out,continue_prompt) + self.log(line,line_out,line_info.continue_prompt) return line_out - def handle_magic(self, line, continue_prompt=None, - pre=None,iFun=None,theRest=None): + def handle_magic(self, line_info): """Execute magic functions.""" - - - cmd = '%s_ip.magic(%s)' % (pre,make_quoted_expr(iFun + " " + theRest)) - self.log(line,cmd,continue_prompt) + iFun = line_info.iFun + theRest = line_info.theRest + cmd = '%s_ip.magic(%s)' % (line_info.preWhitespace, + make_quoted_expr(iFun + " " + theRest)) + self.log(line_info.line,cmd,line_info.continue_prompt) #print 'in handle_magic, cmd=<%s>' % cmd # dbg return cmd - def handle_auto(self, line, continue_prompt=None, - pre=None,iFun=None,theRest=None,obj=None): + def handle_auto(self, line_info): """Hande lines which can be auto-executed, quoting if requested.""" #print 'pre <%s> iFun <%s> rest <%s>' % (pre,iFun,theRest) # dbg + line = line_info.line + iFun = line_info.iFun + theRest = line_info.theRest + pre = line_info.pre + continue_prompt = line_info.continue_prompt + obj = line_info.ofind(self)['obj'] # This should only be active for single-line input! if continue_prompt: @@ -2408,14 +2201,14 @@ want to merge them back into the new files.""" % locals() self.log(line,newcmd,continue_prompt) return newcmd - def handle_help(self, line, continue_prompt=None, - pre=None,iFun=None,theRest=None): + def handle_help(self, line_info): """Try to get some help for the object. obj? or ?obj -> basic information. obj?? or ??obj -> more details. """ - + + line = line_info.line # We need to make sure that we don't process lines which would be # otherwise valid python, such as "x=1 # what?" try: @@ -2426,7 +2219,7 @@ want to merge them back into the new files.""" % locals() line = line[1:] elif line[-1]==self.ESC_HELP: line = line[:-1] - self.log(line,'#?'+line,continue_prompt) + self.log(line,'#?'+line,line_info.continue_prompt) if line: #print 'line:<%r>' % line # dbg self.magic_pinfo(line) @@ -2435,10 +2228,10 @@ want to merge them back into the new files.""" % locals() return '' # Empty string is needed here! except: # Pass any other exceptions through to the normal handler - return self.handle_normal(line,continue_prompt) + return self.handle_normal(line_info) else: # If the code compiles ok, we should handle it normally - return self.handle_normal(line,continue_prompt) + return self.handle_normal(line_info) def getapi(self): """ Get an IPApi object for this shell instance @@ -2451,17 +2244,16 @@ want to merge them back into the new files.""" % locals() """ return self.api - - def handle_emacs(self,line,continue_prompt=None, - pre=None,iFun=None,theRest=None): + + def handle_emacs(self, line_info): """Handle input lines marked by python-mode.""" # Currently, nothing is done. Later more functionality can be added # here if needed. # The input cache shouldn't be updated - - return line + return line_info.line + def mktempfile(self,data=None): """Make a new tempfile and return its filename. diff --git a/IPython/prefilter.py b/IPython/prefilter.py new file mode 100644 index 0000000..070167e --- /dev/null +++ b/IPython/prefilter.py @@ -0,0 +1,298 @@ +# -*- coding: utf-8 -*- +""" +Classes and functions for prefiltering (transforming) a line of user input. +This module is responsible, primarily, for breaking the line up into useful +pieces and triggering the appropriate handlers in iplib to do the actual +transforming work. +""" +__docformat__ = "restructuredtext en" + +import re +import IPython.ipapi + +class LineInfo(object): + """A single line of input and associated info. + + Includes the following as properties: + + line + The original, raw line + + continue_prompt + Is this line a continuation in a sequence of multiline input? + + pre + The initial esc character or whitespace. + + preChar + The escape character(s) in pre or the empty string if there isn't one. + Note that '!!' is a possible value for preChar. Otherwise it will + always be a single character. + + preWhitespace + The leading whitespace from pre if it exists. If there is a preChar, + this is just ''. + + iFun + The 'function part', which is basically the maximal initial sequence + of valid python identifiers and the '.' character. This is what is + checked for alias and magic transformations, used for auto-calling, + etc. + + theRest + Everything else on the line. + """ + def __init__(self, line, continue_prompt): + self.line = line + self.continue_prompt = continue_prompt + self.pre, self.iFun, self.theRest = splitUserInput(line) + + self.preChar = self.pre.strip() + if self.preChar: + self.preWhitespace = '' # No whitespace allowd before esc chars + else: + self.preWhitespace = self.pre + + self._oinfo = None + + def ofind(self, ip): + """Do a full, attribute-walking lookup of the iFun in the various + namespaces for the given IPython InteractiveShell instance. + + Return a dict with keys: found,obj,ospace,ismagic + + Note: can cause state changes because of calling getattr, but should + only be run if autocall is on and if the line hasn't matched any + other, less dangerous handlers. + + Does cache the results of the call, so can be called multiple times + without worrying about *further* damaging state. + """ + if not self._oinfo: + self._oinfo = ip._ofind(self.iFun) + return self._oinfo + + +def splitUserInput(line, pattern=None): + """Split user input into pre-char/whitespace, function part and rest. + + Mostly internal to this module, but also used by iplib.expand_aliases, + which passes in a shell pattern. + """ + # It seems to me that the shell splitting should be a separate method. + + if not pattern: + pattern = line_split + match = pattern.match(line) + if not match: + #print "match failed for line '%s'" % line + try: + iFun,theRest = line.split(None,1) + except ValueError: + #print "split failed for line '%s'" % line + iFun,theRest = line,'' + pre = re.match('^(\s*)(.*)',line).groups()[0] + else: + pre,iFun,theRest = match.groups() + + # iFun has to be a valid python identifier, so it better be only pure + # ascii, no unicode: + try: + iFun = iFun.encode('ascii') + except UnicodeEncodeError: + theRest = iFun + u' ' + theRest + iFun = u'' + + #print 'line:<%s>' % line # dbg + #print 'pre <%s> iFun <%s> rest <%s>' % (pre,iFun.strip(),theRest) # dbg + return pre,iFun.strip(),theRest + + +# RegExp for splitting line contents into pre-char//first word-method//rest. +# For clarity, each group in on one line. + +# WARNING: update the regexp if the escapes in iplib are changed, as they +# are hardwired in. + +# Although it's not solely driven by the regex, note that: +# ,;/% only trigger if they are the first character on the line +# ! and !! trigger if they are first char(s) *or* follow an indent +# ? triggers as first or last char. + +# The three parts of the regex are: +# 1) pre: pre_char *or* initial whitespace +# 2) iFun: first word/method (mix of \w and '.') +# 3) theRest: rest of line +line_split = re.compile(r'^([,;/%?]|!!?|\s*)' + r'\s*([\w\.]+)\s*' + r'(.*)$') + +shell_line_split = re.compile(r'^(\s*)(\S*\s*)(.*$)') + +def prefilter(line_info, ip): + """Call one of the passed-in InteractiveShell's handler preprocessors, + depending on the form of the line. Return the results, which must be a + value, even if it's a blank ('').""" + # Note: the order of these checks does matter. + for check in [ checkEmacs, + checkIPyAutocall, + checkMultiLineShell, + checkEscChars, + checkPythonChars, + checkAutomagic, + checkAlias, + checkAutocall, + ]: + handler = check(line_info, ip) + if handler: + return handler(line_info) + + return ip.handle_normal(line_info) + +# Handler checks +# +# All have the same interface: they take a LineInfo object and a ref to the +# iplib.InteractiveShell object. They check the line to see if a particular +# handler should be called, and return either a handler or None. The +# handlers which they return are *bound* methods of the InteractiveShell +# object. +# +# In general, these checks should only take responsibility for their 'own' +# handler. If it doesn't get triggered, they should just return None and +# let the rest of the check sequence run. +def checkEmacs(l_info,ip): + "Emacs ipython-mode tags certain input lines." + if l_info.line.endswith('# PYTHON-MODE'): + return ip.handle_emacs + else: + return None + +def checkIPyAutocall(l_info,ip): + "Instances of IPyAutocall in user_ns get autocalled immediately" + obj = ip.user_ns.get(l_info.iFun, None) + if isinstance(obj, IPython.ipapi.IPyAutocall): + obj.set_ip(ip.api) + return ip.handle_auto + else: + return None + + +def checkMultiLineShell(l_info,ip): + "Allow ! and !! in multi-line statements if multi_line_specials is on" + # Note that this one of the only places we check the first character of + # iFun and *not* the preChar. Also note that the below test matches + # both ! and !!. + if l_info.continue_prompt \ + and ip.rc.multi_line_specials \ + and l_info.iFun.startswith(ip.ESC_SHELL): + return ip.handle_shell_escape + else: + return None + +def checkEscChars(l_info,ip): + """Check for escape character and return either a handler to handle it, + or None if there is no escape char.""" + if l_info.line[-1] == ip.ESC_HELP \ + and l_info.preChar != ip.ESC_SHELL \ + and l_info.preChar != ip.ESC_SH_CAP: + # the ? can be at the end, but *not* for either kind of shell escape, + # because a ? can be a vaild final char in a shell cmd + return ip.handle_help + elif l_info.preChar in ip.esc_handlers: + return ip.esc_handlers[l_info.preChar] + else: + return None + +def checkPythonChars(l_info,ip): + """If the 'rest' of the line begins with an (in)equality, assginment, + function call or tuple comma, we should simply execute the line + (regardless of whether or not there's a possible alias, automagic or + autocall expansion). This both avoids spurious geattr() accesses on + objects upon assignment, and also allows users to assign to either alias + or magic names true python variables (the magic/alias systems always + take second seat to true python code). E.g. ls='hi', or ls,that=1,2""" + if l_info.theRest and l_info.theRest[0] in '!=()<>,+*/%^&|': + return ip.handle_normal + else: + return None + +def checkAutomagic(l_info,ip): + """If the iFun is magic, and automagic is on, run it. Note: normal, + non-auto magic would already have been triggered via '%' in + check_esc_chars. This just checks for automagic.""" + if not ip.rc.automagic or not hasattr(ip,'magic_'+l_info.iFun): + return None + + # We have a likely magic method. Make sure we should actually call it. + if l_info.continue_prompt and not ip.rc.multi_line_specials: + return None + + head = l_info.iFun.split('.',1)[0] + if isShadowed(head,ip): + return None + + return ip.handle_magic + + +def checkAlias(l_info,ip): + "Check if the initital identifier on the line is an alias." + # Note: aliases can not contain '.' + head = l_info.iFun.split('.',1)[0] + + if l_info.iFun not in ip.alias_table \ + or head not in ip.alias_table \ + or isShadowed(head,ip): + return None + + return ip.handle_alias + + +def checkAutocall(l_info,ip): + "Check if the initial word/function is callable and autocall is on." + if not ip.rc.autocall: + return None + + oinfo = l_info.ofind(ip) # This can mutate state via getattr + if not oinfo['found']: + return None + + if callable(oinfo['obj']) \ + and (not re_exclude_auto.match(l_info.theRest)) \ + and re_fun_name.match(l_info.iFun): + #print 'going auto' # dbg + return ip.handle_auto + else: + #print 'was callable?', callable(l_info.oinfo['obj']) # dbg + return None + +# RegExp to identify potential function names +re_fun_name = re.compile(r'[a-zA-Z_]([a-zA-Z0-9_.]*) *$') + +# RegExp to exclude strings with this start from autocalling. In +# particular, all binary operators should be excluded, so that if foo is +# callable, foo OP bar doesn't become foo(OP bar), which is invalid. The +# characters '!=()' don't need to be checked for, as the checkPythonChars +# routine explicitely does so, to catch direct calls and rebindings of +# existing names. + +# Warning: the '-' HAS TO BE AT THE END of the first group, otherwise +# it affects the rest of the group in square brackets. +re_exclude_auto = re.compile(r'^[,&^\|\*/\+-]' + r'|^is |^not |^in |^and |^or ') + +# try to catch also methods for stuff in lists/tuples/dicts: off +# (experimental). For this to work, the line_split regexp would need +# to be modified so it wouldn't break things at '['. That line is +# nasty enough that I shouldn't change it until I can test it _well_. +#self.re_fun_name = re.compile (r'[a-zA-Z_]([a-zA-Z0-9_.\[\]]*) ?$') + +# Handler Check Utilities +def isShadowed(identifier,ip): + """Is the given identifier defined in one of the namespaces which shadow + the alias and magic namespaces? Note that an identifier is different + than iFun, because it can not contain a '.' character.""" + # This is much safer than calling ofind, which can change state + return (identifier in ip.user_ns \ + or identifier in ip.internal_ns \ + or identifier in ip.ns_table['builtin']) + diff --git a/test/test_handlers.py b/test/test_handlers.py new file mode 100644 index 0000000..0cf29ca --- /dev/null +++ b/test/test_handlers.py @@ -0,0 +1,200 @@ +"""Test the various handlers which do the actual rewriting of the line.""" + +from StringIO import StringIO +import sys + +failures = [] +num_tests = 0 + +def run(tests): + """Loop through a list of (pre, post) inputs, where pre is the string + handed to ipython, and post is how that string looks after it's been + transformed (i.e. ipython's notion of _i)""" + for pre, post in tests: + global num_tests + num_tests += 1 + ip.runlines(pre) + ip.runlines('_i') # Not sure why I need this... + actual = ip.user_ns['_i'] + if actual != None: actual = actual.rstrip('\n') + if actual != post: + failures.append('Expected %r to become %r, found %r' % ( + pre, post, actual)) + + +# Shutdown stdout/stderr so that ipython isn't noisy during tests. Have to +# do this *before* importing IPython below. +# +# NOTE: this means that, if you stick print statements into code as part of +# debugging, you won't see the results (unless you comment out some of the +# below). I keep on doing this, so apparently it's easy. Or I am an idiot. +old_stdout = sys.stdout +old_stderr = sys.stderr + +sys.stdout = StringIO() +sys.stderr = StringIO() + +import IPython +import IPython.ipapi + +IPython.Shell.start() +ip = IPython.ipapi.get() + +class CallableIndexable(object): + def __getitem__(self, idx): return True + def __call__(self, *args, **kws): return True + + +try: + # alias expansion + + # We're using 'true' as our syscall of choice because it doesn't + # write anything to stdout. + + # Turn off actual execution of aliases, because it's noisy + old_system_cmd = ip.IP.system + ip.IP.system = lambda cmd: None + + + ip.IP.alias_table['an_alias'] = (0, 'true') + # These are useful for checking a particular recursive alias issue + ip.IP.alias_table['top'] = (0, 'd:/cygwin/top') + ip.IP.alias_table['d'] = (0, 'true') + run([("an_alias", '_ip.system("true ")'), # alias + # Below: recursive aliases should expand whitespace-surrounded + # chars, *not* initial chars which happen to be aliases: + ("top", '_ip.system("d:/cygwin/top ")'), + ]) + ip.IP.system = old_system_cmd + + + call_idx = CallableIndexable() + ip.to_user_ns('call_idx') + + # For many of the below, we're also checking that leading whitespace + # turns off the esc char, which it should unless there is a continuation + # line. + run([('"no change"', '"no change"'), # normal + ("!true", '_ip.system("true")'), # shell_escapes + ("!! true", '_ip.magic("sx true")'), # shell_escapes + magic + ("!!true", '_ip.magic("sx true")'), # shell_escapes + magic + ("%lsmagic", '_ip.magic("lsmagic ")'), # magic + ("lsmagic", '_ip.magic("lsmagic ")'), # magic + ("a = b # PYTHON-MODE", '_i'), # emacs -- avoids _in cache + + # post-esc-char whitespace goes inside + ("! true", '_ip.system(" true")'), + + # Leading whitespace generally turns off escape characters + (" ! true", ' ! true'), + (" !true", ' !true'), + + # handle_help + + # These are weak tests -- just looking at what the help handlers + # logs, which is not how it really does its work. But it still + # lets us check the key paths through the handler. + + ("x=1 # what?", "x=1 # what?"), # no help if valid python + ("len?", "#?len"), # this is what help logs when it runs + ("len??", "#?len?"), + ("?len", "#?len"), + ]) + + # multi_line_specials + ip.options.multi_line_specials = 0 + # W/ multi_line_specials off, leading ws kills esc chars/autoexpansion + run([ + ('if 1:\n !true', 'if 1:\n !true'), + ('if 1:\n lsmagic', 'if 1:\n lsmagic'), + ('if 1:\n an_alias', 'if 1:\n an_alias'), + ]) + + ip.options.multi_line_specials = 1 + # initial indents must be preserved. + run([ + ('if 1:\n !true', 'if 1:\n _ip.system("true")'), + ('if 1:\n lsmagic', 'if 1:\n _ip.magic("lsmagic ")'), + ('if 1:\n an_alias', 'if 1:\n _ip.system("true ")'), + # Weird one + ('if 1:\n !!true', 'if 1:\n _ip.magic("sx true")'), + + + # Even with m_l_s on, all esc_chars except ! are off + ('if 1:\n %lsmagic', 'if 1:\n %lsmagic'), + ('if 1:\n /fun 1 2', 'if 1:\n /fun 1 2'), + ('if 1:\n ;fun 1 2', 'if 1:\n ;fun 1 2'), + ('if 1:\n ,fun 1 2', 'if 1:\n ,fun 1 2'), + ('if 1:\n ?fun 1 2', 'if 1:\n ?fun 1 2'), + # What about !! + ]) + + + # Objects which are instances of IPyAutocall are *always* autocalled + import IPython.ipapi + class Autocallable(IPython.ipapi.IPyAutocall): + def __call__(self): + return "called" + + autocallable = Autocallable() + ip.to_user_ns('autocallable') + + # auto + ip.options.autocall = 0 + # Only explicit escapes or instances of IPyAutocallable should get + # expanded + run([ + ('len "abc"', 'len "abc"'), + ('autocallable', 'autocallable()'), + (",list 1 2 3", 'list("1", "2", "3")'), + (";list 1 2 3", 'list("1 2 3")'), + ("/len range(1,4)", 'len(range(1,4))'), + ]) + ip.options.autocall = 1 + run([ + (",list 1 2 3", 'list("1", "2", "3")'), + (";list 1 2 3", 'list("1 2 3")'), + ("/len range(1,4)", 'len(range(1,4))'), + ('len "abc"', 'len("abc")'), + ('len "abc";', 'len("abc");'), # ; is special -- moves out of parens + # Autocall is turned off if first arg is [] and the object + # is both callable and indexable. Like so: + ('len [1,2]', 'len([1,2])'), # len doesn't support __getitem__... + ('call_idx [1]', 'call_idx [1]'), # call_idx *does*.. + ('call_idx 1', 'call_idx(1)'), + ('len', 'len '), # only at 2 does it auto-call on single args + ]) + + ip.options.autocall = 2 + run([ + (",list 1 2 3", 'list("1", "2", "3")'), + (";list 1 2 3", 'list("1 2 3")'), + ("/len range(1,4)", 'len(range(1,4))'), + ('len "abc"', 'len("abc")'), + ('len "abc";', 'len("abc");'), + ('len [1,2]', 'len([1,2])'), + ('call_idx [1]', 'call_idx [1]'), + ('call_idx 1', 'call_idx(1)'), + # This is what's different: + ('len', 'len()'), # only at 2 does it auto-call on single args + ]) + ip.options.autocall = 1 + + # Ignoring handle_emacs, 'cause it doesn't do anything. +finally: + sys.stdout = old_stdout + sys.stderr = old_stderr + + + + +num_f = len(failures) +#if verbose: +# print + + +print "%s tests run, %s failure%s" % (num_tests, + num_f, + num_f != 1 and "s" or "") +for f in failures: + print f diff --git a/test/test_prefilter.py b/test/test_prefilter.py index cc4675e..69a622f 100644 --- a/test/test_prefilter.py +++ b/test/test_prefilter.py @@ -73,6 +73,7 @@ def reset_esc_handlers(): s.ESC_MAGIC : s.handle_magic, s.ESC_HELP : s.handle_help, s.ESC_SHELL : s.handle_shell_escape, + s.ESC_SH_CAP : s.handle_shell_escape, } reset_esc_handlers() @@ -145,7 +146,14 @@ esc_handler_tests = [ ( '?thing', handle_help, ), ( 'thing?', handle_help ), # '?' can trail... ( 'thing!', handle_normal), # but only '?' can trail - ( ' ?thing', handle_help), # ignore leading whitespace + ( ' ?thing', handle_normal), # leading whitespace turns off esc chars + ( '!ls', handle_shell_escape), + ( '! true', handle_shell_escape), + ( '!! true', handle_shell_escape), + ( '%magic', handle_magic), + # XXX Possibly, add test for /,; once those are unhooked from %autocall + ( 'emacs_mode # PYTHON-MODE', handle_emacs ), + ( ' ', handle_normal), # Trailing qmark combos. Odd special cases abound ( '!thing?', handle_shell_escape), # trailing '?' loses to shell esc ( '!thing ?', handle_shell_escape), @@ -154,11 +162,6 @@ esc_handler_tests = [ ( '/cmd?', handle_help), ( ';cmd?', handle_help), ( ',cmd?', handle_help), - ( '!ls', handle_shell_escape ), - ( '%magic', handle_magic), - # Possibly, add test for /,; once those are unhooked from %autocall - ( 'emacs_mode # PYTHON-MODE', handle_emacs ), - ( ' ', handle_normal), ] run_handler_tests(esc_handler_tests) @@ -167,19 +170,26 @@ run_handler_tests(esc_handler_tests) # Shell Escapes in Multi-line statements # ====================================== # -# We can't test this via runlines, since the hacked over-handlers all -# return None, so continue_prompt never becomes true. Instead we drop -# into prefilter directly and pass in continue_prompt. +# We can't test this via runlines, since the hacked-over-for-testing +# handlers all return None, so continue_prompt never becomes true. Instead +# we drop into prefilter directly and pass in continue_prompt. old_mls = ip.options.multi_line_specials -ln = '!ls $f multi_line_specials on' -ignore = ip.IP.prefilter(ln, continue_prompt=True) -check_handler(handle_shell_escape, ln) - -ip.options.multi_line_specials = 0 -ln = '!ls $f multi_line_specials off' -ignore = ip.IP.prefilter(ln, continue_prompt=True) -check_handler(handle_normal, ln) +for ln in [ ' !ls $f multi_line_specials %s', + ' !!ls $f multi_line_specials %s', # !! escapes work on mls + # Trailing ? doesn't trigger help: + ' !ls $f multi_line_specials %s ?', + ' !!ls $f multi_line_specials %s ?', + ]: + ip.options.multi_line_specials = 1 + on_ln = ln % 'on' + ignore = ip.IP.prefilter(on_ln, continue_prompt=True) + check_handler(handle_shell_escape, on_ln) + + ip.options.multi_line_specials = 0 + off_ln = ln % 'off' + ignore = ip.IP.prefilter(off_ln, continue_prompt=True) + check_handler(handle_normal, off_ln) ip.options.multi_line_specials = old_mls @@ -190,6 +200,7 @@ ip.options.multi_line_specials = old_mls # Pick one magic fun and one non_magic fun, make sure both exist assert hasattr(ip.IP, "magic_cpaste") assert not hasattr(ip.IP, "magic_does_not_exist") +ip.options.autocall = 0 # gotta have this off to get handle_normal ip.options.automagic = 0 run_handler_tests([ # Without automagic, only shows up with explicit escape @@ -214,12 +225,12 @@ run_handler_tests(magic_killing_tests) # magic on indented continuation lines -- on iff multi_line_specials == 1 ip.options.multi_line_specials = 0 -ln = 'cpaste multi_line off kills magic' +ln = ' cpaste multi_line off kills magic' ignore = ip.IP.prefilter(ln, continue_prompt=True) check_handler(handle_normal, ln) ip.options.multi_line_specials = 1 -ln = 'cpaste multi_line on enables magic' +ln = ' cpaste multi_line on enables magic' ignore = ip.IP.prefilter(ln, continue_prompt=True) check_handler(handle_magic, ln) @@ -231,6 +242,7 @@ run_handler_tests([ del ip.user_ns['cpaste'] + # Check for !=() turning off .ofind # ================================= class AttributeMutator(object): @@ -249,7 +261,7 @@ ip.options.autocall = 1 run_one_test('attr_mutator.foo should mutate', handle_normal) check(attr_mutator.called, 'ofind should be called in absence of assign characters') -for c in list('!=()'): # XXX What about <> -- they *are* important above +for c in list('!=()<>+*/%^&|'): attr_mutator.called = False run_one_test('attr_mutator.foo %s should *not* mutate' % c, handle_normal) run_one_test('attr_mutator.foo%s should *not* mutate' % c, handle_normal) @@ -296,51 +308,73 @@ ip.options.autocall = 1 # Autocall # ======== -# First, with autocalling fully off -ip.options.autocall = 0 -run_handler_tests( [ - # Since len is callable, these *should* get auto-called - - # XXX Except, at the moment, they're *not*, because the code is wrong - # XXX So I'm commenting 'em out to keep the tests quiet +# For all the tests below, 'len' is callable / 'thing' is not - #( '/len autocall_0', handle_auto), - #( ',len autocall_0 b0', handle_auto), - #( ';len autocall_0 b0', handle_auto), +# Objects which are instances of IPyAutocall are *always* autocalled +import IPython.ipapi +class Autocallable(IPython.ipapi.IPyAutocall): + def __call__(self): + return "called" - # But these, since fun is not a callable, should *not* get auto-called - ( '/fun autocall_0', handle_normal), - ( ',fun autocall_0 b0', handle_normal), - ( ';fun autocall_0 b0', handle_normal), +autocallable = Autocallable() +ip.to_user_ns('autocallable') + - # With no escapes, no autocalling should happen, callable or not +# First, with autocalling fully off +ip.options.autocall = 0 +run_handler_tests( [ + # With no escapes, no autocalling expansions happen, callable or not, + # unless the obj extends IPyAutocall ( 'len autocall_0', handle_normal), - ( 'fun autocall_0', handle_normal), + ( 'thing autocall_0', handle_normal), + ( 'autocallable', handle_auto), + + # With explicit escapes, callable and non-callables both get expanded, + # regardless of the %autocall setting: + ( '/len autocall_0', handle_auto), + ( ',len autocall_0 b0', handle_auto), + ( ';len autocall_0 b0', handle_auto), + + ( '/thing autocall_0', handle_auto), + ( ',thing autocall_0 b0', handle_auto), + ( ';thing autocall_0 b0', handle_auto), + + # Explicit autocall should not trigger if there is leading whitespace + ( ' /len autocall_0', handle_normal), + ( ' ;len autocall_0', handle_normal), + ( ' ,len autocall_0', handle_normal), + ( ' / len autocall_0', handle_normal), + + # But should work if the whitespace comes after the esc char + ( '/ len autocall_0', handle_auto), + ( '; len autocall_0', handle_auto), + ( ', len autocall_0', handle_auto), + ( '/ len autocall_0', handle_auto), ]) # Now, with autocall in default, 'smart' mode ip.options.autocall = 1 run_handler_tests( [ - # Since len is callable, these *do* get auto-called - ( '/len a1', handle_auto), - ( ',len a1 b1', handle_auto), - ( ';len a1 b1', handle_auto), - # But these, since fun is not a callable, should *not* get auto-called - ( '/fun a1', handle_normal), - ( ',fun a1 b1', handle_normal), - ( ';fun a1 b1', handle_normal), - # Autocalls without escapes - ( 'len a1', handle_auto), - ( 'fun a1', handle_normal), # Not callable -> no add + # Autocalls without escapes -- only expand if it's callable + ( 'len a1', handle_auto), + ( 'thing a1', handle_normal), + ( 'autocallable', handle_auto), + + # As above, all explicit escapes generate auto-calls, callable or not + ( '/len a1', handle_auto), + ( ',len a1 b1', handle_auto), + ( ';len a1 b1', handle_auto), + ( '/thing a1', handle_auto), + ( ',thing a1 b1', handle_auto), + ( ';thing a1 b1', handle_auto), + # Autocalls only happen on things which look like funcs, even if # explicitly requested. Which, in this case means they look like a - # sequence of identifiers and . attribute references. So the second - # test should pass, but it's not at the moment (meaning, IPython is - # attempting to run an autocall). Though it does blow up in ipython - # later (because of how lines are split, I think). + # sequence of identifiers and . attribute references. Possibly the + # second of these two should trigger handle_auto. But not for now. ( '"abc".join range(4)', handle_normal), - # XXX ( '/"abc".join range(4)', handle_normal), + ( '/"abc".join range(4)', handle_normal), ])