From 76b8481697d3190eb7ed019c7c431bd1ad8dd39f 2011-09-07 11:18:47 From: Thomas Kluyver Date: 2011-09-07 11:18:47 Subject: [PATCH] Reuse common code for inputsplitter and prefilter. --- diff --git a/IPython/core/inputsplitter.py b/IPython/core/inputsplitter.py index d836630..87fe9a5 100644 --- a/IPython/core/inputsplitter.py +++ b/IPython/core/inputsplitter.py @@ -74,6 +74,7 @@ import tokenize from StringIO import StringIO # IPython modules +from IPython.core.splitinput import split_user_input, LineInfo from IPython.utils.text import make_quoted_expr from IPython.utils.py3compat import cast_unicode @@ -482,132 +483,10 @@ class InputSplitter(object): # Functions and classes for IPython-specific syntactic support #----------------------------------------------------------------------------- -# RegExp for splitting line contents into pre-char//first word-method//rest. -# For clarity, each group in on one line. - -line_split = re.compile(""" - ^(\s*) # any leading space - ([,;/%]|!!?|\?\??) # escape character or characters - \s*(%?[\w\.\*]*) # function/method, possibly with leading % - # to correctly treat things like '?%magic' - (\s+.*$|$) # rest of line - """, re.VERBOSE) - - -def split_user_input(line): - """Split user input into early whitespace, esc-char, function part and rest. - - This is currently handles lines with '=' in them in a very inconsistent - manner. - - Examples - ======== - >>> split_user_input('x=1') - ('', '', 'x=1', '') - >>> split_user_input('?') - ('', '?', '', '') - >>> split_user_input('??') - ('', '??', '', '') - >>> split_user_input(' ?') - (' ', '?', '', '') - >>> split_user_input(' ??') - (' ', '??', '', '') - >>> split_user_input('??x') - ('', '??', 'x', '') - >>> split_user_input('?x=1') - ('', '', '?x=1', '') - >>> split_user_input('!ls') - ('', '!', 'ls', '') - >>> split_user_input(' !ls') - (' ', '!', 'ls', '') - >>> split_user_input('!!ls') - ('', '!!', 'ls', '') - >>> split_user_input(' !!ls') - (' ', '!!', 'ls', '') - >>> split_user_input(',ls') - ('', ',', 'ls', '') - >>> split_user_input(';ls') - ('', ';', 'ls', '') - >>> split_user_input(' ;ls') - (' ', ';', 'ls', '') - >>> split_user_input('f.g(x)') - ('', '', 'f.g(x)', '') - >>> split_user_input('f.g (x)') - ('', '', 'f.g', '(x)') - >>> split_user_input('?%hist') - ('', '?', '%hist', '') - >>> split_user_input('?x*') - ('', '?', 'x*', '') - """ - match = line_split.match(line) - if match: - lspace, esc, fpart, rest = match.groups() - else: - # print "match failed for line '%s'" % line - try: - fpart, rest = line.split(None, 1) - except ValueError: - # print "split failed for line '%s'" % line - fpart, rest = line,'' - lspace = re.match('^(\s*)(.*)', line).groups()[0] - esc = '' - - # fpart has to be a valid python identifier, so it better be only pure - # ascii, no unicode: - try: - fpart = fpart.encode('ascii') - except UnicodeEncodeError: - lspace = unicode(lspace) - rest = fpart + u' ' + rest - fpart = u'' - - #print 'line:<%s>' % line # dbg - #print 'esc <%s> fpart <%s> rest <%s>' % (esc,fpart.strip(),rest) # dbg - return lspace, esc, fpart.strip(), rest.lstrip() - - # The escaped translators ALL receive a line where their own escape has been # stripped. Only '?' is valid at the end of the line, all others can only be # placed at the start. -class LineInfo(object): - """A single line of input and associated info. - - This is a utility class that mostly wraps the output of - :func:`split_user_input` into a convenient object to be passed around - during input transformations. - - Includes the following as properties: - - line - The original, raw line - - lspace - Any early whitespace before actual text starts. - - esc - The initial esc character (or characters, for double-char escapes like - '??' or '!!'). - - fpart - The 'function part', which is basically the maximal initial sequence - of valid python identifiers and the '.' character. This is what is - checked for alias and magic transformations, used for auto-calling, - etc. - - rest - Everything else on the line. - """ - def __init__(self, line): - self.line = line - self.lspace, self.esc, self.fpart, self.rest = \ - split_user_input(line) - - def __str__(self): - return "LineInfo [%s|%s|%s|%s]" % (self.lspace, self.esc, - self.fpart, self.rest) - - # Transformations of the special syntaxes that don't rely on an explicit escape # character but instead on patterns on the input line @@ -690,10 +569,10 @@ def _make_help_call(target, esc, lspace, next_input=None): _initial_space_re = re.compile(r'\s*') _help_end_re = re.compile(r"""(%? - [a-zA-Z_*][a-zA-Z0-9_*]* # Variable name - (\.[a-zA-Z_*][a-zA-Z0-9_*]*)* # .etc.etc + [a-zA-Z_*][\w*]* # Variable name + (\.[a-zA-Z_*][\w*]*)* # .etc.etc ) - (\?\??)$ # ? or ??""", + (\?\??)$ # ? or ??""", re.VERBOSE) def transform_help_end(line): """Translate lines with ?/?? at the end""" @@ -703,7 +582,6 @@ def transform_help_end(line): target = m.group(1) esc = m.group(3) lspace = _initial_space_re.match(line).group(0) - newline = _make_help_call(target, esc, lspace) # If we're mid-command, put it back on the next prompt for the user. next_input = line.rstrip('?') if line.strip() != m.group(0) else None @@ -731,14 +609,14 @@ class EscapedTransformer(object): def _tr_system(line_info): "Translate lines escaped with: !" cmd = line_info.line.lstrip().lstrip(ESC_SHELL) - return '%sget_ipython().system(%s)' % (line_info.lspace, + return '%sget_ipython().system(%s)' % (line_info.pre, make_quoted_expr(cmd)) @staticmethod def _tr_system2(line_info): "Translate lines escaped with: !!" cmd = line_info.line.lstrip()[2:] - return '%sget_ipython().getoutput(%s)' % (line_info.lspace, + return '%sget_ipython().getoutput(%s)' % (line_info.pre, make_quoted_expr(cmd)) @staticmethod @@ -748,33 +626,33 @@ class EscapedTransformer(object): if not line_info.line[1:]: return 'get_ipython().show_usage()' - return _make_help_call(line_info.fpart, line_info.esc, line_info.lspace) + return _make_help_call(line_info.ifun, line_info.esc, line_info.pre) @staticmethod def _tr_magic(line_info): "Translate lines escaped with: %" tpl = '%sget_ipython().magic(%s)' - cmd = make_quoted_expr(' '.join([line_info.fpart, - line_info.rest]).strip()) - return tpl % (line_info.lspace, cmd) + cmd = make_quoted_expr(' '.join([line_info.ifun, + line_info.the_rest]).strip()) + return tpl % (line_info.pre, cmd) @staticmethod def _tr_quote(line_info): "Translate lines escaped with: ," - return '%s%s("%s")' % (line_info.lspace, line_info.fpart, - '", "'.join(line_info.rest.split()) ) + return '%s%s("%s")' % (line_info.pre, line_info.ifun, + '", "'.join(line_info.the_rest.split()) ) @staticmethod def _tr_quote2(line_info): "Translate lines escaped with: ;" - return '%s%s("%s")' % (line_info.lspace, line_info.fpart, - line_info.rest) + return '%s%s("%s")' % (line_info.pre, line_info.ifun, + line_info.the_rest) @staticmethod def _tr_paren(line_info): "Translate lines escaped with: /" - return '%s%s(%s)' % (line_info.lspace, line_info.fpart, - ", ".join(line_info.rest.split())) + return '%s%s(%s)' % (line_info.pre, line_info.ifun, + ", ".join(line_info.the_rest.split())) def __call__(self, line): """Class to transform lines that are explicitly escaped out. @@ -843,7 +721,7 @@ class IPythonInputSplitter(InputSplitter): lines_list = lines.splitlines() transforms = [transform_ipy_prompt, transform_classic_prompt, - transform_escaped, transform_help_end, + transform_help_end, transform_escaped, transform_assign_system, transform_assign_magic] # Transform logic diff --git a/IPython/core/prefilter.py b/IPython/core/prefilter.py index cfaf212..c31b34b 100755 --- a/IPython/core/prefilter.py +++ b/IPython/core/prefilter.py @@ -32,7 +32,7 @@ from IPython.core.alias import AliasManager from IPython.core.autocall import IPyAutocall from IPython.config.configurable import Configurable from IPython.core.macro import Macro -from IPython.core.splitinput import split_user_input +from IPython.core.splitinput import split_user_input, LineInfo from IPython.core import page from IPython.utils.traitlets import List, Int, Any, Unicode, CBool, Bool, Instance @@ -92,78 +92,6 @@ def is_shadowed(identifier, ip): #----------------------------------------------------------------------------- -# The LineInfo class used throughout -#----------------------------------------------------------------------------- - - -class LineInfo(object): - """A single line of input and associated info. - - Includes the following as properties: - - line - The original, raw line - - continue_prompt - Is this line a continuation in a sequence of multiline input? - - pre - The initial esc character or whitespace. - - pre_char - The escape character(s) in pre or the empty string if there isn't one. - Note that '!!' is a possible value for pre_char. Otherwise it will - always be a single character. - - pre_whitespace - The leading whitespace from pre if it exists. If there is a pre_char, - this is just ''. - - ifun - The 'function part', which is basically the maximal initial sequence - of valid python identifiers and the '.' character. This is what is - checked for alias and magic transformations, used for auto-calling, - etc. - - the_rest - Everything else on the line. - """ - def __init__(self, line, continue_prompt): - self.line = line - self.continue_prompt = continue_prompt - self.pre, self.esc, self.ifun, self.the_rest = split_user_input(line) - - self.pre_char = self.pre.strip() - if self.pre_char: - self.pre_whitespace = '' # No whitespace allowd before esc chars - else: - self.pre_whitespace = self.pre - - self._oinfo = None - - def ofind(self, ip): - """Do a full, attribute-walking lookup of the ifun in the various - namespaces for the given IPython InteractiveShell instance. - - Return a dict with keys: found,obj,ospace,ismagic - - Note: can cause state changes because of calling getattr, but should - only be run if autocall is on and if the line hasn't matched any - other, less dangerous handlers. - - Does cache the results of the call, so can be called multiple times - without worrying about *further* damaging state. - """ - if not self._oinfo: - # ip.shell._ofind is actually on the Magic class! - self._oinfo = ip.shell._ofind(self.ifun) - return self._oinfo - - def __str__(self): - return "Lineinfo [%s|%s|%s]" %(self.pre, self.ifun, self.the_rest) - - -#----------------------------------------------------------------------------- # Main Prefilter manager #----------------------------------------------------------------------------- diff --git a/IPython/core/splitinput.py b/IPython/core/splitinput.py index ca5bb02..bd04f6a 100755 --- a/IPython/core/splitinput.py +++ b/IPython/core/splitinput.py @@ -1,6 +1,7 @@ # encoding: utf-8 """ -Simple utility for splitting user input. +Simple utility for splitting user input. This is used by both inputsplitter and +prefilter. Authors: @@ -28,36 +29,28 @@ from IPython.utils import py3compat # Main function #----------------------------------------------------------------------------- - # RegExp for splitting line contents into pre-char//first word-method//rest. # For clarity, each group in on one line. -# WARNING: update the regexp if the escapes in interactiveshell are changed, as they -# are hardwired in. +# WARNING: update the regexp if the escapes in interactiveshell are changed, as +# they are hardwired in. # Although it's not solely driven by the regex, note that: # ,;/% only trigger if they are the first character on the line # ! and !! trigger if they are first char(s) *or* follow an indent # ? triggers as first or last char. -# The four parts of the regex are: -# 1) pre: initial whitespace -# 2) esc: escape character -# 3) ifun: first word/method (mix of \w and '.') -# 4) the_rest: rest of line (separated from ifun by space if non-empty) -line_split = re.compile(r'^(\s*)' - r'([,;/%?]|!!?)?' - r'\s*([\w\.]+)' - r'(.*$|$)') - -# r'[\w\.]+' -# r'\s*=\s*%.*' +line_split = re.compile(""" + ^(\s*) # any leading space + ([,;/%]|!!?|\?\??)? # escape character or characters + \s*(%?[\w\.\*]*) # function/method, possibly with leading % + # to correctly treat things like '?%magic' + (.*?$|$) # rest of line + """, re.VERBOSE) def split_user_input(line, pattern=None): - """Split user input into pre-char/whitespace, function part and rest. - - This is currently handles lines with '=' in them in a very inconsistent - manner. + """Split user input into initial whitespace, escape character, function part + and the rest. """ # We need to ensure that the rest of this routine deals only with unicode line = py3compat.cast_unicode(line, sys.stdin.encoding or 'utf-8') @@ -76,11 +69,70 @@ def split_user_input(line, pattern=None): esc = "" else: pre, esc, ifun, the_rest = match.groups() - - if not py3compat.isidentifier(ifun, dotted=True): - the_rest = ifun + u' ' + the_rest - ifun = u'' #print 'line:<%s>' % line # dbg #print 'pre <%s> ifun <%s> rest <%s>' % (pre,ifun.strip(),the_rest) # dbg - return pre, esc, ifun.strip(), the_rest.lstrip() + return pre, esc or '', ifun.strip(), the_rest.lstrip() + +class LineInfo(object): + """A single line of input and associated info. + + Includes the following as properties: + + line + The original, raw line + + continue_prompt + Is this line a continuation in a sequence of multiline input? + + pre + Any leading whitespace. + + esc + The escape character(s) in pre or the empty string if there isn't one. + Note that '!!' and '??' are possible values for esc. Otherwise it will + always be a single character. + + ifun + The 'function part', which is basically the maximal initial sequence + of valid python identifiers and the '.' character. This is what is + checked for alias and magic transformations, used for auto-calling, + etc. In contrast to Python identifiers, it may start with "%" and contain + "*". + + the_rest + Everything else on the line. + """ + def __init__(self, line, continue_prompt=False): + self.line = line + self.continue_prompt = continue_prompt + self.pre, self.esc, self.ifun, self.the_rest = split_user_input(line) + + self.pre_char = self.pre.strip() + if self.pre_char: + self.pre_whitespace = '' # No whitespace allowd before esc chars + else: + self.pre_whitespace = self.pre + + self._oinfo = None + + def ofind(self, ip): + """Do a full, attribute-walking lookup of the ifun in the various + namespaces for the given IPython InteractiveShell instance. + + Return a dict with keys: found,obj,ospace,ismagic + + Note: can cause state changes because of calling getattr, but should + only be run if autocall is on and if the line hasn't matched any + other, less dangerous handlers. + + Does cache the results of the call, so can be called multiple times + without worrying about *further* damaging state. + """ + if not self._oinfo: + # ip.shell._ofind is actually on the Magic class! + self._oinfo = ip.shell._ofind(self.ifun) + return self._oinfo + + def __str__(self): + return "LineInfo [%s|%s|%s|%s]" %(self.pre, self.esc, self.ifun, self.the_rest) diff --git a/IPython/core/tests/test_inputsplitter.py b/IPython/core/tests/test_inputsplitter.py index 639a5eb..4335445 100644 --- a/IPython/core/tests/test_inputsplitter.py +++ b/IPython/core/tests/test_inputsplitter.py @@ -394,9 +394,8 @@ def test_LineInfo(): def test_split_user_input(): """Unicode test - split_user_input already has good doctests""" line = u"PĂ©rez Fernando" - parts = isp.split_user_input(line) parts_expected = (u'', u'', u'', line) - nt.assert_equal(parts, parts_expected) + tt.check_pairs(isp.split_user_input, [(line, parts_expected),]) # Transformer tests @@ -611,7 +610,8 @@ class IPythonInputTestCase(InputSplitterTestCase): isp.push(raw) out, out_raw = isp.source_raw_reset() - self.assertEqual(out.rstrip(), out_t) + self.assertEqual(out.rstrip(), out_t, + tt.pair_fail_msg.format("inputsplitter",raw, out_t, out)) self.assertEqual(out_raw.rstrip(), raw.rstrip()) def test_syntax_multiline(self): diff --git a/IPython/core/tests/test_splitinput.py b/IPython/core/tests/test_splitinput.py new file mode 100644 index 0000000..a5eb828 --- /dev/null +++ b/IPython/core/tests/test_splitinput.py @@ -0,0 +1,26 @@ +from IPython.core.splitinput import split_user_input +from IPython.testing import tools as tt + +tests = [ + ('x=1', ('', '', 'x', '=1')), + ('?', ('', '?', '', '')), + ('??', ('', '??', '', '')), + (' ?', (' ', '?', '', '')), + (' ??', (' ', '??', '', '')), + ('??x', ('', '??', 'x', '')), + ('?x=1', ('', '?', 'x', '=1')), + ('!ls', ('', '!', 'ls', '')), + (' !ls', (' ', '!', 'ls', '')), + ('!!ls', ('', '!!', 'ls', '')), + (' !!ls', (' ', '!!', 'ls', '')), + (',ls', ('', ',', 'ls', '')), + (';ls', ('', ';', 'ls', '')), + (' ;ls', (' ', ';', 'ls', '')), + ('f.g(x)', ('', '', 'f.g', '(x)')), + ('f.g (x)', ('', '', 'f.g', '(x)')), + ('?%hist', ('', '?', '%hist', '')), + ('?x*', ('', '?', 'x*', '')), + ] + +def test_split_user_input(): + return tt.check_pairs(split_user_input, tests) diff --git a/IPython/testing/tools.py b/IPython/testing/tools.py index a611814..9750575 100644 --- a/IPython/testing/tools.py +++ b/IPython/testing/tools.py @@ -295,7 +295,7 @@ class TempFileMixin(object): # delete it. I have no clue why pass -pair_fail_msg = ("Testing function {0}\n\n" +pair_fail_msg = ("Testing {0}\n\n" "In:\n" " {1!r}\n" "Expected:\n" @@ -318,9 +318,10 @@ def check_pairs(func, pairs): None. Raises an AssertionError if any output does not match the expected value. """ + name = getattr(func, "func_name", getattr(func, "__name__", "")) for inp, expected in pairs: out = func(inp) - assert out == expected, pair_fail_msg.format(func.func_name, inp, expected, out) + assert out == expected, pair_fail_msg.format(name, inp, expected, out) @contextmanager def mute_warn(): @@ -342,4 +343,3 @@ def make_tempfile(name): yield finally: os.unlink(name) -