From 073a890c1a8e06d85ba040ef9974edbd71ee3270 2010-08-17 22:20:51 From: Fernando Perez Date: 2010-08-17 22:20:51 Subject: [PATCH] Completed first pass of inputsplitter with IPython syntax. Code is now mostly ready for use. Upon review we'll probably restructure a few things, and there will likely be a few more classes created for extensibility. But for now, this is ready for the various frontends to start using. --- diff --git a/IPython/core/inputsplitter.py b/IPython/core/inputsplitter.py index 8312333..ef965f7 100644 --- a/IPython/core/inputsplitter.py +++ b/IPython/core/inputsplitter.py @@ -6,6 +6,11 @@ into standalone blocks that can be executed by Python as 'single' statements (thus triggering sys.displayhook). For more details, see the class docstring below. + +Authors + +* Fernando Perez +* Brian Granger """ #----------------------------------------------------------------------------- # Copyright (C) 2010 The IPython Development Team @@ -26,10 +31,32 @@ import sys from IPython.utils.text import make_quoted_expr #----------------------------------------------------------------------------- +# Globals +#----------------------------------------------------------------------------- + +# The escape sequences that define the syntax transformations IPython will +# apply to user input. These can NOT be just changed here: many regular +# expressions and other parts of the code may use their hardcoded values, and +# for all intents and purposes they constitute the 'IPython syntax', so they +# should be considered fixed. + +ESC_SHELL = '!' +ESC_SH_CAP = '!!' +ESC_HELP = '?' +ESC_HELP2 = '??' +ESC_MAGIC = '%' +ESC_QUOTE = ',' +ESC_QUOTE2 = ';' +ESC_PAREN = '/' + +#----------------------------------------------------------------------------- # Utilities #----------------------------------------------------------------------------- -# FIXME: move these utilities to the general ward... +# FIXME: These are general-purpose utilities that later can be moved to the +# general ward. Kept here for now because we're being very strict about test +# coverage with this code, and this lets us ensure that we keep 100% coverage +# while developing. # compiled regexps for autoindent management dedent_re = re.compile(r'^\s+raise|^\s+return|^\s+pass') @@ -88,7 +115,7 @@ def get_input_encoding(): return encoding #----------------------------------------------------------------------------- -# Classes and functions +# Classes and functions for normal Python syntax handling #----------------------------------------------------------------------------- class InputSplitter(object): @@ -425,14 +452,140 @@ class InputSplitter(object): #----------------------------------------------------------------------------- -# IPython-specific syntactic support +# Functions and classes for IPython-specific syntactic support #----------------------------------------------------------------------------- -# We implement things, as much as possible, as standalone functions that can be -# tested and validated in isolation. +# RegExp for splitting line contents into pre-char//first word-method//rest. +# For clarity, each group in on one line. + +line_split = re.compile(""" + ^(\s*) # any leading space + ([,;/%]|!!?|\?\??) # escape character or characters + \s*([\w\.]*) # function/method part (mix of \w and '.') + (\s+.*$|$) # rest of line + """, re.VERBOSE) + + +def split_user_input(line): + """Split user input into early whitespace, esc-char, function part and rest. + + This is currently handles lines with '=' in them in a very inconsistent + manner. + + Examples + ======== + >>> split_user_input('x=1') + ('', '', 'x=1', '') + >>> split_user_input('?') + ('', '?', '', '') + >>> split_user_input('??') + ('', '??', '', '') + >>> split_user_input(' ?') + (' ', '?', '', '') + >>> split_user_input(' ??') + (' ', '??', '', '') + >>> split_user_input('??x') + ('', '??', 'x', '') + >>> split_user_input('?x=1') + ('', '', '?x=1', '') + >>> split_user_input('!ls') + ('', '!', 'ls', '') + >>> split_user_input(' !ls') + (' ', '!', 'ls', '') + >>> split_user_input('!!ls') + ('', '!!', 'ls', '') + >>> split_user_input(' !!ls') + (' ', '!!', 'ls', '') + >>> split_user_input(',ls') + ('', ',', 'ls', '') + >>> split_user_input(';ls') + ('', ';', 'ls', '') + >>> split_user_input(' ;ls') + (' ', ';', 'ls', '') + >>> split_user_input('f.g(x)') + ('', '', 'f.g(x)', '') + >>> split_user_input('f.g (x)') + ('', '', 'f.g', '(x)') + """ + match = line_split.match(line) + if match: + lspace, esc, fpart, rest = match.groups() + else: + # print "match failed for line '%s'" % line + try: + fpart, rest = line.split(None,1) + except ValueError: + # print "split failed for line '%s'" % line + fpart, rest = line,'' + lspace = re.match('^(\s*)(.*)',line).groups()[0] + esc = '' + + # fpart has to be a valid python identifier, so it better be only pure + # ascii, no unicode: + try: + fpart = fpart.encode('ascii') + except UnicodeEncodeError: + lspace = unicode(lspace) + rest = fpart + u' ' + rest + fpart = u'' + + #print 'line:<%s>' % line # dbg + #print 'esc <%s> fpart <%s> rest <%s>' % (esc,fpart.strip(),rest) # dbg + return lspace, esc, fpart.strip(), rest.lstrip() + + +# The escaped translators ALL receive a line where their own escape has been +# stripped. Only '?' is valid at the end of the line, all others can only be +# placed at the start. + +class LineInfo(object): + """A single line of input and associated info. + + This is a utility class that mostly wraps the output of + :func:`split_user_input` into a convenient object to be passed around + during input transformations. + + Includes the following as properties: + + line + The original, raw line + + lspace + Any early whitespace before actual text starts. + + esc + The initial esc character (or characters, for double-char escapes like + '??' or '!!'). + + pre_char + The escape character(s) in esc or the empty string if there isn't one. + + fpart + The 'function part', which is basically the maximal initial sequence + of valid python identifiers and the '.' character. This is what is + checked for alias and magic transformations, used for auto-calling, + etc. + + rest + Everything else on the line. + """ + def __init__(self, line): + self.line = line + self.lspace, self.esc, self.fpart, self.rest = \ + split_user_input(line) + + def __str__(self): + return "LineInfo [%s|%s|%s|%s]" % (self.lspace, self.esc, + self.fpart, self.rest) + + +# Transformations of the special syntaxes that don't rely on an explicit escape +# character but instead on patterns on the input line + +# The core transformations are implemented as standalone functions that can be +# tested and validated in isolation. Each of these uses a regexp, we +# pre-compile these and keep them close to each function definition for clarity -# Each of these uses a regexp, we pre-compile these and keep them close to each -# function definition for clarity _assign_system_re = re.compile(r'(?P(\s*)([\w\.]+)((\s*,\s*[\w\.]+)*))' r'\s*=\s*!\s*(?P.*)') @@ -467,16 +620,13 @@ def transform_assign_magic(line): return line -_classic_prompt_re = re.compile(r'(^[ \t]*>>> |^[ \t]*\.\.\. )') +_classic_prompt_re = re.compile(r'^([ \t]*>>> |^[ \t]*\.\.\. )') def transform_classic_prompt(line): """Handle inputs that start with '>>> ' syntax.""" - if not line or line.isspace() or line.strip() == '...': - # This allows us to recognize multiple input prompts separated by - # blank lines and pasted in a single chunk, very common when - # pasting doctests or long tutorial passages. - return '' + if not line or line.isspace(): + return line m = _classic_prompt_re.match(line) if m: return line[len(m.group(0)):] @@ -484,16 +634,13 @@ def transform_classic_prompt(line): return line -_ipy_prompt_re = re.compile(r'(^[ \t]*In \[\d+\]: |^[ \t]*\ \ \ \.\.\.+: )') +_ipy_prompt_re = re.compile(r'^([ \t]*In \[\d+\]: |^[ \t]*\ \ \ \.\.\.+: )') def transform_ipy_prompt(line): """Handle inputs that start classic IPython prompt syntax.""" - if not line or line.isspace() or line.strip() == '...': - # This allows us to recognize multiple input prompts separated by - # blank lines and pasted in a single chunk, very common when - # pasting doctests or long tutorial passages. - return '' + if not line or line.isspace(): + return line m = _ipy_prompt_re.match(line) if m: return line[len(m.group(0)):] @@ -501,21 +648,151 @@ def transform_ipy_prompt(line): return line -# Warning, these cannot be changed unless various regular expressions -# are updated in a number of places. Not great, but at least we told you. -ESC_SHELL = '!' -ESC_SH_CAP = '!!' -ESC_HELP = '?' -ESC_MAGIC = '%' -ESC_QUOTE = ',' -ESC_QUOTE2 = ';' -ESC_PAREN = '/' +def transform_unescaped(line): + """Transform lines that are explicitly escaped out. + + This calls to the above transform_* functions for the actual line + translations. + + Parameters + ---------- + line : str + A single line of input to be transformed. + + Returns + ------- + new_line : str + Transformed line, which may be identical to the original.""" + + if not line or line.isspace(): + return line + + new_line = line + for f in [transform_assign_system, transform_assign_magic, + transform_classic_prompt, transform_ipy_prompt ] : + new_line = f(new_line) + return new_line + +# Support for syntax transformations that use explicit escapes typed by the +# user at the beginning of a line + +def tr_system(line_info): + "Translate lines escaped with: !" + cmd = line_info.line.lstrip().lstrip(ESC_SHELL) + return '%sget_ipython().system(%s)' % (line_info.lspace, + make_quoted_expr(cmd)) + + +def tr_system2(line_info): + "Translate lines escaped with: !!" + cmd = line_info.line.lstrip()[2:] + return '%sget_ipython().getoutput(%s)' % (line_info.lspace, + make_quoted_expr(cmd)) + + +def tr_help(line_info): + "Translate lines escaped with: ?/??" + # A naked help line should just fire the intro help screen + if not line_info.line[1:]: + return 'get_ipython().show_usage()' + + # There may be one or two '?' at the end, move them to the front so that + # the rest of the logic can assume escapes are at the start + line = line_info.line + if line.endswith('?'): + line = line[-1] + line[:-1] + if line.endswith('?'): + line = line[-1] + line[:-1] + line_info = LineInfo(line) + + # From here on, simply choose which level of detail to get. + if line_info.esc == '?': + pinfo = 'pinfo' + elif line_info.esc == '??': + pinfo = 'pinfo2' + + tpl = '%sget_ipython().magic("%s %s")' + return tpl % (line_info.lspace, pinfo, + ' '.join([line_info.fpart, line_info.rest]).strip()) + + +def tr_magic(line_info): + "Translate lines escaped with: %" + tpl = '%sget_ipython().magic(%s)' + cmd = make_quoted_expr(' '.join([line_info.fpart, + line_info.rest])).strip() + return tpl % (line_info.lspace, cmd) + + +def tr_quote(line_info): + "Translate lines escaped with: ," + return '%s%s("%s")' % (line_info.lspace, line_info.fpart, + '", "'.join(line_info.rest.split()) ) + + +def tr_quote2(line_info): + "Translate lines escaped with: ;" + return '%s%s("%s")' % (line_info.lspace, line_info.fpart, + line_info.rest) + + +def tr_paren(line_info): + "Translate lines escaped with: /" + return '%s%s(%s)' % (line_info.lspace, line_info.fpart, + ", ".join(line_info.rest.split())) + + +def transform_escaped(line): + """Transform lines that are explicitly escaped out. + + This calls to the above tr_* functions for the actual line translations.""" + + tr = { ESC_SHELL : tr_system, + ESC_SH_CAP : tr_system2, + ESC_HELP : tr_help, + ESC_HELP2 : tr_help, + ESC_MAGIC : tr_magic, + ESC_QUOTE : tr_quote, + ESC_QUOTE2 : tr_quote2, + ESC_PAREN : tr_paren } + + # Empty lines just get returned unmodified + if not line or line.isspace(): + return line + + # Get line endpoints, where the escapes can be + line_info = LineInfo(line) + + # If the escape is not at the start, only '?' needs to be special-cased. + # All other escapes are only valid at the start + if not line_info.esc in tr: + if line.endswith(ESC_HELP): + return tr_help(line_info) + else: + # If we don't recognize the escape, don't modify the line + return line + + return tr[line_info.esc](line_info) + class IPythonInputSplitter(InputSplitter): """An input splitter that recognizes all of IPython's special syntax.""" - def push(self, lines): """Push one or more lines of IPython input. """ - return super(IPythonInputSplitter, self).push(lines) + # We only apply the line transformers to the input if we have either no + # input yet, or complete input. This prevents the accidental + # transformation of escapes inside multiline expressions like + # triple-quoted strings or parenthesized expressions. + lines_list = lines.splitlines() + if self._is_complete or not self._buffer: + + new_list = map(transform_escaped, lines_list) + else: + new_list = lines_list + + # Now apply the unescaped transformations to each input line + new_list = map(transform_unescaped, new_list) + newlines = '\n'.join(new_list) + return super(IPythonInputSplitter, self).push(newlines) diff --git a/IPython/core/tests/test_inputsplitter.py b/IPython/core/tests/test_inputsplitter.py index 4d07e40..3a6cf45 100644 --- a/IPython/core/tests/test_inputsplitter.py +++ b/IPython/core/tests/test_inputsplitter.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- """Tests for the inputsplitter module. """ #----------------------------------------------------------------------------- @@ -23,6 +24,10 @@ from IPython.core import inputsplitter as isp #----------------------------------------------------------------------------- # Semi-complete examples (also used as tests) #----------------------------------------------------------------------------- + +# Note: at the bottom, there's a slightly more complete version of this that +# can be useful during development of code here. + def mini_interactive_loop(raw_input): """Minimal example of the logic of an interactive interpreter loop. @@ -44,7 +49,7 @@ def mini_interactive_loop(raw_input): # Here we just return input so we can use it in a test suite, but a real # interpreter would instead send it for execution somewhere. src = isp.source_reset() - print 'Input source was:\n', src + #print 'Input source was:\n', src # dbg return src #----------------------------------------------------------------------------- @@ -363,9 +368,18 @@ class InteractiveLoopTestCase(unittest.TestCase): self.check_ns(['x =(1+','1+','2)'], dict(x=4)) -class IPythonInputTestCase(InputSplitterTestCase): - def setUp(self): - self.isp = isp.IPythonInputSplitter() +def test_LineInfo(): + """Simple test for LineInfo construction and str()""" + linfo = isp.LineInfo(' %cd /home') + nt.assert_equals(str(linfo), 'LineInfo [ |%|cd|/home]') + + +def test_split_user_input(): + """Unicode test - split_user_input already has good doctests""" + line = u"PĂ©rez Fernando" + parts = isp.split_user_input(line) + parts_expected = (u'', u'', u'', line) + nt.assert_equal(parts, parts_expected) # Transformer tests @@ -373,39 +387,237 @@ def transform_checker(tests, func): """Utility to loop over test inputs""" for inp, tr in tests: nt.assert_equals(func(inp), tr) - + +# Data for all the syntax tests in the form of lists of pairs of +# raw/transformed input. We store it here as a global dict so that we can use +# it both within single-function tests and also to validate the behavior of the +# larger objects + +syntax = \ + dict(assign_system = + [('a =! ls', 'a = get_ipython().magic("sc -l = ls")'), + ('b = !ls', 'b = get_ipython().magic("sc -l = ls")'), + ('x=1', 'x=1'), # normal input is unmodified + (' ',' '), # blank lines are kept intact + ], + + assign_magic = + [('a =% who', 'a = get_ipython().magic("who")'), + ('b = %who', 'b = get_ipython().magic("who")'), + ('x=1', 'x=1'), # normal input is unmodified + (' ',' '), # blank lines are kept intact + ], + + classic_prompt = + [('>>> x=1', 'x=1'), + ('x=1', 'x=1'), # normal input is unmodified + (' ',' '), # blank lines are kept intact + ], + + ipy_prompt = + [('In [1]: x=1', 'x=1'), + ('x=1', 'x=1'), # normal input is unmodified + (' ',' '), # blank lines are kept intact + ], + + # Tests for the escape transformer to leave normal code alone + escaped_noesc = + [ (' ', ' '), + ('x=1', 'x=1'), + ], + + # System calls + escaped_shell = + [ ('!ls', 'get_ipython().system("ls")'), + # Double-escape shell, this means to capture the output of the + # subprocess and return it + ('!!ls', 'get_ipython().getoutput("ls")'), + ], + + # Help/object info + escaped_help = + [ ('?', 'get_ipython().show_usage()'), + ('?x1', 'get_ipython().magic("pinfo x1")'), + ('??x2', 'get_ipython().magic("pinfo2 x2")'), + ('x3?', 'get_ipython().magic("pinfo x3")'), + ('x4??', 'get_ipython().magic("pinfo2 x4")'), + ], + + # Explicit magic calls + escaped_magic = + [ ('%cd', 'get_ipython().magic("cd")'), + ('%cd /home', 'get_ipython().magic("cd /home")'), + (' %magic', ' get_ipython().magic("magic")'), + ], + + # Quoting with separate arguments + escaped_quote = + [ (',f', 'f("")'), + (',f x', 'f("x")'), + (' ,f y', ' f("y")'), + (',f a b', 'f("a", "b")'), + ], + + # Quoting with single argument + escaped_quote2 = + [ (';f', 'f("")'), + (';f x', 'f("x")'), + (' ;f y', ' f("y")'), + (';f a b', 'f("a b")'), + ], + + # Simply apply parens + escaped_paren = + [ ('/f', 'f()'), + ('/f x', 'f(x)'), + (' /f y', ' f(y)'), + ('/f a b', 'f(a, b)'), + ], + + # More complex multiline tests + ## escaped_multiline = + ## [()], + ) + +# multiline syntax examples. Each of these should be a list of lists, with +# each entry itself having pairs of raw/transformed input. The union (with +# '\n'.join() of the transformed inputs is what the splitter should produce +# when fed the raw lines one at a time via push. +syntax_ml = \ + dict(classic_prompt = + [ [('>>> for i in range(10):','for i in range(10):'), + ('... print i',' print i'), + ('... ', ''), + ], + ], + + ipy_prompt = + [ [('In [24]: for i in range(10):','for i in range(10):'), + (' ....: print i',' print i'), + (' ....: ', ''), + ], + ], + ) + def test_assign_system(): - tests = [('a =! ls', 'a = get_ipython().magic("sc -l = ls")'), - ('b = !ls', 'b = get_ipython().magic("sc -l = ls")'), - ('x=1','x=1')] - transform_checker(tests, isp.transform_assign_system) + transform_checker(syntax['assign_system'], isp.transform_assign_system) def test_assign_magic(): - tests = [('a =% who', 'a = get_ipython().magic("who")'), - ('b = %who', 'b = get_ipython().magic("who")'), - ('x=1','x=1')] - transform_checker(tests, isp.transform_assign_magic) + transform_checker(syntax['assign_magic'], isp.transform_assign_magic) def test_classic_prompt(): - tests = [('>>> x=1', 'x=1'), - ('>>> for i in range(10):','for i in range(10):'), - ('... print i',' print i'), - ('...', ''), - ('x=1','x=1') - ] - transform_checker(tests, isp.transform_classic_prompt) + transform_checker(syntax['classic_prompt'], isp.transform_classic_prompt) + for example in syntax_ml['classic_prompt']: + transform_checker(example, isp.transform_classic_prompt) def test_ipy_prompt(): - tests = [('In [1]: x=1', 'x=1'), - ('In [24]: for i in range(10):','for i in range(10):'), - (' ....: print i',' print i'), - (' ....: ', ''), - ('x=1', 'x=1'), # normal input is unmodified - (' ','') # blank lines are just collapsed - ] - transform_checker(tests, isp.transform_ipy_prompt) + transform_checker(syntax['ipy_prompt'], isp.transform_ipy_prompt) + for example in syntax_ml['ipy_prompt']: + transform_checker(example, isp.transform_ipy_prompt) + + +def test_escaped_noesc(): + transform_checker(syntax['escaped_noesc'], isp.transform_escaped) + + +def test_escaped_shell(): + transform_checker(syntax['escaped_shell'], isp.transform_escaped) + + +def test_escaped_help(): + transform_checker(syntax['escaped_help'], isp.transform_escaped) + + +def test_escaped_magic(): + transform_checker(syntax['escaped_magic'], isp.transform_escaped) + + +def test_escaped_quote(): + transform_checker(syntax['escaped_quote'], isp.transform_escaped) + + +def test_escaped_quote2(): + transform_checker(syntax['escaped_quote2'], isp.transform_escaped) + + +def test_escaped_paren(): + transform_checker(syntax['escaped_paren'], isp.transform_escaped) + + +class IPythonInputTestCase(InputSplitterTestCase): + """By just creating a new class whose .isp is a different instance, we + re-run the same test battery on the new input splitter. + + In addition, this runs the tests over the syntax and syntax_ml dicts that + were tested by individual functions, as part of the OO interface. + """ + def setUp(self): + self.isp = isp.IPythonInputSplitter() + + def test_syntax(self): + """Call all single-line syntax tests from the main object""" + isp = self.isp + for example in syntax.itervalues(): + for raw, out_t in example: + if raw.startswith(' '): + continue + + isp.push(raw) + out = isp.source_reset().rstrip() + self.assertEqual(out, out_t) + + def test_syntax_multiline(self): + isp = self.isp + for example in syntax_ml.itervalues(): + out_t_parts = [] + for line_pairs in example: + for raw, out_t_part in line_pairs: + isp.push(raw) + out_t_parts.append(out_t_part) + + out = isp.source_reset().rstrip() + out_t = '\n'.join(out_t_parts).rstrip() + self.assertEqual(out, out_t) + + +#----------------------------------------------------------------------------- +# Main - use as a script +#----------------------------------------------------------------------------- + +if __name__ == '__main__': + # A simple demo for interactive experimentation. This code will not get + # picked up by any test suite. Useful mostly for illustration and during + # development. + from IPython.core.inputsplitter import InputSplitter, IPythonInputSplitter + #isp, start_prompt = InputSplitter(), '>>> ' + isp, start_prompt = IPythonInputSplitter(), 'In> ' + + autoindent = True + #autoindent = False + + # In practice, this input loop would be wrapped in an outside loop to read + # input indefinitely, until some exit/quit command was issued. Here we + # only illustrate the basic inner loop. + try: + while True: + prompt = start_prompt + while isp.push_accepts_more(): + indent = ' '*isp.indent_spaces + if autoindent: + line = indent + raw_input(prompt+indent) + else: + line = raw_input(prompt) + isp.push(line) + prompt = '... ' + + # Here we just return input so we can use it in a test suite, but a + # real interpreter would instead send it for execution somewhere. + src = isp.source_reset() + print 'Input source was:\n', src # dbg + except EOFError: + print 'Bye' diff --git a/IPython/utils/text.py b/IPython/utils/text.py index e354574..5b80381 100644 --- a/IPython/utils/text.py +++ b/IPython/utils/text.py @@ -295,8 +295,9 @@ def make_quoted_expr(s): quote = "'''" else: # give up, backslash-escaped string will do - return '"%s"' % esc_quotes(s) - res = raw + quote + s + tailpadding + quote + tail + return '"%s"' % esc_quotes(s).strip() + txt = (s + tailpadding).strip() + res = raw + quote + txt + quote + tail return res