From 790cb1437eded4851dea45d0d2226f9e73e68c3e 2011-12-12 19:00:47 From: MinRK Date: 2011-12-12 19:00:47 Subject: [PATCH] Merge shlex PRs (#1130, #1116) * arg_split now takes optional strict flag, to ignore ValueErrors in shlex parsing * %timeit uses strict=False, to avoid errors parsing python code * %run completer uses arg_split(strict=False) for its unicode behavior, instead of custom shlex derivative, which is now redundant. closes #1109 closes #1115 closes #1116 closes #1130 --- diff --git a/IPython/core/completerlib.py b/IPython/core/completerlib.py index c210ee6..7717039 100644 --- a/IPython/core/completerlib.py +++ b/IPython/core/completerlib.py @@ -20,7 +20,6 @@ import glob import inspect import os import re -import shlex import sys # Third-party imports @@ -31,6 +30,7 @@ from zipimport import zipimporter from IPython.core.completer import expand_user, compress_user from IPython.core.error import TryNext from IPython.utils import py3compat +from IPython.utils._process_common import arg_split # FIXME: this should be pulled in with the right call via the component system from IPython.core.ipapi import get as get_ipython @@ -56,35 +56,6 @@ magic_run_re = re.compile(r'.*(\.ipy|\.py[w]?)$') # Local utilities #----------------------------------------------------------------------------- -def shlex_split(x): - """Helper function to split lines into segments. - """ - # shlex.split raises an exception if there is a syntax error in sh syntax - # for example if no closing " is found. This function keeps dropping the - # last character of the line until shlex.split does not raise - # an exception. It adds end of the line to the result of shlex.split - # - # Example: - # %run "c:/python -> ['%run','"c:/python'] - - # shlex.split has unicode bugs in Python 2, so encode first to str - if not py3compat.PY3: - x = py3compat.cast_bytes(x) - - endofline = [] - while x != '': - try: - comps = shlex.split(x) - if len(endofline) >= 1: - comps.append(''.join(endofline)) - return comps - - except ValueError: - endofline = [x[-1:]]+endofline - x = x[:-1] - - return [''.join(endofline)] - def module_list(path): """ Return the list containing the names of the modules available in the given @@ -265,7 +236,7 @@ def module_completer(self,event): def magic_run_completer(self, event): """Complete files that end in .py or .ipy for the %run command. """ - comps = shlex_split(event.line) + comps = arg_split(event.line, strict=False) relpath = (len(comps) > 1 and comps[-1] or '').strip("'\"") #print("\nev=", event) # dbg diff --git a/IPython/core/magic.py b/IPython/core/magic.py index 4670881..ee84680 100644 --- a/IPython/core/magic.py +++ b/IPython/core/magic.py @@ -266,6 +266,7 @@ python-profiler package from non-free.""") # Get options list_all = kw.get('list_all',0) posix = kw.get('posix', os.name == 'posix') + strict = kw.get('strict', True) # Check if we have more than one argument to warrant extra processing: odict = {} # Dictionary with options @@ -273,7 +274,7 @@ python-profiler package from non-free.""") if len(args) >= 1: # If the list of inputs only has 0 or 1 thing in it, there's no # need to look for options - argv = arg_split(arg_str,posix) + argv = arg_split(arg_str, posix, strict) # Do regular option processing try: opts,args = getopt(argv,opt_str,*long_opts) @@ -1865,7 +1866,7 @@ Currently the magic system has the following functions:\n""" scaling = [1, 1e3, 1e6, 1e9] opts, stmt = self.parse_options(parameter_s,'n:r:tcp:', - posix=False) + posix=False, strict=False) if stmt == "": return timefunc = timeit.default_timer diff --git a/IPython/core/tests/test_completerlib.py b/IPython/core/tests/test_completerlib.py new file mode 100644 index 0000000..2404b80 --- /dev/null +++ b/IPython/core/tests/test_completerlib.py @@ -0,0 +1,69 @@ +# -*- coding: utf-8 -*- +"""Tests for completerlib. + +""" +from __future__ import absolute_import + +#----------------------------------------------------------------------------- +# Imports +#----------------------------------------------------------------------------- + +import os +import shutil +import sys +import tempfile +import unittest +from os.path import join + +import nose.tools as nt +from nose import SkipTest + +from IPython.core.completerlib import magic_run_completer +from IPython.testing import decorators as dec +from IPython.testing import tools as tt +from IPython.utils import py3compat + + +class MockEvent(object): + def __init__(self, line): + self.line = line + +#----------------------------------------------------------------------------- +# Test functions begin +#----------------------------------------------------------------------------- +class Test_magic_run_completer(unittest.TestCase): + def setUp(self): + self.BASETESTDIR = tempfile.mkdtemp() + for fil in [u"aaø.py", u"a.py", u"b.py"]: + with open(join(self.BASETESTDIR, fil), "w") as sfile: + sfile.write("pass\n") + self.oldpath = os.getcwdu() + os.chdir(self.BASETESTDIR) + + def tearDown(self): + os.chdir(self.oldpath) + shutil.rmtree(self.BASETESTDIR) + + def test_1(self): + """Test magic_run_completer, should match two alterntives + """ + event = MockEvent(u"%run a") + mockself = None + match = set(magic_run_completer(mockself, event)) + self.assertEqual(match, set([u"a.py", u"aaø.py"])) + + def test_2(self): + """Test magic_run_completer, should match one alterntive + """ + event = MockEvent(u"%run aa") + mockself = None + match = set(magic_run_completer(mockself, event)) + self.assertEqual(match, set([u"aaø.py"])) + + def test_3(self): + """Test magic_run_completer with unterminated " """ + event = MockEvent(u'%run "a') + mockself = None + match = set(magic_run_completer(mockself, event)) + self.assertEqual(match, set([u"a.py", u"aaø.py"])) + diff --git a/IPython/core/tests/test_magic.py b/IPython/core/tests/test_magic.py index 8086371..7877825 100644 --- a/IPython/core/tests/test_magic.py +++ b/IPython/core/tests/test_magic.py @@ -344,3 +344,12 @@ def test_psearch(): with tt.AssertPrints("dict.fromkeys"): _ip.run_cell("dict.fr*?") +def test_timeit_shlex(): + """test shlex issues with timeit (#1109)""" + _ip.ex("def f(*a,**kw): pass") + _ip.magic('timeit -n1 "this is a bug".count(" ")') + _ip.magic('timeit -r1 -n1 f(" ", 1)') + _ip.magic('timeit -r1 -n1 f(" ", 1, " ", 2, " ")') + _ip.magic('timeit -r1 -n1 ("a " + "b")') + _ip.magic('timeit -r1 -n1 f("a " + "b")') + _ip.magic('timeit -r1 -n1 f("a " + "b ")') diff --git a/IPython/utils/_process_common.py b/IPython/utils/_process_common.py index f77cf59..e352155 100644 --- a/IPython/utils/_process_common.py +++ b/IPython/utils/_process_common.py @@ -146,12 +146,18 @@ def getoutputerror(cmd): return py3compat.bytes_to_str(out), py3compat.bytes_to_str(err) -def arg_split(s, posix=False): +def arg_split(s, posix=False, strict=True): """Split a command line's arguments in a shell-like manner. This is a modified version of the standard library's shlex.split() function, but with a default of posix=False for splitting, so that quotes - in inputs are respected.""" + in inputs are respected. + + if strict=False, then any errors shlex.split would raise will result in the + unparsed remainder being the last element of the list, rather than raising. + This is because we sometimes use arg_split to parse things other than + command-line args. + """ # Unfortunately, python's shlex module is buggy with unicode input: # http://bugs.python.org/issue1170 @@ -163,7 +169,25 @@ def arg_split(s, posix=False): s = s.encode('utf-8') lex = shlex.shlex(s, posix=posix) lex.whitespace_split = True - tokens = list(lex) + # Extract tokens, ensuring that things like leaving open quotes + # does not cause this to raise. This is important, because we + # sometimes pass Python source through this (e.g. %timeit f(" ")), + # and it shouldn't raise an exception. + # It may be a bad idea to parse things that are not command-line args + # through this function, but we do, so let's be safe about it. + tokens = [] + while True: + try: + tokens.append(lex.next()) + except StopIteration: + break + except ValueError: + if strict: + raise + # couldn't parse, get remaining blob as last token + tokens.append(lex.token) + break + if is_unicode: # Convert the tokens back to unicode. tokens = [x.decode('utf-8') for x in tokens]