From 4f1e79bd070b9de24e64c0778c68dbc42c396599 2011-12-11 01:21:56 From: MinRK Date: 2011-12-11 01:21:56 Subject: [PATCH] add strict flag to arg_split, to optionally ignore shlex parse errors Sometimes we pass things that aren't really command-line args to arg_split, e.g: %timeit python_code(" ") This commit adds a `strict` flag, which defaults to the same raising behavior as before. Currently magic_timeit is the *only* place, we use strict=False, but it should also be done in completions (PR #1116). closes #1109 --- diff --git a/IPython/core/magic.py b/IPython/core/magic.py index 4670881..ee84680 100644 --- a/IPython/core/magic.py +++ b/IPython/core/magic.py @@ -266,6 +266,7 @@ python-profiler package from non-free.""") # Get options list_all = kw.get('list_all',0) posix = kw.get('posix', os.name == 'posix') + strict = kw.get('strict', True) # Check if we have more than one argument to warrant extra processing: odict = {} # Dictionary with options @@ -273,7 +274,7 @@ python-profiler package from non-free.""") if len(args) >= 1: # If the list of inputs only has 0 or 1 thing in it, there's no # need to look for options - argv = arg_split(arg_str,posix) + argv = arg_split(arg_str, posix, strict) # Do regular option processing try: opts,args = getopt(argv,opt_str,*long_opts) @@ -1865,7 +1866,7 @@ Currently the magic system has the following functions:\n""" scaling = [1, 1e3, 1e6, 1e9] opts, stmt = self.parse_options(parameter_s,'n:r:tcp:', - posix=False) + posix=False, strict=False) if stmt == "": return timefunc = timeit.default_timer diff --git a/IPython/core/tests/test_magic.py b/IPython/core/tests/test_magic.py index 5221499..4a35227 100644 --- a/IPython/core/tests/test_magic.py +++ b/IPython/core/tests/test_magic.py @@ -344,3 +344,12 @@ def test_psearch(): with tt.AssertPrints("dict.fromkeys"): _ip.run_cell("dict.fr*?") +def test_timeit_shlex(): + """test shlex issues with timeit (#1109)""" + _ip.ex("def f(*a,**kw): pass") + _ip.magic('timeit -n1 "this is a bug".count(" ")') + _ip.magic('timeit -r1 -n1 f(" ", 1)') + _ip.magic('timeit -r1 -n1 f(" ", 1, " ", 2, " ")') + _ip.magic('timeit -r1 -n1 ("a " + "b")') + _ip.magic('timeit -r1 -n1 f("a " + "b")') + _ip.magic('timeit -r1 -n1 f("a " + "b ")') diff --git a/IPython/utils/_process_common.py b/IPython/utils/_process_common.py index f77cf59..e352155 100644 --- a/IPython/utils/_process_common.py +++ b/IPython/utils/_process_common.py @@ -146,12 +146,18 @@ def getoutputerror(cmd): return py3compat.bytes_to_str(out), py3compat.bytes_to_str(err) -def arg_split(s, posix=False): +def arg_split(s, posix=False, strict=True): """Split a command line's arguments in a shell-like manner. This is a modified version of the standard library's shlex.split() function, but with a default of posix=False for splitting, so that quotes - in inputs are respected.""" + in inputs are respected. + + if strict=False, then any errors shlex.split would raise will result in the + unparsed remainder being the last element of the list, rather than raising. + This is because we sometimes use arg_split to parse things other than + command-line args. + """ # Unfortunately, python's shlex module is buggy with unicode input: # http://bugs.python.org/issue1170 @@ -163,7 +169,25 @@ def arg_split(s, posix=False): s = s.encode('utf-8') lex = shlex.shlex(s, posix=posix) lex.whitespace_split = True - tokens = list(lex) + # Extract tokens, ensuring that things like leaving open quotes + # does not cause this to raise. This is important, because we + # sometimes pass Python source through this (e.g. %timeit f(" ")), + # and it shouldn't raise an exception. + # It may be a bad idea to parse things that are not command-line args + # through this function, but we do, so let's be safe about it. + tokens = [] + while True: + try: + tokens.append(lex.next()) + except StopIteration: + break + except ValueError: + if strict: + raise + # couldn't parse, get remaining blob as last token + tokens.append(lex.token) + break + if is_unicode: # Convert the tokens back to unicode. tokens = [x.decode('utf-8') for x in tokens]