From 4f1e79bd070b9de24e64c0778c68dbc42c396599 2011-12-11 01:21:56
From: MinRK <benjaminrk@gmail.com>
Date: 2011-12-11 01:21:56
Subject: [PATCH] add strict flag to arg_split, to optionally ignore shlex parse errors

Sometimes we pass things that aren't really command-line args to arg_split, e.g:

    %timeit python_code(" ")

This commit adds a `strict` flag, which defaults to the same raising behavior
as before.

Currently magic_timeit is the *only* place, we use strict=False, but it should
also be done in completions (PR #1116).

closes #1109

---

diff --git a/IPython/core/magic.py b/IPython/core/magic.py
index 4670881..ee84680 100644
--- a/IPython/core/magic.py
+++ b/IPython/core/magic.py
@@ -266,6 +266,7 @@ python-profiler package from non-free.""")
         # Get options
         list_all = kw.get('list_all',0)
         posix = kw.get('posix', os.name == 'posix')
+        strict = kw.get('strict', True)
 
         # Check if we have more than one argument to warrant extra processing:
         odict = {}  # Dictionary with options
@@ -273,7 +274,7 @@ python-profiler package from non-free.""")
         if len(args) >= 1:
             # If the list of inputs only has 0 or 1 thing in it, there's no
             # need to look for options
-            argv = arg_split(arg_str,posix)
+            argv = arg_split(arg_str, posix, strict)
             # Do regular option processing
             try:
                 opts,args = getopt(argv,opt_str,*long_opts)
@@ -1865,7 +1866,7 @@ Currently the magic system has the following functions:\n"""
         scaling = [1, 1e3, 1e6, 1e9]
 
         opts, stmt = self.parse_options(parameter_s,'n:r:tcp:',
-                                        posix=False)
+                                        posix=False, strict=False)
         if stmt == "":
             return
         timefunc = timeit.default_timer
diff --git a/IPython/core/tests/test_magic.py b/IPython/core/tests/test_magic.py
index 5221499..4a35227 100644
--- a/IPython/core/tests/test_magic.py
+++ b/IPython/core/tests/test_magic.py
@@ -344,3 +344,12 @@ def test_psearch():
     with tt.AssertPrints("dict.fromkeys"):
         _ip.run_cell("dict.fr*?")
 
+def test_timeit_shlex():
+    """test shlex issues with timeit (#1109)"""
+    _ip.ex("def f(*a,**kw): pass")
+    _ip.magic('timeit -n1 "this is a bug".count(" ")')
+    _ip.magic('timeit -r1 -n1 f(" ", 1)')
+    _ip.magic('timeit -r1 -n1 f(" ", 1, " ", 2, " ")')
+    _ip.magic('timeit -r1 -n1 ("a " + "b")')
+    _ip.magic('timeit -r1 -n1 f("a " + "b")')
+    _ip.magic('timeit -r1 -n1 f("a " + "b ")')
diff --git a/IPython/utils/_process_common.py b/IPython/utils/_process_common.py
index f77cf59..e352155 100644
--- a/IPython/utils/_process_common.py
+++ b/IPython/utils/_process_common.py
@@ -146,12 +146,18 @@ def getoutputerror(cmd):
     return py3compat.bytes_to_str(out), py3compat.bytes_to_str(err)
 
 
-def arg_split(s, posix=False):
+def arg_split(s, posix=False, strict=True):
     """Split a command line's arguments in a shell-like manner.
 
     This is a modified version of the standard library's shlex.split()
     function, but with a default of posix=False for splitting, so that quotes
-    in inputs are respected."""
+    in inputs are respected.
+
+    if strict=False, then any errors shlex.split would raise will result in the
+    unparsed remainder being the last element of the list, rather than raising.
+    This is because we sometimes use arg_split to parse things other than
+    command-line args.
+    """
 
     # Unfortunately, python's shlex module is buggy with unicode input:
     # http://bugs.python.org/issue1170
@@ -163,7 +169,25 @@ def arg_split(s, posix=False):
         s = s.encode('utf-8')
     lex = shlex.shlex(s, posix=posix)
     lex.whitespace_split = True
-    tokens = list(lex)
+    # Extract tokens, ensuring that things like leaving open quotes
+    # does not cause this to raise.  This is important, because we
+    # sometimes pass Python source through this (e.g. %timeit f(" ")),
+    # and it shouldn't raise an exception.
+    # It may be a bad idea to parse things that are not command-line args
+    # through this function, but we do, so let's be safe about it.
+    tokens = []
+    while True:
+        try:
+            tokens.append(lex.next())
+        except StopIteration:
+            break
+        except ValueError:
+            if strict:
+                raise
+            # couldn't parse, get remaining blob as last token
+            tokens.append(lex.token)
+            break
+    
     if is_unicode:
         # Convert the tokens back to unicode.
         tokens = [x.decode('utf-8') for x in tokens]