From 790cb1437eded4851dea45d0d2226f9e73e68c3e 2011-12-12 19:00:47
From: MinRK <benjaminrk@gmail.com>
Date: 2011-12-12 19:00:47
Subject: [PATCH] Merge shlex PRs (#1130, #1116)

* arg_split now takes optional strict flag, to ignore ValueErrors in
  shlex parsing
* %timeit uses strict=False, to avoid errors parsing python code
* %run completer uses arg_split(strict=False) for its unicode behavior, instead
  of custom shlex derivative, which is now redundant.

closes #1109
closes #1115
closes #1116
closes #1130

---

diff --git a/IPython/core/completerlib.py b/IPython/core/completerlib.py
index c210ee6..7717039 100644
--- a/IPython/core/completerlib.py
+++ b/IPython/core/completerlib.py
@@ -20,7 +20,6 @@ import glob
 import inspect
 import os
 import re
-import shlex
 import sys
 
 # Third-party imports
@@ -31,6 +30,7 @@ from zipimport import zipimporter
 from IPython.core.completer import expand_user, compress_user
 from IPython.core.error import TryNext
 from IPython.utils import py3compat
+from IPython.utils._process_common import arg_split
 
 # FIXME: this should be pulled in with the right call via the component system
 from IPython.core.ipapi import get as get_ipython
@@ -56,35 +56,6 @@ magic_run_re = re.compile(r'.*(\.ipy|\.py[w]?)$')
 # Local utilities
 #-----------------------------------------------------------------------------
 
-def shlex_split(x):
-    """Helper function to split lines into segments.
-    """
-    # shlex.split raises an exception if there is a syntax error in sh syntax
-    # for example if no closing " is found. This function keeps dropping the
-    # last character of the line until shlex.split does not raise
-    # an exception. It adds end of the line to the result of shlex.split
-    #
-    # Example:
-    # %run "c:/python -> ['%run','"c:/python']
-
-    # shlex.split has unicode bugs in Python 2, so encode first to str
-    if not py3compat.PY3:
-        x = py3compat.cast_bytes(x)
-
-    endofline = []
-    while x != '':
-        try:
-            comps = shlex.split(x)
-            if len(endofline) >= 1:
-                comps.append(''.join(endofline))
-            return comps
-
-        except ValueError:
-            endofline = [x[-1:]]+endofline
-            x = x[:-1]
-
-    return [''.join(endofline)]
-
 def module_list(path):
     """
     Return the list containing the names of the modules available in the given
@@ -265,7 +236,7 @@ def module_completer(self,event):
 def magic_run_completer(self, event):
     """Complete files that end in .py or .ipy for the %run command.
     """
-    comps = shlex_split(event.line)
+    comps = arg_split(event.line, strict=False)
     relpath = (len(comps) > 1 and comps[-1] or '').strip("'\"")
 
     #print("\nev=", event)  # dbg
diff --git a/IPython/core/magic.py b/IPython/core/magic.py
index 4670881..ee84680 100644
--- a/IPython/core/magic.py
+++ b/IPython/core/magic.py
@@ -266,6 +266,7 @@ python-profiler package from non-free.""")
         # Get options
         list_all = kw.get('list_all',0)
         posix = kw.get('posix', os.name == 'posix')
+        strict = kw.get('strict', True)
 
         # Check if we have more than one argument to warrant extra processing:
         odict = {}  # Dictionary with options
@@ -273,7 +274,7 @@ python-profiler package from non-free.""")
         if len(args) >= 1:
             # If the list of inputs only has 0 or 1 thing in it, there's no
             # need to look for options
-            argv = arg_split(arg_str,posix)
+            argv = arg_split(arg_str, posix, strict)
             # Do regular option processing
             try:
                 opts,args = getopt(argv,opt_str,*long_opts)
@@ -1865,7 +1866,7 @@ Currently the magic system has the following functions:\n"""
         scaling = [1, 1e3, 1e6, 1e9]
 
         opts, stmt = self.parse_options(parameter_s,'n:r:tcp:',
-                                        posix=False)
+                                        posix=False, strict=False)
         if stmt == "":
             return
         timefunc = timeit.default_timer
diff --git a/IPython/core/tests/test_completerlib.py b/IPython/core/tests/test_completerlib.py
new file mode 100644
index 0000000..2404b80
--- /dev/null
+++ b/IPython/core/tests/test_completerlib.py
@@ -0,0 +1,69 @@
+# -*- coding: utf-8 -*-
+"""Tests for completerlib.
+
+"""
+from __future__ import absolute_import
+
+#-----------------------------------------------------------------------------
+# Imports
+#-----------------------------------------------------------------------------
+
+import os
+import shutil
+import sys
+import tempfile
+import unittest
+from os.path import join
+
+import nose.tools as nt
+from nose import SkipTest
+
+from IPython.core.completerlib import magic_run_completer
+from IPython.testing import decorators as dec
+from IPython.testing import tools as tt
+from IPython.utils import py3compat
+
+
+class MockEvent(object):
+    def __init__(self, line):
+        self.line = line
+
+#-----------------------------------------------------------------------------
+# Test functions begin
+#-----------------------------------------------------------------------------
+class Test_magic_run_completer(unittest.TestCase):
+    def setUp(self):
+        self.BASETESTDIR = tempfile.mkdtemp()
+        for fil in [u"aaø.py", u"a.py", u"b.py"]:
+            with open(join(self.BASETESTDIR, fil), "w") as sfile:
+                sfile.write("pass\n")
+        self.oldpath = os.getcwdu()
+        os.chdir(self.BASETESTDIR)
+
+    def tearDown(self):
+        os.chdir(self.oldpath)
+        shutil.rmtree(self.BASETESTDIR)
+
+    def test_1(self):
+        """Test magic_run_completer, should match two alterntives
+        """
+        event = MockEvent(u"%run a")
+        mockself = None
+        match = set(magic_run_completer(mockself, event))
+        self.assertEqual(match, set([u"a.py", u"aaø.py"]))
+
+    def test_2(self):
+        """Test magic_run_completer, should match one alterntive
+        """
+        event = MockEvent(u"%run aa")
+        mockself = None
+        match = set(magic_run_completer(mockself, event))
+        self.assertEqual(match, set([u"aaø.py"]))
+
+    def test_3(self):
+        """Test magic_run_completer with unterminated " """
+        event = MockEvent(u'%run "a')
+        mockself = None
+        match = set(magic_run_completer(mockself, event))
+        self.assertEqual(match, set([u"a.py", u"aaø.py"]))
+
diff --git a/IPython/core/tests/test_magic.py b/IPython/core/tests/test_magic.py
index 8086371..7877825 100644
--- a/IPython/core/tests/test_magic.py
+++ b/IPython/core/tests/test_magic.py
@@ -344,3 +344,12 @@ def test_psearch():
     with tt.AssertPrints("dict.fromkeys"):
         _ip.run_cell("dict.fr*?")
 
+def test_timeit_shlex():
+    """test shlex issues with timeit (#1109)"""
+    _ip.ex("def f(*a,**kw): pass")
+    _ip.magic('timeit -n1 "this is a bug".count(" ")')
+    _ip.magic('timeit -r1 -n1 f(" ", 1)')
+    _ip.magic('timeit -r1 -n1 f(" ", 1, " ", 2, " ")')
+    _ip.magic('timeit -r1 -n1 ("a " + "b")')
+    _ip.magic('timeit -r1 -n1 f("a " + "b")')
+    _ip.magic('timeit -r1 -n1 f("a " + "b ")')
diff --git a/IPython/utils/_process_common.py b/IPython/utils/_process_common.py
index f77cf59..e352155 100644
--- a/IPython/utils/_process_common.py
+++ b/IPython/utils/_process_common.py
@@ -146,12 +146,18 @@ def getoutputerror(cmd):
     return py3compat.bytes_to_str(out), py3compat.bytes_to_str(err)
 
 
-def arg_split(s, posix=False):
+def arg_split(s, posix=False, strict=True):
     """Split a command line's arguments in a shell-like manner.
 
     This is a modified version of the standard library's shlex.split()
     function, but with a default of posix=False for splitting, so that quotes
-    in inputs are respected."""
+    in inputs are respected.
+
+    if strict=False, then any errors shlex.split would raise will result in the
+    unparsed remainder being the last element of the list, rather than raising.
+    This is because we sometimes use arg_split to parse things other than
+    command-line args.
+    """
 
     # Unfortunately, python's shlex module is buggy with unicode input:
     # http://bugs.python.org/issue1170
@@ -163,7 +169,25 @@ def arg_split(s, posix=False):
         s = s.encode('utf-8')
     lex = shlex.shlex(s, posix=posix)
     lex.whitespace_split = True
-    tokens = list(lex)
+    # Extract tokens, ensuring that things like leaving open quotes
+    # does not cause this to raise.  This is important, because we
+    # sometimes pass Python source through this (e.g. %timeit f(" ")),
+    # and it shouldn't raise an exception.
+    # It may be a bad idea to parse things that are not command-line args
+    # through this function, but we do, so let's be safe about it.
+    tokens = []
+    while True:
+        try:
+            tokens.append(lex.next())
+        except StopIteration:
+            break
+        except ValueError:
+            if strict:
+                raise
+            # couldn't parse, get remaining blob as last token
+            tokens.append(lex.token)
+            break
+    
     if is_unicode:
         # Convert the tokens back to unicode.
         tokens = [x.decode('utf-8') for x in tokens]