From d1997d966a2e58e0bb781e4aed1cf74a06bc25ad 2011-11-30 21:11:17 From: Thomas Kluyver Date: 2011-11-30 21:11:17 Subject: [PATCH] Merge branch 'win32-shlex' --- diff --git a/IPython/utils/_process_common.py b/IPython/utils/_process_common.py index 0843e89..f77cf59 100644 --- a/IPython/utils/_process_common.py +++ b/IPython/utils/_process_common.py @@ -15,6 +15,7 @@ of subprocess utilities, and it contains tools that are common to all of them. # Imports #----------------------------------------------------------------------------- import subprocess +import shlex import sys from IPython.utils import py3compat @@ -143,3 +144,27 @@ def getoutputerror(cmd): return '', '' out, err = out_err return py3compat.bytes_to_str(out), py3compat.bytes_to_str(err) + + +def arg_split(s, posix=False): + """Split a command line's arguments in a shell-like manner. + + This is a modified version of the standard library's shlex.split() + function, but with a default of posix=False for splitting, so that quotes + in inputs are respected.""" + + # Unfortunately, python's shlex module is buggy with unicode input: + # http://bugs.python.org/issue1170 + # At least encoding the input when it's unicode seems to help, but there + # may be more problems lurking. Apparently this is fixed in python3. + is_unicode = False + if (not py3compat.PY3) and isinstance(s, unicode): + is_unicode = True + s = s.encode('utf-8') + lex = shlex.shlex(s, posix=posix) + lex.whitespace_split = True + tokens = list(lex) + if is_unicode: + # Convert the tokens back to unicode. + tokens = [x.decode('utf-8') for x in tokens] + return tokens diff --git a/IPython/utils/_process_posix.py b/IPython/utils/_process_posix.py index 9d17a00..6227f35 100644 --- a/IPython/utils/_process_posix.py +++ b/IPython/utils/_process_posix.py @@ -23,7 +23,7 @@ from IPython.external import pexpect # Our own from .autoattr import auto_attr -from ._process_common import getoutput +from ._process_common import getoutput, arg_split from IPython.utils import text from IPython.utils import py3compat @@ -192,3 +192,6 @@ class ProcessHandler(object): # programs think they are talking to a tty and produce highly formatted output # (ls is a good example) that makes them hard. system = ProcessHandler().system + + + diff --git a/IPython/utils/_process_win32.py b/IPython/utils/_process_win32.py index d26df21..9de9edf 100644 --- a/IPython/utils/_process_win32.py +++ b/IPython/utils/_process_win32.py @@ -18,11 +18,15 @@ from __future__ import print_function # stdlib import os import sys +import ctypes +from ctypes import c_int, POINTER +from ctypes.wintypes import LPCWSTR, HLOCAL from subprocess import STDOUT # our own imports from ._process_common import read_no_interrupt, process_handler +from . import py3compat from . import text #----------------------------------------------------------------------------- @@ -146,3 +150,29 @@ def getoutput(cmd): if out is None: out = '' return out + +try: + CommandLineToArgvW = ctypes.windll.shell32.CommandLineToArgvW + CommandLineToArgvW.arg_types = [LPCWSTR, POINTER(c_int)] + CommandLineToArgvW.res_types = [POINTER(LPCWSTR)] + LocalFree = ctypes.windll.kernel32.LocalFree + LocalFree.res_type = HLOCAL + LocalFree.arg_types = [HLOCAL] + + def arg_split(commandline, posix=False): + """Split a command line's arguments in a shell-like manner. + + This is a special version for windows that use a ctypes call to CommandLineToArgvW + to do the argv splitting. The posix paramter is ignored. + """ + #CommandLineToArgvW returns path to executable if called with empty string. + if commandline.strip() == "": + return [] + argvn = c_int() + result_pointer = CommandLineToArgvW(py3compat.cast_unicode(commandline.lstrip()), ctypes.byref(argvn)) + result_array_type = LPCWSTR * argvn.value + result = [arg for arg in result_array_type.from_address(result_pointer)] + retval = LocalFree(result_pointer) + return result +except AttributeError: + from ._process_common import arg_split diff --git a/IPython/utils/process.py b/IPython/utils/process.py index 3e18640..2c2c669 100644 --- a/IPython/utils/process.py +++ b/IPython/utils/process.py @@ -22,9 +22,10 @@ import shlex # Our own if sys.platform == 'win32': - from ._process_win32 import _find_cmd, system, getoutput, AvoidUNCPath + from ._process_win32 import _find_cmd, system, getoutput, AvoidUNCPath, arg_split else: - from ._process_posix import _find_cmd, system, getoutput + from ._process_posix import _find_cmd, system, getoutput, arg_split + from ._process_common import getoutputerror from IPython.utils import py3compat @@ -103,31 +104,6 @@ def pycmd2argv(cmd): else: return [sys.executable, cmd] - -def arg_split(s, posix=False): - """Split a command line's arguments in a shell-like manner. - - This is a modified version of the standard library's shlex.split() - function, but with a default of posix=False for splitting, so that quotes - in inputs are respected.""" - - # Unfortunately, python's shlex module is buggy with unicode input: - # http://bugs.python.org/issue1170 - # At least encoding the input when it's unicode seems to help, but there - # may be more problems lurking. Apparently this is fixed in python3. - is_unicode = False - if (not py3compat.PY3) and isinstance(s, unicode): - is_unicode = True - s = s.encode('utf-8') - lex = shlex.shlex(s, posix=posix) - lex.whitespace_split = True - tokens = list(lex) - if is_unicode: - # Convert the tokens back to unicode. - tokens = [x.decode('utf-8') for x in tokens] - return tokens - - def abbrev_cwd(): """ Return abbreviated version of cwd, e.g. d:mydir """ cwd = os.getcwdu().replace('\\','/') diff --git a/IPython/utils/tests/test_process.py b/IPython/utils/tests/test_process.py index 21af0ef..f946fcf 100644 --- a/IPython/utils/tests/test_process.py +++ b/IPython/utils/tests/test_process.py @@ -62,16 +62,32 @@ def test_find_cmd_fail(): nt.assert_raises(FindCmdError,find_cmd,'asdfasdf') +@dec.skip_win32 def test_arg_split(): """Ensure that argument lines are correctly split like in a shell.""" tests = [['hi', ['hi']], [u'hi', [u'hi']], ['hello there', ['hello', 'there']], - [u'h\N{LATIN SMALL LETTER A WITH CARON}llo', [u'h\N{LATIN SMALL LETTER A WITH CARON}llo']], + # \u01ce == \N{LATIN SMALL LETTER A WITH CARON} + # Do not use \N because the tests crash with syntax error in + # some cases, for example windows python2.6. + [u'h\u01cello', [u'h\u01cello']], ['something "with quotes"', ['something', '"with quotes"']], ] for argstr, argv in tests: nt.assert_equal(arg_split(argstr), argv) + +@dec.skip_if_not_win32 +def test_arg_split_win32(): + """Ensure that argument lines are correctly split like in a shell.""" + tests = [['hi', ['hi']], + [u'hi', [u'hi']], + ['hello there', ['hello', 'there']], + [u'h\u01cello', [u'h\u01cello']], + ['something "with quotes"', ['something', 'with quotes']], + ] + for argstr, argv in tests: + nt.assert_equal(arg_split(argstr), argv) class SubProcessTestCase(TestCase, tt.TempFileMixin): @@ -100,6 +116,10 @@ class SubProcessTestCase(TestCase, tt.TempFileMixin): def test_getoutput_quoted(self): out = getoutput('python -c "print (1)"') self.assertEquals(out.strip(), '1') + + #Invalid quoting on windows + @dec.skip_win32 + def test_getoutput_quoted2(self): out = getoutput("python -c 'print (1)'") self.assertEquals(out.strip(), '1') out = getoutput("python -c 'print (\"1\")'")