diff --git a/IPython/utils/process.py b/IPython/utils/process.py index 3e18640..597b4de 100644 --- a/IPython/utils/process.py +++ b/IPython/utils/process.py @@ -22,7 +22,16 @@ import shlex # Our own if sys.platform == 'win32': + import ctypes + from ctypes.wintypes import LPCWSTR, HLOCAL + from ctypes import c_int, POINTER from ._process_win32 import _find_cmd, system, getoutput, AvoidUNCPath + CommandLineToArgvW = ctypes.windll.shell32.CommandLineToArgvW + CommandLineToArgvW.arg_types = [LPCWSTR, POINTER(c_int)] + CommandLineToArgvW.res_types = [POINTER(LPCWSTR)] + LocalFree = ctypes.windll.kernel32.LocalFree + LocalFree.res_type = HLOCAL + LocalFree.arg_types = [HLOCAL] else: from ._process_posix import _find_cmd, system, getoutput @@ -103,29 +112,42 @@ def pycmd2argv(cmd): else: return [sys.executable, cmd] - -def arg_split(s, posix=False): - """Split a command line's arguments in a shell-like manner. - - This is a modified version of the standard library's shlex.split() - function, but with a default of posix=False for splitting, so that quotes - in inputs are respected.""" - - # Unfortunately, python's shlex module is buggy with unicode input: - # http://bugs.python.org/issue1170 - # At least encoding the input when it's unicode seems to help, but there - # may be more problems lurking. Apparently this is fixed in python3. - is_unicode = False - if (not py3compat.PY3) and isinstance(s, unicode): - is_unicode = True - s = s.encode('utf-8') - lex = shlex.shlex(s, posix=posix) - lex.whitespace_split = True - tokens = list(lex) - if is_unicode: - # Convert the tokens back to unicode. - tokens = [x.decode('utf-8') for x in tokens] - return tokens +if sys.platform == 'win32': + def arg_split(commandline, posix=False): + """Split a command line's arguments in a shell-like manner. + + This is a special version for windows that use a ctypes call to CommandLineToArgvW + to do the argv splitting. The posix paramter is ignored. + """ + argvn = c_int() + result_pointer = CommandLineToArgvW(py3compat.str_to_unicode(commandline.lstrip()), ctypes.byref(argvn)) + result_array_type = LPCWSTR * argvn.value + result = [arg for arg in result_array_type.from_address(result_pointer)] + retval = LocalFree(result_pointer) + return result +else: + def arg_split(s, posix=False): + """Split a command line's arguments in a shell-like manner. + + This is a modified version of the standard library's shlex.split() + function, but with a default of posix=False for splitting, so that quotes + in inputs are respected.""" + + # Unfortunately, python's shlex module is buggy with unicode input: + # http://bugs.python.org/issue1170 + # At least encoding the input when it's unicode seems to help, but there + # may be more problems lurking. Apparently this is fixed in python3. + is_unicode = False + if (not py3compat.PY3) and isinstance(s, unicode): + is_unicode = True + s = s.encode('utf-8') + lex = shlex.shlex(s, posix=posix) + lex.whitespace_split = True + tokens = list(lex) + if is_unicode: + # Convert the tokens back to unicode. + tokens = [x.decode('utf-8') for x in tokens] + return tokens def abbrev_cwd(): diff --git a/IPython/utils/tests/test_process.py b/IPython/utils/tests/test_process.py index 21af0ef..4e72399 100644 --- a/IPython/utils/tests/test_process.py +++ b/IPython/utils/tests/test_process.py @@ -62,12 +62,25 @@ def test_find_cmd_fail(): nt.assert_raises(FindCmdError,find_cmd,'asdfasdf') +@dec.skip_win32 def test_arg_split(): """Ensure that argument lines are correctly split like in a shell.""" tests = [['hi', ['hi']], [u'hi', [u'hi']], ['hello there', ['hello', 'there']], - [u'h\N{LATIN SMALL LETTER A WITH CARON}llo', [u'h\N{LATIN SMALL LETTER A WITH CARON}llo']], +# [u'h\N{LATIN SMALL LETTER A WITH CARON}llo', [u'h\N{LATIN SMALL LETTER A WITH CARON}llo']], + ['something "with quotes"', ['something', '"with quotes"']], + ] + for argstr, argv in tests: + nt.assert_equal(arg_split(argstr), argv) + +@dec.skip_win32 +def test_arg_split_win32(): + """Ensure that argument lines are correctly split like in a shell.""" + tests = [['hi', ['hi']], + [u'hi', [u'hi']], + ['hello there', ['hello', 'there']], + # [u'h\N{LATIN SMALL LETTER A WITH CARON}llo', [u'h\N{LATIN SMALL LETTER A WITH CARON}llo']], ['something "with quotes"', ['something', '"with quotes"']], ] for argstr, argv in tests: