diff --git a/IPython/utils/_process_posix.py b/IPython/utils/_process_posix.py index 9d17a00..3147cb4 100644 --- a/IPython/utils/_process_posix.py +++ b/IPython/utils/_process_posix.py @@ -18,6 +18,7 @@ from __future__ import print_function # Stdlib import subprocess as sp import sys +import shlex from IPython.external import pexpect @@ -192,3 +193,29 @@ class ProcessHandler(object): # programs think they are talking to a tty and produce highly formatted output # (ls is a good example) that makes them hard. system = ProcessHandler().system + +def arg_split(s, posix=False): + """Split a command line's arguments in a shell-like manner. + + This is a modified version of the standard library's shlex.split() + function, but with a default of posix=False for splitting, so that quotes + in inputs are respected.""" + + # Unfortunately, python's shlex module is buggy with unicode input: + # http://bugs.python.org/issue1170 + # At least encoding the input when it's unicode seems to help, but there + # may be more problems lurking. Apparently this is fixed in python3. + is_unicode = False + if (not py3compat.PY3) and isinstance(s, unicode): + is_unicode = True + s = s.encode('utf-8') + lex = shlex.shlex(s, posix=posix) + lex.whitespace_split = True + tokens = list(lex) + if is_unicode: + # Convert the tokens back to unicode. + tokens = [x.decode('utf-8') for x in tokens] + return tokens + + + diff --git a/IPython/utils/_process_win32.py b/IPython/utils/_process_win32.py index d26df21..791b28d 100644 --- a/IPython/utils/_process_win32.py +++ b/IPython/utils/_process_win32.py @@ -18,11 +18,15 @@ from __future__ import print_function # stdlib import os import sys +import ctypes +from ctypes import c_int, POINTER +from ctypes.wintypes import LPCWSTR, HLOCAL from subprocess import STDOUT # our own imports from ._process_common import read_no_interrupt, process_handler +from . import py3compat from . import text #----------------------------------------------------------------------------- @@ -146,3 +150,27 @@ def getoutput(cmd): if out is None: out = '' return out + + +CommandLineToArgvW = ctypes.windll.shell32.CommandLineToArgvW +CommandLineToArgvW.arg_types = [LPCWSTR, POINTER(c_int)] +CommandLineToArgvW.res_types = [POINTER(LPCWSTR)] +LocalFree = ctypes.windll.kernel32.LocalFree +LocalFree.res_type = HLOCAL +LocalFree.arg_types = [HLOCAL] + +def arg_split(commandline, posix=False): + """Split a command line's arguments in a shell-like manner. + + This is a special version for windows that use a ctypes call to CommandLineToArgvW + to do the argv splitting. The posix paramter is ignored. + """ + #CommandLineToArgvW returns path to executable if called with empty string. + if commandline.strip() == "": + return [] + argvn = c_int() + result_pointer = CommandLineToArgvW(py3compat.cast_unicode(commandline.lstrip()), ctypes.byref(argvn)) + result_array_type = LPCWSTR * argvn.value + result = [arg for arg in result_array_type.from_address(result_pointer)] + retval = LocalFree(result_pointer) + return result diff --git a/IPython/utils/process.py b/IPython/utils/process.py index 597b4de..2c2c669 100644 --- a/IPython/utils/process.py +++ b/IPython/utils/process.py @@ -22,18 +22,10 @@ import shlex # Our own if sys.platform == 'win32': - import ctypes - from ctypes.wintypes import LPCWSTR, HLOCAL - from ctypes import c_int, POINTER - from ._process_win32 import _find_cmd, system, getoutput, AvoidUNCPath - CommandLineToArgvW = ctypes.windll.shell32.CommandLineToArgvW - CommandLineToArgvW.arg_types = [LPCWSTR, POINTER(c_int)] - CommandLineToArgvW.res_types = [POINTER(LPCWSTR)] - LocalFree = ctypes.windll.kernel32.LocalFree - LocalFree.res_type = HLOCAL - LocalFree.arg_types = [HLOCAL] + from ._process_win32 import _find_cmd, system, getoutput, AvoidUNCPath, arg_split else: - from ._process_posix import _find_cmd, system, getoutput + from ._process_posix import _find_cmd, system, getoutput, arg_split + from ._process_common import getoutputerror from IPython.utils import py3compat @@ -112,44 +104,6 @@ def pycmd2argv(cmd): else: return [sys.executable, cmd] -if sys.platform == 'win32': - def arg_split(commandline, posix=False): - """Split a command line's arguments in a shell-like manner. - - This is a special version for windows that use a ctypes call to CommandLineToArgvW - to do the argv splitting. The posix paramter is ignored. - """ - argvn = c_int() - result_pointer = CommandLineToArgvW(py3compat.str_to_unicode(commandline.lstrip()), ctypes.byref(argvn)) - result_array_type = LPCWSTR * argvn.value - result = [arg for arg in result_array_type.from_address(result_pointer)] - retval = LocalFree(result_pointer) - return result -else: - def arg_split(s, posix=False): - """Split a command line's arguments in a shell-like manner. - - This is a modified version of the standard library's shlex.split() - function, but with a default of posix=False for splitting, so that quotes - in inputs are respected.""" - - # Unfortunately, python's shlex module is buggy with unicode input: - # http://bugs.python.org/issue1170 - # At least encoding the input when it's unicode seems to help, but there - # may be more problems lurking. Apparently this is fixed in python3. - is_unicode = False - if (not py3compat.PY3) and isinstance(s, unicode): - is_unicode = True - s = s.encode('utf-8') - lex = shlex.shlex(s, posix=posix) - lex.whitespace_split = True - tokens = list(lex) - if is_unicode: - # Convert the tokens back to unicode. - tokens = [x.decode('utf-8') for x in tokens] - return tokens - - def abbrev_cwd(): """ Return abbreviated version of cwd, e.g. d:mydir """ cwd = os.getcwdu().replace('\\','/') diff --git a/IPython/utils/tests/test_process.py b/IPython/utils/tests/test_process.py index 4994a3e..f946fcf 100644 --- a/IPython/utils/tests/test_process.py +++ b/IPython/utils/tests/test_process.py @@ -68,7 +68,10 @@ def test_arg_split(): tests = [['hi', ['hi']], [u'hi', [u'hi']], ['hello there', ['hello', 'there']], -# [u'h\N{LATIN SMALL LETTER A WITH CARON}llo', [u'h\N{LATIN SMALL LETTER A WITH CARON}llo']], + # \u01ce == \N{LATIN SMALL LETTER A WITH CARON} + # Do not use \N because the tests crash with syntax error in + # some cases, for example windows python2.6. + [u'h\u01cello', [u'h\u01cello']], ['something "with quotes"', ['something', '"with quotes"']], ] for argstr, argv in tests: @@ -80,7 +83,7 @@ def test_arg_split_win32(): tests = [['hi', ['hi']], [u'hi', [u'hi']], ['hello there', ['hello', 'there']], - # [u'h\N{LATIN SMALL LETTER A WITH CARON}llo', [u'h\N{LATIN SMALL LETTER A WITH CARON}llo']], + [u'h\u01cello', [u'h\u01cello']], ['something "with quotes"', ['something', 'with quotes']], ] for argstr, argv in tests: