##// END OF EJS Templates
BUG: when given unicode inputs, arg_split should return unicode outputs. Always use utf-8 to encode the string instead of relying on sys.stdin.encoding, which may not be able to accept the full range of Unicode characters. When given unicode strings, arg_split is probably not receiving input from a terminal.
Robert Kern -
Show More
@@ -114,11 +114,17 b' def arg_split(s, posix=False):'
114 # http://bugs.python.org/issue1170
114 # http://bugs.python.org/issue1170
115 # At least encoding the input when it's unicode seems to help, but there
115 # At least encoding the input when it's unicode seems to help, but there
116 # may be more problems lurking. Apparently this is fixed in python3.
116 # may be more problems lurking. Apparently this is fixed in python3.
117 is_unicode = False
117 if isinstance(s, unicode):
118 if isinstance(s, unicode):
118 s = s.encode(sys.stdin.encoding)
119 is_unicode = True
120 s = s.encode('utf-8')
119 lex = shlex.shlex(s, posix=posix)
121 lex = shlex.shlex(s, posix=posix)
120 lex.whitespace_split = True
122 lex.whitespace_split = True
121 return list(lex)
123 tokens = list(lex)
124 if is_unicode:
125 # Convert the tokens back to unicode.
126 tokens = [x.decode('utf-8') for x in tokens]
127 return tokens
122
128
123
129
124 def abbrev_cwd():
130 def abbrev_cwd():
@@ -66,6 +66,9 b' def test_arg_split():'
66 """Ensure that argument lines are correctly split like in a shell."""
66 """Ensure that argument lines are correctly split like in a shell."""
67 tests = [['hi', ['hi']],
67 tests = [['hi', ['hi']],
68 [u'hi', [u'hi']],
68 [u'hi', [u'hi']],
69 ['hello there', ['hello', 'there']],
70 [u'h\N{LATIN SMALL LETTER A WITH CARON}llo', [u'h\N{LATIN SMALL LETTER A WITH CARON}llo']],
71 ['something "with quotes"', ['something', '"with quotes"']],
69 ]
72 ]
70 for argstr, argv in tests:
73 for argstr, argv in tests:
71 nt.assert_equal(arg_split(argstr), argv)
74 nt.assert_equal(arg_split(argstr), argv)
General Comments 0
You need to be logged in to leave comments. Login now