##// END OF EJS Templates
Unicode fix for case when stdin is a pipe (like during test suite)
Fernando Perez -
Show More
@@ -1,88 +1,91 b''
1 1 #!/usr/bin/env python
2 2 # encoding: utf-8
3 3 """
4 4 Simple utility for splitting user input.
5 5
6 6 Authors:
7 7
8 8 * Brian Granger
9 9 * Fernando Perez
10 10 """
11 11
12 12 #-----------------------------------------------------------------------------
13 13 # Copyright (C) 2008-2009 The IPython Development Team
14 14 #
15 15 # Distributed under the terms of the BSD License. The full license is in
16 16 # the file COPYING, distributed as part of this software.
17 17 #-----------------------------------------------------------------------------
18 18
19 19 #-----------------------------------------------------------------------------
20 20 # Imports
21 21 #-----------------------------------------------------------------------------
22 22
23 23 import re
24 24 import sys
25 25
26 26 #-----------------------------------------------------------------------------
27 27 # Main function
28 28 #-----------------------------------------------------------------------------
29 29
30 30
31 31 # RegExp for splitting line contents into pre-char//first word-method//rest.
32 32 # For clarity, each group in on one line.
33 33
34 34 # WARNING: update the regexp if the escapes in interactiveshell are changed, as they
35 35 # are hardwired in.
36 36
37 37 # Although it's not solely driven by the regex, note that:
38 38 # ,;/% only trigger if they are the first character on the line
39 39 # ! and !! trigger if they are first char(s) *or* follow an indent
40 40 # ? triggers as first or last char.
41 41
42 42 # The three parts of the regex are:
43 43 # 1) pre: pre_char *or* initial whitespace
44 44 # 2) ifun: first word/method (mix of \w and '.')
45 45 # 3) the_rest: rest of line (separated from ifun by space if non-empty)
46 46 line_split = re.compile(r'^([,;/%?]|!!?|\s*)'
47 47 r'\s*([\w\.]+)'
48 48 r'(\s+.*$|$)')
49 49
50 50 # r'[\w\.]+'
51 51 # r'\s*=\s*%.*'
52 52
53 53 def split_user_input(line, pattern=None):
54 54 """Split user input into pre-char/whitespace, function part and rest.
55 55
56 56 This is currently handles lines with '=' in them in a very inconsistent
57 57 manner.
58 58 """
59 59 # We need to ensure that the rest of this routine deals only with unicode
60 60 if type(line)==str:
61 line = line.decode(sys.stdin.encoding)
61 codec = sys.stdin.encoding
62 if codec is None:
63 codec = 'utf-8'
64 line = line.decode(codec)
62 65
63 66 if pattern is None:
64 67 pattern = line_split
65 68 match = pattern.match(line)
66 69 if not match:
67 70 # print "match failed for line '%s'" % line
68 71 try:
69 72 ifun, the_rest = line.split(None,1)
70 73 except ValueError:
71 74 # print "split failed for line '%s'" % line
72 75 ifun, the_rest = line, u''
73 76 pre = re.match('^(\s*)(.*)',line).groups()[0]
74 77 else:
75 78 pre,ifun,the_rest = match.groups()
76 79
77 80 # ifun has to be a valid python identifier, so it better encode into
78 81 # ascii. We do still make it a unicode string so that we consistently
79 82 # return unicode, but it will be one that is guaranteed to be pure ascii
80 83 try:
81 84 ifun = unicode(ifun.encode('ascii'))
82 85 except UnicodeEncodeError:
83 86 the_rest = ifun + u' ' + the_rest
84 87 ifun = u''
85 88
86 89 #print 'line:<%s>' % line # dbg
87 90 #print 'pre <%s> ifun <%s> rest <%s>' % (pre,ifun.strip(),the_rest) # dbg
88 91 return pre, ifun.strip(), the_rest.lstrip()
General Comments 0
You need to be logged in to leave comments. Login now