upstream/ipython Commit - r3039:3f194000

Unicode fix for case when stdin is a pipe (like during test suite)

Fernando Perez -

r3039:3f194000

parent child

IPython/core/splitinput.py

0 +4 -1

              #!/usr/bin/env python
              # encoding: utf-8
              """
              Simple utility for splitting user input.
              Authors:
              * Brian Granger
              * Fernando Perez
              """
              #-----------------------------------------------------------------------------
              #  Copyright (C) 2008-2009  The IPython Development Team
              #
              #  Distributed under the terms of the BSD License.  The full license is in
              #  the file COPYING, distributed as part of this software.
              #-----------------------------------------------------------------------------
              #-----------------------------------------------------------------------------
              # Imports
              #-----------------------------------------------------------------------------
              import re
              import sys
              #-----------------------------------------------------------------------------
              # Main function
              #-----------------------------------------------------------------------------
              # RegExp for splitting line contents into pre-char//first word-method//rest.
              # For clarity, each group in on one line.
              # WARNING: update the regexp if the escapes in interactiveshell are changed, as they
              # are hardwired in.
              # Although it's not solely driven by the regex, note that:
              # ,;/% only trigger if they are the first character on the line
              # ! and !! trigger if they are first char(s) *or* follow an indent
              # ? triggers as first or last char.
              # The three parts of the regex are:
              #  1) pre:     pre_char *or* initial whitespace
              #  2) ifun:    first word/method (mix of \w and '.')
              #  3) the_rest: rest of line (separated from ifun by space if non-empty)
              line_split = re.compile(r'^([,;/%?]|!!?|\s*)'
                                      r'\s*([\w\.]+)'
                                      r'(\s+.*$|$)')
              # r'[\w\.]+'
              # r'\s*=\s*%.*'
              def split_user_input(line, pattern=None):
                  """Split user input into pre-char/whitespace, function part and rest.
                  This is currently handles lines with '=' in them in a very inconsistent
                  manner.
                  """
                  # We need to ensure that the rest of this routine deals only with unicode
                  if type(line)==str:
-                     line = line.decode(sys.stdin.encoding)
+                     codec = sys.stdin.encoding
+                     if codec is None:
+                         codec = 'utf-8'
+                     line = line.decode(codec)
                  if pattern is None:
                      pattern = line_split
                  match = pattern.match(line)
                  if not match:
                      # print "match failed for line '%s'" % line
                      try:
                          ifun, the_rest = line.split(None,1)
                      except ValueError:
                          # print "split failed for line '%s'" % line
                          ifun, the_rest = line, u''
                      pre = re.match('^(\s*)(.*)',line).groups()[0]
                  else:
                      pre,ifun,the_rest = match.groups()
                  # ifun has to be a valid python identifier, so it better encode into
                  # ascii.  We do still make it a unicode string so that we consistently
                  # return unicode, but it will be one that is guaranteed to be pure ascii
                  try:
                      ifun = unicode(ifun.encode('ascii'))
                  except UnicodeEncodeError:
                      the_rest = ifun + u' ' + the_rest
                      ifun = u''
                  #print 'line:<%s>' % line # dbg
                  #print 'pre <%s> ifun <%s> rest <%s>' % (pre,ifun.strip(),the_rest) # dbg
                  return pre, ifun.strip(), the_rest.lstrip()

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages