|
|
#!/usr/bin/env python
|
|
|
# encoding: utf-8
|
|
|
"""
|
|
|
Simple utility for splitting user input.
|
|
|
|
|
|
Authors:
|
|
|
|
|
|
* Brian Granger
|
|
|
* Fernando Perez
|
|
|
"""
|
|
|
|
|
|
#-----------------------------------------------------------------------------
|
|
|
# Copyright (C) 2008-2009 The IPython Development Team
|
|
|
#
|
|
|
# Distributed under the terms of the BSD License. The full license is in
|
|
|
# the file COPYING, distributed as part of this software.
|
|
|
#-----------------------------------------------------------------------------
|
|
|
|
|
|
#-----------------------------------------------------------------------------
|
|
|
# Imports
|
|
|
#-----------------------------------------------------------------------------
|
|
|
|
|
|
import re
|
|
|
import sys
|
|
|
|
|
|
#-----------------------------------------------------------------------------
|
|
|
# Main function
|
|
|
#-----------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
# RegExp for splitting line contents into pre-char//first word-method//rest.
|
|
|
# For clarity, each group in on one line.
|
|
|
|
|
|
# WARNING: update the regexp if the escapes in interactiveshell are changed, as they
|
|
|
# are hardwired in.
|
|
|
|
|
|
# Although it's not solely driven by the regex, note that:
|
|
|
# ,;/% only trigger if they are the first character on the line
|
|
|
# ! and !! trigger if they are first char(s) *or* follow an indent
|
|
|
# ? triggers as first or last char.
|
|
|
|
|
|
# The three parts of the regex are:
|
|
|
# 1) pre: pre_char *or* initial whitespace
|
|
|
# 2) ifun: first word/method (mix of \w and '.')
|
|
|
# 3) the_rest: rest of line (separated from ifun by space if non-empty)
|
|
|
line_split = re.compile(r'^([,;/%?]|!!?|\s*)'
|
|
|
r'\s*([\w\.]+)'
|
|
|
r'(\s+.*$|$)')
|
|
|
|
|
|
# r'[\w\.]+'
|
|
|
# r'\s*=\s*%.*'
|
|
|
|
|
|
def split_user_input(line, pattern=None):
|
|
|
"""Split user input into pre-char/whitespace, function part and rest.
|
|
|
|
|
|
This is currently handles lines with '=' in them in a very inconsistent
|
|
|
manner.
|
|
|
"""
|
|
|
# We need to ensure that the rest of this routine deals only with unicode
|
|
|
if type(line)==str:
|
|
|
codec = sys.stdin.encoding
|
|
|
if codec is None:
|
|
|
codec = 'utf-8'
|
|
|
line = line.decode(codec)
|
|
|
|
|
|
if pattern is None:
|
|
|
pattern = line_split
|
|
|
match = pattern.match(line)
|
|
|
if not match:
|
|
|
# print "match failed for line '%s'" % line
|
|
|
try:
|
|
|
ifun, the_rest = line.split(None,1)
|
|
|
except ValueError:
|
|
|
# print "split failed for line '%s'" % line
|
|
|
ifun, the_rest = line, u''
|
|
|
pre = re.match('^(\s*)(.*)',line).groups()[0]
|
|
|
else:
|
|
|
pre,ifun,the_rest = match.groups()
|
|
|
|
|
|
# ifun has to be a valid python identifier, so it better encode into
|
|
|
# ascii. We do still make it a unicode string so that we consistently
|
|
|
# return unicode, but it will be one that is guaranteed to be pure ascii
|
|
|
try:
|
|
|
ifun = unicode(ifun.encode('ascii'))
|
|
|
except UnicodeEncodeError:
|
|
|
the_rest = ifun + u' ' + the_rest
|
|
|
ifun = u''
|
|
|
|
|
|
#print 'line:<%s>' % line # dbg
|
|
|
#print 'pre <%s> ifun <%s> rest <%s>' % (pre,ifun.strip(),the_rest) # dbg
|
|
|
return pre, ifun.strip(), the_rest.lstrip()
|
|
|
|