splitinput.py
90 lines
| 3.0 KiB
| text/x-python
|
PythonLexer
Brian Granger
|
r2244 | # encoding: utf-8 | ||
""" | ||||
Simple utility for splitting user input. | ||||
Authors: | ||||
* Brian Granger | ||||
* Fernando Perez | ||||
""" | ||||
#----------------------------------------------------------------------------- | ||||
# Copyright (C) 2008-2009 The IPython Development Team | ||||
# | ||||
# Distributed under the terms of the BSD License. The full license is in | ||||
# the file COPYING, distributed as part of this software. | ||||
#----------------------------------------------------------------------------- | ||||
#----------------------------------------------------------------------------- | ||||
# Imports | ||||
#----------------------------------------------------------------------------- | ||||
import re | ||||
Fernando Perez
|
r3038 | import sys | ||
Brian Granger
|
r2244 | |||
#----------------------------------------------------------------------------- | ||||
# Main function | ||||
#----------------------------------------------------------------------------- | ||||
# RegExp for splitting line contents into pre-char//first word-method//rest. | ||||
# For clarity, each group in on one line. | ||||
Brian Granger
|
r2760 | # WARNING: update the regexp if the escapes in interactiveshell are changed, as they | ||
Brian Granger
|
r2244 | # are hardwired in. | ||
# Although it's not solely driven by the regex, note that: | ||||
# ,;/% only trigger if they are the first character on the line | ||||
# ! and !! trigger if they are first char(s) *or* follow an indent | ||||
# ? triggers as first or last char. | ||||
# The three parts of the regex are: | ||||
# 1) pre: pre_char *or* initial whitespace | ||||
# 2) ifun: first word/method (mix of \w and '.') | ||||
# 3) the_rest: rest of line (separated from ifun by space if non-empty) | ||||
line_split = re.compile(r'^([,;/%?]|!!?|\s*)' | ||||
r'\s*([\w\.]+)' | ||||
r'(\s+.*$|$)') | ||||
Brian Granger
|
r2256 | # r'[\w\.]+' | ||
# r'\s*=\s*%.*' | ||||
Brian Granger
|
r2244 | |||
def split_user_input(line, pattern=None): | ||||
Brian Granger
|
r2256 | """Split user input into pre-char/whitespace, function part and rest. | ||
This is currently handles lines with '=' in them in a very inconsistent | ||||
manner. | ||||
""" | ||||
Fernando Perez
|
r3038 | # We need to ensure that the rest of this routine deals only with unicode | ||
if type(line)==str: | ||||
Fernando Perez
|
r3039 | codec = sys.stdin.encoding | ||
if codec is None: | ||||
codec = 'utf-8' | ||||
line = line.decode(codec) | ||||
Fernando Perez
|
r3038 | |||
Brian Granger
|
r2244 | if pattern is None: | ||
pattern = line_split | ||||
match = pattern.match(line) | ||||
if not match: | ||||
Brian Granger
|
r2256 | # print "match failed for line '%s'" % line | ||
Brian Granger
|
r2244 | try: | ||
ifun, the_rest = line.split(None,1) | ||||
except ValueError: | ||||
Brian Granger
|
r2256 | # print "split failed for line '%s'" % line | ||
Fernando Perez
|
r3038 | ifun, the_rest = line, u'' | ||
Brian Granger
|
r2244 | pre = re.match('^(\s*)(.*)',line).groups()[0] | ||
else: | ||||
pre,ifun,the_rest = match.groups() | ||||
Fernando Perez
|
r3038 | # ifun has to be a valid python identifier, so it better encode into | ||
# ascii. We do still make it a unicode string so that we consistently | ||||
# return unicode, but it will be one that is guaranteed to be pure ascii | ||||
Brian Granger
|
r2244 | try: | ||
Fernando Perez
|
r3038 | ifun = unicode(ifun.encode('ascii')) | ||
Brian Granger
|
r2244 | except UnicodeEncodeError: | ||
the_rest = ifun + u' ' + the_rest | ||||
ifun = u'' | ||||
#print 'line:<%s>' % line # dbg | ||||
#print 'pre <%s> ifun <%s> rest <%s>' % (pre,ifun.strip(),the_rest) # dbg | ||||
return pre, ifun.strip(), the_rest.lstrip() | ||||