upstream/ipython Commit - r10110:30fce8a6

Now include patched copies of tokenize for Python 2 and 3.

Thomas Kluyver -

r10110:30fce8a6

parent child

IPython/utils/_tokenize_py2.py

0 created 644 +438 0

@@ -0,0 +1,438 b''
	1	"""Patched version of standard library tokenize, to deal with various bugs.
	2
	3	Patches
	4
	5	- Relevant parts of Gareth Rees' patch for Python issue #12691 (untokenizing),
	6	manually applied.
	7	- Newlines in comments and blank lines should be either NL or NEWLINE, depending
	8	on whether they are in a multi-line statement. Filed as Python issue #17061.
	9
	10	-------------------------------------------------------------------------------
	11	Tokenization help for Python programs.
	12
	13	generate_tokens(readline) is a generator that breaks a stream of
	14	text into Python tokens. It accepts a readline-like method which is called
	15	repeatedly to get the next line of input (or "" for EOF). It generates
	16	5-tuples with these members:
	17
	18	the token type (see token.py)
	19	the token (a string)
	20	the starting (row, column) indices of the token (a 2-tuple of ints)
	21	the ending (row, column) indices of the token (a 2-tuple of ints)
	22	the original line (string)
	23
	24	It is designed to match the working of the Python tokenizer exactly, except
	25	that it produces COMMENT tokens for comments and gives type OP for all
	26	operators
	27
	28	Older entry points
	29	tokenize_loop(readline, tokeneater)
	30	tokenize(readline, tokeneater=printtoken)
	31	are the same, except instead of generating tokens, tokeneater is a callback
	32	function to which the 5 fields described above are passed as 5 arguments,
	33	each time a new token is found."""
	34
	35	__author__ = 'Ka-Ping Yee <ping@lfw.org>'
	36	__credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, '
	37	'Skip Montanaro, Raymond Hettinger')
	38
	39	import string, re
	40	from token import *
	41
	42	import token
	43	__all__ = [x for x in dir(token) if not x.startswith("_")]
	44	__all__ += ["COMMENT", "tokenize", "generate_tokens", "NL", "untokenize"]
	45	del x
	46	del token
	47
	48	__all__ += ["TokenError"]
	49
	50	COMMENT = N_TOKENS
	51	tok_name[COMMENT] = 'COMMENT'
	52	NL = N_TOKENS + 1
	53	tok_name[NL] = 'NL'
	54	N_TOKENS += 2
	55
	56	def group(*choices): return '(' + '\|'.join(choices) + ')'
	57	def any(choices): return group(choices) + '*'
	58	def maybe(choices): return group(choices) + '?'
	59
	60	Whitespace = r'[ \f\t]*'
	61	Comment = r'#[^\r\n]*'
	62	Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
	63	Name = r'[a-zA-Z_]\w*'
	64
	65	Hexnumber = r'0[xX][\da-fA-F]+[lL]?'
	66	Octnumber = r'(0[oO][0-7]+)\|(0[0-7]*)[lL]?'
	67	Binnumber = r'0[bB][01]+[lL]?'
	68	Decnumber = r'[1-9]\d*[lL]?'
	69	Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
	70	Exponent = r'[eE][-+]?\d+'
	71	Pointfloat = group(r'\d+\.\d*', r'\.\d+') + maybe(Exponent)
	72	Expfloat = r'\d+' + Exponent
	73	Floatnumber = group(Pointfloat, Expfloat)
	74	Imagnumber = group(r'\d+[jJ]', Floatnumber + r'[jJ]')
	75	Number = group(Imagnumber, Floatnumber, Intnumber)
	76
	77	# Tail end of ' string.
	78	Single = r"[^'\\](?:\\.[^'\\])*'"
	79	# Tail end of " string.
	80	Double = r'[^"\\](?:\\.[^"\\])*"'
	81	# Tail end of ''' string.
	82	Single3 = r"[^'\\](?:(?:\\.\|'(?!''))[^'\\])*'''"
	83	# Tail end of """ string.
	84	Double3 = r'[^"\\](?:(?:\\.\|"(?!""))[^"\\])*"""'
	85	Triple = group("[uUbB]?[rR]?'''", '[uUbB]?[rR]?"""')
	86	# Single-line ' or " string.
	87	String = group(r"[uUbB]?[rR]?'[^\n'\\](?:\\.[^\n'\\])*'",
	88	r'[uUbB]?[rR]?"[^\n"\\](?:\\.[^\n"\\])*"')
	89
	90	# Because of leftmost-then-longest match semantics, be sure to put the
	91	# longest operators first (e.g., if = came before ==, == would get
	92	# recognized as two instances of =).
	93	Operator = group(r"\\=?", r">>=?", r"<<=?", r"<>", r"!=",
	94	r"//=?",
	95	r"[+\-*/%&\|^=<>]=?",
	96	r"~")
	97
	98	Bracket = '[][(){}]'
	99	Special = group(r'\r?\n', r'[:;.,`@]')
	100	Funny = group(Operator, Bracket, Special)
	101
	102	PlainToken = group(Number, Funny, String, Name)
	103	Token = Ignore + PlainToken
	104
	105	# First (or only) line of ' or " string.
	106	ContStr = group(r"[uUbB]?[rR]?'[^\n'\\](?:\\.[^\n'\\])*" +
	107	group("'", r'\\\r?\n'),
	108	r'[uUbB]?[rR]?"[^\n"\\](?:\\.[^\n"\\])*' +
	109	group('"', r'\\\r?\n'))
	110	PseudoExtras = group(r'\\\r?\n', Comment, Triple)
	111	PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
	112
	113	tokenprog, pseudoprog, single3prog, double3prog = map(
	114	re.compile, (Token, PseudoToken, Single3, Double3))
	115	endprogs = {"'": re.compile(Single), '"': re.compile(Double),
	116	"'''": single3prog, '"""': double3prog,
	117	"r'''": single3prog, 'r"""': double3prog,
	118	"u'''": single3prog, 'u"""': double3prog,
	119	"ur'''": single3prog, 'ur"""': double3prog,
	120	"R'''": single3prog, 'R"""': double3prog,
	121	"U'''": single3prog, 'U"""': double3prog,
	122	"uR'''": single3prog, 'uR"""': double3prog,
	123	"Ur'''": single3prog, 'Ur"""': double3prog,
	124	"UR'''": single3prog, 'UR"""': double3prog,
	125	"b'''": single3prog, 'b"""': double3prog,
	126	"br'''": single3prog, 'br"""': double3prog,
	127	"B'''": single3prog, 'B"""': double3prog,
	128	"bR'''": single3prog, 'bR"""': double3prog,
	129	"Br'''": single3prog, 'Br"""': double3prog,
	130	"BR'''": single3prog, 'BR"""': double3prog,
	131	'r': None, 'R': None, 'u': None, 'U': None,
	132	'b': None, 'B': None}
	133
	134	triple_quoted = {}
	135	for t in ("'''", '"""',
	136	"r'''", 'r"""', "R'''", 'R"""',
	137	"u'''", 'u"""', "U'''", 'U"""',
	138	"ur'''", 'ur"""', "Ur'''", 'Ur"""',
	139	"uR'''", 'uR"""', "UR'''", 'UR"""',
	140	"b'''", 'b"""', "B'''", 'B"""',
	141	"br'''", 'br"""', "Br'''", 'Br"""',
	142	"bR'''", 'bR"""', "BR'''", 'BR"""'):
	143	triple_quoted[t] = t
	144	single_quoted = {}
	145	for t in ("'", '"',
	146	"r'", 'r"', "R'", 'R"',
	147	"u'", 'u"', "U'", 'U"',
	148	"ur'", 'ur"', "Ur'", 'Ur"',
	149	"uR'", 'uR"', "UR'", 'UR"',
	150	"b'", 'b"', "B'", 'B"',
	151	"br'", 'br"', "Br'", 'Br"',
	152	"bR'", 'bR"', "BR'", 'BR"' ):
	153	single_quoted[t] = t
	154
	155	tabsize = 8
	156
	157	class TokenError(Exception): pass
	158
	159	class StopTokenizing(Exception): pass
	160
	161	def printtoken(type, token, srow_scol, erow_ecol, line): # for testing
	162	srow, scol = srow_scol
	163	erow, ecol = erow_ecol
	164	print "%d,%d-%d,%d:\t%s\t%s" % \
	165	(srow, scol, erow, ecol, tok_name[type], repr(token))
	166
	167	def tokenize(readline, tokeneater=printtoken):
	168	"""
	169	The tokenize() function accepts two parameters: one representing the
	170	input stream, and one providing an output mechanism for tokenize().
	171
	172	The first parameter, readline, must be a callable object which provides
	173	the same interface as the readline() method of built-in file objects.
	174	Each call to the function should return one line of input as a string.
	175
	176	The second parameter, tokeneater, must also be a callable object. It is
	177	called once for each token, with five arguments, corresponding to the
	178	tuples generated by generate_tokens().
	179	"""
	180	try:
	181	tokenize_loop(readline, tokeneater)
	182	except StopTokenizing:
	183	pass
	184
	185	# backwards compatible interface
	186	def tokenize_loop(readline, tokeneater):
	187	for token_info in generate_tokens(readline):
	188	tokeneater(*token_info)
	189
	190	class Untokenizer:
	191
	192	def __init__(self):
	193	self.tokens = []
	194	self.prev_row = 1
	195	self.prev_col = 0
	196
	197	def add_whitespace(self, start):
	198	row, col = start
	199	assert row >= self.prev_row
	200	col_offset = col - self.prev_col
	201	if col_offset > 0:
	202	self.tokens.append(" " * col_offset)
	203	elif row > self.prev_row and tok_type not in (NEWLINE, NL, ENDMARKER):
	204	# Line was backslash-continued
	205	self.tokens.append(" ")
	206
	207	def untokenize(self, tokens):
	208	iterable = iter(tokens)
	209	for t in iterable:
	210	if len(t) == 2:
	211	self.compat(t, iterable)
	212	break
	213	tok_type, token, start, end = t[:4]
	214	self.add_whitespace(start)
	215	self.tokens.append(token)
	216	self.prev_row, self.prev_col = end
	217	if tok_type in (NEWLINE, NL):
	218	self.prev_row += 1
	219	self.prev_col = 0
	220	return "".join(self.tokens)
	221
	222	def compat(self, token, iterable):
	223	# This import is here to avoid problems when the itertools
	224	# module is not built yet and tokenize is imported.
	225	from itertools import chain
	226	startline = False
	227	prevstring = False
	228	indents = []
	229	toks_append = self.tokens.append
	230	for tok in chain([token], iterable):
	231	toknum, tokval = tok[:2]
	232
	233	if toknum in (NAME, NUMBER):
	234	tokval += ' '
	235
	236	# Insert a space between two consecutive strings
	237	if toknum == STRING:
	238	if prevstring:
	239	tokval = ' ' + tokval
	240	prevstring = True
	241	else:
	242	prevstring = False
	243
	244	if toknum == INDENT:
	245	indents.append(tokval)
	246	continue
	247	elif toknum == DEDENT:
	248	indents.pop()
	249	continue
	250	elif toknum in (NEWLINE, NL):
	251	startline = True
	252	elif startline and indents:
	253	toks_append(indents[-1])
	254	startline = False
	255	toks_append(tokval)
	256
	257	def untokenize(iterable):
	258	"""Transform tokens back into Python source code.
	259
	260	Each element returned by the iterable must be a token sequence
	261	with at least two elements, a token number and token value. If
	262	only two tokens are passed, the resulting output is poor.
	263
	264	Round-trip invariant for full input:
	265	Untokenized source will match input source exactly
	266
	267	Round-trip invariant for limited intput:
	268	# Output text will tokenize the back to the input
	269	t1 = [tok[:2] for tok in generate_tokens(f.readline)]
	270	newcode = untokenize(t1)
	271	readline = iter(newcode.splitlines(1)).next
	272	t2 = [tok[:2] for tok in generate_tokens(readline)]
	273	assert t1 == t2
	274	"""
	275	ut = Untokenizer()
	276	return ut.untokenize(iterable)
	277
	278	def generate_tokens(readline):
	279	"""
	280	The generate_tokens() generator requires one argment, readline, which
	281	must be a callable object which provides the same interface as the
	282	readline() method of built-in file objects. Each call to the function
	283	should return one line of input as a string. Alternately, readline
	284	can be a callable function terminating with StopIteration:
	285	readline = open(myfile).next # Example of alternate readline
	286
	287	The generator produces 5-tuples with these members: the token type; the
	288	token string; a 2-tuple (srow, scol) of ints specifying the row and
	289	column where the token begins in the source; a 2-tuple (erow, ecol) of
	290	ints specifying the row and column where the token ends in the source;
	291	and the line on which the token was found. The line passed is the
	292	logical line; continuation lines are included.
	293	"""
	294	lnum = parenlev = continued = 0
	295	namechars, numchars = string.ascii_letters + '_', '0123456789'
	296	contstr, needcont = '', 0
	297	contline = None
	298	indents = [0]
	299
	300	while 1: # loop over lines in stream
	301	try:
	302	line = readline()
	303	except StopIteration:
	304	line = ''
	305	lnum += 1
	306	pos, max = 0, len(line)
	307
	308	if contstr: # continued string
	309	if not line:
	310	raise TokenError, ("EOF in multi-line string", strstart)
	311	endmatch = endprog.match(line)
	312	if endmatch:
	313	pos = end = endmatch.end(0)
	314	yield (STRING, contstr + line[:end],
	315	strstart, (lnum, end), contline + line)
	316	contstr, needcont = '', 0
	317	contline = None
	318	elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':
	319	yield (ERRORTOKEN, contstr + line,
	320	strstart, (lnum, len(line)), contline)
	321	contstr = ''
	322	contline = None
	323	continue
	324	else:
	325	contstr = contstr + line
	326	contline = contline + line
	327	continue
	328
	329	elif parenlev == 0 and not continued: # new statement
	330	if not line: break
	331	column = 0
	332	while pos < max: # measure leading whitespace
	333	if line[pos] == ' ':
	334	column += 1
	335	elif line[pos] == '\t':
	336	column = (column//tabsize + 1)*tabsize
	337	elif line[pos] == '\f':
	338	column = 0
	339	else:
	340	break
	341	pos += 1
	342	if pos == max:
	343	break
	344
	345	if line[pos] in '#\r\n': # skip comments or blank lines
	346	if line[pos] == '#':
	347	comment_token = line[pos:].rstrip('\r\n')
	348	nl_pos = pos + len(comment_token)
	349	yield (COMMENT, comment_token,
	350	(lnum, pos), (lnum, pos + len(comment_token)), line)
	351	yield (NEWLINE, line[nl_pos:],
	352	(lnum, nl_pos), (lnum, len(line)), line)
	353	else:
	354	yield (NEWLINE, line[pos:],
	355	(lnum, pos), (lnum, len(line)), line)
	356	continue
	357
	358	if column > indents[-1]: # count indents or dedents
	359	indents.append(column)
	360	yield (INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
	361	while column < indents[-1]:
	362	if column not in indents:
	363	raise IndentationError(
	364	"unindent does not match any outer indentation level",
	365	("<tokenize>", lnum, pos, line))
	366	indents = indents[:-1]
	367	yield (DEDENT, '', (lnum, pos), (lnum, pos), line)
	368
	369	else: # continued statement
	370	if not line:
	371	raise TokenError, ("EOF in multi-line statement", (lnum, 0))
	372	continued = 0
	373
	374	while pos < max:
	375	pseudomatch = pseudoprog.match(line, pos)
	376	if pseudomatch: # scan for tokens
	377	start, end = pseudomatch.span(1)
	378	spos, epos, pos = (lnum, start), (lnum, end), end
	379	token, initial = line[start:end], line[start]
	380
	381	if initial in numchars or \
	382	(initial == '.' and token != '.'): # ordinary number
	383	yield (NUMBER, token, spos, epos, line)
	384	elif initial in '\r\n':
	385	yield (NL if parenlev > 0 else NEWLINE,
	386	token, spos, epos, line)
	387	elif initial == '#':
	388	assert not token.endswith("\n")
	389	yield (COMMENT, token, spos, epos, line)
	390	elif token in triple_quoted:
	391	endprog = endprogs[token]
	392	endmatch = endprog.match(line, pos)
	393	if endmatch: # all on one line
	394	pos = endmatch.end(0)
	395	token = line[start:pos]
	396	yield (STRING, token, spos, (lnum, pos), line)
	397	else:
	398	strstart = (lnum, start) # multiple lines
	399	contstr = line[start:]
	400	contline = line
	401	break
	402	elif initial in single_quoted or \
	403	token[:2] in single_quoted or \
	404	token[:3] in single_quoted:
	405	if token[-1] == '\n': # continued string
	406	strstart = (lnum, start)
	407	endprog = (endprogs[initial] or endprogs[token[1]] or
	408	endprogs[token[2]])
	409	contstr, needcont = line[start:], 1
	410	contline = line
	411	break
	412	else: # ordinary string
	413	yield (STRING, token, spos, epos, line)
	414	elif initial in namechars: # ordinary name
	415	yield (NAME, token, spos, epos, line)
	416	elif initial == '\\': # continued stmt
	417	continued = 1
	418	else:
	419	if initial in '([{':
	420	parenlev += 1
	421	elif initial in ')]}':
	422	parenlev -= 1
	423	yield (OP, token, spos, epos, line)
	424	else:
	425	yield (ERRORTOKEN, line[pos],
	426	(lnum, pos), (lnum, pos+1), line)
	427	pos += 1
	428
	429	for indent in indents[1:]: # pop remaining indent levels
	430	yield (DEDENT, '', (lnum, 0), (lnum, 0), '')
	431	yield (ENDMARKER, '', (lnum, 0), (lnum, 0), '')
	432
	433	if __name__ == '__main__': # testing
	434	import sys
	435	if len(sys.argv) > 1:
	436	tokenize(open(sys.argv[1]).readline)
	437	else:
	438	tokenize(sys.stdin.readline)

IPython/utils/_tokenize_py3.py

0 created 644 +574 0

This diff has been collapsed as it changes many lines, (574 lines changed) Show them Hide them
	@@ -0,0 +1,574 b''
		1	"""Patched version of standard library tokenize, to deal with various bugs.
		2
		3	Based on Python 3.2 code.
		4
		5	Patches:
		6
		7	- Gareth Rees' patch for Python issue #12691 (untokenizing)
		8	- Except we don't encode the output of untokenize
		9	- Python 2 compatible syntax, so that it can be byte-compiled at installation
		10	- Newlines in comments and blank lines should be either NL or NEWLINE, depending
		11	on whether they are in a multi-line statement. Filed as Python issue #17061.
		12	- Export generate_tokens & TokenError
		13
		14	------------------------------------------------------------------------------
		15	Tokenization help for Python programs.
		16
		17	tokenize(readline) is a generator that breaks a stream of bytes into
		18	Python tokens. It decodes the bytes according to PEP-0263 for
		19	determining source file encoding.
		20
		21	It accepts a readline-like method which is called repeatedly to get the
		22	next line of input (or b"" for EOF). It generates 5-tuples with these
		23	members:
		24
		25	the token type (see token.py)
		26	the token (a string)
		27	the starting (row, column) indices of the token (a 2-tuple of ints)
		28	the ending (row, column) indices of the token (a 2-tuple of ints)
		29	the original line (string)
		30
		31	It is designed to match the working of the Python tokenizer exactly, except
		32	that it produces COMMENT tokens for comments and gives type OP for all
		33	operators. Additionally, all token lists start with an ENCODING token
		34	which tells you which encoding was used to decode the bytes stream.
		35	"""
		36	from __future__ import absolute_import
		37
		38	__author__ = 'Ka-Ping Yee <ping@lfw.org>'
		39	__credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, '
		40	'Skip Montanaro, Raymond Hettinger, Trent Nelson, '
		41	'Michael Foord')
		42	import builtins
		43	import re
		44	import sys
		45	from token import *
		46	from codecs import lookup, BOM_UTF8
		47	import collections
		48	from io import TextIOWrapper
		49	cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
		50
		51	import token
		52	__all__ = token.__all__ + ["COMMENT", "tokenize", "detect_encoding",
		53	"NL", "untokenize", "ENCODING", "TokenInfo"]
		54	del token
		55
		56	__all__ += ["generate_tokens", "TokenError"]
		57
		58	COMMENT = N_TOKENS
		59	tok_name[COMMENT] = 'COMMENT'
		60	NL = N_TOKENS + 1
		61	tok_name[NL] = 'NL'
		62	ENCODING = N_TOKENS + 2
		63	tok_name[ENCODING] = 'ENCODING'
		64	N_TOKENS += 3
		65
		66	class TokenInfo(collections.namedtuple('TokenInfo', 'type string start end line')):
		67	def __repr__(self):
		68	annotated_type = '%d (%s)' % (self.type, tok_name[self.type])
		69	return ('TokenInfo(type=%s, string=%r, start=%r, end=%r, line=%r)' %
		70	self._replace(type=annotated_type))
		71
		72	def group(*choices): return '(' + '\|'.join(choices) + ')'
		73	def any(choices): return group(choices) + '*'
		74	def maybe(choices): return group(choices) + '?'
		75
		76	# Note: we use unicode matching for names ("\w") but ascii matching for
		77	# number literals.
		78	Whitespace = r'[ \f\t]*'
		79	Comment = r'#[^\r\n]*'
		80	Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
		81	Name = r'\w+'
		82
		83	Hexnumber = r'0[xX][0-9a-fA-F]+'
		84	Binnumber = r'0[bB][01]+'
		85	Octnumber = r'0[oO][0-7]+'
		86	Decnumber = r'(?:0+\|[1-9][0-9]*)'
		87	Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
		88	Exponent = r'[eE][-+]?[0-9]+'
		89	Pointfloat = group(r'[0-9]+\.[0-9]*', r'\.[0-9]+') + maybe(Exponent)
		90	Expfloat = r'[0-9]+' + Exponent
		91	Floatnumber = group(Pointfloat, Expfloat)
		92	Imagnumber = group(r'[0-9]+[jJ]', Floatnumber + r'[jJ]')
		93	Number = group(Imagnumber, Floatnumber, Intnumber)
		94
		95	# Tail end of ' string.
		96	Single = r"[^'\\](?:\\.[^'\\])*'"
		97	# Tail end of " string.
		98	Double = r'[^"\\](?:\\.[^"\\])*"'
		99	# Tail end of ''' string.
		100	Single3 = r"[^'\\](?:(?:\\.\|'(?!''))[^'\\])*'''"
		101	# Tail end of """ string.
		102	Double3 = r'[^"\\](?:(?:\\.\|"(?!""))[^"\\])*"""'
		103	Triple = group("[bB]?[rR]?'''", '[bB]?[rR]?"""')
		104	# Single-line ' or " string.
		105	String = group(r"[bB]?[rR]?'[^\n'\\](?:\\.[^\n'\\])*'",
		106	r'[bB]?[rR]?"[^\n"\\](?:\\.[^\n"\\])*"')
		107
		108	# Because of leftmost-then-longest match semantics, be sure to put the
		109	# longest operators first (e.g., if = came before ==, == would get
		110	# recognized as two instances of =).
		111	Operator = group(r"\\=?", r">>=?", r"<<=?", r"!=",
		112	r"//=?", r"->",
		113	r"[+\-*/%&\|^=<>]=?",
		114	r"~")
		115
		116	Bracket = '[][(){}]'
		117	Special = group(r'\r?\n', r'\.\.\.', r'[:;.,@]')
		118	Funny = group(Operator, Bracket, Special)
		119
		120	PlainToken = group(Number, Funny, String, Name)
		121	Token = Ignore + PlainToken
		122
		123	# First (or only) line of ' or " string.
		124	ContStr = group(r"[bB]?[rR]?'[^\n'\\](?:\\.[^\n'\\])*" +
		125	group("'", r'\\\r?\n'),
		126	r'[bB]?[rR]?"[^\n"\\](?:\\.[^\n"\\])*' +
		127	group('"', r'\\\r?\n'))
		128	PseudoExtras = group(r'\\\r?\n', Comment, Triple)
		129	PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
		130
		131	def _compile(expr):
		132	return re.compile(expr, re.UNICODE)
		133
		134	tokenprog, pseudoprog, single3prog, double3prog = map(
		135	_compile, (Token, PseudoToken, Single3, Double3))
		136	endprogs = {"'": _compile(Single), '"': _compile(Double),
		137	"'''": single3prog, '"""': double3prog,
		138	"r'''": single3prog, 'r"""': double3prog,
		139	"b'''": single3prog, 'b"""': double3prog,
		140	"br'''": single3prog, 'br"""': double3prog,
		141	"R'''": single3prog, 'R"""': double3prog,
		142	"B'''": single3prog, 'B"""': double3prog,
		143	"bR'''": single3prog, 'bR"""': double3prog,
		144	"Br'''": single3prog, 'Br"""': double3prog,
		145	"BR'''": single3prog, 'BR"""': double3prog,
		146	'r': None, 'R': None, 'b': None, 'B': None}
		147
		148	triple_quoted = {}
		149	for t in ("'''", '"""',
		150	"r'''", 'r"""', "R'''", 'R"""',
		151	"b'''", 'b"""', "B'''", 'B"""',
		152	"br'''", 'br"""', "Br'''", 'Br"""',
		153	"bR'''", 'bR"""', "BR'''", 'BR"""'):
		154	triple_quoted[t] = t
		155	single_quoted = {}
		156	for t in ("'", '"',
		157	"r'", 'r"', "R'", 'R"',
		158	"b'", 'b"', "B'", 'B"',
		159	"br'", 'br"', "Br'", 'Br"',
		160	"bR'", 'bR"', "BR'", 'BR"' ):
		161	single_quoted[t] = t
		162
		163	del _compile
		164
		165	tabsize = 8
		166
		167	class TokenError(Exception): pass
		168
		169	class StopTokenizing(Exception): pass
		170
		171
		172	class Untokenizer:
		173
		174	def __init__(self):
		175	self.tokens = []
		176	self.prev_row = 1
		177	self.prev_col = 0
		178	self.encoding = 'utf-8'
		179
		180	def add_whitespace(self, tok_type, start):
		181	row, col = start
		182	assert row >= self.prev_row
		183	col_offset = col - self.prev_col
		184	if col_offset > 0:
		185	self.tokens.append(" " * col_offset)
		186	elif row > self.prev_row and tok_type not in (NEWLINE, NL, ENDMARKER):
		187	# Line was backslash-continued.
		188	self.tokens.append(" ")
		189
		190	def untokenize(self, tokens):
		191	iterable = iter(tokens)
		192	for t in iterable:
		193	if len(t) == 2:
		194	self.compat(t, iterable)
		195	break
		196	tok_type, token, start, end = t[:4]
		197	if tok_type == ENCODING:
		198	self.encoding = token
		199	continue
		200	self.add_whitespace(tok_type, start)
		201	self.tokens.append(token)
		202	self.prev_row, self.prev_col = end
		203	if tok_type in (NEWLINE, NL):
		204	self.prev_row += 1
		205	self.prev_col = 0
		206	return "".join(self.tokens)
		207
		208	def compat(self, token, iterable):
		209	# This import is here to avoid problems when the itertools
		210	# module is not built yet and tokenize is imported.
		211	from itertools import chain
		212	startline = False
		213	prevstring = False
		214	indents = []
		215	toks_append = self.tokens.append
		216
		217	for tok in chain([token], iterable):
		218	toknum, tokval = tok[:2]
		219	if toknum == ENCODING:
		220	self.encoding = tokval
		221	continue
		222
		223	if toknum in (NAME, NUMBER):
		224	tokval += ' '
		225
		226	# Insert a space between two consecutive strings
		227	if toknum == STRING:
		228	if prevstring:
		229	tokval = ' ' + tokval
		230	prevstring = True
		231	else:
		232	prevstring = False
		233
		234	if toknum == INDENT:
		235	indents.append(tokval)
		236	continue
		237	elif toknum == DEDENT:
		238	indents.pop()
		239	continue
		240	elif toknum in (NEWLINE, NL):
		241	startline = True
		242	elif startline and indents:
		243	toks_append(indents[-1])
		244	startline = False
		245	toks_append(tokval)
		246
		247
		248	def untokenize(tokens):
		249	"""
		250	Convert ``tokens`` (an iterable) back into Python source code. Return
		251	a bytes object, encoded using the encoding specified by the last
		252	ENCODING token in ``tokens``, or UTF-8 if no ENCODING token is found.
		253
		254	The result is guaranteed to tokenize back to match the input so that
		255	the conversion is lossless and round-trips are assured. The
		256	guarantee applies only to the token type and token string as the
		257	spacing between tokens (column positions) may change.
		258
		259	:func:`untokenize` has two modes. If the input tokens are sequences
		260	of length 2 (``type``, ``string``) then spaces are added as necessary to
		261	preserve the round-trip property.
		262
		263	If the input tokens are sequences of length 4 or more (``type``,
		264	``string``, ``start``, ``end``), as returned by :func:`tokenize`, then
		265	spaces are added so that each token appears in the result at the
		266	position indicated by ``start`` and ``end``, if possible.
		267	"""
		268	return Untokenizer().untokenize(tokens)
		269
		270
		271	def _get_normal_name(orig_enc):
		272	"""Imitates get_normal_name in tokenizer.c."""
		273	# Only care about the first 12 characters.
		274	enc = orig_enc[:12].lower().replace("_", "-")
		275	if enc == "utf-8" or enc.startswith("utf-8-"):
		276	return "utf-8"
		277	if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \
		278	enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")):
		279	return "iso-8859-1"
		280	return orig_enc
		281
		282	def detect_encoding(readline):
		283	"""
		284	The detect_encoding() function is used to detect the encoding that should
		285	be used to decode a Python source file. It requires one argment, readline,
		286	in the same way as the tokenize() generator.
		287
		288	It will call readline a maximum of twice, and return the encoding used
		289	(as a string) and a list of any lines (left as bytes) it has read in.
		290
		291	It detects the encoding from the presence of a utf-8 bom or an encoding
		292	cookie as specified in pep-0263. If both a bom and a cookie are present,
		293	but disagree, a SyntaxError will be raised. If the encoding cookie is an
		294	invalid charset, raise a SyntaxError. Note that if a utf-8 bom is found,
		295	'utf-8-sig' is returned.
		296
		297	If no encoding is specified, then the default of 'utf-8' will be returned.
		298	"""
		299	bom_found = False
		300	encoding = None
		301	default = 'utf-8'
		302	def read_or_stop():
		303	try:
		304	return readline()
		305	except StopIteration:
		306	return b''
		307
		308	def find_cookie(line):
		309	try:
		310	# Decode as UTF-8. Either the line is an encoding declaration,
		311	# in which case it should be pure ASCII, or it must be UTF-8
		312	# per default encoding.
		313	line_string = line.decode('utf-8')
		314	except UnicodeDecodeError:
		315	raise SyntaxError("invalid or missing encoding declaration")
		316
		317	matches = cookie_re.findall(line_string)
		318	if not matches:
		319	return None
		320	encoding = _get_normal_name(matches[0])
		321	try:
		322	codec = lookup(encoding)
		323	except LookupError:
		324	# This behaviour mimics the Python interpreter
		325	raise SyntaxError("unknown encoding: " + encoding)
		326
		327	if bom_found:
		328	if encoding != 'utf-8':
		329	# This behaviour mimics the Python interpreter
		330	raise SyntaxError('encoding problem: utf-8')
		331	encoding += '-sig'
		332	return encoding
		333
		334	first = read_or_stop()
		335	if first.startswith(BOM_UTF8):
		336	bom_found = True
		337	first = first[3:]
		338	default = 'utf-8-sig'
		339	if not first:
		340	return default, []
		341
		342	encoding = find_cookie(first)
		343	if encoding:
		344	return encoding, [first]
		345
		346	second = read_or_stop()
		347	if not second:
		348	return default, [first]
		349
		350	encoding = find_cookie(second)
		351	if encoding:
		352	return encoding, [first, second]
		353
		354	return default, [first, second]
		355
		356
		357	def open(filename):
		358	"""Open a file in read only mode using the encoding detected by
		359	detect_encoding().
		360	"""
		361	buffer = builtins.open(filename, 'rb')
		362	encoding, lines = detect_encoding(buffer.readline)
		363	buffer.seek(0)
		364	text = TextIOWrapper(buffer, encoding, line_buffering=True)
		365	text.mode = 'r'
		366	return text
		367
		368
		369	def tokenize(readline):
		370	"""
		371	The tokenize() generator requires one argment, readline, which
		372	must be a callable object which provides the same interface as the
		373	readline() method of built-in file objects. Each call to the function
		374	should return one line of input as bytes. Alternately, readline
		375	can be a callable function terminating with StopIteration:
		376	readline = open(myfile, 'rb').__next__ # Example of alternate readline
		377
		378	The generator produces 5-tuples with these members: the token type; the
		379	token string; a 2-tuple (srow, scol) of ints specifying the row and
		380	column where the token begins in the source; a 2-tuple (erow, ecol) of
		381	ints specifying the row and column where the token ends in the source;
		382	and the line on which the token was found. The line passed is the
		383	logical line; continuation lines are included.
		384
		385	The first token sequence will always be an ENCODING token
		386	which tells you which encoding was used to decode the bytes stream.
		387	"""
		388	# This import is here to avoid problems when the itertools module is not
		389	# built yet and tokenize is imported.
		390	from itertools import chain, repeat
		391	encoding, consumed = detect_encoding(readline)
		392	rl_gen = iter(readline, b"")
		393	empty = repeat(b"")
		394	return _tokenize(chain(consumed, rl_gen, empty).__next__, encoding)
		395
		396
		397	def _tokenize(readline, encoding):
		398	lnum = parenlev = continued = 0
		399	numchars = '0123456789'
		400	contstr, needcont = '', 0
		401	contline = None
		402	indents = [0]
		403
		404	if encoding is not None:
		405	if encoding == "utf-8-sig":
		406	# BOM will already have been stripped.
		407	encoding = "utf-8"
		408	yield TokenInfo(ENCODING, encoding, (0, 0), (0, 0), '')
		409	while True: # loop over lines in stream
		410	try:
		411	line = readline()
		412	except StopIteration:
		413	line = b''
		414
		415	if encoding is not None:
		416	line = line.decode(encoding)
		417	lnum += 1
		418	pos, max = 0, len(line)
		419
		420	if contstr: # continued string
		421	if not line:
		422	raise TokenError("EOF in multi-line string", strstart)
		423	endmatch = endprog.match(line)
		424	if endmatch:
		425	pos = end = endmatch.end(0)
		426	yield TokenInfo(STRING, contstr + line[:end],
		427	strstart, (lnum, end), contline + line)
		428	contstr, needcont = '', 0
		429	contline = None
		430	elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':
		431	yield TokenInfo(ERRORTOKEN, contstr + line,
		432	strstart, (lnum, len(line)), contline)
		433	contstr = ''
		434	contline = None
		435	continue
		436	else:
		437	contstr = contstr + line
		438	contline = contline + line
		439	continue
		440
		441	elif parenlev == 0 and not continued: # new statement
		442	if not line: break
		443	column = 0
		444	while pos < max: # measure leading whitespace
		445	if line[pos] == ' ':
		446	column += 1
		447	elif line[pos] == '\t':
		448	column = (column//tabsize + 1)*tabsize
		449	elif line[pos] == '\f':
		450	column = 0
		451	else:
		452	break
		453	pos += 1
		454	if pos == max:
		455	break
		456
		457	if line[pos] in '#\r\n': # skip comments or blank lines
		458	if line[pos] == '#':
		459	comment_token = line[pos:].rstrip('\r\n')
		460	nl_pos = pos + len(comment_token)
		461	yield TokenInfo(COMMENT, comment_token,
		462	(lnum, pos), (lnum, pos + len(comment_token)), line)
		463	yield TokenInfo(NEWLINE, line[nl_pos:],
		464	(lnum, nl_pos), (lnum, len(line)), line)
		465	else:
		466	yield TokenInfo(NEWLINE, line[pos:],
		467	(lnum, pos), (lnum, len(line)), line)
		468	continue
		469
		470	if column > indents[-1]: # count indents or dedents
		471	indents.append(column)
		472	yield TokenInfo(INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
		473	while column < indents[-1]:
		474	if column not in indents:
		475	raise IndentationError(
		476	"unindent does not match any outer indentation level",
		477	("<tokenize>", lnum, pos, line))
		478	indents = indents[:-1]
		479	yield TokenInfo(DEDENT, '', (lnum, pos), (lnum, pos), line)
		480
		481	else: # continued statement
		482	if not line:
		483	raise TokenError("EOF in multi-line statement", (lnum, 0))
		484	continued = 0
		485
		486	while pos < max:
		487	pseudomatch = pseudoprog.match(line, pos)
		488	if pseudomatch: # scan for tokens
		489	start, end = pseudomatch.span(1)
		490	spos, epos, pos = (lnum, start), (lnum, end), end
		491	token, initial = line[start:end], line[start]
		492
		493	if (initial in numchars or # ordinary number
		494	(initial == '.' and token != '.' and token != '...')):
		495	yield TokenInfo(NUMBER, token, spos, epos, line)
		496	elif initial in '\r\n':
		497	yield TokenInfo(NL if parenlev > 0 else NEWLINE,
		498	token, spos, epos, line)
		499	elif initial == '#':
		500	assert not token.endswith("\n")
		501	yield TokenInfo(COMMENT, token, spos, epos, line)
		502	elif token in triple_quoted:
		503	endprog = endprogs[token]
		504	endmatch = endprog.match(line, pos)
		505	if endmatch: # all on one line
		506	pos = endmatch.end(0)
		507	token = line[start:pos]
		508	yield TokenInfo(STRING, token, spos, (lnum, pos), line)
		509	else:
		510	strstart = (lnum, start) # multiple lines
		511	contstr = line[start:]
		512	contline = line
		513	break
		514	elif initial in single_quoted or \
		515	token[:2] in single_quoted or \
		516	token[:3] in single_quoted:
		517	if token[-1] == '\n': # continued string
		518	strstart = (lnum, start)
		519	endprog = (endprogs[initial] or endprogs[token[1]] or
		520	endprogs[token[2]])
		521	contstr, needcont = line[start:], 1
		522	contline = line
		523	break
		524	else: # ordinary string
		525	yield TokenInfo(STRING, token, spos, epos, line)
		526	elif initial.isidentifier(): # ordinary name
		527	yield TokenInfo(NAME, token, spos, epos, line)
		528	elif initial == '\\': # continued stmt
		529	continued = 1
		530	else:
		531	if initial in '([{':
		532	parenlev += 1
		533	elif initial in ')]}':
		534	parenlev -= 1
		535	yield TokenInfo(OP, token, spos, epos, line)
		536	else:
		537	yield TokenInfo(ERRORTOKEN, line[pos],
		538	(lnum, pos), (lnum, pos+1), line)
		539	pos += 1
		540
		541	for indent in indents[1:]: # pop remaining indent levels
		542	yield TokenInfo(DEDENT, '', (lnum, 0), (lnum, 0), '')
		543	yield TokenInfo(ENDMARKER, '', (lnum, 0), (lnum, 0), '')
		544
		545
		546	# An undocumented, backwards compatible, API for all the places in the standard
		547	# library that expect to be able to use tokenize with strings
		548	def generate_tokens(readline):
		549	return _tokenize(readline, None)
		550
		551	if __name__ == "__main__":
		552	# Quick sanity check
		553	s = b'''def parseline(self, line):
		554	"""Parse the line into a command name and a string containing
		555	the arguments. Returns a tuple containing (command, args, line).
		556	'command' and 'args' may be None if the line couldn't be parsed.
		557	"""
		558	line = line.strip()
		559	if not line:
		560	return None, None, line
		561	elif line[0] == '?':
		562	line = 'help ' + line[1:]
		563	elif line[0] == '!':
		564	if hasattr(self, 'do_shell'):
		565	line = 'shell ' + line[1:]
		566	else:
		567	return None, None, line
		568	i, n = 0, len(line)
		569	while i < n and line[i] in self.identchars: i = i+1
		570	cmd, arg = line[:i], line[i:].strip()
		571	return cmd, arg, line
		572	'''
		573	for tok in tokenize(iter(s.splitlines()).__next__):
		574	print(tok)

IPython/utils/tokenize2.py

0 created 644 +9 0

@@ -0,0 +1,9 b''
	1	"""Load our patched versions of tokenize.
	2	"""
	3
	4	import sys
	5
	6	if sys.version_info[0] >= 3:
	7	from _tokenize_py3 import *
	8	else:
	9	from _tokenize_py2 import *

IPython/core/inputtransformer.py

0 +9 -16

             import abc
             import functools
             import re
             from StringIO import StringIO
-            import tokenize
-            try:
-                generate_tokens = tokenize.generate_tokens
-            except AttributeError:
-                # Python 3. Note that we use the undocumented _tokenize because it expects
-                # strings, not bytes. See also Python issue #9969.
-                generate_tokens = tokenize._tokenize
             from IPython.core.splitinput import split_user_input, LineInfo
-            from IPython.utils.untokenize import untokenize
+            from IPython.utils import tokenize2
+            from IPython.utils.tokenize2 import generate_tokens, untokenize, TokenError
             #-----------------------------------------------------------------------------
             # Globals
             #-----------------------------------------------------------------------------
             # The escape sequences that define the syntax transformations IPython will
             # apply to user input.  These can NOT be just changed here: many regular
             # expressions and other parts of the code may use their hardcoded values, and
             # for all intents and purposes they constitute the 'IPython syntax', so they
             # should be considered fixed.
             ESC_SHELL  = '!'     # Send line to underlying system shell
             ESC_SH_CAP = '!!'    # Send line to system shell and capture output
             ESC_HELP   = '?'     # Find information about object
             ESC_HELP2  = '??'    # Find extra-detailed information about object
             ESC_MAGIC  = '%'     # Call magic function
             ESC_MAGIC2 = '%%'    # Call cell-magic function
             ESC_QUOTE  = ','     # Split args on whitespace, quote each as string and call
             ESC_QUOTE2 = ';'     # Quote all args as a single string, call
             ESC_PAREN  = '/'     # Call first argument with rest of line as arguments
             ESC_SEQUENCES = [ESC_SHELL, ESC_SH_CAP, ESC_HELP ,\
                              ESC_HELP2, ESC_MAGIC, ESC_MAGIC2,\
                              ESC_QUOTE, ESC_QUOTE2, ESC_PAREN ]
             class InputTransformer(object):
                 """Abstract base class for line-based input transformers."""
                 __metaclass__ = abc.ABCMeta
                 @abc.abstractmethod
                 def push(self, line):
                     """Send a line of input to the transformer, returning the transformed
                     input or None if the transformer is waiting for more input.
                     Must be overridden by subclasses.
                     """
                     pass
                 @abc.abstractmethod
                 def reset(self):
                     """Return, transformed any lines that the transformer has accumulated,
                     and reset its internal state.
                     Must be overridden by subclasses.
                     """
                     pass
                 @classmethod
                 def wrap(cls, func):
                     """Can be used by subclasses as a decorator, to return a factory that
                     will allow instantiation with the decorated object.
                     """
                     @functools.wraps(func)
                     def transformer_factory():
                         return cls(func)
                     return transformer_factory
             class StatelessInputTransformer(InputTransformer):
                 """Wrapper for a stateless input transformer implemented as a function."""
                 def __init__(self, func):
                     self.func = func
                 def __repr__(self):
                     return "StatelessInputTransformer(func={!r})".format(self.func)
                 def push(self, line):
                     """Send a line of input to the transformer, returning the
                     transformed input."""
                     return self.func(line)
                 def reset(self):
                     """No-op - exists for compatibility."""
                     pass
             class CoroutineInputTransformer(InputTransformer):
                 """Wrapper for an input transformer implemented as a coroutine."""
                 def __init__(self, coro):
                     # Prime it
                     self.coro = coro()
                     next(self.coro)
                 def __repr__(self):
                     return "CoroutineInputTransformer(coro={!r})".format(self.coro)
                 def push(self, line):
                     """Send a line of input to the transformer, returning the
                     transformed input or None if the transformer is waiting for more
                     input.
                     """
                     return self.coro.send(line)
                 def reset(self):
                     """Return, transformed any lines that the transformer has
                     accumulated, and reset its internal state.
                     """
                     return self.coro.send(None)
             class TokenInputTransformer(InputTransformer):
                 """Wrapper for a token-based input transformer.
                 func should accept a list of tokens (5-tuples, see tokenize docs), and
                 return an iterable which can be passed to tokenize.untokenize().
                 """
                 def __init__(self, func):
                     self.func = func
                     self.current_line = ""
                     self.line_used = False
                     self.reset_tokenizer()
                 def reset_tokenizer(self):
                     self.tokenizer = generate_tokens(self.get_line)
                 def get_line(self):
                     if self.line_used:
-                        raise tokenize.TokenError
+                        raise TokenError
                     self.line_used = True
                     return self.current_line
                 def push(self, line):
                     self.current_line += line + "\n"
                     if self.current_line.isspace():
                         return self.reset()
                     self.line_used = False
                     tokens = []
                     stop_at_NL = False
                     try:
                         for intok in self.tokenizer:
                             tokens.append(intok)
                             t = intok[0]
-                            if t == tokenize.NEWLINE or (stop_at_NL and t == tokenize.NL):
+                            if t == tokenize2.NEWLINE or (stop_at_NL and t == tokenize2.NL):
                                 # Stop before we try to pull a line we don't have yet
                                 break
-                            elif t in (tokenize.COMMENT, tokenize.ERRORTOKEN):
+                            elif t == tokenize2.ERRORTOKEN:
                                 stop_at_NL = True
-                    except tokenize.TokenError:
+                    except TokenError:
                         # Multi-line statement - stop and try again with the next line
                         self.reset_tokenizer()
                         return None
                     return self.output(tokens)
                 def output(self, tokens):
                     self.current_line = ""
                     self.reset_tokenizer()
                     return untokenize(self.func(tokens)).rstrip('\n')
                 def reset(self):
                     l = self.current_line
                     self.current_line = ""
                     self.reset_tokenizer()
                     if l:
                         return l.rstrip('\n')
             class assemble_python_lines(TokenInputTransformer):
                 def __init__(self):
                     super(assemble_python_lines, self).__init__(None)
                 def output(self, tokens):
                     return self.reset()
             @CoroutineInputTransformer.wrap
             def assemble_logical_lines():
                 """Join lines following explicit line continuations (\)"""
                 line = ''
                 while True:
                     line = (yield line)
                     if not line or line.isspace():
                         continue
                     parts = []
                     while line is not None:
                         parts.append(line.rstrip('\\'))
                         if not line.endswith('\\'):
                             break
                         line = (yield None)
                     # Output
                     line = ' '.join(parts)
             # Utilities
             def _make_help_call(target, esc, lspace, next_input=None):
                 """Prepares a pinfo(2)/psearch call from a target name and the escape
                 (i.e. ? or ??)"""
                 method  = 'pinfo2' if esc == '??' \
                             else 'psearch' if '*' in target \
                             else 'pinfo'
                 arg = " ".join([method, target])
                 if next_input is None:
                     return '%sget_ipython().magic(%r)' % (lspace, arg)
                 else:
                     return '%sget_ipython().set_next_input(%r);get_ipython().magic(%r)' % \
                        (lspace, next_input, arg)
             # These define the transformations for the different escape characters.
             def _tr_system(line_info):
                 "Translate lines escaped with: !"
                 cmd = line_info.line.lstrip().lstrip(ESC_SHELL)
                 return '%sget_ipython().system(%r)' % (line_info.pre, cmd)
             def _tr_system2(line_info):
                 "Translate lines escaped with: !!"
                 cmd = line_info.line.lstrip()[2:]
                 return '%sget_ipython().getoutput(%r)' % (line_info.pre, cmd)
             def _tr_help(line_info):
                 "Translate lines escaped with: ?/??"
                 # A naked help line should just fire the intro help screen
                 if not line_info.line[1:]:
                     return 'get_ipython().show_usage()'
                 return _make_help_call(line_info.ifun, line_info.esc, line_info.pre)
             def _tr_magic(line_info):
                 "Translate lines escaped with: %"
                 tpl = '%sget_ipython().magic(%r)'
                 cmd = ' '.join([line_info.ifun, line_info.the_rest]).strip()
                 return tpl % (line_info.pre, cmd)
             def _tr_quote(line_info):
                 "Translate lines escaped with: ,"
                 return '%s%s("%s")' % (line_info.pre, line_info.ifun,
                                      '", "'.join(line_info.the_rest.split()) )
             def _tr_quote2(line_info):
                 "Translate lines escaped with: ;"
                 return '%s%s("%s")' % (line_info.pre, line_info.ifun,
                                        line_info.the_rest)
             def _tr_paren(line_info):
                 "Translate lines escaped with: /"
                 return '%s%s(%s)' % (line_info.pre, line_info.ifun,
                                      ", ".join(line_info.the_rest.split()))
             tr = { ESC_SHELL  : _tr_system,
                    ESC_SH_CAP : _tr_system2,
                    ESC_HELP   : _tr_help,
                    ESC_HELP2  : _tr_help,
                    ESC_MAGIC  : _tr_magic,
                    ESC_QUOTE  : _tr_quote,
                    ESC_QUOTE2 : _tr_quote2,
                    ESC_PAREN  : _tr_paren }
             @StatelessInputTransformer.wrap
             def escaped_commands(line):
                 """Transform escaped commands - %magic, !system, ?help + various autocalls.
                 """
                 if not line or line.isspace():
                     return line
                 lineinf = LineInfo(line)
                 if lineinf.esc not in tr:
                     return line
                 return tr[lineinf.esc](lineinf)
             _initial_space_re = re.compile(r'\s*')
             _help_end_re = re.compile(r"""(%{0,2}
                                           [a-zA-Z_*][\w*]*        # Variable name
                                           (\.[a-zA-Z_*][\w*]*)*   # .etc.etc
                                           )
                                           (\?\??)$                # ? or ??""",
                                           re.VERBOSE)
             def has_comment(src):
                 """Indicate whether an input line has (i.e. ends in, or is) a comment.
                 This uses tokenize, so it can distinguish comments from # inside strings.
                 Parameters
                 ----------
                 src : string
                   A single line input string.
                 Returns
                 -------
                 comment : bool
                     True if source has a comment.
                 """
                 readline = StringIO(src).readline
                 toktypes = set()
                 try:
-                    for t in tokenize.generate_tokens(readline):
+                    for t in generate_tokens(readline):
                         toktypes.add(t[0])
-                except tokenize.TokenError:
+                except TokenError:
                     pass
-                return(tokenize.COMMENT in toktypes)
+                return(tokenize2.COMMENT in toktypes)
             @StatelessInputTransformer.wrap
             def help_end(line):
                 """Translate lines with ?/?? at the end"""
                 m = _help_end_re.search(line)
                 if m is None or has_comment(line):
                     return line
                 target = m.group(1)
                 esc = m.group(3)
                 lspace = _initial_space_re.match(line).group(0)
                 # If we're mid-command, put it back on the next prompt for the user.
                 next_input = line.rstrip('?') if line.strip() != m.group(0) else None
                 return _make_help_call(target, esc, lspace, next_input)
             @CoroutineInputTransformer.wrap
             def cellmagic():
                 """Captures & transforms cell magics.
                 After a cell magic is started, this stores up any lines it gets until it is
                 reset (sent None).
                 """
                 tpl = 'get_ipython().run_cell_magic(%r, %r, %r)'
                 cellmagic_help_re = re.compile('%%\w+\?')
                 line = ''
                 while True:
                     line = (yield line)
                     if (not line) or (not line.startswith(ESC_MAGIC2)):
                         continue
                     if cellmagic_help_re.match(line):
                         # This case will be handled by help_end
                         continue
                     first = line
                     body = []
                     line = (yield None)
                     while (line is not None) and (line.strip() != ''):
                         body.append(line)
                         line = (yield None)
                     # Output
                     magic_name, _, first = first.partition(' ')
                     magic_name = magic_name.lstrip(ESC_MAGIC2)
                     line = tpl % (magic_name, first, u'\n'.join(body))
             def _strip_prompts(prompt1_re, prompt2_re):
                 """Remove matching input prompts from a block of input."""
                 line = ''
                 while True:
                     line = (yield line)
                     if line is None:
                         continue
                     m = prompt1_re.match(line)
                     if m:
                         while m:
                             line = (yield line[len(m.group(0)):])
                             if line is None:
                                 break
                             m = prompt2_re.match(line)
                     else:
                         # Prompts not in input - wait for reset
                         while line is not None:
                             line = (yield line)
             @CoroutineInputTransformer.wrap
             def classic_prompt():
                 """Strip the >>>/... prompts of the Python interactive shell."""
                 prompt1_re = re.compile(r'^(>>> )')
                 prompt2_re = re.compile(r'^(>>> |^\.\.\. )')
                 return _strip_prompts(prompt1_re, prompt2_re)
             @CoroutineInputTransformer.wrap
             def ipy_prompt():
                 """Strip IPython's In [1]:/...: prompts."""
                 prompt1_re = re.compile(r'^In \[\d+\]: ')
                 prompt2_re = re.compile(r'^(In \[\d+\]: |^\ \ \ \.\.\.+: )')
                 return _strip_prompts(prompt1_re, prompt2_re)
             @CoroutineInputTransformer.wrap
             def leading_indent():
                 """Remove leading indentation.
                 If the first line starts with a spaces or tabs, the same whitespace will be
                 removed from each following line until it is reset.
                 """
                 space_re = re.compile(r'^[ \t]+')
                 line = ''
                 while True:
                     line = (yield line)
                     if line is None:
                         continue
                     m = space_re.match(line)
                     if m:
                         space = m.group(0)
                         while line is not None:
                             if line.startswith(space):
                                 line = line[len(space):]
                             line = (yield line)
                     else:
                         # No leading spaces - wait for reset
                         while line is not None:
                             line = (yield line)
             assign_system_re = re.compile(r'(?P<lhs>(\s*)([\w\.]+)((\s*,\s*[\w\.]+)*))'
                                           r'\s*=\s*!\s*(?P<cmd>.*)')
             assign_system_template = '%s = get_ipython().getoutput(%r)'
             @StatelessInputTransformer.wrap
             def assign_from_system(line):
                 """Transform assignment from system commands (e.g. files = !ls)"""
                 m = assign_system_re.match(line)
                 if m is None:
                     return line
                 return assign_system_template % m.group('lhs', 'cmd')
             assign_magic_re = re.compile(r'(?P<lhs>(\s*)([\w\.]+)((\s*,\s*[\w\.]+)*))'
                                          r'\s*=\s*%\s*(?P<cmd>.*)')
             assign_magic_template = '%s = get_ipython().magic(%r)'
             @StatelessInputTransformer.wrap
             def assign_from_magic(line):
                 """Transform assignment from magic commands (e.g. a = %who_ls)"""
                 m = assign_magic_re.match(line)
                 if m is None:
                     return line
                 return assign_magic_template % m.group('lhs', 'cmd')

IPython/utils/untokenize.py

0 removed 0 -125

NO CONTENT: file was removed

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages