upstream/ipython Commit - r22959:7851f8a3

remove sys_version for Python 3...

Paul Ivanov -

r22959:7851f8a3

parent child

IPython/core/debugger.py

0 0 -1

                                   ('Globals', self.curframe.f_globals)]
                     self.shell.find_line_magic('psource')(arg, namespaces=namespaces)
-                if sys.version_info > (3, ):
                 def do_where(self, arg):
                     """w(here)
                     Print a stack trace, with the most recent frame at the bottom.

IPython/core/interactiveshell.py

0 0 -3

                         raise self.error_in_exec
                 def __repr__(self):
-                    if sys.version_info > (3,):
                     name = self.__class__.__qualname__
-                    else:
-                        name = self.__class__.__name__
                     return '<%s object at %x, execution_count=%s error_before_exec=%s error_in_exec=%s result=%s>' %\
                             (name, id(self), self.execution_count, self.error_before_exec, self.error_in_exec, repr(self.result))

IPython/core/tests/test_completer.py

0 0 -7

-            @dec.onlyif(sys.version_info[0] >= 3, 'This test only apply on python3')
             def test_back_latex_completion():
                 ip = get_ipython()
                 nt.assert_equal(len(matches), 1)
                 nt.assert_equal(matches[0], '\\beta')
-            @dec.onlyif(sys.version_info[0] >= 3, 'This test only apply on python3')
             def test_back_unicode_completion():
                 ip = get_ipython()
                 nt.assert_equal(matches[0], '\\ROMAN NUMERAL FIVE')
-            @dec.onlyif(sys.version_info[0] >= 3, 'This test only apply on python3')
             def test_forward_unicode_completion():
                 ip = get_ipython()
                 nt.assert_equal(len(matches), 1)
                 nt.assert_equal(matches[0], 'Ⅴ')
-            @dec.onlyif(sys.version_info[0] >= 3, 'This test only apply on python3')
             @dec.knownfailureif(sys.platform == 'win32', 'Fails if there is a C:\\j... path')
             def test_no_ascii_back_completion():
                 ip = get_ipython()
-            @dec.onlyif(sys.version_info[0] >= 3, 'This test only applies in Py>=3')
             def test_dict_key_completion_bytes():
                 """Test handling of bytes in dict key completion"""
                 ip = get_ipython()
                     nt.assert_not_in("abd", matches)
-            @dec.onlyif(sys.version_info[0] < 3, 'This test only applies in Py<3')
             def test_dict_key_completion_unicode_py2():
                 """Test handling of unicode in dict key completion"""
                 ip = get_ipython()
                     nt.assert_in(u"d[u'a\u05d0b']", matches)
-            @dec.onlyif(sys.version_info[0] >= 3, 'This test only applies in Py>=3')
             def test_dict_key_completion_unicode_py3():
                 """Test handling of unicode in dict key completion"""
                 ip = get_ipython()

IPython/core/tests/test_interactiveshell.py

0 0 -32

@@ -338,19 +338,6 b' class InteractiveShellTestCase(unittest.TestCase):'
338	finally:	338	finally:
339	trap.hook = save_hook	339	trap.hook = save_hook
340		340
341	@skipif(sys.version_info[0] >= 3, "softspace removed in py3")
342	def test_print_softspace(self):
343	"""Verify that softspace is handled correctly when executing multiple
344	statements.
345
346	In [1]: print 1; print 2
347	1
348	2
349
350	In [2]: print 1,; print 2
351	1 2
352	"""
353
354	def test_ofind_line_magic(self):	341	def test_ofind_line_magic(self):
355	from IPython.core.magic import register_line_magic	342	from IPython.core.magic import register_line_magic
356		343
@@ -466,22 +453,6 b' class InteractiveShellTestCase(unittest.TestCase):'
466	# Reset the custom exception hook	453	# Reset the custom exception hook
467	ip.set_custom_exc((), None)	454	ip.set_custom_exc((), None)
468		455
469	@skipif(sys.version_info[0] >= 3, "no differences with __future__ in py3")
470	def test_future_environment(self):
471	"Can we run code with & without the shell's __future__ imports?"
472	ip.run_cell("from __future__ import division")
473	ip.run_cell("a = 1/2", shell_futures=True)
474	self.assertEqual(ip.user_ns['a'], 0.5)
475	ip.run_cell("b = 1/2", shell_futures=False)
476	self.assertEqual(ip.user_ns['b'], 0)
477
478	ip.compile.reset_compiler_flags()
479	# This shouldn't leak to the shell's compiler
480	ip.run_cell("from __future__ import division \nc=1/2", shell_futures=False)
481	self.assertEqual(ip.user_ns['c'], 0.5)
482	ip.run_cell("d = 1/2", shell_futures=True)
483	self.assertEqual(ip.user_ns['d'], 0)
484
485	def test_mktempfile(self):	456	def test_mktempfile(self):
486	filename = ip.mktempfile()	457	filename = ip.mktempfile()
487	# Check that we can open the file again on Windows	458	# Check that we can open the file again on Windows
@@ -509,9 +480,6 b' class InteractiveShellTestCase(unittest.TestCase):'
509	raise DerivedInterrupt("foo")	480	raise DerivedInterrupt("foo")
510	except KeyboardInterrupt:	481	except KeyboardInterrupt:
511	msg = ip.get_exception_only()	482	msg = ip.get_exception_only()
512	if sys.version_info[0] <= 2:
513	self.assertEqual(msg, 'DerivedInterrupt: foo\n')
514	else:
515	self.assertEqual(msg, 'IPython.core.tests.test_interactiveshell.DerivedInterrupt: foo\n')	483	self.assertEqual(msg, 'IPython.core.tests.test_interactiveshell.DerivedInterrupt: foo\n')
516		484
517	def test_inspect_text(self):	485	def test_inspect_text(self):

IPython/core/tests/test_magic.py

0 0 -23

@@ -377,18 +377,6 b' def test_time3():'
377	"run = 0\n"	377	"run = 0\n"
378	"run += 1")	378	"run += 1")
379		379
380	@dec.skipif(sys.version_info[0] >= 3, "no differences with __future__ in py3")
381	def test_time_futures():
382	"Test %time with __future__ environments"
383	ip = get_ipython()
384	ip.autocall = 0
385	ip.run_cell("from __future__ import division")
386	with tt.AssertPrints('0.25'):
387	ip.run_line_magic('time', 'print(1/4)')
388	ip.compile.reset_compiler_flags()
389	with tt.AssertNotPrints('0.25'):
390	ip.run_line_magic('time', 'print(1/4)')
391
392	def test_doctest_mode():	380	def test_doctest_mode():
393	"Toggle doctest_mode twice, it should be a no-op and run without error"	381	"Toggle doctest_mode twice, it should be a no-op and run without error"
394	_ip.magic('doctest_mode')	382	_ip.magic('doctest_mode')
@@ -573,17 +561,6 b' def test_timeit_return_quiet():'
573	res = _ip.run_line_magic('timeit', '-n1 -r1 -q -o 1')	561	res = _ip.run_line_magic('timeit', '-n1 -r1 -q -o 1')
574	assert (res is not None)	562	assert (res is not None)
575		563
576	@dec.skipif(sys.version_info[0] >= 3, "no differences with __future__ in py3")
577	def test_timeit_futures():
578	"Test %timeit with __future__ environments"
579	ip = get_ipython()
580	ip.run_cell("from __future__ import division")
581	with tt.AssertPrints('0.25'):
582	ip.run_line_magic('timeit', '-n1 -r1 print(1/4)')
583	ip.compile.reset_compiler_flags()
584	with tt.AssertNotPrints('0.25'):
585	ip.run_line_magic('timeit', '-n1 -r1 print(1/4)')
586
587	@dec.skipif(execution.profile is None)	564	@dec.skipif(execution.profile is None)
588	def test_prun_special_syntax():	565	def test_prun_special_syntax():
589	"Test %%prun with IPython special syntax"	566	"Test %%prun with IPython special syntax"

IPython/core/tests/test_magic_terminal.py

0 +1 -8

                 finally:
                     sys.stdin = stdin_save
-            PY31 = sys.version_info[:2] == (3,1)
             def test_cpaste():
                 """Test cpaste magic"""
                                   ],
                          'fail': ["1 + runf()",
+                                  "++ runf()",
                          ]}
-                # I don't know why this is failing specifically on Python 3.1. I've
-                # checked it manually interactively, but we don't care enough about 3.1
-                # to spend time fiddling with the tests, so we just skip it.
-                if not PY31:
-                    tests['fail'].append("++ runf()")
                 ip.user_ns['runf'] = runf
                 for code in tests['pass']:

IPython/core/tests/test_oinspect.py

0 0 -1

                 nt.assert_equal(i['type_name'], 'type')
                 expted_class = str(type(type))  # <class 'type'> (Python 3) or <type 'type'>
                 nt.assert_equal(i['base_class'], expted_class)
-                if sys.version_info > (3,):
                 nt.assert_regex(i['string_form'], "<class 'IPython.core.tests.test_oinspect.Call'( at 0x[0-9a-f]{1,9})?>")
                 fname = __file__
                 if fname.endswith(".pyc"):

IPython/core/tests/test_ultratb.py

0 0 -1

             #----------------------------------------------------------------------------
             # module testing (minimal)
-            if sys.version_info > (3,):
             def test_handlers():
                 def spam(c, d_e):
                     (d, e) = d_e

IPython/testing/iptest.py

0 0 -1

             # Enable printing all warnings raise by IPython's modules
             warnings.filterwarnings('ignore', message='.*Matplotlib is building the font cache.*', category=UserWarning, module='.*')
-            if sys.version_info > (3,0):
             warnings.filterwarnings('error', message='.*', category=ResourceWarning, module='.*')
             warnings.filterwarnings('error', message=".*{'config': True}.*", category=DeprecationWarning, module='IPy.*')
             warnings.filterwarnings('default', message='.*', category=Warning, module='IPy.*')

IPython/testing/skipdoctest.py

0 +3 -16

@@ -1,26 +1,13 b''
1	"""Decorators marks that a doctest should be skipped~~, for both python 2 and 3~~.	1	"""Decorators marks that a doctest should be skipped.
2		2
3	The IPython.testing.decorators module triggers various extra imports, including	3	The IPython.testing.decorators module triggers various extra imports, including
4	numpy and sympy if they're present. Since this decorator is used in core parts	4	numpy and sympy if they're present. Since this decorator is used in core parts
5	of IPython, it's in a separate module so that running IPython doesn't trigger	5	of IPython, it's in a separate module so that running IPython doesn't trigger
6	those imports."""	6	those imports."""
7		7
8	#-----------------------------------------------------------------------------	8	# Copyright (C) IPython Development Team
9	# Copyright (C) 2009-2011 The IPython Development Team	9	# Distributed under the terms of the Modified BSD License.
10	#
11	# Distributed under the terms of the BSD License. The full license is in
12	# the file COPYING, distributed as part of this software.
13	#-----------------------------------------------------------------------------
14		10
15	#-----------------------------------------------------------------------------
16	# Imports
17	#-----------------------------------------------------------------------------
18
19	import sys
20
21	#-----------------------------------------------------------------------------
22	# Decorators
23	#-----------------------------------------------------------------------------
24		11
25	def skip_doctest(f):	12	def skip_doctest(f):
26	"""Decorator - mark a function or method for skipping its doctest.	13	"""Decorator - mark a function or method for skipping its doctest.

IPython/utils/tokenize2.py

0 +590 -4

This diff has been collapsed as it changes many lines, (594 lines changed) Show them Hide them
	@@ -1,9 +1,595 b''
	1	"""Load our patched versions of tokenize.	1	"""Patched version of standard library tokenize, to deal with various bugs.
			2
			3	Based on Python 3.2 code.
			4
			5	Patches:
			6
			7	- Gareth Rees' patch for Python issue #12691 (untokenizing)
			8	- Except we don't encode the output of untokenize
			9	- Python 2 compatible syntax, so that it can be byte-compiled at installation
			10	- Newlines in comments and blank lines should be either NL or NEWLINE, depending
			11	on whether they are in a multi-line statement. Filed as Python issue #17061.
			12	- Export generate_tokens & TokenError
			13	- u and rb literals are allowed under Python 3.3 and above.
			14
			15	------------------------------------------------------------------------------
			16	Tokenization help for Python programs.
			17
			18	tokenize(readline) is a generator that breaks a stream of bytes into
			19	Python tokens. It decodes the bytes according to PEP-0263 for
			20	determining source file encoding.
			21
			22	It accepts a readline-like method which is called repeatedly to get the
			23	next line of input (or b"" for EOF). It generates 5-tuples with these
			24	members:
			25
			26	the token type (see token.py)
			27	the token (a string)
			28	the starting (row, column) indices of the token (a 2-tuple of ints)
			29	the ending (row, column) indices of the token (a 2-tuple of ints)
			30	the original line (string)
			31
			32	It is designed to match the working of the Python tokenizer exactly, except
			33	that it produces COMMENT tokens for comments and gives type OP for all
			34	operators. Additionally, all token lists start with an ENCODING token
			35	which tells you which encoding was used to decode the bytes stream.
	2	"""	36	"""
			37	from __future__ import absolute_import
	3		38
			39	__author__ = 'Ka-Ping Yee <ping@lfw.org>'
			40	__credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, '
			41	'Skip Montanaro, Raymond Hettinger, Trent Nelson, '
			42	'Michael Foord')
			43	import builtins
			44	import re
	4	import sys	45	import sys
			46	from token import *
			47	from codecs import lookup, BOM_UTF8
			48	import collections
			49	from io import TextIOWrapper
			50	cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
			51
			52	import token
			53	__all__ = token.__all__ + ["COMMENT", "tokenize", "detect_encoding",
			54	"NL", "untokenize", "ENCODING", "TokenInfo"]
			55	del token
			56
			57	__all__ += ["generate_tokens", "TokenError"]
			58
			59	COMMENT = N_TOKENS
			60	tok_name[COMMENT] = 'COMMENT'
			61	NL = N_TOKENS + 1
			62	tok_name[NL] = 'NL'
			63	ENCODING = N_TOKENS + 2
			64	tok_name[ENCODING] = 'ENCODING'
			65	N_TOKENS += 3
			66
			67	class TokenInfo(collections.namedtuple('TokenInfo', 'type string start end line')):
			68	def __repr__(self):
			69	annotated_type = '%d (%s)' % (self.type, tok_name[self.type])
			70	return ('TokenInfo(type=%s, string=%r, start=%r, end=%r, line=%r)' %
			71	self._replace(type=annotated_type))
			72
			73	def group(*choices): return '(' + '\|'.join(choices) + ')'
			74	def any(choices): return group(choices) + '*'
			75	def maybe(choices): return group(choices) + '?'
			76
			77	# Note: we use unicode matching for names ("\w") but ascii matching for
			78	# number literals.
			79	Whitespace = r'[ \f\t]*'
			80	Comment = r'#[^\r\n]*'
			81	Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
			82	Name = r'\w+'
			83
			84	Hexnumber = r'0[xX][0-9a-fA-F]+'
			85	Binnumber = r'0[bB][01]+'
			86	Octnumber = r'0[oO][0-7]+'
			87	Decnumber = r'(?:0+\|[1-9][0-9]*)'
			88	Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
			89	Exponent = r'[eE][-+]?[0-9]+'
			90	Pointfloat = group(r'[0-9]+\.[0-9]*', r'\.[0-9]+') + maybe(Exponent)
			91	Expfloat = r'[0-9]+' + Exponent
			92	Floatnumber = group(Pointfloat, Expfloat)
			93	Imagnumber = group(r'[0-9]+[jJ]', Floatnumber + r'[jJ]')
			94	Number = group(Imagnumber, Floatnumber, Intnumber)
			95
			96	if sys.version_info.minor >= 3:
			97	StringPrefix = r'(?:[bB][rR]?\|[rR][bB]?\|[uU])?'
			98	else:
			99	StringPrefix = r'(?:[bB]?[rR]?)?'
			100
			101	# Tail end of ' string.
			102	Single = r"[^'\\](?:\\.[^'\\])*'"
			103	# Tail end of " string.
			104	Double = r'[^"\\](?:\\.[^"\\])*"'
			105	# Tail end of ''' string.
			106	Single3 = r"[^'\\](?:(?:\\.\|'(?!''))[^'\\])*'''"
			107	# Tail end of """ string.
			108	Double3 = r'[^"\\](?:(?:\\.\|"(?!""))[^"\\])*"""'
			109	Triple = group(StringPrefix + "'''", StringPrefix + '"""')
			110	# Single-line ' or " string.
			111	String = group(StringPrefix + r"'[^\n'\\](?:\\.[^\n'\\])*'",
			112	StringPrefix + r'"[^\n"\\](?:\\.[^\n"\\])*"')
			113
			114	# Because of leftmost-then-longest match semantics, be sure to put the
			115	# longest operators first (e.g., if = came before ==, == would get
			116	# recognized as two instances of =).
			117	Operator = group(r"\\=?", r">>=?", r"<<=?", r"!=",
			118	r"//=?", r"->",
			119	r"[+\-*/%&\|^=<>]=?",
			120	r"~")
			121
			122	Bracket = '[][(){}]'
			123	Special = group(r'\r?\n', r'\.\.\.', r'[:;.,@]')
			124	Funny = group(Operator, Bracket, Special)
			125
			126	PlainToken = group(Number, Funny, String, Name)
			127	Token = Ignore + PlainToken
			128
			129	# First (or only) line of ' or " string.
			130	ContStr = group(StringPrefix + r"'[^\n'\\](?:\\.[^\n'\\])*" +
			131	group("'", r'\\\r?\n'),
			132	StringPrefix + r'"[^\n"\\](?:\\.[^\n"\\])*' +
			133	group('"', r'\\\r?\n'))
			134	PseudoExtras = group(r'\\\r?\n', Comment, Triple)
			135	PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
			136
			137	def _compile(expr):
			138	return re.compile(expr, re.UNICODE)
			139
			140	tokenprog, pseudoprog, single3prog, double3prog = map(
			141	_compile, (Token, PseudoToken, Single3, Double3))
			142	endprogs = {"'": _compile(Single), '"': _compile(Double),
			143	"'''": single3prog, '"""': double3prog,
			144	"r'''": single3prog, 'r"""': double3prog,
			145	"b'''": single3prog, 'b"""': double3prog,
			146	"R'''": single3prog, 'R"""': double3prog,
			147	"B'''": single3prog, 'B"""': double3prog,
			148	"br'''": single3prog, 'br"""': double3prog,
			149	"bR'''": single3prog, 'bR"""': double3prog,
			150	"Br'''": single3prog, 'Br"""': double3prog,
			151	"BR'''": single3prog, 'BR"""': double3prog,
			152	'r': None, 'R': None, 'b': None, 'B': None}
			153
			154	triple_quoted = {}
			155	for t in ("'''", '"""',
			156	"r'''", 'r"""', "R'''", 'R"""',
			157	"b'''", 'b"""', "B'''", 'B"""',
			158	"br'''", 'br"""', "Br'''", 'Br"""',
			159	"bR'''", 'bR"""', "BR'''", 'BR"""'):
			160	triple_quoted[t] = t
			161	single_quoted = {}
			162	for t in ("'", '"',
			163	"r'", 'r"', "R'", 'R"',
			164	"b'", 'b"', "B'", 'B"',
			165	"br'", 'br"', "Br'", 'Br"',
			166	"bR'", 'bR"', "BR'", 'BR"' ):
			167	single_quoted[t] = t
			168
			169	if sys.version_info.minor >= 3:
			170	# Python 3.3
			171	for _prefix in ['rb', 'rB', 'Rb', 'RB', 'u', 'U']:
			172	_t2 = _prefix+'"""'
			173	endprogs[_t2] = double3prog
			174	triple_quoted[_t2] = _t2
			175	_t1 = _prefix + "'''"
			176	endprogs[_t1] = single3prog
			177	triple_quoted[_t1] = _t1
			178	single_quoted[_prefix+'"'] = _prefix+'"'
			179	single_quoted[_prefix+"'"] = _prefix+"'"
			180	del _prefix, _t2, _t1
			181	endprogs['u'] = None
			182	endprogs['U'] = None
			183
			184	del _compile
			185
			186	tabsize = 8
			187
			188	class TokenError(Exception): pass
			189
			190	class StopTokenizing(Exception): pass
			191
			192
			193	class Untokenizer:
			194
			195	def __init__(self):
			196	self.tokens = []
			197	self.prev_row = 1
			198	self.prev_col = 0
			199	self.encoding = 'utf-8'
			200
			201	def add_whitespace(self, tok_type, start):
			202	row, col = start
			203	assert row >= self.prev_row
			204	col_offset = col - self.prev_col
			205	if col_offset > 0:
			206	self.tokens.append(" " * col_offset)
			207	elif row > self.prev_row and tok_type not in (NEWLINE, NL, ENDMARKER):
			208	# Line was backslash-continued.
			209	self.tokens.append(" ")
			210
			211	def untokenize(self, tokens):
			212	iterable = iter(tokens)
			213	for t in iterable:
			214	if len(t) == 2:
			215	self.compat(t, iterable)
			216	break
			217	tok_type, token, start, end = t[:4]
			218	if tok_type == ENCODING:
			219	self.encoding = token
			220	continue
			221	self.add_whitespace(tok_type, start)
			222	self.tokens.append(token)
			223	self.prev_row, self.prev_col = end
			224	if tok_type in (NEWLINE, NL):
			225	self.prev_row += 1
			226	self.prev_col = 0
			227	return "".join(self.tokens)
			228
			229	def compat(self, token, iterable):
			230	# This import is here to avoid problems when the itertools
			231	# module is not built yet and tokenize is imported.
			232	from itertools import chain
			233	startline = False
			234	prevstring = False
			235	indents = []
			236	toks_append = self.tokens.append
			237
			238	for tok in chain([token], iterable):
			239	toknum, tokval = tok[:2]
			240	if toknum == ENCODING:
			241	self.encoding = tokval
			242	continue
			243
			244	if toknum in (NAME, NUMBER):
			245	tokval += ' '
			246
			247	# Insert a space between two consecutive strings
			248	if toknum == STRING:
			249	if prevstring:
			250	tokval = ' ' + tokval
			251	prevstring = True
			252	else:
			253	prevstring = False
			254
			255	if toknum == INDENT:
			256	indents.append(tokval)
			257	continue
			258	elif toknum == DEDENT:
			259	indents.pop()
			260	continue
			261	elif toknum in (NEWLINE, NL):
			262	startline = True
			263	elif startline and indents:
			264	toks_append(indents[-1])
			265	startline = False
			266	toks_append(tokval)
			267
			268
			269	def untokenize(tokens):
			270	"""
			271	Convert ``tokens`` (an iterable) back into Python source code. Return
			272	a bytes object, encoded using the encoding specified by the last
			273	ENCODING token in ``tokens``, or UTF-8 if no ENCODING token is found.
			274
			275	The result is guaranteed to tokenize back to match the input so that
			276	the conversion is lossless and round-trips are assured. The
			277	guarantee applies only to the token type and token string as the
			278	spacing between tokens (column positions) may change.
			279
			280	:func:`untokenize` has two modes. If the input tokens are sequences
			281	of length 2 (``type``, ``string``) then spaces are added as necessary to
			282	preserve the round-trip property.
			283
			284	If the input tokens are sequences of length 4 or more (``type``,
			285	``string``, ``start``, ``end``), as returned by :func:`tokenize`, then
			286	spaces are added so that each token appears in the result at the
			287	position indicated by ``start`` and ``end``, if possible.
			288	"""
			289	return Untokenizer().untokenize(tokens)
			290
			291
			292	def _get_normal_name(orig_enc):
			293	"""Imitates get_normal_name in tokenizer.c."""
			294	# Only care about the first 12 characters.
			295	enc = orig_enc[:12].lower().replace("_", "-")
			296	if enc == "utf-8" or enc.startswith("utf-8-"):
			297	return "utf-8"
			298	if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \
			299	enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")):
			300	return "iso-8859-1"
			301	return orig_enc
			302
			303	def detect_encoding(readline):
			304	"""
			305	The detect_encoding() function is used to detect the encoding that should
			306	be used to decode a Python source file. It requires one argment, readline,
			307	in the same way as the tokenize() generator.
			308
			309	It will call readline a maximum of twice, and return the encoding used
			310	(as a string) and a list of any lines (left as bytes) it has read in.
			311
			312	It detects the encoding from the presence of a utf-8 bom or an encoding
			313	cookie as specified in pep-0263. If both a bom and a cookie are present,
			314	but disagree, a SyntaxError will be raised. If the encoding cookie is an
			315	invalid charset, raise a SyntaxError. Note that if a utf-8 bom is found,
			316	'utf-8-sig' is returned.
			317
			318	If no encoding is specified, then the default of 'utf-8' will be returned.
			319	"""
			320	bom_found = False
			321	encoding = None
			322	default = 'utf-8'
			323	def read_or_stop():
			324	try:
			325	return readline()
			326	except StopIteration:
			327	return b''
			328
			329	def find_cookie(line):
			330	try:
			331	# Decode as UTF-8. Either the line is an encoding declaration,
			332	# in which case it should be pure ASCII, or it must be UTF-8
			333	# per default encoding.
			334	line_string = line.decode('utf-8')
			335	except UnicodeDecodeError:
			336	raise SyntaxError("invalid or missing encoding declaration")
			337
			338	matches = cookie_re.findall(line_string)
			339	if not matches:
			340	return None
			341	encoding = _get_normal_name(matches[0])
			342	try:
			343	codec = lookup(encoding)
			344	except LookupError:
			345	# This behaviour mimics the Python interpreter
			346	raise SyntaxError("unknown encoding: " + encoding)
			347
			348	if bom_found:
			349	if encoding != 'utf-8':
			350	# This behaviour mimics the Python interpreter
			351	raise SyntaxError('encoding problem: utf-8')
			352	encoding += '-sig'
			353	return encoding
			354
			355	first = read_or_stop()
			356	if first.startswith(BOM_UTF8):
			357	bom_found = True
			358	first = first[3:]
			359	default = 'utf-8-sig'
			360	if not first:
			361	return default, []
			362
			363	encoding = find_cookie(first)
			364	if encoding:
			365	return encoding, [first]
			366
			367	second = read_or_stop()
			368	if not second:
			369	return default, [first]
			370
			371	encoding = find_cookie(second)
			372	if encoding:
			373	return encoding, [first, second]
			374
			375	return default, [first, second]
			376
			377
			378	def open(filename):
			379	"""Open a file in read only mode using the encoding detected by
			380	detect_encoding().
			381	"""
			382	buffer = builtins.open(filename, 'rb')
			383	encoding, lines = detect_encoding(buffer.readline)
			384	buffer.seek(0)
			385	text = TextIOWrapper(buffer, encoding, line_buffering=True)
			386	text.mode = 'r'
			387	return text
			388
			389
			390	def tokenize(readline):
			391	"""
			392	The tokenize() generator requires one argment, readline, which
			393	must be a callable object which provides the same interface as the
			394	readline() method of built-in file objects. Each call to the function
			395	should return one line of input as bytes. Alternately, readline
			396	can be a callable function terminating with StopIteration:
			397	readline = open(myfile, 'rb').__next__ # Example of alternate readline
			398
			399	The generator produces 5-tuples with these members: the token type; the
			400	token string; a 2-tuple (srow, scol) of ints specifying the row and
			401	column where the token begins in the source; a 2-tuple (erow, ecol) of
			402	ints specifying the row and column where the token ends in the source;
			403	and the line on which the token was found. The line passed is the
			404	logical line; continuation lines are included.
	5		405
	6	if sys.version_info[0] >= 3:	406	The first token sequence will always be an ENCODING token
	7	from ._tokenize_py3 import *	407	which tells you which encoding was used to decode the bytes stream.
			408	"""
			409	# This import is here to avoid problems when the itertools module is not
			410	# built yet and tokenize is imported.
			411	from itertools import chain, repeat
			412	encoding, consumed = detect_encoding(readline)
			413	rl_gen = iter(readline, b"")
			414	empty = repeat(b"")
			415	return _tokenize(chain(consumed, rl_gen, empty).__next__, encoding)
			416
			417
			418	def _tokenize(readline, encoding):
			419	lnum = parenlev = continued = 0
			420	numchars = '0123456789'
			421	contstr, needcont = '', 0
			422	contline = None
			423	indents = [0]
			424
			425	if encoding is not None:
			426	if encoding == "utf-8-sig":
			427	# BOM will already have been stripped.
			428	encoding = "utf-8"
			429	yield TokenInfo(ENCODING, encoding, (0, 0), (0, 0), '')
			430	while True: # loop over lines in stream
			431	try:
			432	line = readline()
			433	except StopIteration:
			434	line = b''
			435
			436	if encoding is not None:
			437	line = line.decode(encoding)
			438	lnum += 1
			439	pos, max = 0, len(line)
			440
			441	if contstr: # continued string
			442	if not line:
			443	raise TokenError("EOF in multi-line string", strstart)
			444	endmatch = endprog.match(line)
			445	if endmatch:
			446	pos = end = endmatch.end(0)
			447	yield TokenInfo(STRING, contstr + line[:end],
			448	strstart, (lnum, end), contline + line)
			449	contstr, needcont = '', 0
			450	contline = None
			451	elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':
			452	yield TokenInfo(ERRORTOKEN, contstr + line,
			453	strstart, (lnum, len(line)), contline)
			454	contstr = ''
			455	contline = None
			456	continue
			457	else:
			458	contstr = contstr + line
			459	contline = contline + line
			460	continue
			461
			462	elif parenlev == 0 and not continued: # new statement
			463	if not line: break
			464	column = 0
			465	while pos < max: # measure leading whitespace
			466	if line[pos] == ' ':
			467	column += 1
			468	elif line[pos] == '\t':
			469	column = (column//tabsize + 1)*tabsize
			470	elif line[pos] == '\f':
			471	column = 0
			472	else:
			473	break
			474	pos += 1
			475	if pos == max:
			476	break
			477
			478	if line[pos] in '#\r\n': # skip comments or blank lines
			479	if line[pos] == '#':
			480	comment_token = line[pos:].rstrip('\r\n')
			481	nl_pos = pos + len(comment_token)
			482	yield TokenInfo(COMMENT, comment_token,
			483	(lnum, pos), (lnum, pos + len(comment_token)), line)
			484	yield TokenInfo(NEWLINE, line[nl_pos:],
			485	(lnum, nl_pos), (lnum, len(line)), line)
			486	else:
			487	yield TokenInfo(NEWLINE, line[pos:],
			488	(lnum, pos), (lnum, len(line)), line)
			489	continue
			490
			491	if column > indents[-1]: # count indents or dedents
			492	indents.append(column)
			493	yield TokenInfo(INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
			494	while column < indents[-1]:
			495	if column not in indents:
			496	raise IndentationError(
			497	"unindent does not match any outer indentation level",
			498	("<tokenize>", lnum, pos, line))
			499	indents = indents[:-1]
			500	yield TokenInfo(DEDENT, '', (lnum, pos), (lnum, pos), line)
			501
			502	else: # continued statement
			503	if not line:
			504	raise TokenError("EOF in multi-line statement", (lnum, 0))
			505	continued = 0
			506
			507	while pos < max:
			508	pseudomatch = pseudoprog.match(line, pos)
			509	if pseudomatch: # scan for tokens
			510	start, end = pseudomatch.span(1)
			511	spos, epos, pos = (lnum, start), (lnum, end), end
			512	token, initial = line[start:end], line[start]
			513
			514	if (initial in numchars or # ordinary number
			515	(initial == '.' and token != '.' and token != '...')):
			516	yield TokenInfo(NUMBER, token, spos, epos, line)
			517	elif initial in '\r\n':
			518	yield TokenInfo(NL if parenlev > 0 else NEWLINE,
			519	token, spos, epos, line)
			520	elif initial == '#':
			521	assert not token.endswith("\n")
			522	yield TokenInfo(COMMENT, token, spos, epos, line)
			523	elif token in triple_quoted:
			524	endprog = endprogs[token]
			525	endmatch = endprog.match(line, pos)
			526	if endmatch: # all on one line
			527	pos = endmatch.end(0)
			528	token = line[start:pos]
			529	yield TokenInfo(STRING, token, spos, (lnum, pos), line)
			530	else:
			531	strstart = (lnum, start) # multiple lines
			532	contstr = line[start:]
			533	contline = line
			534	break
			535	elif initial in single_quoted or \
			536	token[:2] in single_quoted or \
			537	token[:3] in single_quoted:
			538	if token[-1] == '\n': # continued string
			539	strstart = (lnum, start)
			540	endprog = (endprogs[initial] or endprogs[token[1]] or
			541	endprogs[token[2]])
			542	contstr, needcont = line[start:], 1
			543	contline = line
			544	break
			545	else: # ordinary string
			546	yield TokenInfo(STRING, token, spos, epos, line)
			547	elif initial.isidentifier(): # ordinary name
			548	yield TokenInfo(NAME, token, spos, epos, line)
			549	elif initial == '\\': # continued stmt
			550	continued = 1
			551	else:
			552	if initial in '([{':
			553	parenlev += 1
			554	elif initial in ')]}':
			555	parenlev -= 1
			556	yield TokenInfo(OP, token, spos, epos, line)
			557	else:
			558	yield TokenInfo(ERRORTOKEN, line[pos],
			559	(lnum, pos), (lnum, pos+1), line)
			560	pos += 1
			561
			562	for indent in indents[1:]: # pop remaining indent levels
			563	yield TokenInfo(DEDENT, '', (lnum, 0), (lnum, 0), '')
			564	yield TokenInfo(ENDMARKER, '', (lnum, 0), (lnum, 0), '')
			565
			566
			567	# An undocumented, backwards compatible, API for all the places in the standard
			568	# library that expect to be able to use tokenize with strings
			569	def generate_tokens(readline):
			570	return _tokenize(readline, None)
			571
			572	if __name__ == "__main__":
			573	# Quick sanity check
			574	s = b'''def parseline(self, line):
			575	"""Parse the line into a command name and a string containing
			576	the arguments. Returns a tuple containing (command, args, line).
			577	'command' and 'args' may be None if the line couldn't be parsed.
			578	"""
			579	line = line.strip()
			580	if not line:
			581	return None, None, line
			582	elif line[0] == '?':
			583	line = 'help ' + line[1:]
			584	elif line[0] == '!':
			585	if hasattr(self, 'do_shell'):
			586	line = 'shell ' + line[1:]
	8	else:	587	else:
	9	from ._tokenize_py2 import *	588	return None, None, line
			589	i, n = 0, len(line)
			590	while i < n and line[i] in self.identchars: i = i+1
			591	cmd, arg = line[:i], line[i:].strip()
			592	return cmd, arg, line
			593	'''
			594	for tok in tokenize(iter(s.splitlines()).__next__):
			595	print(tok)

setup.py

0 0 -2

                 print(error, file=sys.stderr)
                 sys.exit(1)
-            PY3 = (sys.version_info[0] >= 3)
             # At least we're on the python version we need, move on.
             #-------------------------------------------------------------------------------

tools/github_stats.py

0 0 -3

                 print("DEPRECATE: backport_pr.py is deprecated and is is now recommended"
                       "to install `ghpro` from PyPI.", file=sys.stderr)
-                # deal with unicode
-                if sys.version_info < (3,):
-                    sys.stdout = codecs.getwriter('utf8')(sys.stdout)
                 # Whether to add reST urls for all issues in printout.
                 show_urls = True

IPython/utils/_tokenize_py2.py

0 removed 0 -439

	@@ -1,439 +0,0 b''
	1	"""Patched version of standard library tokenize, to deal with various bugs.
	2
	3	Patches
	4
	5	- Relevant parts of Gareth Rees' patch for Python issue #12691 (untokenizing),
	6	manually applied.
	7	- Newlines in comments and blank lines should be either NL or NEWLINE, depending
	8	on whether they are in a multi-line statement. Filed as Python issue #17061.
	9
	10	-------------------------------------------------------------------------------
	11	Tokenization help for Python programs.
	12
	13	generate_tokens(readline) is a generator that breaks a stream of
	14	text into Python tokens. It accepts a readline-like method which is called
	15	repeatedly to get the next line of input (or "" for EOF). It generates
	16	5-tuples with these members:
	17
	18	the token type (see token.py)
	19	the token (a string)
	20	the starting (row, column) indices of the token (a 2-tuple of ints)
	21	the ending (row, column) indices of the token (a 2-tuple of ints)
	22	the original line (string)
	23
	24	It is designed to match the working of the Python tokenizer exactly, except
	25	that it produces COMMENT tokens for comments and gives type OP for all
	26	operators
	27
	28	Older entry points
	29	tokenize_loop(readline, tokeneater)
	30	tokenize(readline, tokeneater=printtoken)
	31	are the same, except instead of generating tokens, tokeneater is a callback
	32	function to which the 5 fields described above are passed as 5 arguments,
	33	each time a new token is found."""
	34	from __future__ import print_function
	35
	36	__author__ = 'Ka-Ping Yee <ping@lfw.org>'
	37	__credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, '
	38	'Skip Montanaro, Raymond Hettinger')
	39
	40	import string, re
	41	from token import *
	42
	43	import token
	44	__all__ = [x for x in dir(token) if not x.startswith("_")]
	45	__all__ += ["COMMENT", "tokenize", "generate_tokens", "NL", "untokenize"]
	46	del x
	47	del token
	48
	49	__all__ += ["TokenError"]
	50
	51	COMMENT = N_TOKENS
	52	tok_name[COMMENT] = 'COMMENT'
	53	NL = N_TOKENS + 1
	54	tok_name[NL] = 'NL'
	55	N_TOKENS += 2
	56
	57	def group(*choices): return '(' + '\|'.join(choices) + ')'
	58	def any(choices): return group(choices) + '*'
	59	def maybe(choices): return group(choices) + '?'
	60
	61	Whitespace = r'[ \f\t]*'
	62	Comment = r'#[^\r\n]*'
	63	Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
	64	Name = r'[a-zA-Z_]\w*'
	65
	66	Hexnumber = r'0[xX][\da-fA-F]+[lL]?'
	67	Octnumber = r'(0[oO][0-7]+)\|(0[0-7]*)[lL]?'
	68	Binnumber = r'0[bB][01]+[lL]?'
	69	Decnumber = r'[1-9]\d*[lL]?'
	70	Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
	71	Exponent = r'[eE][-+]?\d+'
	72	Pointfloat = group(r'\d+\.\d*', r'\.\d+') + maybe(Exponent)
	73	Expfloat = r'\d+' + Exponent
	74	Floatnumber = group(Pointfloat, Expfloat)
	75	Imagnumber = group(r'\d+[jJ]', Floatnumber + r'[jJ]')
	76	Number = group(Imagnumber, Floatnumber, Intnumber)
	77
	78	# Tail end of ' string.
	79	Single = r"[^'\\](?:\\.[^'\\])*'"
	80	# Tail end of " string.
	81	Double = r'[^"\\](?:\\.[^"\\])*"'
	82	# Tail end of ''' string.
	83	Single3 = r"[^'\\](?:(?:\\.\|'(?!''))[^'\\])*'''"
	84	# Tail end of """ string.
	85	Double3 = r'[^"\\](?:(?:\\.\|"(?!""))[^"\\])*"""'
	86	Triple = group("[uUbB]?[rR]?'''", '[uUbB]?[rR]?"""')
	87	# Single-line ' or " string.
	88	String = group(r"[uUbB]?[rR]?'[^\n'\\](?:\\.[^\n'\\])*'",
	89	r'[uUbB]?[rR]?"[^\n"\\](?:\\.[^\n"\\])*"')
	90
	91	# Because of leftmost-then-longest match semantics, be sure to put the
	92	# longest operators first (e.g., if = came before ==, == would get
	93	# recognized as two instances of =).
	94	Operator = group(r"\\=?", r">>=?", r"<<=?", r"<>", r"!=",
	95	r"//=?",
	96	r"[+\-*/%&\|^=<>]=?",
	97	r"~")
	98
	99	Bracket = '[][(){}]'
	100	Special = group(r'\r?\n', r'[:;.,`@]')
	101	Funny = group(Operator, Bracket, Special)
	102
	103	PlainToken = group(Number, Funny, String, Name)
	104	Token = Ignore + PlainToken
	105
	106	# First (or only) line of ' or " string.
	107	ContStr = group(r"[uUbB]?[rR]?'[^\n'\\](?:\\.[^\n'\\])*" +
	108	group("'", r'\\\r?\n'),
	109	r'[uUbB]?[rR]?"[^\n"\\](?:\\.[^\n"\\])*' +
	110	group('"', r'\\\r?\n'))
	111	PseudoExtras = group(r'\\\r?\n', Comment, Triple)
	112	PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
	113
	114	tokenprog, pseudoprog, single3prog, double3prog = map(
	115	re.compile, (Token, PseudoToken, Single3, Double3))
	116	endprogs = {"'": re.compile(Single), '"': re.compile(Double),
	117	"'''": single3prog, '"""': double3prog,
	118	"r'''": single3prog, 'r"""': double3prog,
	119	"u'''": single3prog, 'u"""': double3prog,
	120	"ur'''": single3prog, 'ur"""': double3prog,
	121	"R'''": single3prog, 'R"""': double3prog,
	122	"U'''": single3prog, 'U"""': double3prog,
	123	"uR'''": single3prog, 'uR"""': double3prog,
	124	"Ur'''": single3prog, 'Ur"""': double3prog,
	125	"UR'''": single3prog, 'UR"""': double3prog,
	126	"b'''": single3prog, 'b"""': double3prog,
	127	"br'''": single3prog, 'br"""': double3prog,
	128	"B'''": single3prog, 'B"""': double3prog,
	129	"bR'''": single3prog, 'bR"""': double3prog,
	130	"Br'''": single3prog, 'Br"""': double3prog,
	131	"BR'''": single3prog, 'BR"""': double3prog,
	132	'r': None, 'R': None, 'u': None, 'U': None,
	133	'b': None, 'B': None}
	134
	135	triple_quoted = {}
	136	for t in ("'''", '"""',
	137	"r'''", 'r"""', "R'''", 'R"""',
	138	"u'''", 'u"""', "U'''", 'U"""',
	139	"ur'''", 'ur"""', "Ur'''", 'Ur"""',
	140	"uR'''", 'uR"""', "UR'''", 'UR"""',
	141	"b'''", 'b"""', "B'''", 'B"""',
	142	"br'''", 'br"""', "Br'''", 'Br"""',
	143	"bR'''", 'bR"""', "BR'''", 'BR"""'):
	144	triple_quoted[t] = t
	145	single_quoted = {}
	146	for t in ("'", '"',
	147	"r'", 'r"', "R'", 'R"',
	148	"u'", 'u"', "U'", 'U"',
	149	"ur'", 'ur"', "Ur'", 'Ur"',
	150	"uR'", 'uR"', "UR'", 'UR"',
	151	"b'", 'b"', "B'", 'B"',
	152	"br'", 'br"', "Br'", 'Br"',
	153	"bR'", 'bR"', "BR'", 'BR"' ):
	154	single_quoted[t] = t
	155
	156	tabsize = 8
	157
	158	class TokenError(Exception): pass
	159
	160	class StopTokenizing(Exception): pass
	161
	162	def printtoken(type, token, srow_scol, erow_ecol, line): # for testing
	163	srow, scol = srow_scol
	164	erow, ecol = erow_ecol
	165	print("%d,%d-%d,%d:\t%s\t%s" % \
	166	(srow, scol, erow, ecol, tok_name[type], repr(token)))
	167
	168	def tokenize(readline, tokeneater=printtoken):
	169	"""
	170	The tokenize() function accepts two parameters: one representing the
	171	input stream, and one providing an output mechanism for tokenize().
	172
	173	The first parameter, readline, must be a callable object which provides
	174	the same interface as the readline() method of built-in file objects.
	175	Each call to the function should return one line of input as a string.
	176
	177	The second parameter, tokeneater, must also be a callable object. It is
	178	called once for each token, with five arguments, corresponding to the
	179	tuples generated by generate_tokens().
	180	"""
	181	try:
	182	tokenize_loop(readline, tokeneater)
	183	except StopTokenizing:
	184	pass
	185
	186	# backwards compatible interface
	187	def tokenize_loop(readline, tokeneater):
	188	for token_info in generate_tokens(readline):
	189	tokeneater(*token_info)
	190
	191	class Untokenizer:
	192
	193	def __init__(self):
	194	self.tokens = []
	195	self.prev_row = 1
	196	self.prev_col = 0
	197
	198	def add_whitespace(self, start):
	199	row, col = start
	200	assert row >= self.prev_row
	201	col_offset = col - self.prev_col
	202	if col_offset > 0:
	203	self.tokens.append(" " * col_offset)
	204	elif row > self.prev_row and tok_type not in (NEWLINE, NL, ENDMARKER):
	205	# Line was backslash-continued
	206	self.tokens.append(" ")
	207
	208	def untokenize(self, tokens):
	209	iterable = iter(tokens)
	210	for t in iterable:
	211	if len(t) == 2:
	212	self.compat(t, iterable)
	213	break
	214	tok_type, token, start, end = t[:4]
	215	self.add_whitespace(start)
	216	self.tokens.append(token)
	217	self.prev_row, self.prev_col = end
	218	if tok_type in (NEWLINE, NL):
	219	self.prev_row += 1
	220	self.prev_col = 0
	221	return "".join(self.tokens)
	222
	223	def compat(self, token, iterable):
	224	# This import is here to avoid problems when the itertools
	225	# module is not built yet and tokenize is imported.
	226	from itertools import chain
	227	startline = False
	228	prevstring = False
	229	indents = []
	230	toks_append = self.tokens.append
	231	for tok in chain([token], iterable):
	232	toknum, tokval = tok[:2]
	233
	234	if toknum in (NAME, NUMBER):
	235	tokval += ' '
	236
	237	# Insert a space between two consecutive strings
	238	if toknum == STRING:
	239	if prevstring:
	240	tokval = ' ' + tokval
	241	prevstring = True
	242	else:
	243	prevstring = False
	244
	245	if toknum == INDENT:
	246	indents.append(tokval)
	247	continue
	248	elif toknum == DEDENT:
	249	indents.pop()
	250	continue
	251	elif toknum in (NEWLINE, NL):
	252	startline = True
	253	elif startline and indents:
	254	toks_append(indents[-1])
	255	startline = False
	256	toks_append(tokval)
	257
	258	def untokenize(iterable):
	259	"""Transform tokens back into Python source code.
	260
	261	Each element returned by the iterable must be a token sequence
	262	with at least two elements, a token number and token value. If
	263	only two tokens are passed, the resulting output is poor.
	264
	265	Round-trip invariant for full input:
	266	Untokenized source will match input source exactly
	267
	268	Round-trip invariant for limited intput:
	269	# Output text will tokenize the back to the input
	270	t1 = [tok[:2] for tok in generate_tokens(f.readline)]
	271	newcode = untokenize(t1)
	272	readline = iter(newcode.splitlines(1)).next
	273	t2 = [tok[:2] for tok in generate_tokens(readline)]
	274	assert t1 == t2
	275	"""
	276	ut = Untokenizer()
	277	return ut.untokenize(iterable)
	278
	279	def generate_tokens(readline):
	280	"""
	281	The generate_tokens() generator requires one argment, readline, which
	282	must be a callable object which provides the same interface as the
	283	readline() method of built-in file objects. Each call to the function
	284	should return one line of input as a string. Alternately, readline
	285	can be a callable function terminating with StopIteration:
	286	readline = open(myfile).next # Example of alternate readline
	287
	288	The generator produces 5-tuples with these members: the token type; the
	289	token string; a 2-tuple (srow, scol) of ints specifying the row and
	290	column where the token begins in the source; a 2-tuple (erow, ecol) of
	291	ints specifying the row and column where the token ends in the source;
	292	and the line on which the token was found. The line passed is the
	293	logical line; continuation lines are included.
	294	"""
	295	lnum = parenlev = continued = 0
	296	namechars, numchars = string.ascii_letters + '_', '0123456789'
	297	contstr, needcont = '', 0
	298	contline = None
	299	indents = [0]
	300
	301	while 1: # loop over lines in stream
	302	try:
	303	line = readline()
	304	except StopIteration:
	305	line = ''
	306	lnum += 1
	307	pos, max = 0, len(line)
	308
	309	if contstr: # continued string
	310	if not line:
	311	raise TokenError("EOF in multi-line string", strstart)
	312	endmatch = endprog.match(line)
	313	if endmatch:
	314	pos = end = endmatch.end(0)
	315	yield (STRING, contstr + line[:end],
	316	strstart, (lnum, end), contline + line)
	317	contstr, needcont = '', 0
	318	contline = None
	319	elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':
	320	yield (ERRORTOKEN, contstr + line,
	321	strstart, (lnum, len(line)), contline)
	322	contstr = ''
	323	contline = None
	324	continue
	325	else:
	326	contstr = contstr + line
	327	contline = contline + line
	328	continue
	329
	330	elif parenlev == 0 and not continued: # new statement
	331	if not line: break
	332	column = 0
	333	while pos < max: # measure leading whitespace
	334	if line[pos] == ' ':
	335	column += 1
	336	elif line[pos] == '\t':
	337	column = (column//tabsize + 1)*tabsize
	338	elif line[pos] == '\f':
	339	column = 0
	340	else:
	341	break
	342	pos += 1
	343	if pos == max:
	344	break
	345
	346	if line[pos] in '#\r\n': # skip comments or blank lines
	347	if line[pos] == '#':
	348	comment_token = line[pos:].rstrip('\r\n')
	349	nl_pos = pos + len(comment_token)
	350	yield (COMMENT, comment_token,
	351	(lnum, pos), (lnum, pos + len(comment_token)), line)
	352	yield (NEWLINE, line[nl_pos:],
	353	(lnum, nl_pos), (lnum, len(line)), line)
	354	else:
	355	yield (NEWLINE, line[pos:],
	356	(lnum, pos), (lnum, len(line)), line)
	357	continue
	358
	359	if column > indents[-1]: # count indents or dedents
	360	indents.append(column)
	361	yield (INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
	362	while column < indents[-1]:
	363	if column not in indents:
	364	raise IndentationError(
	365	"unindent does not match any outer indentation level",
	366	("<tokenize>", lnum, pos, line))
	367	indents = indents[:-1]
	368	yield (DEDENT, '', (lnum, pos), (lnum, pos), line)
	369
	370	else: # continued statement
	371	if not line:
	372	raise TokenError("EOF in multi-line statement", (lnum, 0))
	373	continued = 0
	374
	375	while pos < max:
	376	pseudomatch = pseudoprog.match(line, pos)
	377	if pseudomatch: # scan for tokens
	378	start, end = pseudomatch.span(1)
	379	spos, epos, pos = (lnum, start), (lnum, end), end
	380	token, initial = line[start:end], line[start]
	381
	382	if initial in numchars or \
	383	(initial == '.' and token != '.'): # ordinary number
	384	yield (NUMBER, token, spos, epos, line)
	385	elif initial in '\r\n':
	386	yield (NL if parenlev > 0 else NEWLINE,
	387	token, spos, epos, line)
	388	elif initial == '#':
	389	assert not token.endswith("\n")
	390	yield (COMMENT, token, spos, epos, line)
	391	elif token in triple_quoted:
	392	endprog = endprogs[token]
	393	endmatch = endprog.match(line, pos)
	394	if endmatch: # all on one line
	395	pos = endmatch.end(0)
	396	token = line[start:pos]
	397	yield (STRING, token, spos, (lnum, pos), line)
	398	else:
	399	strstart = (lnum, start) # multiple lines
	400	contstr = line[start:]
	401	contline = line
	402	break
	403	elif initial in single_quoted or \
	404	token[:2] in single_quoted or \
	405	token[:3] in single_quoted:
	406	if token[-1] == '\n': # continued string
	407	strstart = (lnum, start)
	408	endprog = (endprogs[initial] or endprogs[token[1]] or
	409	endprogs[token[2]])
	410	contstr, needcont = line[start:], 1
	411	contline = line
	412	break
	413	else: # ordinary string
	414	yield (STRING, token, spos, epos, line)
	415	elif initial in namechars: # ordinary name
	416	yield (NAME, token, spos, epos, line)
	417	elif initial == '\\': # continued stmt
	418	continued = 1
	419	else:
	420	if initial in '([{':
	421	parenlev += 1
	422	elif initial in ')]}':
	423	parenlev -= 1
	424	yield (OP, token, spos, epos, line)
	425	else:
	426	yield (ERRORTOKEN, line[pos],
	427	(lnum, pos), (lnum, pos+1), line)
	428	pos += 1
	429
	430	for indent in indents[1:]: # pop remaining indent levels
	431	yield (DEDENT, '', (lnum, 0), (lnum, 0), '')
	432	yield (ENDMARKER, '', (lnum, 0), (lnum, 0), '')
	433
	434	if __name__ == '__main__': # testing
	435	import sys
	436	if len(sys.argv) > 1:
	437	tokenize(open(sys.argv[1]).readline)
	438	else:
	439	tokenize(sys.stdin.readline)

IPython/utils/_tokenize_py3.py

0 removed 0 -595

This diff has been collapsed as it changes many lines, (595 lines changed) Show them Hide them
	@@ -1,595 +0,0 b''
	1	"""Patched version of standard library tokenize, to deal with various bugs.
	2
	3	Based on Python 3.2 code.
	4
	5	Patches:
	6
	7	- Gareth Rees' patch for Python issue #12691 (untokenizing)
	8	- Except we don't encode the output of untokenize
	9	- Python 2 compatible syntax, so that it can be byte-compiled at installation
	10	- Newlines in comments and blank lines should be either NL or NEWLINE, depending
	11	on whether they are in a multi-line statement. Filed as Python issue #17061.
	12	- Export generate_tokens & TokenError
	13	- u and rb literals are allowed under Python 3.3 and above.
	14
	15	------------------------------------------------------------------------------
	16	Tokenization help for Python programs.
	17
	18	tokenize(readline) is a generator that breaks a stream of bytes into
	19	Python tokens. It decodes the bytes according to PEP-0263 for
	20	determining source file encoding.
	21
	22	It accepts a readline-like method which is called repeatedly to get the
	23	next line of input (or b"" for EOF). It generates 5-tuples with these
	24	members:
	25
	26	the token type (see token.py)
	27	the token (a string)
	28	the starting (row, column) indices of the token (a 2-tuple of ints)
	29	the ending (row, column) indices of the token (a 2-tuple of ints)
	30	the original line (string)
	31
	32	It is designed to match the working of the Python tokenizer exactly, except
	33	that it produces COMMENT tokens for comments and gives type OP for all
	34	operators. Additionally, all token lists start with an ENCODING token
	35	which tells you which encoding was used to decode the bytes stream.
	36	"""
	37	from __future__ import absolute_import
	38
	39	__author__ = 'Ka-Ping Yee <ping@lfw.org>'
	40	__credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, '
	41	'Skip Montanaro, Raymond Hettinger, Trent Nelson, '
	42	'Michael Foord')
	43	import builtins
	44	import re
	45	import sys
	46	from token import *
	47	from codecs import lookup, BOM_UTF8
	48	import collections
	49	from io import TextIOWrapper
	50	cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
	51
	52	import token
	53	__all__ = token.__all__ + ["COMMENT", "tokenize", "detect_encoding",
	54	"NL", "untokenize", "ENCODING", "TokenInfo"]
	55	del token
	56
	57	__all__ += ["generate_tokens", "TokenError"]
	58
	59	COMMENT = N_TOKENS
	60	tok_name[COMMENT] = 'COMMENT'
	61	NL = N_TOKENS + 1
	62	tok_name[NL] = 'NL'
	63	ENCODING = N_TOKENS + 2
	64	tok_name[ENCODING] = 'ENCODING'
	65	N_TOKENS += 3
	66
	67	class TokenInfo(collections.namedtuple('TokenInfo', 'type string start end line')):
	68	def __repr__(self):
	69	annotated_type = '%d (%s)' % (self.type, tok_name[self.type])
	70	return ('TokenInfo(type=%s, string=%r, start=%r, end=%r, line=%r)' %
	71	self._replace(type=annotated_type))
	72
	73	def group(*choices): return '(' + '\|'.join(choices) + ')'
	74	def any(choices): return group(choices) + '*'
	75	def maybe(choices): return group(choices) + '?'
	76
	77	# Note: we use unicode matching for names ("\w") but ascii matching for
	78	# number literals.
	79	Whitespace = r'[ \f\t]*'
	80	Comment = r'#[^\r\n]*'
	81	Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
	82	Name = r'\w+'
	83
	84	Hexnumber = r'0[xX][0-9a-fA-F]+'
	85	Binnumber = r'0[bB][01]+'
	86	Octnumber = r'0[oO][0-7]+'
	87	Decnumber = r'(?:0+\|[1-9][0-9]*)'
	88	Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
	89	Exponent = r'[eE][-+]?[0-9]+'
	90	Pointfloat = group(r'[0-9]+\.[0-9]*', r'\.[0-9]+') + maybe(Exponent)
	91	Expfloat = r'[0-9]+' + Exponent
	92	Floatnumber = group(Pointfloat, Expfloat)
	93	Imagnumber = group(r'[0-9]+[jJ]', Floatnumber + r'[jJ]')
	94	Number = group(Imagnumber, Floatnumber, Intnumber)
	95
	96	if sys.version_info.minor >= 3:
	97	StringPrefix = r'(?:[bB][rR]?\|[rR][bB]?\|[uU])?'
	98	else:
	99	StringPrefix = r'(?:[bB]?[rR]?)?'
	100
	101	# Tail end of ' string.
	102	Single = r"[^'\\](?:\\.[^'\\])*'"
	103	# Tail end of " string.
	104	Double = r'[^"\\](?:\\.[^"\\])*"'
	105	# Tail end of ''' string.
	106	Single3 = r"[^'\\](?:(?:\\.\|'(?!''))[^'\\])*'''"
	107	# Tail end of """ string.
	108	Double3 = r'[^"\\](?:(?:\\.\|"(?!""))[^"\\])*"""'
	109	Triple = group(StringPrefix + "'''", StringPrefix + '"""')
	110	# Single-line ' or " string.
	111	String = group(StringPrefix + r"'[^\n'\\](?:\\.[^\n'\\])*'",
	112	StringPrefix + r'"[^\n"\\](?:\\.[^\n"\\])*"')
	113
	114	# Because of leftmost-then-longest match semantics, be sure to put the
	115	# longest operators first (e.g., if = came before ==, == would get
	116	# recognized as two instances of =).
	117	Operator = group(r"\\=?", r">>=?", r"<<=?", r"!=",
	118	r"//=?", r"->",
	119	r"[+\-*/%&\|^=<>]=?",
	120	r"~")
	121
	122	Bracket = '[][(){}]'
	123	Special = group(r'\r?\n', r'\.\.\.', r'[:;.,@]')
	124	Funny = group(Operator, Bracket, Special)
	125
	126	PlainToken = group(Number, Funny, String, Name)
	127	Token = Ignore + PlainToken
	128
	129	# First (or only) line of ' or " string.
	130	ContStr = group(StringPrefix + r"'[^\n'\\](?:\\.[^\n'\\])*" +
	131	group("'", r'\\\r?\n'),
	132	StringPrefix + r'"[^\n"\\](?:\\.[^\n"\\])*' +
	133	group('"', r'\\\r?\n'))
	134	PseudoExtras = group(r'\\\r?\n', Comment, Triple)
	135	PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
	136
	137	def _compile(expr):
	138	return re.compile(expr, re.UNICODE)
	139
	140	tokenprog, pseudoprog, single3prog, double3prog = map(
	141	_compile, (Token, PseudoToken, Single3, Double3))
	142	endprogs = {"'": _compile(Single), '"': _compile(Double),
	143	"'''": single3prog, '"""': double3prog,
	144	"r'''": single3prog, 'r"""': double3prog,
	145	"b'''": single3prog, 'b"""': double3prog,
	146	"R'''": single3prog, 'R"""': double3prog,
	147	"B'''": single3prog, 'B"""': double3prog,
	148	"br'''": single3prog, 'br"""': double3prog,
	149	"bR'''": single3prog, 'bR"""': double3prog,
	150	"Br'''": single3prog, 'Br"""': double3prog,
	151	"BR'''": single3prog, 'BR"""': double3prog,
	152	'r': None, 'R': None, 'b': None, 'B': None}
	153
	154	triple_quoted = {}
	155	for t in ("'''", '"""',
	156	"r'''", 'r"""', "R'''", 'R"""',
	157	"b'''", 'b"""', "B'''", 'B"""',
	158	"br'''", 'br"""', "Br'''", 'Br"""',
	159	"bR'''", 'bR"""', "BR'''", 'BR"""'):
	160	triple_quoted[t] = t
	161	single_quoted = {}
	162	for t in ("'", '"',
	163	"r'", 'r"', "R'", 'R"',
	164	"b'", 'b"', "B'", 'B"',
	165	"br'", 'br"', "Br'", 'Br"',
	166	"bR'", 'bR"', "BR'", 'BR"' ):
	167	single_quoted[t] = t
	168
	169	if sys.version_info.minor >= 3:
	170	# Python 3.3
	171	for _prefix in ['rb', 'rB', 'Rb', 'RB', 'u', 'U']:
	172	_t2 = _prefix+'"""'
	173	endprogs[_t2] = double3prog
	174	triple_quoted[_t2] = _t2
	175	_t1 = _prefix + "'''"
	176	endprogs[_t1] = single3prog
	177	triple_quoted[_t1] = _t1
	178	single_quoted[_prefix+'"'] = _prefix+'"'
	179	single_quoted[_prefix+"'"] = _prefix+"'"
	180	del _prefix, _t2, _t1
	181	endprogs['u'] = None
	182	endprogs['U'] = None
	183
	184	del _compile
	185
	186	tabsize = 8
	187
	188	class TokenError(Exception): pass
	189
	190	class StopTokenizing(Exception): pass
	191
	192
	193	class Untokenizer:
	194
	195	def __init__(self):
	196	self.tokens = []
	197	self.prev_row = 1
	198	self.prev_col = 0
	199	self.encoding = 'utf-8'
	200
	201	def add_whitespace(self, tok_type, start):
	202	row, col = start
	203	assert row >= self.prev_row
	204	col_offset = col - self.prev_col
	205	if col_offset > 0:
	206	self.tokens.append(" " * col_offset)
	207	elif row > self.prev_row and tok_type not in (NEWLINE, NL, ENDMARKER):
	208	# Line was backslash-continued.
	209	self.tokens.append(" ")
	210
	211	def untokenize(self, tokens):
	212	iterable = iter(tokens)
	213	for t in iterable:
	214	if len(t) == 2:
	215	self.compat(t, iterable)
	216	break
	217	tok_type, token, start, end = t[:4]
	218	if tok_type == ENCODING:
	219	self.encoding = token
	220	continue
	221	self.add_whitespace(tok_type, start)
	222	self.tokens.append(token)
	223	self.prev_row, self.prev_col = end
	224	if tok_type in (NEWLINE, NL):
	225	self.prev_row += 1
	226	self.prev_col = 0
	227	return "".join(self.tokens)
	228
	229	def compat(self, token, iterable):
	230	# This import is here to avoid problems when the itertools
	231	# module is not built yet and tokenize is imported.
	232	from itertools import chain
	233	startline = False
	234	prevstring = False
	235	indents = []
	236	toks_append = self.tokens.append
	237
	238	for tok in chain([token], iterable):
	239	toknum, tokval = tok[:2]
	240	if toknum == ENCODING:
	241	self.encoding = tokval
	242	continue
	243
	244	if toknum in (NAME, NUMBER):
	245	tokval += ' '
	246
	247	# Insert a space between two consecutive strings
	248	if toknum == STRING:
	249	if prevstring:
	250	tokval = ' ' + tokval
	251	prevstring = True
	252	else:
	253	prevstring = False
	254
	255	if toknum == INDENT:
	256	indents.append(tokval)
	257	continue
	258	elif toknum == DEDENT:
	259	indents.pop()
	260	continue
	261	elif toknum in (NEWLINE, NL):
	262	startline = True
	263	elif startline and indents:
	264	toks_append(indents[-1])
	265	startline = False
	266	toks_append(tokval)
	267
	268
	269	def untokenize(tokens):
	270	"""
	271	Convert ``tokens`` (an iterable) back into Python source code. Return
	272	a bytes object, encoded using the encoding specified by the last
	273	ENCODING token in ``tokens``, or UTF-8 if no ENCODING token is found.
	274
	275	The result is guaranteed to tokenize back to match the input so that
	276	the conversion is lossless and round-trips are assured. The
	277	guarantee applies only to the token type and token string as the
	278	spacing between tokens (column positions) may change.
	279
	280	:func:`untokenize` has two modes. If the input tokens are sequences
	281	of length 2 (``type``, ``string``) then spaces are added as necessary to
	282	preserve the round-trip property.
	283
	284	If the input tokens are sequences of length 4 or more (``type``,
	285	``string``, ``start``, ``end``), as returned by :func:`tokenize`, then
	286	spaces are added so that each token appears in the result at the
	287	position indicated by ``start`` and ``end``, if possible.
	288	"""
	289	return Untokenizer().untokenize(tokens)
	290
	291
	292	def _get_normal_name(orig_enc):
	293	"""Imitates get_normal_name in tokenizer.c."""
	294	# Only care about the first 12 characters.
	295	enc = orig_enc[:12].lower().replace("_", "-")
	296	if enc == "utf-8" or enc.startswith("utf-8-"):
	297	return "utf-8"
	298	if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \
	299	enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")):
	300	return "iso-8859-1"
	301	return orig_enc
	302
	303	def detect_encoding(readline):
	304	"""
	305	The detect_encoding() function is used to detect the encoding that should
	306	be used to decode a Python source file. It requires one argment, readline,
	307	in the same way as the tokenize() generator.
	308
	309	It will call readline a maximum of twice, and return the encoding used
	310	(as a string) and a list of any lines (left as bytes) it has read in.
	311
	312	It detects the encoding from the presence of a utf-8 bom or an encoding
	313	cookie as specified in pep-0263. If both a bom and a cookie are present,
	314	but disagree, a SyntaxError will be raised. If the encoding cookie is an
	315	invalid charset, raise a SyntaxError. Note that if a utf-8 bom is found,
	316	'utf-8-sig' is returned.
	317
	318	If no encoding is specified, then the default of 'utf-8' will be returned.
	319	"""
	320	bom_found = False
	321	encoding = None
	322	default = 'utf-8'
	323	def read_or_stop():
	324	try:
	325	return readline()
	326	except StopIteration:
	327	return b''
	328
	329	def find_cookie(line):
	330	try:
	331	# Decode as UTF-8. Either the line is an encoding declaration,
	332	# in which case it should be pure ASCII, or it must be UTF-8
	333	# per default encoding.
	334	line_string = line.decode('utf-8')
	335	except UnicodeDecodeError:
	336	raise SyntaxError("invalid or missing encoding declaration")
	337
	338	matches = cookie_re.findall(line_string)
	339	if not matches:
	340	return None
	341	encoding = _get_normal_name(matches[0])
	342	try:
	343	codec = lookup(encoding)
	344	except LookupError:
	345	# This behaviour mimics the Python interpreter
	346	raise SyntaxError("unknown encoding: " + encoding)
	347
	348	if bom_found:
	349	if encoding != 'utf-8':
	350	# This behaviour mimics the Python interpreter
	351	raise SyntaxError('encoding problem: utf-8')
	352	encoding += '-sig'
	353	return encoding
	354
	355	first = read_or_stop()
	356	if first.startswith(BOM_UTF8):
	357	bom_found = True
	358	first = first[3:]
	359	default = 'utf-8-sig'
	360	if not first:
	361	return default, []
	362
	363	encoding = find_cookie(first)
	364	if encoding:
	365	return encoding, [first]
	366
	367	second = read_or_stop()
	368	if not second:
	369	return default, [first]
	370
	371	encoding = find_cookie(second)
	372	if encoding:
	373	return encoding, [first, second]
	374
	375	return default, [first, second]
	376
	377
	378	def open(filename):
	379	"""Open a file in read only mode using the encoding detected by
	380	detect_encoding().
	381	"""
	382	buffer = builtins.open(filename, 'rb')
	383	encoding, lines = detect_encoding(buffer.readline)
	384	buffer.seek(0)
	385	text = TextIOWrapper(buffer, encoding, line_buffering=True)
	386	text.mode = 'r'
	387	return text
	388
	389
	390	def tokenize(readline):
	391	"""
	392	The tokenize() generator requires one argment, readline, which
	393	must be a callable object which provides the same interface as the
	394	readline() method of built-in file objects. Each call to the function
	395	should return one line of input as bytes. Alternately, readline
	396	can be a callable function terminating with StopIteration:
	397	readline = open(myfile, 'rb').__next__ # Example of alternate readline
	398
	399	The generator produces 5-tuples with these members: the token type; the
	400	token string; a 2-tuple (srow, scol) of ints specifying the row and
	401	column where the token begins in the source; a 2-tuple (erow, ecol) of
	402	ints specifying the row and column where the token ends in the source;
	403	and the line on which the token was found. The line passed is the
	404	logical line; continuation lines are included.
	405
	406	The first token sequence will always be an ENCODING token
	407	which tells you which encoding was used to decode the bytes stream.
	408	"""
	409	# This import is here to avoid problems when the itertools module is not
	410	# built yet and tokenize is imported.
	411	from itertools import chain, repeat
	412	encoding, consumed = detect_encoding(readline)
	413	rl_gen = iter(readline, b"")
	414	empty = repeat(b"")
	415	return _tokenize(chain(consumed, rl_gen, empty).__next__, encoding)
	416
	417
	418	def _tokenize(readline, encoding):
	419	lnum = parenlev = continued = 0
	420	numchars = '0123456789'
	421	contstr, needcont = '', 0
	422	contline = None
	423	indents = [0]
	424
	425	if encoding is not None:
	426	if encoding == "utf-8-sig":
	427	# BOM will already have been stripped.
	428	encoding = "utf-8"
	429	yield TokenInfo(ENCODING, encoding, (0, 0), (0, 0), '')
	430	while True: # loop over lines in stream
	431	try:
	432	line = readline()
	433	except StopIteration:
	434	line = b''
	435
	436	if encoding is not None:
	437	line = line.decode(encoding)
	438	lnum += 1
	439	pos, max = 0, len(line)
	440
	441	if contstr: # continued string
	442	if not line:
	443	raise TokenError("EOF in multi-line string", strstart)
	444	endmatch = endprog.match(line)
	445	if endmatch:
	446	pos = end = endmatch.end(0)
	447	yield TokenInfo(STRING, contstr + line[:end],
	448	strstart, (lnum, end), contline + line)
	449	contstr, needcont = '', 0
	450	contline = None
	451	elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':
	452	yield TokenInfo(ERRORTOKEN, contstr + line,
	453	strstart, (lnum, len(line)), contline)
	454	contstr = ''
	455	contline = None
	456	continue
	457	else:
	458	contstr = contstr + line
	459	contline = contline + line
	460	continue
	461
	462	elif parenlev == 0 and not continued: # new statement
	463	if not line: break
	464	column = 0
	465	while pos < max: # measure leading whitespace
	466	if line[pos] == ' ':
	467	column += 1
	468	elif line[pos] == '\t':
	469	column = (column//tabsize + 1)*tabsize
	470	elif line[pos] == '\f':
	471	column = 0
	472	else:
	473	break
	474	pos += 1
	475	if pos == max:
	476	break
	477
	478	if line[pos] in '#\r\n': # skip comments or blank lines
	479	if line[pos] == '#':
	480	comment_token = line[pos:].rstrip('\r\n')
	481	nl_pos = pos + len(comment_token)
	482	yield TokenInfo(COMMENT, comment_token,
	483	(lnum, pos), (lnum, pos + len(comment_token)), line)
	484	yield TokenInfo(NEWLINE, line[nl_pos:],
	485	(lnum, nl_pos), (lnum, len(line)), line)
	486	else:
	487	yield TokenInfo(NEWLINE, line[pos:],
	488	(lnum, pos), (lnum, len(line)), line)
	489	continue
	490
	491	if column > indents[-1]: # count indents or dedents
	492	indents.append(column)
	493	yield TokenInfo(INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
	494	while column < indents[-1]:
	495	if column not in indents:
	496	raise IndentationError(
	497	"unindent does not match any outer indentation level",
	498	("<tokenize>", lnum, pos, line))
	499	indents = indents[:-1]
	500	yield TokenInfo(DEDENT, '', (lnum, pos), (lnum, pos), line)
	501
	502	else: # continued statement
	503	if not line:
	504	raise TokenError("EOF in multi-line statement", (lnum, 0))
	505	continued = 0
	506
	507	while pos < max:
	508	pseudomatch = pseudoprog.match(line, pos)
	509	if pseudomatch: # scan for tokens
	510	start, end = pseudomatch.span(1)
	511	spos, epos, pos = (lnum, start), (lnum, end), end
	512	token, initial = line[start:end], line[start]
	513
	514	if (initial in numchars or # ordinary number
	515	(initial == '.' and token != '.' and token != '...')):
	516	yield TokenInfo(NUMBER, token, spos, epos, line)
	517	elif initial in '\r\n':
	518	yield TokenInfo(NL if parenlev > 0 else NEWLINE,
	519	token, spos, epos, line)
	520	elif initial == '#':
	521	assert not token.endswith("\n")
	522	yield TokenInfo(COMMENT, token, spos, epos, line)
	523	elif token in triple_quoted:
	524	endprog = endprogs[token]
	525	endmatch = endprog.match(line, pos)
	526	if endmatch: # all on one line
	527	pos = endmatch.end(0)
	528	token = line[start:pos]
	529	yield TokenInfo(STRING, token, spos, (lnum, pos), line)
	530	else:
	531	strstart = (lnum, start) # multiple lines
	532	contstr = line[start:]
	533	contline = line
	534	break
	535	elif initial in single_quoted or \
	536	token[:2] in single_quoted or \
	537	token[:3] in single_quoted:
	538	if token[-1] == '\n': # continued string
	539	strstart = (lnum, start)
	540	endprog = (endprogs[initial] or endprogs[token[1]] or
	541	endprogs[token[2]])
	542	contstr, needcont = line[start:], 1
	543	contline = line
	544	break
	545	else: # ordinary string
	546	yield TokenInfo(STRING, token, spos, epos, line)
	547	elif initial.isidentifier(): # ordinary name
	548	yield TokenInfo(NAME, token, spos, epos, line)
	549	elif initial == '\\': # continued stmt
	550	continued = 1
	551	else:
	552	if initial in '([{':
	553	parenlev += 1
	554	elif initial in ')]}':
	555	parenlev -= 1
	556	yield TokenInfo(OP, token, spos, epos, line)
	557	else:
	558	yield TokenInfo(ERRORTOKEN, line[pos],
	559	(lnum, pos), (lnum, pos+1), line)
	560	pos += 1
	561
	562	for indent in indents[1:]: # pop remaining indent levels
	563	yield TokenInfo(DEDENT, '', (lnum, 0), (lnum, 0), '')
	564	yield TokenInfo(ENDMARKER, '', (lnum, 0), (lnum, 0), '')
	565
	566
	567	# An undocumented, backwards compatible, API for all the places in the standard
	568	# library that expect to be able to use tokenize with strings
	569	def generate_tokens(readline):
	570	return _tokenize(readline, None)
	571
	572	if __name__ == "__main__":
	573	# Quick sanity check
	574	s = b'''def parseline(self, line):
	575	"""Parse the line into a command name and a string containing
	576	the arguments. Returns a tuple containing (command, args, line).
	577	'command' and 'args' may be None if the line couldn't be parsed.
	578	"""
	579	line = line.strip()
	580	if not line:
	581	return None, None, line
	582	elif line[0] == '?':
	583	line = 'help ' + line[1:]
	584	elif line[0] == '!':
	585	if hasattr(self, 'do_shell'):
	586	line = 'shell ' + line[1:]
	587	else:
	588	return None, None, line
	589	i, n = 0, len(line)
	590	while i < n and line[i] in self.identchars: i = i+1
	591	cmd, arg = line[:i], line[i:].strip()
	592	return cmd, arg, line
	593	'''
	594	for tok in tokenize(iter(s.splitlines()).__next__):
	595	print(tok)

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages