upstream/ipython Commit - r22959:7851f8a3

remove sys_version for Python 3...

Paul Ivanov -

r22959:7851f8a3

parent child

IPython/core/debugger.py

0 +14 -15

		@@ -604,22 +604,21 b' class Pdb(OldPdb, object):'
604	604	('Globals', self.curframe.f_globals)]
605	605	self.shell.find_line_magic('psource')(arg, namespaces=namespaces)
606	606
607		if sys.version_info > (3, ):
608		def do_where(self, arg):
609		"""w(here)
610		Print a stack trace, with the most recent frame at the bottom.
611		An arrow indicates the "current frame", which determines the
612		context of most commands. 'bt' is an alias for this command.
613
614		Take a number as argument as an (optional) number of context line to
615		print"""
616		if arg:
617		context = int(arg)
618		self.print_stack_trace(context)
619		~~else~~:
620		self.print_stack_trace()
	607	def do_where(self, arg):
	608	"""w(here)
	609	Print a stack trace, with the most recent frame at the bottom.
	610	An arrow indicates the "current frame", which determines the
	611	context of most commands. 'bt' is an alias for this command.
	612
	613	Take a number as argument as an (optional) number of context line to
	614	print"""
	615	if arg:
	616	context = int(arg)
	617	self.print_stack_trace(context)
	618	else:
	619	self.print_stack_trace()
621	620
622		do_w = do_where
	621	do_w = do_where
623	622
624	623
625	624	def set_trace(frame=None):

IPython/core/interactiveshell.py

0 +1 -4

                          raise self.error_in_exec
                  def __repr__(self):
-                     if sys.version_info > (3,):
-                         name = self.__class__.__qualname__
-                     else:
-                         name = self.__class__.__name__
+                     name = self.__class__.__qualname__
                      return '<%s object at %x, execution_count=%s error_before_exec=%s error_in_exec=%s result=%s>' %\
                              (name, id(self), self.execution_count, self.error_before_exec, self.error_in_exec, repr(self.result))

IPython/core/tests/test_completer.py

0 0 -7

-             @dec.onlyif(sys.version_info[0] >= 3, 'This test only apply on python3')
              def test_back_latex_completion():
                  ip = get_ipython()
                  nt.assert_equal(len(matches), 1)
                  nt.assert_equal(matches[0], '\\beta')
-             @dec.onlyif(sys.version_info[0] >= 3, 'This test only apply on python3')
              def test_back_unicode_completion():
                  ip = get_ipython()
                  nt.assert_equal(matches[0], '\\ROMAN NUMERAL FIVE')
-             @dec.onlyif(sys.version_info[0] >= 3, 'This test only apply on python3')
              def test_forward_unicode_completion():
                  ip = get_ipython()
                  nt.assert_equal(len(matches), 1)
                  nt.assert_equal(matches[0], 'Ⅴ')
-             @dec.onlyif(sys.version_info[0] >= 3, 'This test only apply on python3')
              @dec.knownfailureif(sys.platform == 'win32', 'Fails if there is a C:\\j... path')
              def test_no_ascii_back_completion():
                  ip = get_ipython()
-             @dec.onlyif(sys.version_info[0] >= 3, 'This test only applies in Py>=3')
              def test_dict_key_completion_bytes():
                  """Test handling of bytes in dict key completion"""
                  ip = get_ipython()
                      nt.assert_not_in("abd", matches)
-             @dec.onlyif(sys.version_info[0] < 3, 'This test only applies in Py<3')
              def test_dict_key_completion_unicode_py2():
                  """Test handling of unicode in dict key completion"""
                  ip = get_ipython()
                      nt.assert_in(u"d[u'a\u05d0b']", matches)
-             @dec.onlyif(sys.version_info[0] >= 3, 'This test only applies in Py>=3')
              def test_dict_key_completion_unicode_py3():
                  """Test handling of unicode in dict key completion"""
                  ip = get_ipython()

IPython/core/tests/test_interactiveshell.py

0 +1 -33

                      finally:
                          trap.hook = save_hook
-                 @skipif(sys.version_info[0] >= 3, "softspace removed in py3")
-                 def test_print_softspace(self):
-                     """Verify that softspace is handled correctly when executing multiple
-                     statements.
-                     In [1]: print 1; print 2
-                     In [2]: print 1,; print 2
-2
-                     """
                  def test_ofind_line_magic(self):
                      from IPython.core.magic import register_line_magic
                          # Reset the custom exception hook
                          ip.set_custom_exc((), None)
-                 @skipif(sys.version_info[0] >= 3, "no differences with __future__ in py3")
-                 def test_future_environment(self):
-                     "Can we run code with & without the shell's __future__ imports?"
-                     ip.run_cell("from __future__ import division")
-                     ip.run_cell("a = 1/2", shell_futures=True)
-                     self.assertEqual(ip.user_ns['a'], 0.5)
-                     ip.run_cell("b = 1/2", shell_futures=False)
-                     self.assertEqual(ip.user_ns['b'], 0)
-                     ip.compile.reset_compiler_flags()
-                     # This shouldn't leak to the shell's compiler
-                     ip.run_cell("from __future__ import division \nc=1/2", shell_futures=False)
-                     self.assertEqual(ip.user_ns['c'], 0.5)
-                     ip.run_cell("d = 1/2", shell_futures=True)
-                     self.assertEqual(ip.user_ns['d'], 0)
                  def test_mktempfile(self):
                      filename = ip.mktempfile()
                      # Check that we can open the file again on Windows
                          raise DerivedInterrupt("foo")
                      except KeyboardInterrupt:
                          msg = ip.get_exception_only()
-                     if sys.version_info[0] <= 2:
-                         self.assertEqual(msg, 'DerivedInterrupt: foo\n')
-                     else:
-                         self.assertEqual(msg, 'IPython.core.tests.test_interactiveshell.DerivedInterrupt: foo\n')
+                     self.assertEqual(msg, 'IPython.core.tests.test_interactiveshell.DerivedInterrupt: foo\n')
                  def test_inspect_text(self):
                      ip.run_cell('a = 5')

IPython/core/tests/test_magic.py

0 0 -23

                                  "run = 0\n"
                                  "run += 1")
-             @dec.skipif(sys.version_info[0] >= 3, "no differences with __future__ in py3")
-             def test_time_futures():
-                 "Test %time with __future__ environments"
-                 ip = get_ipython()
-                 ip.autocall = 0
-                 ip.run_cell("from __future__ import division")
-                 with tt.AssertPrints('0.25'):
-                     ip.run_line_magic('time', 'print(1/4)')
-                 ip.compile.reset_compiler_flags()
-                 with tt.AssertNotPrints('0.25'):
-                     ip.run_line_magic('time', 'print(1/4)')
              def test_doctest_mode():
                  "Toggle doctest_mode twice, it should be a no-op and run without error"
                  _ip.magic('doctest_mode')
                      res = _ip.run_line_magic('timeit', '-n1 -r1 -q -o 1')
                  assert (res is not None)
-             @dec.skipif(sys.version_info[0] >= 3, "no differences with __future__ in py3")
-             def test_timeit_futures():
-                 "Test %timeit with __future__ environments"
-                 ip = get_ipython()
-                 ip.run_cell("from __future__ import division")
-                 with tt.AssertPrints('0.25'):
-                     ip.run_line_magic('timeit', '-n1 -r1 print(1/4)')
-                 ip.compile.reset_compiler_flags()
-                 with tt.AssertNotPrints('0.25'):
-                     ip.run_line_magic('timeit', '-n1 -r1 print(1/4)')
              @dec.skipif(execution.profile is None)
              def test_prun_special_syntax():
                  "Test %%prun with IPython special syntax"

IPython/core/tests/test_magic_terminal.py

0 +1 -8

                  finally:
                      sys.stdin = stdin_save
-             PY31 = sys.version_info[:2] == (3,1)
              def test_cpaste():
                  """Test cpaste magic"""
                                    ],
                           'fail': ["1 + runf()",
+                                   "++ runf()",
                           ]}
-                 # I don't know why this is failing specifically on Python 3.1. I've
-                 # checked it manually interactively, but we don't care enough about 3.1
-                 # to spend time fiddling with the tests, so we just skip it.
-                 if not PY31:
-                     tests['fail'].append("++ runf()")
                  ip.user_ns['runf'] = runf

IPython/core/tests/test_oinspect.py

0 +1 -2

                  nt.assert_equal(i['type_name'], 'type')
                  expted_class = str(type(type))  # <class 'type'> (Python 3) or <type 'type'>
                  nt.assert_equal(i['base_class'], expted_class)
-                 if sys.version_info > (3,):
-                     nt.assert_regex(i['string_form'], "<class 'IPython.core.tests.test_oinspect.Call'( at 0x[0-9a-f]{1,9})?>")
+                 nt.assert_regex(i['string_form'], "<class 'IPython.core.tests.test_oinspect.Call'( at 0x[0-9a-f]{1,9})?>")
                  fname = __file__
                  if fname.endswith(".pyc"):
                      fname = fname[:-1]

IPython/core/tests/test_ultratb.py

0 +39 -40

		@@ -313,44 +313,43 b' def r3o2():'
313	313	#----------------------------------------------------------------------------
314	314
315	315	# module testing (minimal)
316		if sys.version_info > (3,):
317		def test_handlers():
318		~~def~~ ~~spam~~(c, d_e):
319		(d, e) = d_e
320		x = c + d
321		y = c * d
322		foo(x, y)
323
324		~~def~~ ~~foo~~(a, b, bar=1):
325		eggs(a, b + bar)
326
327		def eggs(f, g, z=globals()):
328		h = f + g
329		i = f - g
330		return h / i
331
332		buff = io.StringIO()
333
334		buff.write('')
335		buff.write('* Before *')
336		try:
337		buff.write(spam(1, (2, 3)))
338		except:
339		traceback.print_exc(file=buff)
340
341		handler = ColorTB(ostream=buff)
342		buff.write('* ColorTB *')
343		try:
344		buff.write(spam(1, (2, 3)))
345		except:
346		handler(*sys.exc_info())
347		buff.write('')
348
349		handler = VerboseTB(ostream=buff)
350		buff.write('* VerboseTB *')
351		try:
352		buff.write(spam(1, (2, 3)))
353		except:
354		handler(*sys.exc_info())
355		buff.write('')
	316	def test_handlers():
	317	def spam(c, d_e):
	318	(d, e) = d_e
	319	x = c + d
	320	y = c * d
	321	foo(x, y)
	322
	323	def foo(a, b, bar=1):
	324	eggs(a, b + bar)
	325
	326	def eggs(f, g, z=globals()):
	327	h = f + g
	328	i = f - g
	329	return h / i
	330
	331	buff = io.StringIO()
	332
	333	buff.write('')
	334	buff.write('* Before *')
	335	try:
	336	buff.write(spam(1, (2, 3)))
	337	except:
	338	traceback.print_exc(file=buff)
	339
	340	handler = ColorTB(ostream=buff)
	341	buff.write('* ColorTB *')
	342	try:
	343	buff.write(spam(1, (2, 3)))
	344	except:
	345	handler(*sys.exc_info())
	346	buff.write('')
	347
	348	handler = VerboseTB(ostream=buff)
	349	buff.write('* VerboseTB *')
	350	try:
	351	buff.write(spam(1, (2, 3)))
	352	except:
	353	handler(*sys.exc_info())
	354	buff.write('')
356	355

IPython/testing/iptest.py

0 +1 -2

              # Enable printing all warnings raise by IPython's modules
              warnings.filterwarnings('ignore', message='.*Matplotlib is building the font cache.*', category=UserWarning, module='.*')
-             if sys.version_info > (3,0):
-                 warnings.filterwarnings('error', message='.*', category=ResourceWarning, module='.*')
+             warnings.filterwarnings('error', message='.*', category=ResourceWarning, module='.*')
              warnings.filterwarnings('error', message=".*{'config': True}.*", category=DeprecationWarning, module='IPy.*')
              warnings.filterwarnings('default', message='.*', category=Warning, module='IPy.*')

IPython/testing/skipdoctest.py

0 +3 -16

-             """Decorators marks that a doctest should be skipped, for both python 2 and 3.
+             """Decorators marks that a doctest should be skipped.
              The IPython.testing.decorators module triggers various extra imports, including
              numpy and sympy if they're present. Since this decorator is used in core parts
              of IPython, it's in a separate module so that running IPython doesn't trigger
              those imports."""
-             #-----------------------------------------------------------------------------
-             #  Copyright (C) 2009-2011  The IPython Development Team
+             #
-             #  Distributed under the terms of the BSD License.  The full license is in
-             #  the file COPYING, distributed as part of this software.
-             #-----------------------------------------------------------------------------
+             # Copyright (C) IPython Development Team
+             # Distributed under the terms of the Modified BSD License.
-             #-----------------------------------------------------------------------------
-             # Imports
-             #-----------------------------------------------------------------------------
-             import sys
-             #-----------------------------------------------------------------------------
-             # Decorators
-             #-----------------------------------------------------------------------------
              def skip_doctest(f):
                  """Decorator - mark a function or method for skipping its doctest.

IPython/utils/tokenize2.py

0 +590 -4

This diff has been collapsed as it changes many lines, (594 lines changed) Show them Hide them
			@@ -1,9 +1,595 b''
	1		"""Load our patched versions of tokenize.
		1	"""Patched version of standard library tokenize, to deal with various bugs.
		2
		3	Based on Python 3.2 code.
		4
		5	Patches:
		6
		7	- Gareth Rees' patch for Python issue #12691 (untokenizing)
		8	- Except we don't encode the output of untokenize
		9	- Python 2 compatible syntax, so that it can be byte-compiled at installation
		10	- Newlines in comments and blank lines should be either NL or NEWLINE, depending
		11	on whether they are in a multi-line statement. Filed as Python issue #17061.
		12	- Export generate_tokens & TokenError
		13	- u and rb literals are allowed under Python 3.3 and above.
		14
		15	------------------------------------------------------------------------------
		16	Tokenization help for Python programs.
		17
		18	tokenize(readline) is a generator that breaks a stream of bytes into
		19	Python tokens. It decodes the bytes according to PEP-0263 for
		20	determining source file encoding.
		21
		22	It accepts a readline-like method which is called repeatedly to get the
		23	next line of input (or b"" for EOF). It generates 5-tuples with these
		24	members:
		25
		26	the token type (see token.py)
		27	the token (a string)
		28	the starting (row, column) indices of the token (a 2-tuple of ints)
		29	the ending (row, column) indices of the token (a 2-tuple of ints)
		30	the original line (string)
		31
		32	It is designed to match the working of the Python tokenizer exactly, except
		33	that it produces COMMENT tokens for comments and gives type OP for all
		34	operators. Additionally, all token lists start with an ENCODING token
		35	which tells you which encoding was used to decode the bytes stream.
	2	36	"""
		37	from __future__ import absolute_import
	3	38
		39	__author__ = 'Ka-Ping Yee <ping@lfw.org>'
		40	__credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, '
		41	'Skip Montanaro, Raymond Hettinger, Trent Nelson, '
		42	'Michael Foord')
		43	import builtins
		44	import re
	4	45	import sys
		46	from token import *
		47	from codecs import lookup, BOM_UTF8
		48	import collections
		49	from io import TextIOWrapper
		50	cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
		51
		52	import token
		53	__all__ = token.__all__ + ["COMMENT", "tokenize", "detect_encoding",
		54	"NL", "untokenize", "ENCODING", "TokenInfo"]
		55	del token
		56
		57	__all__ += ["generate_tokens", "TokenError"]
	5	58
	6		if sys.version_info[0] >= 3:
	7		from ._tokenize_py3 import *
		59	COMMENT = N_TOKENS
		60	tok_name[COMMENT] = 'COMMENT'
		61	NL = N_TOKENS + 1
		62	tok_name[NL] = 'NL'
		63	ENCODING = N_TOKENS + 2
		64	tok_name[ENCODING] = 'ENCODING'
		65	N_TOKENS += 3
		66
		67	class TokenInfo(collections.namedtuple('TokenInfo', 'type string start end line')):
		68	def __repr__(self):
		69	annotated_type = '%d (%s)' % (self.type, tok_name[self.type])
		70	return ('TokenInfo(type=%s, string=%r, start=%r, end=%r, line=%r)' %
		71	self._replace(type=annotated_type))
		72
		73	def group(*choices): return '(' + '\|'.join(choices) + ')'
		74	def any(choices): return group(choices) + '*'
		75	def maybe(choices): return group(choices) + '?'
		76
		77	# Note: we use unicode matching for names ("\w") but ascii matching for
		78	# number literals.
		79	Whitespace = r'[ \f\t]*'
		80	Comment = r'#[^\r\n]*'
		81	Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
		82	Name = r'\w+'
		83
		84	Hexnumber = r'0[xX][0-9a-fA-F]+'
		85	Binnumber = r'0[bB][01]+'
		86	Octnumber = r'0[oO][0-7]+'
		87	Decnumber = r'(?:0+\|[1-9][0-9]*)'
		88	Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
		89	Exponent = r'[eE][-+]?[0-9]+'
		90	Pointfloat = group(r'[0-9]+\.[0-9]*', r'\.[0-9]+') + maybe(Exponent)
		91	Expfloat = r'[0-9]+' + Exponent
		92	Floatnumber = group(Pointfloat, Expfloat)
		93	Imagnumber = group(r'[0-9]+[jJ]', Floatnumber + r'[jJ]')
		94	Number = group(Imagnumber, Floatnumber, Intnumber)
		95
		96	if sys.version_info.minor >= 3:
		97	StringPrefix = r'(?:[bB][rR]?\|[rR][bB]?\|[uU])?'
	8	98	else:
	9		from ._tokenize_py2 import *
		99	StringPrefix = r'(?:[bB]?[rR]?)?'
		100
		101	# Tail end of ' string.
		102	Single = r"[^'\\](?:\\.[^'\\])*'"
		103	# Tail end of " string.
		104	Double = r'[^"\\](?:\\.[^"\\])*"'
		105	# Tail end of ''' string.
		106	Single3 = r"[^'\\](?:(?:\\.\|'(?!''))[^'\\])*'''"
		107	# Tail end of """ string.
		108	Double3 = r'[^"\\](?:(?:\\.\|"(?!""))[^"\\])*"""'
		109	Triple = group(StringPrefix + "'''", StringPrefix + '"""')
		110	# Single-line ' or " string.
		111	String = group(StringPrefix + r"'[^\n'\\](?:\\.[^\n'\\])*'",
		112	StringPrefix + r'"[^\n"\\](?:\\.[^\n"\\])*"')
		113
		114	# Because of leftmost-then-longest match semantics, be sure to put the
		115	# longest operators first (e.g., if = came before ==, == would get
		116	# recognized as two instances of =).
		117	Operator = group(r"\\=?", r">>=?", r"<<=?", r"!=",
		118	r"//=?", r"->",
		119	r"[+\-*/%&\|^=<>]=?",
		120	r"~")
		121
		122	Bracket = '[][(){}]'
		123	Special = group(r'\r?\n', r'\.\.\.', r'[:;.,@]')
		124	Funny = group(Operator, Bracket, Special)
		125
		126	PlainToken = group(Number, Funny, String, Name)
		127	Token = Ignore + PlainToken
		128
		129	# First (or only) line of ' or " string.
		130	ContStr = group(StringPrefix + r"'[^\n'\\](?:\\.[^\n'\\])*" +
		131	group("'", r'\\\r?\n'),
		132	StringPrefix + r'"[^\n"\\](?:\\.[^\n"\\])*' +
		133	group('"', r'\\\r?\n'))
		134	PseudoExtras = group(r'\\\r?\n', Comment, Triple)
		135	PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
		136
		137	def _compile(expr):
		138	return re.compile(expr, re.UNICODE)
		139
		140	tokenprog, pseudoprog, single3prog, double3prog = map(
		141	_compile, (Token, PseudoToken, Single3, Double3))
		142	endprogs = {"'": _compile(Single), '"': _compile(Double),
		143	"'''": single3prog, '"""': double3prog,
		144	"r'''": single3prog, 'r"""': double3prog,
		145	"b'''": single3prog, 'b"""': double3prog,
		146	"R'''": single3prog, 'R"""': double3prog,
		147	"B'''": single3prog, 'B"""': double3prog,
		148	"br'''": single3prog, 'br"""': double3prog,
		149	"bR'''": single3prog, 'bR"""': double3prog,
		150	"Br'''": single3prog, 'Br"""': double3prog,
		151	"BR'''": single3prog, 'BR"""': double3prog,
		152	'r': None, 'R': None, 'b': None, 'B': None}
		153
		154	triple_quoted = {}
		155	for t in ("'''", '"""',
		156	"r'''", 'r"""', "R'''", 'R"""',
		157	"b'''", 'b"""', "B'''", 'B"""',
		158	"br'''", 'br"""', "Br'''", 'Br"""',
		159	"bR'''", 'bR"""', "BR'''", 'BR"""'):
		160	triple_quoted[t] = t
		161	single_quoted = {}
		162	for t in ("'", '"',
		163	"r'", 'r"', "R'", 'R"',
		164	"b'", 'b"', "B'", 'B"',
		165	"br'", 'br"', "Br'", 'Br"',
		166	"bR'", 'bR"', "BR'", 'BR"' ):
		167	single_quoted[t] = t
		168
		169	if sys.version_info.minor >= 3:
		170	# Python 3.3
		171	for _prefix in ['rb', 'rB', 'Rb', 'RB', 'u', 'U']:
		172	_t2 = _prefix+'"""'
		173	endprogs[_t2] = double3prog
		174	triple_quoted[_t2] = _t2
		175	_t1 = _prefix + "'''"
		176	endprogs[_t1] = single3prog
		177	triple_quoted[_t1] = _t1
		178	single_quoted[_prefix+'"'] = _prefix+'"'
		179	single_quoted[_prefix+"'"] = _prefix+"'"
		180	del _prefix, _t2, _t1
		181	endprogs['u'] = None
		182	endprogs['U'] = None
		183
		184	del _compile
		185
		186	tabsize = 8
		187
		188	class TokenError(Exception): pass
		189
		190	class StopTokenizing(Exception): pass
		191
		192
		193	class Untokenizer:
		194
		195	def __init__(self):
		196	self.tokens = []
		197	self.prev_row = 1
		198	self.prev_col = 0
		199	self.encoding = 'utf-8'
		200
		201	def add_whitespace(self, tok_type, start):
		202	row, col = start
		203	assert row >= self.prev_row
		204	col_offset = col - self.prev_col
		205	if col_offset > 0:
		206	self.tokens.append(" " * col_offset)
		207	elif row > self.prev_row and tok_type not in (NEWLINE, NL, ENDMARKER):
		208	# Line was backslash-continued.
		209	self.tokens.append(" ")
		210
		211	def untokenize(self, tokens):
		212	iterable = iter(tokens)
		213	for t in iterable:
		214	if len(t) == 2:
		215	self.compat(t, iterable)
		216	break
		217	tok_type, token, start, end = t[:4]
		218	if tok_type == ENCODING:
		219	self.encoding = token
		220	continue
		221	self.add_whitespace(tok_type, start)
		222	self.tokens.append(token)
		223	self.prev_row, self.prev_col = end
		224	if tok_type in (NEWLINE, NL):
		225	self.prev_row += 1
		226	self.prev_col = 0
		227	return "".join(self.tokens)
		228
		229	def compat(self, token, iterable):
		230	# This import is here to avoid problems when the itertools
		231	# module is not built yet and tokenize is imported.
		232	from itertools import chain
		233	startline = False
		234	prevstring = False
		235	indents = []
		236	toks_append = self.tokens.append
		237
		238	for tok in chain([token], iterable):
		239	toknum, tokval = tok[:2]
		240	if toknum == ENCODING:
		241	self.encoding = tokval
		242	continue
		243
		244	if toknum in (NAME, NUMBER):
		245	tokval += ' '
		246
		247	# Insert a space between two consecutive strings
		248	if toknum == STRING:
		249	if prevstring:
		250	tokval = ' ' + tokval
		251	prevstring = True
		252	else:
		253	prevstring = False
		254
		255	if toknum == INDENT:
		256	indents.append(tokval)
		257	continue
		258	elif toknum == DEDENT:
		259	indents.pop()
		260	continue
		261	elif toknum in (NEWLINE, NL):
		262	startline = True
		263	elif startline and indents:
		264	toks_append(indents[-1])
		265	startline = False
		266	toks_append(tokval)
		267
		268
		269	def untokenize(tokens):
		270	"""
		271	Convert ``tokens`` (an iterable) back into Python source code. Return
		272	a bytes object, encoded using the encoding specified by the last
		273	ENCODING token in ``tokens``, or UTF-8 if no ENCODING token is found.
		274
		275	The result is guaranteed to tokenize back to match the input so that
		276	the conversion is lossless and round-trips are assured. The
		277	guarantee applies only to the token type and token string as the
		278	spacing between tokens (column positions) may change.
		279
		280	:func:`untokenize` has two modes. If the input tokens are sequences
		281	of length 2 (``type``, ``string``) then spaces are added as necessary to
		282	preserve the round-trip property.
		283
		284	If the input tokens are sequences of length 4 or more (``type``,
		285	``string``, ``start``, ``end``), as returned by :func:`tokenize`, then
		286	spaces are added so that each token appears in the result at the
		287	position indicated by ``start`` and ``end``, if possible.
		288	"""
		289	return Untokenizer().untokenize(tokens)
		290
		291
		292	def _get_normal_name(orig_enc):
		293	"""Imitates get_normal_name in tokenizer.c."""
		294	# Only care about the first 12 characters.
		295	enc = orig_enc[:12].lower().replace("_", "-")
		296	if enc == "utf-8" or enc.startswith("utf-8-"):
		297	return "utf-8"
		298	if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \
		299	enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")):
		300	return "iso-8859-1"
		301	return orig_enc
		302
		303	def detect_encoding(readline):
		304	"""
		305	The detect_encoding() function is used to detect the encoding that should
		306	be used to decode a Python source file. It requires one argment, readline,
		307	in the same way as the tokenize() generator.
		308
		309	It will call readline a maximum of twice, and return the encoding used
		310	(as a string) and a list of any lines (left as bytes) it has read in.
		311
		312	It detects the encoding from the presence of a utf-8 bom or an encoding
		313	cookie as specified in pep-0263. If both a bom and a cookie are present,
		314	but disagree, a SyntaxError will be raised. If the encoding cookie is an
		315	invalid charset, raise a SyntaxError. Note that if a utf-8 bom is found,
		316	'utf-8-sig' is returned.
		317
		318	If no encoding is specified, then the default of 'utf-8' will be returned.
		319	"""
		320	bom_found = False
		321	encoding = None
		322	default = 'utf-8'
		323	def read_or_stop():
		324	try:
		325	return readline()
		326	except StopIteration:
		327	return b''
		328
		329	def find_cookie(line):
		330	try:
		331	# Decode as UTF-8. Either the line is an encoding declaration,
		332	# in which case it should be pure ASCII, or it must be UTF-8
		333	# per default encoding.
		334	line_string = line.decode('utf-8')
		335	except UnicodeDecodeError:
		336	raise SyntaxError("invalid or missing encoding declaration")
		337
		338	matches = cookie_re.findall(line_string)
		339	if not matches:
		340	return None
		341	encoding = _get_normal_name(matches[0])
		342	try:
		343	codec = lookup(encoding)
		344	except LookupError:
		345	# This behaviour mimics the Python interpreter
		346	raise SyntaxError("unknown encoding: " + encoding)
		347
		348	if bom_found:
		349	if encoding != 'utf-8':
		350	# This behaviour mimics the Python interpreter
		351	raise SyntaxError('encoding problem: utf-8')
		352	encoding += '-sig'
		353	return encoding
		354
		355	first = read_or_stop()
		356	if first.startswith(BOM_UTF8):
		357	bom_found = True
		358	first = first[3:]
		359	default = 'utf-8-sig'
		360	if not first:
		361	return default, []
		362
		363	encoding = find_cookie(first)
		364	if encoding:
		365	return encoding, [first]
		366
		367	second = read_or_stop()
		368	if not second:
		369	return default, [first]
		370
		371	encoding = find_cookie(second)
		372	if encoding:
		373	return encoding, [first, second]
		374
		375	return default, [first, second]
		376
		377
		378	def open(filename):
		379	"""Open a file in read only mode using the encoding detected by
		380	detect_encoding().
		381	"""
		382	buffer = builtins.open(filename, 'rb')
		383	encoding, lines = detect_encoding(buffer.readline)
		384	buffer.seek(0)
		385	text = TextIOWrapper(buffer, encoding, line_buffering=True)
		386	text.mode = 'r'
		387	return text
		388
		389
		390	def tokenize(readline):
		391	"""
		392	The tokenize() generator requires one argment, readline, which
		393	must be a callable object which provides the same interface as the
		394	readline() method of built-in file objects. Each call to the function
		395	should return one line of input as bytes. Alternately, readline
		396	can be a callable function terminating with StopIteration:
		397	readline = open(myfile, 'rb').__next__ # Example of alternate readline
		398
		399	The generator produces 5-tuples with these members: the token type; the
		400	token string; a 2-tuple (srow, scol) of ints specifying the row and
		401	column where the token begins in the source; a 2-tuple (erow, ecol) of
		402	ints specifying the row and column where the token ends in the source;
		403	and the line on which the token was found. The line passed is the
		404	logical line; continuation lines are included.
		405
		406	The first token sequence will always be an ENCODING token
		407	which tells you which encoding was used to decode the bytes stream.
		408	"""
		409	# This import is here to avoid problems when the itertools module is not
		410	# built yet and tokenize is imported.
		411	from itertools import chain, repeat
		412	encoding, consumed = detect_encoding(readline)
		413	rl_gen = iter(readline, b"")
		414	empty = repeat(b"")
		415	return _tokenize(chain(consumed, rl_gen, empty).__next__, encoding)
		416
		417
		418	def _tokenize(readline, encoding):
		419	lnum = parenlev = continued = 0
		420	numchars = '0123456789'
		421	contstr, needcont = '', 0
		422	contline = None
		423	indents = [0]
		424
		425	if encoding is not None:
		426	if encoding == "utf-8-sig":
		427	# BOM will already have been stripped.
		428	encoding = "utf-8"
		429	yield TokenInfo(ENCODING, encoding, (0, 0), (0, 0), '')
		430	while True: # loop over lines in stream
		431	try:
		432	line = readline()
		433	except StopIteration:
		434	line = b''
		435
		436	if encoding is not None:
		437	line = line.decode(encoding)
		438	lnum += 1
		439	pos, max = 0, len(line)
		440
		441	if contstr: # continued string
		442	if not line:
		443	raise TokenError("EOF in multi-line string", strstart)
		444	endmatch = endprog.match(line)
		445	if endmatch:
		446	pos = end = endmatch.end(0)
		447	yield TokenInfo(STRING, contstr + line[:end],
		448	strstart, (lnum, end), contline + line)
		449	contstr, needcont = '', 0
		450	contline = None
		451	elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':
		452	yield TokenInfo(ERRORTOKEN, contstr + line,
		453	strstart, (lnum, len(line)), contline)
		454	contstr = ''
		455	contline = None
		456	continue
		457	else:
		458	contstr = contstr + line
		459	contline = contline + line
		460	continue
		461
		462	elif parenlev == 0 and not continued: # new statement
		463	if not line: break
		464	column = 0
		465	while pos < max: # measure leading whitespace
		466	if line[pos] == ' ':
		467	column += 1
		468	elif line[pos] == '\t':
		469	column = (column//tabsize + 1)*tabsize
		470	elif line[pos] == '\f':
		471	column = 0
		472	else:
		473	break
		474	pos += 1
		475	if pos == max:
		476	break
		477
		478	if line[pos] in '#\r\n': # skip comments or blank lines
		479	if line[pos] == '#':
		480	comment_token = line[pos:].rstrip('\r\n')
		481	nl_pos = pos + len(comment_token)
		482	yield TokenInfo(COMMENT, comment_token,
		483	(lnum, pos), (lnum, pos + len(comment_token)), line)
		484	yield TokenInfo(NEWLINE, line[nl_pos:],
		485	(lnum, nl_pos), (lnum, len(line)), line)
		486	else:
		487	yield TokenInfo(NEWLINE, line[pos:],
		488	(lnum, pos), (lnum, len(line)), line)
		489	continue
		490
		491	if column > indents[-1]: # count indents or dedents
		492	indents.append(column)
		493	yield TokenInfo(INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
		494	while column < indents[-1]:
		495	if column not in indents:
		496	raise IndentationError(
		497	"unindent does not match any outer indentation level",
		498	("<tokenize>", lnum, pos, line))
		499	indents = indents[:-1]
		500	yield TokenInfo(DEDENT, '', (lnum, pos), (lnum, pos), line)
		501
		502	else: # continued statement
		503	if not line:
		504	raise TokenError("EOF in multi-line statement", (lnum, 0))
		505	continued = 0
		506
		507	while pos < max:
		508	pseudomatch = pseudoprog.match(line, pos)
		509	if pseudomatch: # scan for tokens
		510	start, end = pseudomatch.span(1)
		511	spos, epos, pos = (lnum, start), (lnum, end), end
		512	token, initial = line[start:end], line[start]
		513
		514	if (initial in numchars or # ordinary number
		515	(initial == '.' and token != '.' and token != '...')):
		516	yield TokenInfo(NUMBER, token, spos, epos, line)
		517	elif initial in '\r\n':
		518	yield TokenInfo(NL if parenlev > 0 else NEWLINE,
		519	token, spos, epos, line)
		520	elif initial == '#':
		521	assert not token.endswith("\n")
		522	yield TokenInfo(COMMENT, token, spos, epos, line)
		523	elif token in triple_quoted:
		524	endprog = endprogs[token]
		525	endmatch = endprog.match(line, pos)
		526	if endmatch: # all on one line
		527	pos = endmatch.end(0)
		528	token = line[start:pos]
		529	yield TokenInfo(STRING, token, spos, (lnum, pos), line)
		530	else:
		531	strstart = (lnum, start) # multiple lines
		532	contstr = line[start:]
		533	contline = line
		534	break
		535	elif initial in single_quoted or \
		536	token[:2] in single_quoted or \
		537	token[:3] in single_quoted:
		538	if token[-1] == '\n': # continued string
		539	strstart = (lnum, start)
		540	endprog = (endprogs[initial] or endprogs[token[1]] or
		541	endprogs[token[2]])
		542	contstr, needcont = line[start:], 1
		543	contline = line
		544	break
		545	else: # ordinary string
		546	yield TokenInfo(STRING, token, spos, epos, line)
		547	elif initial.isidentifier(): # ordinary name
		548	yield TokenInfo(NAME, token, spos, epos, line)
		549	elif initial == '\\': # continued stmt
		550	continued = 1
		551	else:
		552	if initial in '([{':
		553	parenlev += 1
		554	elif initial in ')]}':
		555	parenlev -= 1
		556	yield TokenInfo(OP, token, spos, epos, line)
		557	else:
		558	yield TokenInfo(ERRORTOKEN, line[pos],
		559	(lnum, pos), (lnum, pos+1), line)
		560	pos += 1
		561
		562	for indent in indents[1:]: # pop remaining indent levels
		563	yield TokenInfo(DEDENT, '', (lnum, 0), (lnum, 0), '')
		564	yield TokenInfo(ENDMARKER, '', (lnum, 0), (lnum, 0), '')
		565
		566
		567	# An undocumented, backwards compatible, API for all the places in the standard
		568	# library that expect to be able to use tokenize with strings
		569	def generate_tokens(readline):
		570	return _tokenize(readline, None)
		571
		572	if __name__ == "__main__":
		573	# Quick sanity check
		574	s = b'''def parseline(self, line):
		575	"""Parse the line into a command name and a string containing
		576	the arguments. Returns a tuple containing (command, args, line).
		577	'command' and 'args' may be None if the line couldn't be parsed.
		578	"""
		579	line = line.strip()
		580	if not line:
		581	return None, None, line
		582	elif line[0] == '?':
		583	line = 'help ' + line[1:]
		584	elif line[0] == '!':
		585	if hasattr(self, 'do_shell'):
		586	line = 'shell ' + line[1:]
		587	else:
		588	return None, None, line
		589	i, n = 0, len(line)
		590	while i < n and line[i] in self.identchars: i = i+1
		591	cmd, arg = line[:i], line[i:].strip()
		592	return cmd, arg, line
		593	'''
		594	for tok in tokenize(iter(s.splitlines()).__next__):
		595	print(tok)

setup.py

0 0 -2

                  print(error, file=sys.stderr)
                  sys.exit(1)
-             PY3 = (sys.version_info[0] >= 3)
              # At least we're on the python version we need, move on.
              #-------------------------------------------------------------------------------

tools/github_stats.py

0 0 -3

                  print("DEPRECATE: backport_pr.py is deprecated and is is now recommended"
                        "to install `ghpro` from PyPI.", file=sys.stderr)
-                 # deal with unicode
-                 if sys.version_info < (3,):
-                     sys.stdout = codecs.getwriter('utf8')(sys.stdout)
                  # Whether to add reST urls for all issues in printout.
                  show_urls = True

IPython/utils/_tokenize_py2.py

0 removed 0 -439

			@@ -1,439 +0,0 b''
	1		"""Patched version of standard library tokenize, to deal with various bugs.
	2
	3		Patches
	4
	5		- Relevant parts of Gareth Rees' patch for Python issue #12691 (untokenizing),
	6		manually applied.
	7		- Newlines in comments and blank lines should be either NL or NEWLINE, depending
	8		on whether they are in a multi-line statement. Filed as Python issue #17061.
	9
	10		-------------------------------------------------------------------------------
	11		Tokenization help for Python programs.
	12
	13		generate_tokens(readline) is a generator that breaks a stream of
	14		text into Python tokens. It accepts a readline-like method which is called
	15		repeatedly to get the next line of input (or "" for EOF). It generates
	16		5-tuples with these members:
	17
	18		the token type (see token.py)
	19		the token (a string)
	20		the starting (row, column) indices of the token (a 2-tuple of ints)
	21		the ending (row, column) indices of the token (a 2-tuple of ints)
	22		the original line (string)
	23
	24		It is designed to match the working of the Python tokenizer exactly, except
	25		that it produces COMMENT tokens for comments and gives type OP for all
	26		operators
	27
	28		Older entry points
	29		tokenize_loop(readline, tokeneater)
	30		tokenize(readline, tokeneater=printtoken)
	31		are the same, except instead of generating tokens, tokeneater is a callback
	32		function to which the 5 fields described above are passed as 5 arguments,
	33		each time a new token is found."""
	34		from __future__ import print_function
	35
	36		__author__ = 'Ka-Ping Yee <ping@lfw.org>'
	37		__credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, '
	38		'Skip Montanaro, Raymond Hettinger')
	39
	40		import string, re
	41		from token import *
	42
	43		import token
	44		__all__ = [x for x in dir(token) if not x.startswith("_")]
	45		__all__ += ["COMMENT", "tokenize", "generate_tokens", "NL", "untokenize"]
	46		del x
	47		del token
	48
	49		__all__ += ["TokenError"]
	50
	51		COMMENT = N_TOKENS
	52		tok_name[COMMENT] = 'COMMENT'
	53		NL = N_TOKENS + 1
	54		tok_name[NL] = 'NL'
	55		N_TOKENS += 2
	56
	57		def group(*choices): return '(' + '\|'.join(choices) + ')'
	58		def any(choices): return group(choices) + '*'
	59		def maybe(choices): return group(choices) + '?'
	60
	61		Whitespace = r'[ \f\t]*'
	62		Comment = r'#[^\r\n]*'
	63		Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
	64		Name = r'[a-zA-Z_]\w*'
	65
	66		Hexnumber = r'0[xX][\da-fA-F]+[lL]?'
	67		Octnumber = r'(0[oO][0-7]+)\|(0[0-7]*)[lL]?'
	68		Binnumber = r'0[bB][01]+[lL]?'
	69		Decnumber = r'[1-9]\d*[lL]?'
	70		Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
	71		Exponent = r'[eE][-+]?\d+'
	72		Pointfloat = group(r'\d+\.\d*', r'\.\d+') + maybe(Exponent)
	73		Expfloat = r'\d+' + Exponent
	74		Floatnumber = group(Pointfloat, Expfloat)
	75		Imagnumber = group(r'\d+[jJ]', Floatnumber + r'[jJ]')
	76		Number = group(Imagnumber, Floatnumber, Intnumber)
	77
	78		# Tail end of ' string.
	79		Single = r"[^'\\](?:\\.[^'\\])*'"
	80		# Tail end of " string.
	81		Double = r'[^"\\](?:\\.[^"\\])*"'
	82		# Tail end of ''' string.
	83		Single3 = r"[^'\\](?:(?:\\.\|'(?!''))[^'\\])*'''"
	84		# Tail end of """ string.
	85		Double3 = r'[^"\\](?:(?:\\.\|"(?!""))[^"\\])*"""'
	86		Triple = group("[uUbB]?[rR]?'''", '[uUbB]?[rR]?"""')
	87		# Single-line ' or " string.
	88		String = group(r"[uUbB]?[rR]?'[^\n'\\](?:\\.[^\n'\\])*'",
	89		r'[uUbB]?[rR]?"[^\n"\\](?:\\.[^\n"\\])*"')
	90
	91		# Because of leftmost-then-longest match semantics, be sure to put the
	92		# longest operators first (e.g., if = came before ==, == would get
	93		# recognized as two instances of =).
	94		Operator = group(r"\\=?", r">>=?", r"<<=?", r"<>", r"!=",
	95		r"//=?",
	96		r"[+\-*/%&\|^=<>]=?",
	97		r"~")
	98
	99		Bracket = '[][(){}]'
	100		Special = group(r'\r?\n', r'[:;.,`@]')
	101		Funny = group(Operator, Bracket, Special)
	102
	103		PlainToken = group(Number, Funny, String, Name)
	104		Token = Ignore + PlainToken
	105
	106		# First (or only) line of ' or " string.
	107		ContStr = group(r"[uUbB]?[rR]?'[^\n'\\](?:\\.[^\n'\\])*" +
	108		group("'", r'\\\r?\n'),
	109		r'[uUbB]?[rR]?"[^\n"\\](?:\\.[^\n"\\])*' +
	110		group('"', r'\\\r?\n'))
	111		PseudoExtras = group(r'\\\r?\n', Comment, Triple)
	112		PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
	113
	114		tokenprog, pseudoprog, single3prog, double3prog = map(
	115		re.compile, (Token, PseudoToken, Single3, Double3))
	116		endprogs = {"'": re.compile(Single), '"': re.compile(Double),
	117		"'''": single3prog, '"""': double3prog,
	118		"r'''": single3prog, 'r"""': double3prog,
	119		"u'''": single3prog, 'u"""': double3prog,
	120		"ur'''": single3prog, 'ur"""': double3prog,
	121		"R'''": single3prog, 'R"""': double3prog,
	122		"U'''": single3prog, 'U"""': double3prog,
	123		"uR'''": single3prog, 'uR"""': double3prog,
	124		"Ur'''": single3prog, 'Ur"""': double3prog,
	125		"UR'''": single3prog, 'UR"""': double3prog,
	126		"b'''": single3prog, 'b"""': double3prog,
	127		"br'''": single3prog, 'br"""': double3prog,
	128		"B'''": single3prog, 'B"""': double3prog,
	129		"bR'''": single3prog, 'bR"""': double3prog,
	130		"Br'''": single3prog, 'Br"""': double3prog,
	131		"BR'''": single3prog, 'BR"""': double3prog,
	132		'r': None, 'R': None, 'u': None, 'U': None,
	133		'b': None, 'B': None}
	134
	135		triple_quoted = {}
	136		for t in ("'''", '"""',
	137		"r'''", 'r"""', "R'''", 'R"""',
	138		"u'''", 'u"""', "U'''", 'U"""',
	139		"ur'''", 'ur"""', "Ur'''", 'Ur"""',
	140		"uR'''", 'uR"""', "UR'''", 'UR"""',
	141		"b'''", 'b"""', "B'''", 'B"""',
	142		"br'''", 'br"""', "Br'''", 'Br"""',
	143		"bR'''", 'bR"""', "BR'''", 'BR"""'):
	144		triple_quoted[t] = t
	145		single_quoted = {}
	146		for t in ("'", '"',
	147		"r'", 'r"', "R'", 'R"',
	148		"u'", 'u"', "U'", 'U"',
	149		"ur'", 'ur"', "Ur'", 'Ur"',
	150		"uR'", 'uR"', "UR'", 'UR"',
	151		"b'", 'b"', "B'", 'B"',
	152		"br'", 'br"', "Br'", 'Br"',
	153		"bR'", 'bR"', "BR'", 'BR"' ):
	154		single_quoted[t] = t
	155
	156		tabsize = 8
	157
	158		class TokenError(Exception): pass
	159
	160		class StopTokenizing(Exception): pass
	161
	162		def printtoken(type, token, srow_scol, erow_ecol, line): # for testing
	163		srow, scol = srow_scol
	164		erow, ecol = erow_ecol
	165		print("%d,%d-%d,%d:\t%s\t%s" % \
	166		(srow, scol, erow, ecol, tok_name[type], repr(token)))
	167
	168		def tokenize(readline, tokeneater=printtoken):
	169		"""
	170		The tokenize() function accepts two parameters: one representing the
	171		input stream, and one providing an output mechanism for tokenize().
	172
	173		The first parameter, readline, must be a callable object which provides
	174		the same interface as the readline() method of built-in file objects.
	175		Each call to the function should return one line of input as a string.
	176
	177		The second parameter, tokeneater, must also be a callable object. It is
	178		called once for each token, with five arguments, corresponding to the
	179		tuples generated by generate_tokens().
	180		"""
	181		try:
	182		tokenize_loop(readline, tokeneater)
	183		except StopTokenizing:
	184		pass
	185
	186		# backwards compatible interface
	187		def tokenize_loop(readline, tokeneater):
	188		for token_info in generate_tokens(readline):
	189		tokeneater(*token_info)
	190
	191		class Untokenizer:
	192
	193		def __init__(self):
	194		self.tokens = []
	195		self.prev_row = 1
	196		self.prev_col = 0
	197
	198		def add_whitespace(self, start):
	199		row, col = start
	200		assert row >= self.prev_row
	201		col_offset = col - self.prev_col
	202		if col_offset > 0:
	203		self.tokens.append(" " * col_offset)
	204		elif row > self.prev_row and tok_type not in (NEWLINE, NL, ENDMARKER):
	205		# Line was backslash-continued
	206		self.tokens.append(" ")
	207
	208		def untokenize(self, tokens):
	209		iterable = iter(tokens)
	210		for t in iterable:
	211		if len(t) == 2:
	212		self.compat(t, iterable)
	213		break
	214		tok_type, token, start, end = t[:4]
	215		self.add_whitespace(start)
	216		self.tokens.append(token)
	217		self.prev_row, self.prev_col = end
	218		if tok_type in (NEWLINE, NL):
	219		self.prev_row += 1
	220		self.prev_col = 0
	221		return "".join(self.tokens)
	222
	223		def compat(self, token, iterable):
	224		# This import is here to avoid problems when the itertools
	225		# module is not built yet and tokenize is imported.
	226		from itertools import chain
	227		startline = False
	228		prevstring = False
	229		indents = []
	230		toks_append = self.tokens.append
	231		for tok in chain([token], iterable):
	232		toknum, tokval = tok[:2]
	233
	234		if toknum in (NAME, NUMBER):
	235		tokval += ' '
	236
	237		# Insert a space between two consecutive strings
	238		if toknum == STRING:
	239		if prevstring:
	240		tokval = ' ' + tokval
	241		prevstring = True
	242		else:
	243		prevstring = False
	244
	245		if toknum == INDENT:
	246		indents.append(tokval)
	247		continue
	248		elif toknum == DEDENT:
	249		indents.pop()
	250		continue
	251		elif toknum in (NEWLINE, NL):
	252		startline = True
	253		elif startline and indents:
	254		toks_append(indents[-1])
	255		startline = False
	256		toks_append(tokval)
	257
	258		def untokenize(iterable):
	259		"""Transform tokens back into Python source code.
	260
	261		Each element returned by the iterable must be a token sequence
	262		with at least two elements, a token number and token value. If
	263		only two tokens are passed, the resulting output is poor.
	264
	265		Round-trip invariant for full input:
	266		Untokenized source will match input source exactly
	267
	268		Round-trip invariant for limited intput:
	269		# Output text will tokenize the back to the input
	270		t1 = [tok[:2] for tok in generate_tokens(f.readline)]
	271		newcode = untokenize(t1)
	272		readline = iter(newcode.splitlines(1)).next
	273		t2 = [tok[:2] for tok in generate_tokens(readline)]
	274		assert t1 == t2
	275		"""
	276		ut = Untokenizer()
	277		return ut.untokenize(iterable)
	278
	279		def generate_tokens(readline):
	280		"""
	281		The generate_tokens() generator requires one argment, readline, which
	282		must be a callable object which provides the same interface as the
	283		readline() method of built-in file objects. Each call to the function
	284		should return one line of input as a string. Alternately, readline
	285		can be a callable function terminating with StopIteration:
	286		readline = open(myfile).next # Example of alternate readline
	287
	288		The generator produces 5-tuples with these members: the token type; the
	289		token string; a 2-tuple (srow, scol) of ints specifying the row and
	290		column where the token begins in the source; a 2-tuple (erow, ecol) of
	291		ints specifying the row and column where the token ends in the source;
	292		and the line on which the token was found. The line passed is the
	293		logical line; continuation lines are included.
	294		"""
	295		lnum = parenlev = continued = 0
	296		namechars, numchars = string.ascii_letters + '_', '0123456789'
	297		contstr, needcont = '', 0
	298		contline = None
	299		indents = [0]
	300
	301		while 1: # loop over lines in stream
	302		try:
	303		line = readline()
	304		except StopIteration:
	305		line = ''
	306		lnum += 1
	307		pos, max = 0, len(line)
	308
	309		if contstr: # continued string
	310		if not line:
	311		raise TokenError("EOF in multi-line string", strstart)
	312		endmatch = endprog.match(line)
	313		if endmatch:
	314		pos = end = endmatch.end(0)
	315		yield (STRING, contstr + line[:end],
	316		strstart, (lnum, end), contline + line)
	317		contstr, needcont = '', 0
	318		contline = None
	319		elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':
	320		yield (ERRORTOKEN, contstr + line,
	321		strstart, (lnum, len(line)), contline)
	322		contstr = ''
	323		contline = None
	324		continue
	325		else:
	326		contstr = contstr + line
	327		contline = contline + line
	328		continue
	329
	330		elif parenlev == 0 and not continued: # new statement
	331		if not line: break
	332		column = 0
	333		while pos < max: # measure leading whitespace
	334		if line[pos] == ' ':
	335		column += 1
	336		elif line[pos] == '\t':
	337		column = (column//tabsize + 1)*tabsize
	338		elif line[pos] == '\f':
	339		column = 0
	340		else:
	341		break
	342		pos += 1
	343		if pos == max:
	344		break
	345
	346		if line[pos] in '#\r\n': # skip comments or blank lines
	347		if line[pos] == '#':
	348		comment_token = line[pos:].rstrip('\r\n')
	349		nl_pos = pos + len(comment_token)
	350		yield (COMMENT, comment_token,
	351		(lnum, pos), (lnum, pos + len(comment_token)), line)
	352		yield (NEWLINE, line[nl_pos:],
	353		(lnum, nl_pos), (lnum, len(line)), line)
	354		else:
	355		yield (NEWLINE, line[pos:],
	356		(lnum, pos), (lnum, len(line)), line)
	357		continue
	358
	359		if column > indents[-1]: # count indents or dedents
	360		indents.append(column)
	361		yield (INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
	362		while column < indents[-1]:
	363		if column not in indents:
	364		raise IndentationError(
	365		"unindent does not match any outer indentation level",
	366		("<tokenize>", lnum, pos, line))
	367		indents = indents[:-1]
	368		yield (DEDENT, '', (lnum, pos), (lnum, pos), line)
	369
	370		else: # continued statement
	371		if not line:
	372		raise TokenError("EOF in multi-line statement", (lnum, 0))
	373		continued = 0
	374
	375		while pos < max:
	376		pseudomatch = pseudoprog.match(line, pos)
	377		if pseudomatch: # scan for tokens
	378		start, end = pseudomatch.span(1)
	379		spos, epos, pos = (lnum, start), (lnum, end), end
	380		token, initial = line[start:end], line[start]
	381
	382		if initial in numchars or \
	383		(initial == '.' and token != '.'): # ordinary number
	384		yield (NUMBER, token, spos, epos, line)
	385		elif initial in '\r\n':
	386		yield (NL if parenlev > 0 else NEWLINE,
	387		token, spos, epos, line)
	388		elif initial == '#':
	389		assert not token.endswith("\n")
	390		yield (COMMENT, token, spos, epos, line)
	391		elif token in triple_quoted:
	392		endprog = endprogs[token]
	393		endmatch = endprog.match(line, pos)
	394		if endmatch: # all on one line
	395		pos = endmatch.end(0)
	396		token = line[start:pos]
	397		yield (STRING, token, spos, (lnum, pos), line)
	398		else:
	399		strstart = (lnum, start) # multiple lines
	400		contstr = line[start:]
	401		contline = line
	402		break
	403		elif initial in single_quoted or \
	404		token[:2] in single_quoted or \
	405		token[:3] in single_quoted:
	406		if token[-1] == '\n': # continued string
	407		strstart = (lnum, start)
	408		endprog = (endprogs[initial] or endprogs[token[1]] or
	409		endprogs[token[2]])
	410		contstr, needcont = line[start:], 1
	411		contline = line
	412		break
	413		else: # ordinary string
	414		yield (STRING, token, spos, epos, line)
	415		elif initial in namechars: # ordinary name
	416		yield (NAME, token, spos, epos, line)
	417		elif initial == '\\': # continued stmt
	418		continued = 1
	419		else:
	420		if initial in '([{':
	421		parenlev += 1
	422		elif initial in ')]}':
	423		parenlev -= 1
	424		yield (OP, token, spos, epos, line)
	425		else:
	426		yield (ERRORTOKEN, line[pos],
	427		(lnum, pos), (lnum, pos+1), line)
	428		pos += 1
	429
	430		for indent in indents[1:]: # pop remaining indent levels
	431		yield (DEDENT, '', (lnum, 0), (lnum, 0), '')
	432		yield (ENDMARKER, '', (lnum, 0), (lnum, 0), '')
	433
	434		if __name__ == '__main__': # testing
	435		import sys
	436		if len(sys.argv) > 1:
	437		tokenize(open(sys.argv[1]).readline)
	438		else:
	439		tokenize(sys.stdin.readline)

IPython/utils/_tokenize_py3.py

0 removed 0 -595

This diff has been collapsed as it changes many lines, (595 lines changed) Show them Hide them
		@@ -1,595 +0,0 b''
	1	"""Patched version of standard library tokenize, to deal with various bugs.
	2
	3	Based on Python 3.2 code.
	4
	5	Patches:
	6
	7	- Gareth Rees' patch for Python issue #12691 (untokenizing)
	8	- Except we don't encode the output of untokenize
	9	- Python 2 compatible syntax, so that it can be byte-compiled at installation
	10	- Newlines in comments and blank lines should be either NL or NEWLINE, depending
	11	on whether they are in a multi-line statement. Filed as Python issue #17061.
	12	- Export generate_tokens & TokenError
	13	- u and rb literals are allowed under Python 3.3 and above.
	14
	15	------------------------------------------------------------------------------
	16	Tokenization help for Python programs.
	17
	18	tokenize(readline) is a generator that breaks a stream of bytes into
	19	Python tokens. It decodes the bytes according to PEP-0263 for
	20	determining source file encoding.
	21
	22	It accepts a readline-like method which is called repeatedly to get the
	23	next line of input (or b"" for EOF). It generates 5-tuples with these
	24	members:
	25
	26	the token type (see token.py)
	27	the token (a string)
	28	the starting (row, column) indices of the token (a 2-tuple of ints)
	29	the ending (row, column) indices of the token (a 2-tuple of ints)
	30	the original line (string)
	31
	32	It is designed to match the working of the Python tokenizer exactly, except
	33	that it produces COMMENT tokens for comments and gives type OP for all
	34	operators. Additionally, all token lists start with an ENCODING token
	35	which tells you which encoding was used to decode the bytes stream.
	36	"""
	37	from __future__ import absolute_import
	38
	39	__author__ = 'Ka-Ping Yee <ping@lfw.org>'
	40	__credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, '
	41	'Skip Montanaro, Raymond Hettinger, Trent Nelson, '
	42	'Michael Foord')
	43	import builtins
	44	import re
	45	import sys
	46	from token import *
	47	from codecs import lookup, BOM_UTF8
	48	import collections
	49	from io import TextIOWrapper
	50	cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
	51
	52	import token
	53	__all__ = token.__all__ + ["COMMENT", "tokenize", "detect_encoding",
	54	"NL", "untokenize", "ENCODING", "TokenInfo"]
	55	del token
	56
	57	__all__ += ["generate_tokens", "TokenError"]
	58
	59	COMMENT = N_TOKENS
	60	tok_name[COMMENT] = 'COMMENT'
	61	NL = N_TOKENS + 1
	62	tok_name[NL] = 'NL'
	63	ENCODING = N_TOKENS + 2
	64	tok_name[ENCODING] = 'ENCODING'
	65	N_TOKENS += 3
	66
	67	class TokenInfo(collections.namedtuple('TokenInfo', 'type string start end line')):
	68	def __repr__(self):
	69	annotated_type = '%d (%s)' % (self.type, tok_name[self.type])
	70	return ('TokenInfo(type=%s, string=%r, start=%r, end=%r, line=%r)' %
	71	self._replace(type=annotated_type))
	72
	73	def group(*choices): return '(' + '\|'.join(choices) + ')'
	74	def any(choices): return group(choices) + '*'
	75	def maybe(choices): return group(choices) + '?'
	76
	77	# Note: we use unicode matching for names ("\w") but ascii matching for
	78	# number literals.
	79	Whitespace = r'[ \f\t]*'
	80	Comment = r'#[^\r\n]*'
	81	Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
	82	Name = r'\w+'
	83
	84	Hexnumber = r'0[xX][0-9a-fA-F]+'
	85	Binnumber = r'0[bB][01]+'
	86	Octnumber = r'0[oO][0-7]+'
	87	Decnumber = r'(?:0+\|[1-9][0-9]*)'
	88	Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
	89	Exponent = r'[eE][-+]?[0-9]+'
	90	Pointfloat = group(r'[0-9]+\.[0-9]*', r'\.[0-9]+') + maybe(Exponent)
	91	Expfloat = r'[0-9]+' + Exponent
	92	Floatnumber = group(Pointfloat, Expfloat)
	93	Imagnumber = group(r'[0-9]+[jJ]', Floatnumber + r'[jJ]')
	94	Number = group(Imagnumber, Floatnumber, Intnumber)
	95
	96	if sys.version_info.minor >= 3:
	97	StringPrefix = r'(?:[bB][rR]?\|[rR][bB]?\|[uU])?'
	98	else:
	99	StringPrefix = r'(?:[bB]?[rR]?)?'
	100
	101	# Tail end of ' string.
	102	Single = r"[^'\\](?:\\.[^'\\])*'"
	103	# Tail end of " string.
	104	Double = r'[^"\\](?:\\.[^"\\])*"'
	105	# Tail end of ''' string.
	106	Single3 = r"[^'\\](?:(?:\\.\|'(?!''))[^'\\])*'''"
	107	# Tail end of """ string.
	108	Double3 = r'[^"\\](?:(?:\\.\|"(?!""))[^"\\])*"""'
	109	Triple = group(StringPrefix + "'''", StringPrefix + '"""')
	110	# Single-line ' or " string.
	111	String = group(StringPrefix + r"'[^\n'\\](?:\\.[^\n'\\])*'",
	112	StringPrefix + r'"[^\n"\\](?:\\.[^\n"\\])*"')
	113
	114	# Because of leftmost-then-longest match semantics, be sure to put the
	115	# longest operators first (e.g., if = came before ==, == would get
	116	# recognized as two instances of =).
	117	Operator = group(r"\\=?", r">>=?", r"<<=?", r"!=",
	118	r"//=?", r"->",
	119	r"[+\-*/%&\|^=<>]=?",
	120	r"~")
	121
	122	Bracket = '[][(){}]'
	123	Special = group(r'\r?\n', r'\.\.\.', r'[:;.,@]')
	124	Funny = group(Operator, Bracket, Special)
	125
	126	PlainToken = group(Number, Funny, String, Name)
	127	Token = Ignore + PlainToken
	128
	129	# First (or only) line of ' or " string.
	130	ContStr = group(StringPrefix + r"'[^\n'\\](?:\\.[^\n'\\])*" +
	131	group("'", r'\\\r?\n'),
	132	StringPrefix + r'"[^\n"\\](?:\\.[^\n"\\])*' +
	133	group('"', r'\\\r?\n'))
	134	PseudoExtras = group(r'\\\r?\n', Comment, Triple)
	135	PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
	136
	137	def _compile(expr):
	138	return re.compile(expr, re.UNICODE)
	139
	140	tokenprog, pseudoprog, single3prog, double3prog = map(
	141	_compile, (Token, PseudoToken, Single3, Double3))
	142	endprogs = {"'": _compile(Single), '"': _compile(Double),
	143	"'''": single3prog, '"""': double3prog,
	144	"r'''": single3prog, 'r"""': double3prog,
	145	"b'''": single3prog, 'b"""': double3prog,
	146	"R'''": single3prog, 'R"""': double3prog,
	147	"B'''": single3prog, 'B"""': double3prog,
	148	"br'''": single3prog, 'br"""': double3prog,
	149	"bR'''": single3prog, 'bR"""': double3prog,
	150	"Br'''": single3prog, 'Br"""': double3prog,
	151	"BR'''": single3prog, 'BR"""': double3prog,
	152	'r': None, 'R': None, 'b': None, 'B': None}
	153
	154	triple_quoted = {}
	155	for t in ("'''", '"""',
	156	"r'''", 'r"""', "R'''", 'R"""',
	157	"b'''", 'b"""', "B'''", 'B"""',
	158	"br'''", 'br"""', "Br'''", 'Br"""',
	159	"bR'''", 'bR"""', "BR'''", 'BR"""'):
	160	triple_quoted[t] = t
	161	single_quoted = {}
	162	for t in ("'", '"',
	163	"r'", 'r"', "R'", 'R"',
	164	"b'", 'b"', "B'", 'B"',
	165	"br'", 'br"', "Br'", 'Br"',
	166	"bR'", 'bR"', "BR'", 'BR"' ):
	167	single_quoted[t] = t
	168
	169	if sys.version_info.minor >= 3:
	170	# Python 3.3
	171	for _prefix in ['rb', 'rB', 'Rb', 'RB', 'u', 'U']:
	172	_t2 = _prefix+'"""'
	173	endprogs[_t2] = double3prog
	174	triple_quoted[_t2] = _t2
	175	_t1 = _prefix + "'''"
	176	endprogs[_t1] = single3prog
	177	triple_quoted[_t1] = _t1
	178	single_quoted[_prefix+'"'] = _prefix+'"'
	179	single_quoted[_prefix+"'"] = _prefix+"'"
	180	del _prefix, _t2, _t1
	181	endprogs['u'] = None
	182	endprogs['U'] = None
	183
	184	del _compile
	185
	186	tabsize = 8
	187
	188	class TokenError(Exception): pass
	189
	190	class StopTokenizing(Exception): pass
	191
	192
	193	class Untokenizer:
	194
	195	def __init__(self):
	196	self.tokens = []
	197	self.prev_row = 1
	198	self.prev_col = 0
	199	self.encoding = 'utf-8'
	200
	201	def add_whitespace(self, tok_type, start):
	202	row, col = start
	203	assert row >= self.prev_row
	204	col_offset = col - self.prev_col
	205	if col_offset > 0:
	206	self.tokens.append(" " * col_offset)
	207	elif row > self.prev_row and tok_type not in (NEWLINE, NL, ENDMARKER):
	208	# Line was backslash-continued.
	209	self.tokens.append(" ")
	210
	211	def untokenize(self, tokens):
	212	iterable = iter(tokens)
	213	for t in iterable:
	214	if len(t) == 2:
	215	self.compat(t, iterable)
	216	break
	217	tok_type, token, start, end = t[:4]
	218	if tok_type == ENCODING:
	219	self.encoding = token
	220	continue
	221	self.add_whitespace(tok_type, start)
	222	self.tokens.append(token)
	223	self.prev_row, self.prev_col = end
	224	if tok_type in (NEWLINE, NL):
	225	self.prev_row += 1
	226	self.prev_col = 0
	227	return "".join(self.tokens)
	228
	229	def compat(self, token, iterable):
	230	# This import is here to avoid problems when the itertools
	231	# module is not built yet and tokenize is imported.
	232	from itertools import chain
	233	startline = False
	234	prevstring = False
	235	indents = []
	236	toks_append = self.tokens.append
	237
	238	for tok in chain([token], iterable):
	239	toknum, tokval = tok[:2]
	240	if toknum == ENCODING:
	241	self.encoding = tokval
	242	continue
	243
	244	if toknum in (NAME, NUMBER):
	245	tokval += ' '
	246
	247	# Insert a space between two consecutive strings
	248	if toknum == STRING:
	249	if prevstring:
	250	tokval = ' ' + tokval
	251	prevstring = True
	252	else:
	253	prevstring = False
	254
	255	if toknum == INDENT:
	256	indents.append(tokval)
	257	continue
	258	elif toknum == DEDENT:
	259	indents.pop()
	260	continue
	261	elif toknum in (NEWLINE, NL):
	262	startline = True
	263	elif startline and indents:
	264	toks_append(indents[-1])
	265	startline = False
	266	toks_append(tokval)
	267
	268
	269	def untokenize(tokens):
	270	"""
	271	Convert ``tokens`` (an iterable) back into Python source code. Return
	272	a bytes object, encoded using the encoding specified by the last
	273	ENCODING token in ``tokens``, or UTF-8 if no ENCODING token is found.
	274
	275	The result is guaranteed to tokenize back to match the input so that
	276	the conversion is lossless and round-trips are assured. The
	277	guarantee applies only to the token type and token string as the
	278	spacing between tokens (column positions) may change.
	279
	280	:func:`untokenize` has two modes. If the input tokens are sequences
	281	of length 2 (``type``, ``string``) then spaces are added as necessary to
	282	preserve the round-trip property.
	283
	284	If the input tokens are sequences of length 4 or more (``type``,
	285	``string``, ``start``, ``end``), as returned by :func:`tokenize`, then
	286	spaces are added so that each token appears in the result at the
	287	position indicated by ``start`` and ``end``, if possible.
	288	"""
	289	return Untokenizer().untokenize(tokens)
	290
	291
	292	def _get_normal_name(orig_enc):
	293	"""Imitates get_normal_name in tokenizer.c."""
	294	# Only care about the first 12 characters.
	295	enc = orig_enc[:12].lower().replace("_", "-")
	296	if enc == "utf-8" or enc.startswith("utf-8-"):
	297	return "utf-8"
	298	if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \
	299	enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")):
	300	return "iso-8859-1"
	301	return orig_enc
	302
	303	def detect_encoding(readline):
	304	"""
	305	The detect_encoding() function is used to detect the encoding that should
	306	be used to decode a Python source file. It requires one argment, readline,
	307	in the same way as the tokenize() generator.
	308
	309	It will call readline a maximum of twice, and return the encoding used
	310	(as a string) and a list of any lines (left as bytes) it has read in.
	311
	312	It detects the encoding from the presence of a utf-8 bom or an encoding
	313	cookie as specified in pep-0263. If both a bom and a cookie are present,
	314	but disagree, a SyntaxError will be raised. If the encoding cookie is an
	315	invalid charset, raise a SyntaxError. Note that if a utf-8 bom is found,
	316	'utf-8-sig' is returned.
	317
	318	If no encoding is specified, then the default of 'utf-8' will be returned.
	319	"""
	320	bom_found = False
	321	encoding = None
	322	default = 'utf-8'
	323	def read_or_stop():
	324	try:
	325	return readline()
	326	except StopIteration:
	327	return b''
	328
	329	def find_cookie(line):
	330	try:
	331	# Decode as UTF-8. Either the line is an encoding declaration,
	332	# in which case it should be pure ASCII, or it must be UTF-8
	333	# per default encoding.
	334	line_string = line.decode('utf-8')
	335	except UnicodeDecodeError:
	336	raise SyntaxError("invalid or missing encoding declaration")
	337
	338	matches = cookie_re.findall(line_string)
	339	if not matches:
	340	return None
	341	encoding = _get_normal_name(matches[0])
	342	try:
	343	codec = lookup(encoding)
	344	except LookupError:
	345	# This behaviour mimics the Python interpreter
	346	raise SyntaxError("unknown encoding: " + encoding)
	347
	348	if bom_found:
	349	if encoding != 'utf-8':
	350	# This behaviour mimics the Python interpreter
	351	raise SyntaxError('encoding problem: utf-8')
	352	encoding += '-sig'
	353	return encoding
	354
	355	first = read_or_stop()
	356	if first.startswith(BOM_UTF8):
	357	bom_found = True
	358	first = first[3:]
	359	default = 'utf-8-sig'
	360	if not first:
	361	return default, []
	362
	363	encoding = find_cookie(first)
	364	if encoding:
	365	return encoding, [first]
	366
	367	second = read_or_stop()
	368	if not second:
	369	return default, [first]
	370
	371	encoding = find_cookie(second)
	372	if encoding:
	373	return encoding, [first, second]
	374
	375	return default, [first, second]
	376
	377
	378	def open(filename):
	379	"""Open a file in read only mode using the encoding detected by
	380	detect_encoding().
	381	"""
	382	buffer = builtins.open(filename, 'rb')
	383	encoding, lines = detect_encoding(buffer.readline)
	384	buffer.seek(0)
	385	text = TextIOWrapper(buffer, encoding, line_buffering=True)
	386	text.mode = 'r'
	387	return text
	388
	389
	390	def tokenize(readline):
	391	"""
	392	The tokenize() generator requires one argment, readline, which
	393	must be a callable object which provides the same interface as the
	394	readline() method of built-in file objects. Each call to the function
	395	should return one line of input as bytes. Alternately, readline
	396	can be a callable function terminating with StopIteration:
	397	readline = open(myfile, 'rb').__next__ # Example of alternate readline
	398
	399	The generator produces 5-tuples with these members: the token type; the
	400	token string; a 2-tuple (srow, scol) of ints specifying the row and
	401	column where the token begins in the source; a 2-tuple (erow, ecol) of
	402	ints specifying the row and column where the token ends in the source;
	403	and the line on which the token was found. The line passed is the
	404	logical line; continuation lines are included.
	405
	406	The first token sequence will always be an ENCODING token
	407	which tells you which encoding was used to decode the bytes stream.
	408	"""
	409	# This import is here to avoid problems when the itertools module is not
	410	# built yet and tokenize is imported.
	411	from itertools import chain, repeat
	412	encoding, consumed = detect_encoding(readline)
	413	rl_gen = iter(readline, b"")
	414	empty = repeat(b"")
	415	return _tokenize(chain(consumed, rl_gen, empty).__next__, encoding)
	416
	417
	418	def _tokenize(readline, encoding):
	419	lnum = parenlev = continued = 0
	420	numchars = '0123456789'
	421	contstr, needcont = '', 0
	422	contline = None
	423	indents = [0]
	424
	425	if encoding is not None:
	426	if encoding == "utf-8-sig":
	427	# BOM will already have been stripped.
	428	encoding = "utf-8"
	429	yield TokenInfo(ENCODING, encoding, (0, 0), (0, 0), '')
	430	while True: # loop over lines in stream
	431	try:
	432	line = readline()
	433	except StopIteration:
	434	line = b''
	435
	436	if encoding is not None:
	437	line = line.decode(encoding)
	438	lnum += 1
	439	pos, max = 0, len(line)
	440
	441	if contstr: # continued string
	442	if not line:
	443	raise TokenError("EOF in multi-line string", strstart)
	444	endmatch = endprog.match(line)
	445	if endmatch:
	446	pos = end = endmatch.end(0)
	447	yield TokenInfo(STRING, contstr + line[:end],
	448	strstart, (lnum, end), contline + line)
	449	contstr, needcont = '', 0
	450	contline = None
	451	elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':
	452	yield TokenInfo(ERRORTOKEN, contstr + line,
	453	strstart, (lnum, len(line)), contline)
	454	contstr = ''
	455	contline = None
	456	continue
	457	else:
	458	contstr = contstr + line
	459	contline = contline + line
	460	continue
	461
	462	elif parenlev == 0 and not continued: # new statement
	463	if not line: break
	464	column = 0
	465	while pos < max: # measure leading whitespace
	466	if line[pos] == ' ':
	467	column += 1
	468	elif line[pos] == '\t':
	469	column = (column//tabsize + 1)*tabsize
	470	elif line[pos] == '\f':
	471	column = 0
	472	else:
	473	break
	474	pos += 1
	475	if pos == max:
	476	break
	477
	478	if line[pos] in '#\r\n': # skip comments or blank lines
	479	if line[pos] == '#':
	480	comment_token = line[pos:].rstrip('\r\n')
	481	nl_pos = pos + len(comment_token)
	482	yield TokenInfo(COMMENT, comment_token,
	483	(lnum, pos), (lnum, pos + len(comment_token)), line)
	484	yield TokenInfo(NEWLINE, line[nl_pos:],
	485	(lnum, nl_pos), (lnum, len(line)), line)
	486	else:
	487	yield TokenInfo(NEWLINE, line[pos:],
	488	(lnum, pos), (lnum, len(line)), line)
	489	continue
	490
	491	if column > indents[-1]: # count indents or dedents
	492	indents.append(column)
	493	yield TokenInfo(INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
	494	while column < indents[-1]:
	495	if column not in indents:
	496	raise IndentationError(
	497	"unindent does not match any outer indentation level",
	498	("<tokenize>", lnum, pos, line))
	499	indents = indents[:-1]
	500	yield TokenInfo(DEDENT, '', (lnum, pos), (lnum, pos), line)
	501
	502	else: # continued statement
	503	if not line:
	504	raise TokenError("EOF in multi-line statement", (lnum, 0))
	505	continued = 0
	506
	507	while pos < max:
	508	pseudomatch = pseudoprog.match(line, pos)
	509	if pseudomatch: # scan for tokens
	510	start, end = pseudomatch.span(1)
	511	spos, epos, pos = (lnum, start), (lnum, end), end
	512	token, initial = line[start:end], line[start]
	513
	514	if (initial in numchars or # ordinary number
	515	(initial == '.' and token != '.' and token != '...')):
	516	yield TokenInfo(NUMBER, token, spos, epos, line)
	517	elif initial in '\r\n':
	518	yield TokenInfo(NL if parenlev > 0 else NEWLINE,
	519	token, spos, epos, line)
	520	elif initial == '#':
	521	assert not token.endswith("\n")
	522	yield TokenInfo(COMMENT, token, spos, epos, line)
	523	elif token in triple_quoted:
	524	endprog = endprogs[token]
	525	endmatch = endprog.match(line, pos)
	526	if endmatch: # all on one line
	527	pos = endmatch.end(0)
	528	token = line[start:pos]
	529	yield TokenInfo(STRING, token, spos, (lnum, pos), line)
	530	else:
	531	strstart = (lnum, start) # multiple lines
	532	contstr = line[start:]
	533	contline = line
	534	break
	535	elif initial in single_quoted or \
	536	token[:2] in single_quoted or \
	537	token[:3] in single_quoted:
	538	if token[-1] == '\n': # continued string
	539	strstart = (lnum, start)
	540	endprog = (endprogs[initial] or endprogs[token[1]] or
	541	endprogs[token[2]])
	542	contstr, needcont = line[start:], 1
	543	contline = line
	544	break
	545	else: # ordinary string
	546	yield TokenInfo(STRING, token, spos, epos, line)
	547	elif initial.isidentifier(): # ordinary name
	548	yield TokenInfo(NAME, token, spos, epos, line)
	549	elif initial == '\\': # continued stmt
	550	continued = 1
	551	else:
	552	if initial in '([{':
	553	parenlev += 1
	554	elif initial in ')]}':
	555	parenlev -= 1
	556	yield TokenInfo(OP, token, spos, epos, line)
	557	else:
	558	yield TokenInfo(ERRORTOKEN, line[pos],
	559	(lnum, pos), (lnum, pos+1), line)
	560	pos += 1
	561
	562	for indent in indents[1:]: # pop remaining indent levels
	563	yield TokenInfo(DEDENT, '', (lnum, 0), (lnum, 0), '')
	564	yield TokenInfo(ENDMARKER, '', (lnum, 0), (lnum, 0), '')
	565
	566
	567	# An undocumented, backwards compatible, API for all the places in the standard
	568	# library that expect to be able to use tokenize with strings
	569	def generate_tokens(readline):
	570	return _tokenize(readline, None)
	571
	572	if __name__ == "__main__":
	573	# Quick sanity check
	574	s = b'''def parseline(self, line):
	575	"""Parse the line into a command name and a string containing
	576	the arguments. Returns a tuple containing (command, args, line).
	577	'command' and 'args' may be None if the line couldn't be parsed.
	578	"""
	579	line = line.strip()
	580	if not line:
	581	return None, None, line
	582	elif line[0] == '?':
	583	line = 'help ' + line[1:]
	584	elif line[0] == '!':
	585	if hasattr(self, 'do_shell'):
	586	line = 'shell ' + line[1:]
	587	else:
	588	return None, None, line
	589	i, n = 0, len(line)
	590	while i < n and line[i] in self.identchars: i = i+1
	591	cmd, arg = line[:i], line[i:].strip()
	592	return cmd, arg, line
	593	'''
	594	for tok in tokenize(iter(s.splitlines()).__next__):
	595	print(tok)

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages