upstream/ipython Commit - r10105:7715868c

Prototype transformer to assemble logical lines

Thomas Kluyver -

r10105:7715868c

parent child

IPython/utils/untokenize.py

0 created 644 +125 0

			@@ -0,0 +1,125 b''
		1	"""This is a patched copy of the untokenize machinery from the standard library.
		2
		3	untokenize has a number of major bugs that render it almost useless. We're using
		4	the patch written by Gareth Rees on Python issue 12961:
		5
		6	http://bugs.python.org/issue12691
		7
		8	We've undone one part of the patch - it encoded the output to bytes, to neatly
		9	round-trip from tokenize. We want to keep working with text, so we don't encode.
		10	"""
		11
		12	__author__ = 'Ka-Ping Yee <ping@lfw.org>'
		13	__credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, '
		14	'Skip Montanaro, Raymond Hettinger, Trent Nelson, '
		15	'Michael Foord')
		16	from token import *
		17
		18
		19	from tokenize import COMMENT, NL
		20
		21	try:
		22	# Python 3
		23	from tokenize import ENCODING
		24	except:
		25	ENCODING = 987654321
		26
		27	class Untokenizer:
		28
		29	def __init__(self):
		30	self.tokens = []
		31	self.prev_row = 1
		32	self.prev_col = 0
		33	self.encoding = 'utf-8'
		34
		35	def add_whitespace(self, tok_type, start):
		36	row, col = start
		37	assert row >= self.prev_row
		38	col_offset = col - self.prev_col
		39	if col_offset > 0:
		40	self.tokens.append(" " * col_offset)
		41	elif row > self.prev_row and tok_type not in (NEWLINE, NL, ENDMARKER):
		42	# Line was backslash-continued.
		43	self.tokens.append(" ")
		44
		45	def untokenize(self, tokens):
		46	iterable = iter(tokens)
		47	for t in iterable:
		48	if len(t) == 2:
		49	self.compat(t, iterable)
		50	break
		51	# IPython modification - valid Python 2 syntax
		52	tok_type, token, start, end = t[:4]
		53	if tok_type == ENCODING:
		54	self.encoding = token
		55	continue
		56	self.add_whitespace(tok_type, start)
		57	self.tokens.append(token)
		58	self.prev_row, self.prev_col = end
		59	if tok_type in (NEWLINE, NL):
		60	self.prev_row += 1
		61	self.prev_col = 0
		62	# IPython modification - don't encode output
		63	return "".join(self.tokens)
		64
		65	def compat(self, token, iterable):
		66	# This import is here to avoid problems when the itertools
		67	# module is not built yet and tokenize is imported.
		68	from itertools import chain
		69	startline = False
		70	prevstring = False
		71	indents = []
		72	toks_append = self.tokens.append
		73
		74	for tok in chain([token], iterable):
		75	toknum, tokval = tok[:2]
		76	if toknum == ENCODING:
		77	self.encoding = tokval
		78	continue
		79
		80	if toknum in (NAME, NUMBER):
		81	tokval += ' '
		82
		83	# Insert a space between two consecutive strings
		84	if toknum == STRING:
		85	if prevstring:
		86	tokval = ' ' + tokval
		87	prevstring = True
		88	else:
		89	prevstring = False
		90
		91	if toknum == INDENT:
		92	indents.append(tokval)
		93	continue
		94	elif toknum == DEDENT:
		95	indents.pop()
		96	continue
		97	elif toknum in (NEWLINE, NL):
		98	startline = True
		99	elif startline and indents:
		100	toks_append(indents[-1])
		101	startline = False
		102	toks_append(tokval)
		103
		104
		105	def untokenize(tokens):
		106	"""
		107	Convert ``tokens`` (an iterable) back into Python source code. Return
		108	a bytes object, encoded using the encoding specified by the last
		109	ENCODING token in ``tokens``, or UTF-8 if no ENCODING token is found.
		110
		111	The result is guaranteed to tokenize back to match the input so that
		112	the conversion is lossless and round-trips are assured. The
		113	guarantee applies only to the token type and token string as the
		114	spacing between tokens (column positions) may change.
		115
		116	:func:`untokenize` has two modes. If the input tokens are sequences
		117	of length 2 (``type``, ``string``) then spaces are added as necessary to
		118	preserve the round-trip property.
		119
		120	If the input tokens are sequences of length 4 or more (``type``,
		121	``string``, ``start``, ``end``), as returned by :func:`tokenize`, then
		122	spaces are added so that each token appears in the result at the
		123	position indicated by ``start`` and ``end``, if possible.
		124	"""
		125	return Untokenizer().untokenize(tokens)

IPython/core/inputsplitter.py

0 +6 -7

                                                         classic_prompt,
                                                         ipy_prompt,
                                                         cellmagic,
+                                                        assemble_logical_lines,
                                                         help_end,
                                                         escaped_transformer,
                                                         assign_from_magic,
                                             classic_prompt(),
                                             ipy_prompt(),
                                             cellmagic(),
+                                            assemble_logical_lines(),
                                             help_end(),
                                             escaped_transformer(),
                                             assign_from_magic(),
                  def push_line(self, line):
                      buf = self._buffer
-                     not_in_string =  self._is_complete or not buf or \
-                                             (buf and buf[-1].rstrip().endswith((':', ',')))
                      for transformer in self.transforms:
-                         if not_in_string or transformer.look_in_string:
-                             line = transformer.push(line)
-                             if line is None:
-                                 self.transformer_accumulating = True
-                                 return False
+                         line = transformer.push(line)
+                         if line is None:
+                             self.transformer_accumulating = True
+                             return False
                      self.transformer_accumulating = False
                      return super(IPythonInputSplitter, self).push(line)

IPython/core/inputtransformer.py

0 +18 -5

              from StringIO import StringIO
              import tokenize
+             try:
+                 generate_tokens = tokenize.generate_tokens
+             except AttributeError:
+                 # Python 3. Note that we use the undocumented _tokenize because it expects
+                 # strings, not bytes. See also Python issue #9969.
+                 generate_tokens = tokenize._tokenize
              from IPython.core.splitinput import split_user_input, LineInfo
+             from IPython.utils.untokenize import untokenize
              #-----------------------------------------------------------------------------
              # Globals
                  def __init__(self, func):
                      self.func = func
                      self.current_line = ""
-                     self.tokenizer = tokenize.generate_tokens(self.get_line)
                      self.line_used= False
+                     self.reset_tokenizer()
+                 def reset_tokenizer(self):
+                     self.tokenizer = generate_tokens(self.get_line)
                  def get_line(self):
                      if self.line_used:
                                  break
                      except tokenize.TokenError:
                          # Multi-line statement - stop and try again with the next line
-                         self.tokenizer = tokenize.generate_tokens(self.get_line)
+                         self.reset_tokenizer()
                          return None
                      self.current_line = ""
-                     # Python bug 8478 - untokenize doesn't work quite correctly with a
-                     # generator. We call list() to avoid this.
-                     return tokenize.untokenize(list(self.func(tokens))).rstrip('\n')
+                     self.reset_tokenizer()
+                     return untokenize(self.func(tokens)).rstrip('\n')
                  def reset(self):
                      l = self.current_line
                      if l:
                          return l.rstrip('\n')
+             @TokenInputTransformer.wrap
+             def assemble_logical_lines(tokens):
+                 return tokens
              # Utilities
              def _make_help_call(target, esc, lspace, next_input=None):

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages