upstream/ipython Files · IPython/utils/_tokenize_py2.py

Print View has been removed...

Print View has been removed Closes ,

Thomas Kluyver - - Load All Authors

File last commit:

r10110:30fce8a6


                r11766:bcabea40

Download file

             _tokenize_py2.py
        
                    438 lines
            
             | 16.7 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / IPython / utils / _tokenize_py2.py
          
                    History
                
                 |
                  Annotation
                 | Raw
                 |Copy content
                 |Copy permalink

      """Patched version of standard library tokenize, to deal with various bugs.

      Patches

      - Relevant parts of Gareth Rees' patch for Python issue #12691 (untokenizing),

        manually applied.

      - Newlines in comments and blank lines should be either NL or NEWLINE, depending

        on whether they are in a multi-line statement. Filed as Python issue #17061.

      -------------------------------------------------------------------------------

      Tokenization help for Python programs.

      generate_tokens(readline) is a generator that breaks a stream of

      text into Python tokens.  It accepts a readline-like method which is called

      repeatedly to get the next line of input (or "" for EOF).  It generates

      5-tuples with these members:

          the token type (see token.py)

          the token (a string)

          the starting (row, column) indices of the token (a 2-tuple of ints)

          the ending (row, column) indices of the token (a 2-tuple of ints)

          the original line (string)

      It is designed to match the working of the Python tokenizer exactly, except

      that it produces COMMENT tokens for comments and gives type OP for all

      operators

      Older entry points

          tokenize_loop(readline, tokeneater)

          tokenize(readline, tokeneater=printtoken)

      are the same, except instead of generating tokens, tokeneater is a callback

      function to which the 5 fields described above are passed as 5 arguments,

      each time a new token is found."""

      __author__ = 'Ka-Ping Yee <ping@lfw.org>'

      __credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, '

                     'Skip Montanaro, Raymond Hettinger')

      import string, re

      from token import *

      import token

      __all__ = [x for x in dir(token) if not x.startswith("_")]

      __all__ += ["COMMENT", "tokenize", "generate_tokens", "NL", "untokenize"]

      del x

      del token

      __all__ += ["TokenError"]

      COMMENT = N_TOKENS

      tok_name[COMMENT] = 'COMMENT'

      NL = N_TOKENS + 1

      tok_name[NL] = 'NL'

      N_TOKENS += 2

      def group(*choices): return '(' + '|'.join(choices) + ')'

      def any(*choices): return group(*choices) + '*'

      def maybe(*choices): return group(*choices) + '?'

      Whitespace = r'[ \f\t]*'

      Comment = r'#[^\r\n]*'

      Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)

      Name = r'[a-zA-Z_]\w*'

      Hexnumber = r'0[xX][\da-fA-F]+[lL]?'

      Octnumber = r'(0[oO][0-7]+)|(0[0-7]*)[lL]?'

      Binnumber = r'0[bB][01]+[lL]?'

      Decnumber = r'[1-9]\d*[lL]?'

      Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)

      Exponent = r'[eE][-+]?\d+'

      Pointfloat = group(r'\d+\.\d*', r'\.\d+') + maybe(Exponent)

      Expfloat = r'\d+' + Exponent

      Floatnumber = group(Pointfloat, Expfloat)

      Imagnumber = group(r'\d+[jJ]', Floatnumber + r'[jJ]')

      Number = group(Imagnumber, Floatnumber, Intnumber)

      # Tail end of ' string.

      Single = r"[^'\\]*(?:\\.[^'\\]*)*'"

      # Tail end of " string.

      Double = r'[^"\\]*(?:\\.[^"\\]*)*"'

      # Tail end of ''' string.

      Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"

      # Tail end of """ string.

      Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'

      Triple = group("[uUbB]?[rR]?'''", '[uUbB]?[rR]?"""')

      # Single-line ' or " string.

      String = group(r"[uUbB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*'",

                     r'[uUbB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*"')

      # Because of leftmost-then-longest match semantics, be sure to put the

      # longest operators first (e.g., if = came before ==, == would get

      # recognized as two instances of =).

      Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"<>", r"!=",

                       r"//=?",

                       r"[+\-*/%&|^=<>]=?",

                       r"~")

      Bracket = '[][(){}]'

      Special = group(r'\r?\n', r'[:;.,`@]')

      Funny = group(Operator, Bracket, Special)

      PlainToken = group(Number, Funny, String, Name)

      Token = Ignore + PlainToken

      # First (or only) line of ' or " string.

      ContStr = group(r"[uUbB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*" +

                      group("'", r'\\\r?\n'),

                      r'[uUbB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*' +

                      group('"', r'\\\r?\n'))

      PseudoExtras = group(r'\\\r?\n', Comment, Triple)

      PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)

      tokenprog, pseudoprog, single3prog, double3prog = map(

          re.compile, (Token, PseudoToken, Single3, Double3))

      endprogs = {"'": re.compile(Single), '"': re.compile(Double),

                  "'''": single3prog, '"""': double3prog,

                  "r'''": single3prog, 'r"""': double3prog,

                  "u'''": single3prog, 'u"""': double3prog,

                  "ur'''": single3prog, 'ur"""': double3prog,

                  "R'''": single3prog, 'R"""': double3prog,

                  "U'''": single3prog, 'U"""': double3prog,

                  "uR'''": single3prog, 'uR"""': double3prog,

                  "Ur'''": single3prog, 'Ur"""': double3prog,

                  "UR'''": single3prog, 'UR"""': double3prog,

                  "b'''": single3prog, 'b"""': double3prog,

                  "br'''": single3prog, 'br"""': double3prog,

                  "B'''": single3prog, 'B"""': double3prog,

                  "bR'''": single3prog, 'bR"""': double3prog,

                  "Br'''": single3prog, 'Br"""': double3prog,

                  "BR'''": single3prog, 'BR"""': double3prog,

                  'r': None, 'R': None, 'u': None, 'U': None,

                  'b': None, 'B': None}

      triple_quoted = {}

      for t in ("'''", '"""',

                "r'''", 'r"""', "R'''", 'R"""',

                "u'''", 'u"""', "U'''", 'U"""',

                "ur'''", 'ur"""', "Ur'''", 'Ur"""',

                "uR'''", 'uR"""', "UR'''", 'UR"""',

                "b'''", 'b"""', "B'''", 'B"""',

                "br'''", 'br"""', "Br'''", 'Br"""',

                "bR'''", 'bR"""', "BR'''", 'BR"""'):

          triple_quoted[t] = t

      single_quoted = {}

      for t in ("'", '"',

                "r'", 'r"', "R'", 'R"',

                "u'", 'u"', "U'", 'U"',

                "ur'", 'ur"', "Ur'", 'Ur"',

                "uR'", 'uR"', "UR'", 'UR"',

                "b'", 'b"', "B'", 'B"',

                "br'", 'br"', "Br'", 'Br"',

                "bR'", 'bR"', "BR'", 'BR"' ):

          single_quoted[t] = t

      tabsize = 8

      class TokenError(Exception): pass

      class StopTokenizing(Exception): pass

      def printtoken(type, token, srow_scol, erow_ecol, line): # for testing

          srow, scol = srow_scol

          erow, ecol = erow_ecol

          print "%d,%d-%d,%d:\t%s\t%s" % \

              (srow, scol, erow, ecol, tok_name[type], repr(token))

      def tokenize(readline, tokeneater=printtoken):

          """

          The tokenize() function accepts two parameters: one representing the

          input stream, and one providing an output mechanism for tokenize().

          The first parameter, readline, must be a callable object which provides

          the same interface as the readline() method of built-in file objects.

          Each call to the function should return one line of input as a string.

          The second parameter, tokeneater, must also be a callable object. It is

          called once for each token, with five arguments, corresponding to the

          tuples generated by generate_tokens().

          """

          try:

              tokenize_loop(readline, tokeneater)

          except StopTokenizing:

              pass

      # backwards compatible interface

      def tokenize_loop(readline, tokeneater):

          for token_info in generate_tokens(readline):

              tokeneater(*token_info)

      class Untokenizer:

          def __init__(self):

              self.tokens = []

              self.prev_row = 1

              self.prev_col = 0

          def add_whitespace(self, start):

              row, col = start

              assert row >= self.prev_row

              col_offset = col - self.prev_col

              if col_offset > 0:

                  self.tokens.append(" " * col_offset)

              elif row > self.prev_row and tok_type not in (NEWLINE, NL, ENDMARKER):

                  # Line was backslash-continued

                  self.tokens.append(" ")

          def untokenize(self, tokens):

              iterable = iter(tokens)

              for t in iterable:

                  if len(t) == 2:

                      self.compat(t, iterable)

                      break

                  tok_type, token, start, end = t[:4]

                  self.add_whitespace(start)

                  self.tokens.append(token)

                  self.prev_row, self.prev_col = end

                  if tok_type in (NEWLINE, NL):

                      self.prev_row += 1

                      self.prev_col = 0

              return "".join(self.tokens)

          def compat(self, token, iterable):

              # This import is here to avoid problems when the itertools

              # module is not built yet and tokenize is imported.

              from itertools import chain

              startline = False

              prevstring = False

              indents = []

              toks_append = self.tokens.append

              for tok in chain([token], iterable):

                  toknum, tokval = tok[:2]

                  if toknum in (NAME, NUMBER):

                      tokval += ' '

                  # Insert a space between two consecutive strings

                  if toknum == STRING:

                      if prevstring:

                          tokval = ' ' + tokval

                      prevstring = True

                  else:

                      prevstring = False

                  if toknum == INDENT:

                      indents.append(tokval)

                      continue

                  elif toknum == DEDENT:

                      indents.pop()

                      continue

                  elif toknum in (NEWLINE, NL):

                      startline = True

                  elif startline and indents:

                      toks_append(indents[-1])

                      startline = False

                  toks_append(tokval)

      def untokenize(iterable):

          """Transform tokens back into Python source code.

          Each element returned by the iterable must be a token sequence

          with at least two elements, a token number and token value.  If

          only two tokens are passed, the resulting output is poor.

          Round-trip invariant for full input:

              Untokenized source will match input source exactly

          Round-trip invariant for limited intput:

              # Output text will tokenize the back to the input

              t1 = [tok[:2] for tok in generate_tokens(f.readline)]

              newcode = untokenize(t1)

              readline = iter(newcode.splitlines(1)).next

              t2 = [tok[:2] for tok in generate_tokens(readline)]

              assert t1 == t2

          """

          ut = Untokenizer()

          return ut.untokenize(iterable)

      def generate_tokens(readline):

          """

          The generate_tokens() generator requires one argment, readline, which

          must be a callable object which provides the same interface as the

          readline() method of built-in file objects. Each call to the function

          should return one line of input as a string.  Alternately, readline

          can be a callable function terminating with StopIteration:

              readline = open(myfile).next    # Example of alternate readline

          The generator produces 5-tuples with these members: the token type; the

          token string; a 2-tuple (srow, scol) of ints specifying the row and

          column where the token begins in the source; a 2-tuple (erow, ecol) of

          ints specifying the row and column where the token ends in the source;

          and the line on which the token was found. The line passed is the

          logical line; continuation lines are included.

          """

          lnum = parenlev = continued = 0

          namechars, numchars = string.ascii_letters + '_', '0123456789'

          contstr, needcont = '', 0

          contline = None

          indents = [0]

          while 1:                                   # loop over lines in stream

              try:

                  line = readline()

              except StopIteration:

                  line = ''

              lnum += 1

              pos, max = 0, len(line)

              if contstr:                            # continued string

                  if not line:

                      raise TokenError, ("EOF in multi-line string", strstart)

                  endmatch = endprog.match(line)

                  if endmatch:

                      pos = end = endmatch.end(0)

                      yield (STRING, contstr + line[:end],

                             strstart, (lnum, end), contline + line)

                      contstr, needcont = '', 0

                      contline = None

                  elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':

                      yield (ERRORTOKEN, contstr + line,

                                 strstart, (lnum, len(line)), contline)

                      contstr = ''

                      contline = None

                      continue

                  else:

                      contstr = contstr + line

                      contline = contline + line

                      continue

              elif parenlev == 0 and not continued:  # new statement

                  if not line: break

                  column = 0

                  while pos < max:                   # measure leading whitespace

                      if line[pos] == ' ':

                          column += 1

                      elif line[pos] == '\t':

                          column = (column//tabsize + 1)*tabsize

                      elif line[pos] == '\f':

                          column = 0

                      else:

                          break

                      pos += 1

                  if pos == max:

                      break

                  if line[pos] in '#\r\n':           # skip comments or blank lines

                      if line[pos] == '#':

                          comment_token = line[pos:].rstrip('\r\n')

                          nl_pos = pos + len(comment_token)

                          yield (COMMENT, comment_token,

                                 (lnum, pos), (lnum, pos + len(comment_token)), line)

                          yield (NEWLINE, line[nl_pos:],

                                 (lnum, nl_pos), (lnum, len(line)), line)

                      else:

                          yield (NEWLINE, line[pos:],

                                 (lnum, pos), (lnum, len(line)), line)

                      continue

                  if column > indents[-1]:           # count indents or dedents

                      indents.append(column)

                      yield (INDENT, line[:pos], (lnum, 0), (lnum, pos), line)

                  while column < indents[-1]:

                      if column not in indents:

                          raise IndentationError(

                              "unindent does not match any outer indentation level",

                              ("<tokenize>", lnum, pos, line))

                      indents = indents[:-1]

                      yield (DEDENT, '', (lnum, pos), (lnum, pos), line)

              else:                                  # continued statement

                  if not line:

                      raise TokenError, ("EOF in multi-line statement", (lnum, 0))

                  continued = 0

              while pos < max:

                  pseudomatch = pseudoprog.match(line, pos)

                  if pseudomatch:                                # scan for tokens

                      start, end = pseudomatch.span(1)

                      spos, epos, pos = (lnum, start), (lnum, end), end

                      token, initial = line[start:end], line[start]

                      if initial in numchars or \

                         (initial == '.' and token != '.'):      # ordinary number

                          yield (NUMBER, token, spos, epos, line)

                      elif initial in '\r\n':

                          yield (NL if parenlev > 0 else NEWLINE,

                                 token, spos, epos, line)

                      elif initial == '#':

                          assert not token.endswith("\n")

                          yield (COMMENT, token, spos, epos, line)

                      elif token in triple_quoted:

                          endprog = endprogs[token]

                          endmatch = endprog.match(line, pos)

                          if endmatch:                           # all on one line

                              pos = endmatch.end(0)

                              token = line[start:pos]

                              yield (STRING, token, spos, (lnum, pos), line)

                          else:

                              strstart = (lnum, start)           # multiple lines

                              contstr = line[start:]

                              contline = line

                              break

                      elif initial in single_quoted or \

                          token[:2] in single_quoted or \

                          token[:3] in single_quoted:

                          if token[-1] == '\n':                  # continued string

                              strstart = (lnum, start)

                              endprog = (endprogs[initial] or endprogs[token[1]] or

                                         endprogs[token[2]])

                              contstr, needcont = line[start:], 1

                              contline = line

                              break

                          else:                                  # ordinary string

                              yield (STRING, token, spos, epos, line)

                      elif initial in namechars:                 # ordinary name

                          yield (NAME, token, spos, epos, line)

                      elif initial == '\\':                      # continued stmt

                          continued = 1

                      else:

                          if initial in '([{':

                              parenlev += 1

                          elif initial in ')]}':

                              parenlev -= 1

                          yield (OP, token, spos, epos, line)

                  else:

                      yield (ERRORTOKEN, line[pos],

                                 (lnum, pos), (lnum, pos+1), line)

                      pos += 1

          for indent in indents[1:]:                 # pop remaining indent levels

              yield (DEDENT, '', (lnum, 0), (lnum, 0), '')

          yield (ENDMARKER, '', (lnum, 0), (lnum, 0), '')

      if __name__ == '__main__':                     # testing

          import sys

          if len(sys.argv) > 1:

              tokenize(open(sys.argv[1]).readline)

          else:

              tokenize(sys.stdin.readline)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

				"""Patched version of standard library tokenize, to deal with various bugs.

				Patches

				- Relevant parts of Gareth Rees' patch for Python issue #12691 (untokenizing),
				manually applied.
				- Newlines in comments and blank lines should be either NL or NEWLINE, depending
				on whether they are in a multi-line statement. Filed as Python issue #17061.

				-------------------------------------------------------------------------------
				Tokenization help for Python programs.

				generate_tokens(readline) is a generator that breaks a stream of
				text into Python tokens. It accepts a readline-like method which is called
				repeatedly to get the next line of input (or "" for EOF). It generates
				5-tuples with these members:

				the token type (see token.py)
				the token (a string)
				the starting (row, column) indices of the token (a 2-tuple of ints)
				the ending (row, column) indices of the token (a 2-tuple of ints)
				the original line (string)

				It is designed to match the working of the Python tokenizer exactly, except
				that it produces COMMENT tokens for comments and gives type OP for all
				operators

				Older entry points
				tokenize_loop(readline, tokeneater)
				tokenize(readline, tokeneater=printtoken)
				are the same, except instead of generating tokens, tokeneater is a callback
				function to which the 5 fields described above are passed as 5 arguments,
				each time a new token is found."""

				__author__ = 'Ka-Ping Yee <ping@lfw.org>'
				__credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, '
				'Skip Montanaro, Raymond Hettinger')

				import string, re
				from token import *

				import token
				__all__ = [x for x in dir(token) if not x.startswith("_")]
				__all__ += ["COMMENT", "tokenize", "generate_tokens", "NL", "untokenize"]
				del x
				del token

				__all__ += ["TokenError"]

				COMMENT = N_TOKENS
				tok_name[COMMENT] = 'COMMENT'
				NL = N_TOKENS + 1
				tok_name[NL] = 'NL'
				N_TOKENS += 2

				def group(*choices): return '(' + '\|'.join(choices) + ')'
				def any(choices): return group(choices) + '*'
				def maybe(choices): return group(choices) + '?'

				Whitespace = r'[ \f\t]*'
				Comment = r'#[^\r\n]*'
				Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
				Name = r'[a-zA-Z_]\w*'

				Hexnumber = r'0[xX][\da-fA-F]+[lL]?'
				Octnumber = r'(0[oO][0-7]+)\|(0[0-7]*)[lL]?'
				Binnumber = r'0[bB][01]+[lL]?'
				Decnumber = r'[1-9]\d*[lL]?'
				Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
				Exponent = r'[eE][-+]?\d+'
				Pointfloat = group(r'\d+\.\d*', r'\.\d+') + maybe(Exponent)
				Expfloat = r'\d+' + Exponent
				Floatnumber = group(Pointfloat, Expfloat)
				Imagnumber = group(r'\d+[jJ]', Floatnumber + r'[jJ]')
				Number = group(Imagnumber, Floatnumber, Intnumber)

				# Tail end of ' string.
				Single = r"[^'\\](?:\\.[^'\\])*'"
				# Tail end of " string.
				Double = r'[^"\\](?:\\.[^"\\])*"'
				# Tail end of ''' string.
				Single3 = r"[^'\\](?:(?:\\.\|'(?!''))[^'\\])*'''"
				# Tail end of """ string.
				Double3 = r'[^"\\](?:(?:\\.\|"(?!""))[^"\\])*"""'
				Triple = group("[uUbB]?[rR]?'''", '[uUbB]?[rR]?"""')
				# Single-line ' or " string.
				String = group(r"[uUbB]?[rR]?'[^\n'\\](?:\\.[^\n'\\])*'",
				r'[uUbB]?[rR]?"[^\n"\\](?:\\.[^\n"\\])*"')

				# Because of leftmost-then-longest match semantics, be sure to put the
				# longest operators first (e.g., if = came before ==, == would get
				# recognized as two instances of =).
				Operator = group(r"\\=?", r">>=?", r"<<=?", r"<>", r"!=",
				r"//=?",
				r"[+\-*/%&\|^=<>]=?",
				r"~")

				Bracket = '[][(){}]'
				Special = group(r'\r?\n', r'[:;.,`@]')
				Funny = group(Operator, Bracket, Special)

				PlainToken = group(Number, Funny, String, Name)
				Token = Ignore + PlainToken

				# First (or only) line of ' or " string.
				ContStr = group(r"[uUbB]?[rR]?'[^\n'\\](?:\\.[^\n'\\])*" +
				group("'", r'\\\r?\n'),
				r'[uUbB]?[rR]?"[^\n"\\](?:\\.[^\n"\\])*' +
				group('"', r'\\\r?\n'))
				PseudoExtras = group(r'\\\r?\n', Comment, Triple)
				PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)

				tokenprog, pseudoprog, single3prog, double3prog = map(
				re.compile, (Token, PseudoToken, Single3, Double3))
				endprogs = {"'": re.compile(Single), '"': re.compile(Double),
				"'''": single3prog, '"""': double3prog,
				"r'''": single3prog, 'r"""': double3prog,
				"u'''": single3prog, 'u"""': double3prog,
				"ur'''": single3prog, 'ur"""': double3prog,
				"R'''": single3prog, 'R"""': double3prog,
				"U'''": single3prog, 'U"""': double3prog,
				"uR'''": single3prog, 'uR"""': double3prog,
				"Ur'''": single3prog, 'Ur"""': double3prog,
				"UR'''": single3prog, 'UR"""': double3prog,
				"b'''": single3prog, 'b"""': double3prog,
				"br'''": single3prog, 'br"""': double3prog,
				"B'''": single3prog, 'B"""': double3prog,
				"bR'''": single3prog, 'bR"""': double3prog,
				"Br'''": single3prog, 'Br"""': double3prog,
				"BR'''": single3prog, 'BR"""': double3prog,
				'r': None, 'R': None, 'u': None, 'U': None,
				'b': None, 'B': None}

				triple_quoted = {}
				for t in ("'''", '"""',
				"r'''", 'r"""', "R'''", 'R"""',
				"u'''", 'u"""', "U'''", 'U"""',
				"ur'''", 'ur"""', "Ur'''", 'Ur"""',
				"uR'''", 'uR"""', "UR'''", 'UR"""',
				"b'''", 'b"""', "B'''", 'B"""',
				"br'''", 'br"""', "Br'''", 'Br"""',
				"bR'''", 'bR"""', "BR'''", 'BR"""'):
				triple_quoted[t] = t
				single_quoted = {}
				for t in ("'", '"',
				"r'", 'r"', "R'", 'R"',
				"u'", 'u"', "U'", 'U"',
				"ur'", 'ur"', "Ur'", 'Ur"',
				"uR'", 'uR"', "UR'", 'UR"',
				"b'", 'b"', "B'", 'B"',
				"br'", 'br"', "Br'", 'Br"',
				"bR'", 'bR"', "BR'", 'BR"' ):
				single_quoted[t] = t

				tabsize = 8

				class TokenError(Exception): pass

				class StopTokenizing(Exception): pass

				def printtoken(type, token, srow_scol, erow_ecol, line): # for testing
				srow, scol = srow_scol
				erow, ecol = erow_ecol
				print "%d,%d-%d,%d:\t%s\t%s" % \
				(srow, scol, erow, ecol, tok_name[type], repr(token))

				def tokenize(readline, tokeneater=printtoken):
				"""
				The tokenize() function accepts two parameters: one representing the
				input stream, and one providing an output mechanism for tokenize().

				The first parameter, readline, must be a callable object which provides
				the same interface as the readline() method of built-in file objects.
				Each call to the function should return one line of input as a string.

				The second parameter, tokeneater, must also be a callable object. It is
				called once for each token, with five arguments, corresponding to the
				tuples generated by generate_tokens().
				"""
				try:
				tokenize_loop(readline, tokeneater)
				except StopTokenizing:
				pass

				# backwards compatible interface
				def tokenize_loop(readline, tokeneater):
				for token_info in generate_tokens(readline):
				tokeneater(*token_info)

				class Untokenizer:

				def __init__(self):
				self.tokens = []
				self.prev_row = 1
				self.prev_col = 0

				def add_whitespace(self, start):
				row, col = start
				assert row >= self.prev_row
				col_offset = col - self.prev_col
				if col_offset > 0:
				self.tokens.append(" " * col_offset)
				elif row > self.prev_row and tok_type not in (NEWLINE, NL, ENDMARKER):
				# Line was backslash-continued
				self.tokens.append(" ")

				def untokenize(self, tokens):
				iterable = iter(tokens)
				for t in iterable:
				if len(t) == 2:
				self.compat(t, iterable)
				break
				tok_type, token, start, end = t[:4]
				self.add_whitespace(start)
				self.tokens.append(token)
				self.prev_row, self.prev_col = end
				if tok_type in (NEWLINE, NL):
				self.prev_row += 1
				self.prev_col = 0
				return "".join(self.tokens)

				def compat(self, token, iterable):
				# This import is here to avoid problems when the itertools
				# module is not built yet and tokenize is imported.
				from itertools import chain
				startline = False
				prevstring = False
				indents = []
				toks_append = self.tokens.append
				for tok in chain([token], iterable):
				toknum, tokval = tok[:2]

				if toknum in (NAME, NUMBER):
				tokval += ' '

				# Insert a space between two consecutive strings
				if toknum == STRING:
				if prevstring:
				tokval = ' ' + tokval
				prevstring = True
				else:
				prevstring = False

				if toknum == INDENT:
				indents.append(tokval)
				continue
				elif toknum == DEDENT:
				indents.pop()
				continue
				elif toknum in (NEWLINE, NL):
				startline = True
				elif startline and indents:
				toks_append(indents[-1])
				startline = False
				toks_append(tokval)

				def untokenize(iterable):
				"""Transform tokens back into Python source code.

				Each element returned by the iterable must be a token sequence
				with at least two elements, a token number and token value. If
				only two tokens are passed, the resulting output is poor.

				Round-trip invariant for full input:
				Untokenized source will match input source exactly

				Round-trip invariant for limited intput:
				# Output text will tokenize the back to the input
				t1 = [tok[:2] for tok in generate_tokens(f.readline)]
				newcode = untokenize(t1)
				readline = iter(newcode.splitlines(1)).next
				t2 = [tok[:2] for tok in generate_tokens(readline)]
				assert t1 == t2
				"""
				ut = Untokenizer()
				return ut.untokenize(iterable)

				def generate_tokens(readline):
				"""
				The generate_tokens() generator requires one argment, readline, which
				must be a callable object which provides the same interface as the
				readline() method of built-in file objects. Each call to the function
				should return one line of input as a string. Alternately, readline
				can be a callable function terminating with StopIteration:
				readline = open(myfile).next # Example of alternate readline

				The generator produces 5-tuples with these members: the token type; the
				token string; a 2-tuple (srow, scol) of ints specifying the row and
				column where the token begins in the source; a 2-tuple (erow, ecol) of
				ints specifying the row and column where the token ends in the source;
				and the line on which the token was found. The line passed is the
				logical line; continuation lines are included.
				"""
				lnum = parenlev = continued = 0
				namechars, numchars = string.ascii_letters + '_', '0123456789'
				contstr, needcont = '', 0
				contline = None
				indents = [0]

				while 1: # loop over lines in stream
				try:
				line = readline()
				except StopIteration:
				line = ''
				lnum += 1
				pos, max = 0, len(line)

				if contstr: # continued string
				if not line:
				raise TokenError, ("EOF in multi-line string", strstart)
				endmatch = endprog.match(line)
				if endmatch:
				pos = end = endmatch.end(0)
				yield (STRING, contstr + line[:end],
				strstart, (lnum, end), contline + line)
				contstr, needcont = '', 0
				contline = None
				elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':
				yield (ERRORTOKEN, contstr + line,
				strstart, (lnum, len(line)), contline)
				contstr = ''
				contline = None
				continue
				else:
				contstr = contstr + line
				contline = contline + line
				continue

				elif parenlev == 0 and not continued: # new statement
				if not line: break
				column = 0
				while pos < max: # measure leading whitespace
				if line[pos] == ' ':
				column += 1
				elif line[pos] == '\t':
				column = (column//tabsize + 1)*tabsize
				elif line[pos] == '\f':
				column = 0
				else:
				break
				pos += 1
				if pos == max:
				break

				if line[pos] in '#\r\n': # skip comments or blank lines
				if line[pos] == '#':
				comment_token = line[pos:].rstrip('\r\n')
				nl_pos = pos + len(comment_token)
				yield (COMMENT, comment_token,
				(lnum, pos), (lnum, pos + len(comment_token)), line)
				yield (NEWLINE, line[nl_pos:],
				(lnum, nl_pos), (lnum, len(line)), line)
				else:
				yield (NEWLINE, line[pos:],
				(lnum, pos), (lnum, len(line)), line)
				continue

				if column > indents[-1]: # count indents or dedents
				indents.append(column)
				yield (INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
				while column < indents[-1]:
				if column not in indents:
				raise IndentationError(
				"unindent does not match any outer indentation level",
				("<tokenize>", lnum, pos, line))
				indents = indents[:-1]
				yield (DEDENT, '', (lnum, pos), (lnum, pos), line)

				else: # continued statement
				if not line:
				raise TokenError, ("EOF in multi-line statement", (lnum, 0))
				continued = 0

				while pos < max:
				pseudomatch = pseudoprog.match(line, pos)
				if pseudomatch: # scan for tokens
				start, end = pseudomatch.span(1)
				spos, epos, pos = (lnum, start), (lnum, end), end
				token, initial = line[start:end], line[start]

				if initial in numchars or \
				(initial == '.' and token != '.'): # ordinary number
				yield (NUMBER, token, spos, epos, line)
				elif initial in '\r\n':
				yield (NL if parenlev > 0 else NEWLINE,
				token, spos, epos, line)
				elif initial == '#':
				assert not token.endswith("\n")
				yield (COMMENT, token, spos, epos, line)
				elif token in triple_quoted:
				endprog = endprogs[token]
				endmatch = endprog.match(line, pos)
				if endmatch: # all on one line
				pos = endmatch.end(0)
				token = line[start:pos]
				yield (STRING, token, spos, (lnum, pos), line)
				else:
				strstart = (lnum, start) # multiple lines
				contstr = line[start:]
				contline = line
				break
				elif initial in single_quoted or \
				token[:2] in single_quoted or \
				token[:3] in single_quoted:
				if token[-1] == '\n': # continued string
				strstart = (lnum, start)
				endprog = (endprogs[initial] or endprogs[token[1]] or
				endprogs[token[2]])
				contstr, needcont = line[start:], 1
				contline = line
				break
				else: # ordinary string
				yield (STRING, token, spos, epos, line)
				elif initial in namechars: # ordinary name
				yield (NAME, token, spos, epos, line)
				elif initial == '\\': # continued stmt
				continued = 1
				else:
				if initial in '([{':
				parenlev += 1
				elif initial in ')]}':
				parenlev -= 1
				yield (OP, token, spos, epos, line)
				else:
				yield (ERRORTOKEN, line[pos],
				(lnum, pos), (lnum, pos+1), line)
				pos += 1

				for indent in indents[1:]: # pop remaining indent levels
				yield (DEDENT, '', (lnum, 0), (lnum, 0), '')
				yield (ENDMARKER, '', (lnum, 0), (lnum, 0), '')

				if __name__ == '__main__': # testing
				import sys
				if len(sys.argv) > 1:
				tokenize(open(sys.argv[1]).readline)
				else:
				tokenize(sys.stdin.readline)