upstream/ipython Commit - r13349:6edc3771

Make raise statements Python 3 compatible....

Thomas Kluyver -

r13349:6edc3771

parent child

IPython/lib/display.py

0 +2 -4

              """Various display related classes.
              Authors : MinRK, gregcaporaso, dannystaple
              """
              from os.path import exists, isfile, splitext, abspath, join, isdir
              from os import walk, sep
              from IPython.core.display import DisplayObject
              class Audio(DisplayObject):
                  """Create an audio object.
                  When this object is returned by an input cell or passed to the
                  display function, it will result in Audio controls being displayed
                  in the frontend (only works in the notebook).
                  Parameters
                  ----------
                  data : numpy array, list, unicode, str or bytes
                      Can be a
                      * Numpy 1d array containing the desired waveform (mono)
                      * List of float or integer representing the waveform (mono)
                      * String containing the filename
                      * Bytestring containing raw PCM data or
                      * URL pointing to a file on the web.
                      If the array option is used the waveform will be normalized.
                      If a filename or url is used the format support will be browser
                      dependent.
                  url : unicode
                      A URL to download the data from.
                  filename : unicode
                      Path to a local file to load the data from.
                  embed : boolean
                      Should the image data be embedded using a data URI (True) or should
                      the original source be referenced. Set this to True if you want the
                      audio to playable later with no internet connection in the notebook.
                      Default is `True`, unless the keyword argument `url` is set, then
                      default value is `False`.
                  rate : integer
                      The sampling rate of the raw data.
                      Only required when data parameter is being used as an array
                  autoplay : bool
                      Set to True if the audio should immediately start playing.
                      Default is `False`.
                  Examples
                  --------
                  # Generate a sound
                  import numpy as np
                  framerate = 44100
                  t = np.linspace(0,5,framerate*5)
                  data = np.sin(2*np.pi*220*t) + np.sin(2*np.pi*224*t))
                  Audio(data,rate=framerate)
                  Audio("http://www.nch.com.au/acm/8k16bitpcm.wav")
                  Audio(url="http://www.w3schools.com/html/horse.ogg")
                  Audio('/path/to/sound.wav')
                  Audio(filename='/path/to/sound.ogg')
                  Audio(b'RAW_WAV_DATA..)
                  Audio(data=b'RAW_WAV_DATA..)
                  """
                  def __init__(self, data=None, filename=None, url=None, embed=None, rate=None, autoplay=False):
                      if filename is None and url is None and data is None:
                          raise ValueError("No image data found. Expecting filename, url, or data.")
                      if embed is False and url is None:
                          raise ValueError("No url found. Expecting url when embed=False")
                      if url is not None and embed is not True:
                          self.embed = False
                      else:
                          self.embed = True
                      self.autoplay = autoplay
                      super(Audio, self).__init__(data=data, url=url, filename=filename)
                      if self.data is not None and not isinstance(self.data, bytes):
                          self.data = self._make_wav(data,rate)
                  def reload(self):
                      """Reload the raw data from file or URL."""
                      import mimetypes
                      if self.embed:
                          super(Audio, self).reload()
                      if self.filename is not None:
                          self.mimetype = mimetypes.guess_type(self.filename)[0]
                      elif self.url is not None:
                          self.mimetype = mimetypes.guess_type(self.url)[0]
                      else:
                          self.mimetype = "audio/wav"
                  def _make_wav(self, data, rate):
                      """ Transform a numpy array to a PCM bytestring """
                      import struct
                      from io import BytesIO
                      import wave
                      try:
                          import numpy as np
                          data = np.array(data,dtype=float)
                          if len(data.shape) > 1:
                              raise ValueError("encoding of stereo PCM signals are unsupported")
                          scaled = np.int16(data/np.max(np.abs(data))*32767).tolist()
                      except ImportError:
                          maxabsvalue = float(max([abs(x) for x in data]))
                          scaled = [int(x/maxabsvalue*32767) for x in data]
                      fp = BytesIO()
                      waveobj = wave.open(fp,mode='wb')
                      waveobj.setnchannels(1)
                      waveobj.setframerate(rate)
                      waveobj.setsampwidth(2)
                      waveobj.setcomptype('NONE','NONE')
                      waveobj.writeframes(b''.join([struct.pack('<h',x) for x in scaled]))
                      val = fp.getvalue()
                      waveobj.close()
                      return val
                  def _data_and_metadata(self):
                      """shortcut for returning metadata with url information, if defined"""
                      md = {}
                      if self.url:
                          md['url'] = self.url
                      if md:
                          return self.data, md
                      else:
                          return self.data
                  def _repr_html_(self):
                      src = """
                              <audio controls="controls" {autoplay}>
                                  <source src="{src}" type="{type}" />
                                  Your browser does not support the audio element.
                              </audio>
                            """
                      return src.format(src=self.src_attr(),type=self.mimetype, autoplay=self.autoplay_attr())
                  def src_attr(self):
                      import base64
                      if self.embed and (self.data is not None):
                          data = base64=base64.b64encode(self.data).decode('ascii')
                          return """data:{type};base64,{base64}""".format(type=self.mimetype,
                                                                          base64=data)
                      elif self.url is not None:
                          return self.url
                      else:
                          return ""
                  def autoplay_attr(self):
                      if(self.autoplay):
                          return 'autoplay="autoplay"'
                      else:
                          return ''
              class IFrame(object):
                  """
                  Generic class to embed an iframe in an IPython notebook
                  """
                  iframe = """
                      <iframe
                          width="{width}"
                          height={height}"
                          src="{src}{params}"
                          frameborder="0"
                          allowfullscreen
                      ></iframe>
                      """
                  def __init__(self, src, width, height, **kwargs):
                      self.src = src
                      self.width = width
                      self.height = height
                      self.params = kwargs
                  def _repr_html_(self):
                      """return the embed iframe"""
                      if self.params:
                          from urllib import urlencode
                          params = "?" + urlencode(self.params)
                      else:
                          params = ""
                      return self.iframe.format(src=self.src,
                                                width=self.width,
                                                height=self.height,
                                                params=params)
              class YouTubeVideo(IFrame):
                  """Class for embedding a YouTube Video in an IPython session, based on its video id.
                  e.g. to embed the video on this page:
                  http://www.youtube.com/watch?v=foo
                  you would do:
                  vid = YouTubeVideo("foo")
                  display(vid)
                  To start from 30 seconds:
                  vid = YouTubeVideo("abc", start=30)
                  display(vid)
                  To calculate seconds from time as hours, minutes, seconds use:
                  start=int(timedelta(hours=1, minutes=46, seconds=40).total_seconds())
                  Other parameters can be provided as documented at
                  https://developers.google.com/youtube/player_parameters#parameter-subheader
                  """
                  def __init__(self, id, width=400, height=300, **kwargs):
                      src = "http://www.youtube.com/embed/{0}".format(id)
                      super(YouTubeVideo, self).__init__(src, width, height, **kwargs)
              class VimeoVideo(IFrame):
                  """
                  Class for embedding a Vimeo video in an IPython session, based on its video id.
                  """
                  def __init__(self, id, width=400, height=300, **kwargs):
                      src="http://player.vimeo.com/video/{0}".format(id)
                      super(VimeoVideo, self).__init__(src, width, height, **kwargs)
              class ScribdDocument(IFrame):
                  """
                  Class for embedding a Scribd document in an IPython session
                  Use the start_page params to specify a starting point in the document
                  Use the view_mode params to specify display type one off scroll | slideshow | book
                  e.g to Display Wes' foundational paper about PANDAS in book mode from page 3
                  ScribdDocument(71048089, width=800, height=400, start_page=3, view_mode="book")
                  """
                  def __init__(self, id, width=400, height=300, **kwargs):
                      src="http://www.scribd.com/embeds/{0}/content".format(id)
                      super(ScribdDocument, self).__init__(src, width, height, **kwargs)
              class FileLink(object):
                  """Class for embedding a local file link in an IPython session, based on path
                  e.g. to embed a link that was generated in the IPython notebook as my/data.txt
                  you would do::
                      local_file = FileLink("my/data.txt")
                      display(local_file)
                  or in the HTML notebook, just::
                      FileLink("my/data.txt")
                  """
                  html_link_str = "<a href='%s' target='_blank'>%s</a>"
                  def __init__(self,
                               path,
                               url_prefix='files/',
                               result_html_prefix='',
                               result_html_suffix='<br>'):
                      """
                      Parameters
                      ----------
                      path : str
                          path to the file or directory that should be formatted
                      directory_prefix : str
                          prefix to be prepended to all files to form a working link [default:
                          'files']
                      result_html_prefix : str
                          text to append to beginning to link [default: none]
                      result_html_suffix : str
                          text to append at the end of link [default: '<br>']
                      """
                      if isdir(path):
-                         raise ValueError,\
-                          ("Cannot display a directory using FileLink. "
+                         raise ValueError("Cannot display a directory using FileLink. "
                            "Use FileLinks to display '%s'." % path)
                      self.path = path
                      self.url_prefix = url_prefix
                      self.result_html_prefix = result_html_prefix
                      self.result_html_suffix = result_html_suffix
                  def _format_path(self):
                      fp = ''.join([self.url_prefix,self.path])
                      return ''.join([self.result_html_prefix,
                                      self.html_link_str % (fp, self.path),
                                      self.result_html_suffix])
                  def _repr_html_(self):
                      """return html link to file
                      """
                      if not exists(self.path):
                          return ("Path (<tt>%s</tt>) doesn't exist. "
                                  "It may still be in the process of "
                                  "being generated, or you may have the "
                                  "incorrect path." % self.path)
                      return self._format_path()
                  def __repr__(self):
                      """return absolute path to file
                      """
                      return abspath(self.path)
              class FileLinks(FileLink):
                  """Class for embedding local file links in an IPython session, based on path
                  e.g. to embed links to files that were generated in the IPython notebook under my/data
                  you would do:
                  local_files = FileLinks("my/data")
                  display(local_files)
                  or in the HTML notebook, just
                  FileLinks("my/data")
                  """
                  def __init__(self,
                               path,
                               url_prefix='files/',
                               included_suffixes=None,
                               result_html_prefix='',
                               result_html_suffix='<br>',
                               notebook_display_formatter=None,
                               terminal_display_formatter=None):
                      """
                          included_suffixes : list of filename suffixes to include when
                           formatting output [default: include all files]
                          See the FileLink (baseclass of LocalDirectory) docstring for
                           information on additional parameters.
                          notebook_display_formatter : func used to format links for display
                           in the notebook. See discussion of formatter function below.
                          terminal_display_formatter : func used to format links for display
                           in the terminal. See discussion of formatter function below.
                          Passing custom formatter functions
                          ----------------------------------
                           Formatter functions must be of the form:
                            f(dirname, fnames, included_suffixes)
                             dirname : the name of a directory (a string),
                             fnames :  a list of the files in that directory
                             included_suffixes : a list of the file suffixes that should be
                                                 included in the output (passing None means
                                                 to include all suffixes in the output in
                                                 the built-in formatters)
                             returns a list of lines that should will be print in the
                             notebook (if passing notebook_display_formatter) or the terminal
                             (if passing terminal_display_formatter). This function is iterated
                             over for each directory in self.path. Default formatters are in
                             place, can be passed here to support alternative formatting.
                      """
                      if isfile(path):
-                         raise ValueError,\
-                          ("Cannot display a file using FileLinks. "
+                         raise ValueError("Cannot display a file using FileLinks. "
                            "Use FileLink to display '%s'." % path)
                      self.included_suffixes = included_suffixes
                      # remove trailing slashs for more consistent output formatting
                      path = path.rstrip('/')
                      self.path = path
                      self.url_prefix = url_prefix
                      self.result_html_prefix = result_html_prefix
                      self.result_html_suffix = result_html_suffix
                      self.notebook_display_formatter = \
                           notebook_display_formatter or self._get_notebook_display_formatter()
                      self.terminal_display_formatter = \
                           terminal_display_formatter or self._get_terminal_display_formatter()
                  def _get_display_formatter(self,
                                             dirname_output_format,
                                             fname_output_format,
                                             fp_format,
                                             fp_cleaner=None):
                      """ generate built-in formatter function
                         this is used to define both the notebook and terminal built-in
                          formatters as they only differ by some wrapper text for each entry
                         dirname_output_format: string to use for formatting directory
                          names, dirname will be substituted for a single "%s" which
                          must appear in this string
                         fname_output_format: string to use for formatting file names,
                          if a single "%s" appears in the string, fname will be substituted
                          if two "%s" appear in the string, the path to fname will be
                           substituted for the first and fname will be substituted for the
                           second
                         fp_format: string to use for formatting filepaths, must contain
                          exactly two "%s" and the dirname will be subsituted for the first
                          and fname will be substituted for the second
                      """
                      def f(dirname, fnames, included_suffixes=None):
                          result = []
                          # begin by figuring out which filenames, if any,
                          # are going to be displayed
                          display_fnames = []
                          for fname in fnames:
                              if (isfile(join(dirname,fname)) and
                                     (included_suffixes == None or
                                      splitext(fname)[1] in included_suffixes)):
                                    display_fnames.append(fname)
                          if len(display_fnames) == 0:
                              # if there are no filenames to display, don't print anything
                              # (not even the directory name)
                              pass
                          else:
                              # otherwise print the formatted directory name followed by
                              # the formatted filenames
                              dirname_output_line = dirname_output_format % dirname
                              result.append(dirname_output_line)
                              for fname in display_fnames:
                                  fp = fp_format % (dirname,fname)
                                  if fp_cleaner is not None:
                                      fp = fp_cleaner(fp)
                                  try:
                                      # output can include both a filepath and a filename...
                                      fname_output_line = fname_output_format % (fp, fname)
                                  except TypeError:
                                      # ... or just a single filepath
                                      fname_output_line = fname_output_format % fname
                                  result.append(fname_output_line)
                          return result
                      return f
                  def _get_notebook_display_formatter(self,
                                                      spacer="&nbsp;&nbsp;"):
                      """ generate function to use for notebook formatting
                      """
                      dirname_output_format = \
                       self.result_html_prefix + "%s/" + self.result_html_suffix
                      fname_output_format = \
                       self.result_html_prefix + spacer + self.html_link_str + self.result_html_suffix
                      fp_format = self.url_prefix + '%s/%s'
                      if sep == "\\":
                          # Working on a platform where the path separator is "\", so
                          # must convert these to "/" for generating a URI
                          def fp_cleaner(fp):
                              # Replace all occurences of backslash ("\") with a forward
                              # slash ("/") - this is necessary on windows when a path is
                              # provided as input, but we must link to a URI
                              return fp.replace('\\','/')
                      else:
                          fp_cleaner = None
                      return self._get_display_formatter(dirname_output_format,
                                                         fname_output_format,
                                                         fp_format,
                                                         fp_cleaner)
                  def _get_terminal_display_formatter(self,
                                                      spacer="  "):
                      """ generate function to use for terminal formatting
                      """
                      dirname_output_format = "%s/"
                      fname_output_format = spacer + "%s"
                      fp_format = '%s/%s'
                      return self._get_display_formatter(dirname_output_format,
                                                         fname_output_format,
                                                         fp_format)
                  def _format_path(self):
                      result_lines = []
                      walked_dir = list(walk(self.path))
                      walked_dir.sort()
                      for dirname, subdirs, fnames in walked_dir:
                          result_lines += self.notebook_display_formatter(dirname, fnames, self.included_suffixes)
                      return '\n'.join(result_lines)
                  def __repr__(self):
                      """return newline-separated absolute paths
                      """
                      result_lines = []
                      walked_dir = list(walk(self.path))
                      walked_dir.sort()
                      for dirname, subdirs, fnames in walked_dir:
                          result_lines += self.terminal_display_formatter(dirname, fnames, self.included_suffixes)
                      return '\n'.join(result_lines)

IPython/utils/_tokenize_py2.py

0 +2 -2

              """Patched version of standard library tokenize, to deal with various bugs.
              Patches
              - Relevant parts of Gareth Rees' patch for Python issue #12691 (untokenizing),
                manually applied.
              - Newlines in comments and blank lines should be either NL or NEWLINE, depending
                on whether they are in a multi-line statement. Filed as Python issue #17061.
              -------------------------------------------------------------------------------
              Tokenization help for Python programs.
              generate_tokens(readline) is a generator that breaks a stream of
              text into Python tokens.  It accepts a readline-like method which is called
              repeatedly to get the next line of input (or "" for EOF).  It generates
 -tuples with these members:
                  the token type (see token.py)
                  the token (a string)
                  the starting (row, column) indices of the token (a 2-tuple of ints)
                  the ending (row, column) indices of the token (a 2-tuple of ints)
                  the original line (string)
              It is designed to match the working of the Python tokenizer exactly, except
              that it produces COMMENT tokens for comments and gives type OP for all
              operators
              Older entry points
                  tokenize_loop(readline, tokeneater)
                  tokenize(readline, tokeneater=printtoken)
              are the same, except instead of generating tokens, tokeneater is a callback
              function to which the 5 fields described above are passed as 5 arguments,
              each time a new token is found."""
              from __future__ import print_function
              __author__ = 'Ka-Ping Yee <ping@lfw.org>'
              __credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, '
                             'Skip Montanaro, Raymond Hettinger')
              import string, re
              from token import *
              import token
              __all__ = [x for x in dir(token) if not x.startswith("_")]
              __all__ += ["COMMENT", "tokenize", "generate_tokens", "NL", "untokenize"]
              del x
              del token
              __all__ += ["TokenError"]
              COMMENT = N_TOKENS
              tok_name[COMMENT] = 'COMMENT'
              NL = N_TOKENS + 1
              tok_name[NL] = 'NL'
              N_TOKENS += 2
              def group(*choices): return '(' + '|'.join(choices) + ')'
              def any(*choices): return group(*choices) + '*'
              def maybe(*choices): return group(*choices) + '?'
              Whitespace = r'[ \f\t]*'
              Comment = r'#[^\r\n]*'
              Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
              Name = r'[a-zA-Z_]\w*'
              Hexnumber = r'0[xX][\da-fA-F]+[lL]?'
              Octnumber = r'(0[oO][0-7]+)|(0[0-7]*)[lL]?'
              Binnumber = r'0[bB][01]+[lL]?'
              Decnumber = r'[1-9]\d*[lL]?'
              Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
              Exponent = r'[eE][-+]?\d+'
              Pointfloat = group(r'\d+\.\d*', r'\.\d+') + maybe(Exponent)
              Expfloat = r'\d+' + Exponent
              Floatnumber = group(Pointfloat, Expfloat)
              Imagnumber = group(r'\d+[jJ]', Floatnumber + r'[jJ]')
              Number = group(Imagnumber, Floatnumber, Intnumber)
              # Tail end of ' string.
              Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
              # Tail end of " string.
              Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
              # Tail end of ''' string.
              Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
              # Tail end of """ string.
              Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
              Triple = group("[uUbB]?[rR]?'''", '[uUbB]?[rR]?"""')
              # Single-line ' or " string.
              String = group(r"[uUbB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
                             r'[uUbB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*"')
              # Because of leftmost-then-longest match semantics, be sure to put the
              # longest operators first (e.g., if = came before ==, == would get
              # recognized as two instances of =).
              Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"<>", r"!=",
                               r"//=?",
                               r"[+\-*/%&|^=<>]=?",
                               r"~")
              Bracket = '[][(){}]'
              Special = group(r'\r?\n', r'[:;.,`@]')
              Funny = group(Operator, Bracket, Special)
              PlainToken = group(Number, Funny, String, Name)
              Token = Ignore + PlainToken
              # First (or only) line of ' or " string.
              ContStr = group(r"[uUbB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
                              group("'", r'\\\r?\n'),
                              r'[uUbB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
                              group('"', r'\\\r?\n'))
              PseudoExtras = group(r'\\\r?\n', Comment, Triple)
              PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
              tokenprog, pseudoprog, single3prog, double3prog = map(
                  re.compile, (Token, PseudoToken, Single3, Double3))
              endprogs = {"'": re.compile(Single), '"': re.compile(Double),
                          "'''": single3prog, '"""': double3prog,
                          "r'''": single3prog, 'r"""': double3prog,
                          "u'''": single3prog, 'u"""': double3prog,
                          "ur'''": single3prog, 'ur"""': double3prog,
                          "R'''": single3prog, 'R"""': double3prog,
                          "U'''": single3prog, 'U"""': double3prog,
                          "uR'''": single3prog, 'uR"""': double3prog,
                          "Ur'''": single3prog, 'Ur"""': double3prog,
                          "UR'''": single3prog, 'UR"""': double3prog,
                          "b'''": single3prog, 'b"""': double3prog,
                          "br'''": single3prog, 'br"""': double3prog,
                          "B'''": single3prog, 'B"""': double3prog,
                          "bR'''": single3prog, 'bR"""': double3prog,
                          "Br'''": single3prog, 'Br"""': double3prog,
                          "BR'''": single3prog, 'BR"""': double3prog,
                          'r': None, 'R': None, 'u': None, 'U': None,
                          'b': None, 'B': None}
              triple_quoted = {}
              for t in ("'''", '"""',
                        "r'''", 'r"""', "R'''", 'R"""',
                        "u'''", 'u"""', "U'''", 'U"""',
                        "ur'''", 'ur"""', "Ur'''", 'Ur"""',
                        "uR'''", 'uR"""', "UR'''", 'UR"""',
                        "b'''", 'b"""', "B'''", 'B"""',
                        "br'''", 'br"""', "Br'''", 'Br"""',
                        "bR'''", 'bR"""', "BR'''", 'BR"""'):
                  triple_quoted[t] = t
              single_quoted = {}
              for t in ("'", '"',
                        "r'", 'r"', "R'", 'R"',
                        "u'", 'u"', "U'", 'U"',
                        "ur'", 'ur"', "Ur'", 'Ur"',
                        "uR'", 'uR"', "UR'", 'UR"',
                        "b'", 'b"', "B'", 'B"',
                        "br'", 'br"', "Br'", 'Br"',
                        "bR'", 'bR"', "BR'", 'BR"' ):
                  single_quoted[t] = t
              tabsize = 8
              class TokenError(Exception): pass
              class StopTokenizing(Exception): pass
              def printtoken(type, token, srow_scol, erow_ecol, line): # for testing
                  srow, scol = srow_scol
                  erow, ecol = erow_ecol
                  print("%d,%d-%d,%d:\t%s\t%s" % \
                      (srow, scol, erow, ecol, tok_name[type], repr(token)))
              def tokenize(readline, tokeneater=printtoken):
                  """
                  The tokenize() function accepts two parameters: one representing the
                  input stream, and one providing an output mechanism for tokenize().
                  The first parameter, readline, must be a callable object which provides
                  the same interface as the readline() method of built-in file objects.
                  Each call to the function should return one line of input as a string.
                  The second parameter, tokeneater, must also be a callable object. It is
                  called once for each token, with five arguments, corresponding to the
                  tuples generated by generate_tokens().
                  """
                  try:
                      tokenize_loop(readline, tokeneater)
                  except StopTokenizing:
                      pass
              # backwards compatible interface
              def tokenize_loop(readline, tokeneater):
                  for token_info in generate_tokens(readline):
                      tokeneater(*token_info)
              class Untokenizer:
                  def __init__(self):
                      self.tokens = []
                      self.prev_row = 1
                      self.prev_col = 0
                  def add_whitespace(self, start):
                      row, col = start
                      assert row >= self.prev_row
                      col_offset = col - self.prev_col
                      if col_offset > 0:
                          self.tokens.append(" " * col_offset)
                      elif row > self.prev_row and tok_type not in (NEWLINE, NL, ENDMARKER):
                          # Line was backslash-continued
                          self.tokens.append(" ")
                  def untokenize(self, tokens):
                      iterable = iter(tokens)
                      for t in iterable:
                          if len(t) == 2:
                              self.compat(t, iterable)
                              break
                          tok_type, token, start, end = t[:4]
                          self.add_whitespace(start)
                          self.tokens.append(token)
                          self.prev_row, self.prev_col = end
                          if tok_type in (NEWLINE, NL):
                              self.prev_row += 1
                              self.prev_col = 0
                      return "".join(self.tokens)
                  def compat(self, token, iterable):
                      # This import is here to avoid problems when the itertools
                      # module is not built yet and tokenize is imported.
                      from itertools import chain
                      startline = False
                      prevstring = False
                      indents = []
                      toks_append = self.tokens.append
                      for tok in chain([token], iterable):
                          toknum, tokval = tok[:2]
                          if toknum in (NAME, NUMBER):
                              tokval += ' '
                          # Insert a space between two consecutive strings
                          if toknum == STRING:
                              if prevstring:
                                  tokval = ' ' + tokval
                              prevstring = True
                          else:
                              prevstring = False
                          if toknum == INDENT:
                              indents.append(tokval)
                              continue
                          elif toknum == DEDENT:
                              indents.pop()
                              continue
                          elif toknum in (NEWLINE, NL):
                              startline = True
                          elif startline and indents:
                              toks_append(indents[-1])
                              startline = False
                          toks_append(tokval)
              def untokenize(iterable):
                  """Transform tokens back into Python source code.
                  Each element returned by the iterable must be a token sequence
                  with at least two elements, a token number and token value.  If
                  only two tokens are passed, the resulting output is poor.
                  Round-trip invariant for full input:
                      Untokenized source will match input source exactly
                  Round-trip invariant for limited intput:
                      # Output text will tokenize the back to the input
                      t1 = [tok[:2] for tok in generate_tokens(f.readline)]
                      newcode = untokenize(t1)
                      readline = iter(newcode.splitlines(1)).next
                      t2 = [tok[:2] for tok in generate_tokens(readline)]
                      assert t1 == t2
                  """
                  ut = Untokenizer()
                  return ut.untokenize(iterable)
              def generate_tokens(readline):
                  """
                  The generate_tokens() generator requires one argment, readline, which
                  must be a callable object which provides the same interface as the
                  readline() method of built-in file objects. Each call to the function
                  should return one line of input as a string.  Alternately, readline
                  can be a callable function terminating with StopIteration:
                      readline = open(myfile).next    # Example of alternate readline
                  The generator produces 5-tuples with these members: the token type; the
                  token string; a 2-tuple (srow, scol) of ints specifying the row and
                  column where the token begins in the source; a 2-tuple (erow, ecol) of
                  ints specifying the row and column where the token ends in the source;
                  and the line on which the token was found. The line passed is the
                  logical line; continuation lines are included.
                  """
                  lnum = parenlev = continued = 0
                  namechars, numchars = string.ascii_letters + '_', '0123456789'
                  contstr, needcont = '', 0
                  contline = None
                  indents = [0]
                  while 1:                                   # loop over lines in stream
                      try:
                          line = readline()
                      except StopIteration:
                          line = ''
                      lnum += 1
                      pos, max = 0, len(line)
                      if contstr:                            # continued string
                          if not line:
-                             raise TokenError, ("EOF in multi-line string", strstart)
+                             raise TokenError("EOF in multi-line string", strstart)
                          endmatch = endprog.match(line)
                          if endmatch:
                              pos = end = endmatch.end(0)
                              yield (STRING, contstr + line[:end],
                                     strstart, (lnum, end), contline + line)
                              contstr, needcont = '', 0
                              contline = None
                          elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':
                              yield (ERRORTOKEN, contstr + line,
                                         strstart, (lnum, len(line)), contline)
                              contstr = ''
                              contline = None
                              continue
                          else:
                              contstr = contstr + line
                              contline = contline + line
                              continue
                      elif parenlev == 0 and not continued:  # new statement
                          if not line: break
                          column = 0
                          while pos < max:                   # measure leading whitespace
                              if line[pos] == ' ':
                                  column += 1
                              elif line[pos] == '\t':
                                  column = (column//tabsize + 1)*tabsize
                              elif line[pos] == '\f':
                                  column = 0
                              else:
                                  break
                              pos += 1
                          if pos == max:
                              break
                          if line[pos] in '#\r\n':           # skip comments or blank lines
                              if line[pos] == '#':
                                  comment_token = line[pos:].rstrip('\r\n')
                                  nl_pos = pos + len(comment_token)
                                  yield (COMMENT, comment_token,
                                         (lnum, pos), (lnum, pos + len(comment_token)), line)
                                  yield (NEWLINE, line[nl_pos:],
                                         (lnum, nl_pos), (lnum, len(line)), line)
                              else:
                                  yield (NEWLINE, line[pos:],
                                         (lnum, pos), (lnum, len(line)), line)
                              continue
                          if column > indents[-1]:           # count indents or dedents
                              indents.append(column)
                              yield (INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
                          while column < indents[-1]:
                              if column not in indents:
                                  raise IndentationError(
                                      "unindent does not match any outer indentation level",
                                      ("<tokenize>", lnum, pos, line))
                              indents = indents[:-1]
                              yield (DEDENT, '', (lnum, pos), (lnum, pos), line)
                      else:                                  # continued statement
                          if not line:
-                             raise TokenError, ("EOF in multi-line statement", (lnum, 0))
+                             raise TokenError("EOF in multi-line statement", (lnum, 0))
                          continued = 0
                      while pos < max:
                          pseudomatch = pseudoprog.match(line, pos)
                          if pseudomatch:                                # scan for tokens
                              start, end = pseudomatch.span(1)
                              spos, epos, pos = (lnum, start), (lnum, end), end
                              token, initial = line[start:end], line[start]
                              if initial in numchars or \
                                 (initial == '.' and token != '.'):      # ordinary number
                                  yield (NUMBER, token, spos, epos, line)
                              elif initial in '\r\n':
                                  yield (NL if parenlev > 0 else NEWLINE,
                                         token, spos, epos, line)
                              elif initial == '#':
                                  assert not token.endswith("\n")
                                  yield (COMMENT, token, spos, epos, line)
                              elif token in triple_quoted:
                                  endprog = endprogs[token]
                                  endmatch = endprog.match(line, pos)
                                  if endmatch:                           # all on one line
                                      pos = endmatch.end(0)
                                      token = line[start:pos]
                                      yield (STRING, token, spos, (lnum, pos), line)
                                  else:
                                      strstart = (lnum, start)           # multiple lines
                                      contstr = line[start:]
                                      contline = line
                                      break
                              elif initial in single_quoted or \
                                  token[:2] in single_quoted or \
                                  token[:3] in single_quoted:
                                  if token[-1] == '\n':                  # continued string
                                      strstart = (lnum, start)
                                      endprog = (endprogs[initial] or endprogs[token[1]] or
                                                 endprogs[token[2]])
                                      contstr, needcont = line[start:], 1
                                      contline = line
                                      break
                                  else:                                  # ordinary string
                                      yield (STRING, token, spos, epos, line)
                              elif initial in namechars:                 # ordinary name
                                  yield (NAME, token, spos, epos, line)
                              elif initial == '\\':                      # continued stmt
                                  continued = 1
                              else:
                                  if initial in '([{':
                                      parenlev += 1
                                  elif initial in ')]}':
                                      parenlev -= 1
                                  yield (OP, token, spos, epos, line)
                          else:
                              yield (ERRORTOKEN, line[pos],
                                         (lnum, pos), (lnum, pos+1), line)
                              pos += 1
                  for indent in indents[1:]:                 # pop remaining indent levels
                      yield (DEDENT, '', (lnum, 0), (lnum, 0), '')
                  yield (ENDMARKER, '', (lnum, 0), (lnum, 0), '')
              if __name__ == '__main__':                     # testing
                  import sys
                  if len(sys.argv) > 1:
                      tokenize(open(sys.argv[1]).readline)
                  else:
                      tokenize(sys.stdin.readline)

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages