upstream/ipython Commit - r13349:6edc3771

Make raise statements Python 3 compatible....

Thomas Kluyver -

r13349:6edc3771

parent child

IPython/lib/display.py

0 +2 -4

             """Various display related classes.
             Authors : MinRK, gregcaporaso, dannystaple
             """
             from os.path import exists, isfile, splitext, abspath, join, isdir
             from os import walk, sep
             from IPython.core.display import DisplayObject
             class Audio(DisplayObject):
                 """Create an audio object.
                 When this object is returned by an input cell or passed to the
                 display function, it will result in Audio controls being displayed
                 in the frontend (only works in the notebook).
                 Parameters
                 ----------
                 data : numpy array, list, unicode, str or bytes
                     Can be a
                     * Numpy 1d array containing the desired waveform (mono)
                     * List of float or integer representing the waveform (mono)
                     * String containing the filename
                     * Bytestring containing raw PCM data or
                     * URL pointing to a file on the web.
                     If the array option is used the waveform will be normalized.
                     If a filename or url is used the format support will be browser
                     dependent.
                 url : unicode
                     A URL to download the data from.
                 filename : unicode
                     Path to a local file to load the data from.
                 embed : boolean
                     Should the image data be embedded using a data URI (True) or should
                     the original source be referenced. Set this to True if you want the
                     audio to playable later with no internet connection in the notebook.
                     Default is `True`, unless the keyword argument `url` is set, then
                     default value is `False`.
                 rate : integer
                     The sampling rate of the raw data.
                     Only required when data parameter is being used as an array
                 autoplay : bool
                     Set to True if the audio should immediately start playing.
                     Default is `False`.
                 Examples
                 --------
                 # Generate a sound
                 import numpy as np
                 framerate = 44100
                 t = np.linspace(0,5,framerate*5)
                 data = np.sin(2*np.pi*220*t) + np.sin(2*np.pi*224*t))
                 Audio(data,rate=framerate)
                 Audio("http://www.nch.com.au/acm/8k16bitpcm.wav")
                 Audio(url="http://www.w3schools.com/html/horse.ogg")
                 Audio('/path/to/sound.wav')
                 Audio(filename='/path/to/sound.ogg')
                 Audio(b'RAW_WAV_DATA..)
                 Audio(data=b'RAW_WAV_DATA..)
                 """
                 def __init__(self, data=None, filename=None, url=None, embed=None, rate=None, autoplay=False):
                     if filename is None and url is None and data is None:
                         raise ValueError("No image data found. Expecting filename, url, or data.")
                     if embed is False and url is None:
                         raise ValueError("No url found. Expecting url when embed=False")
                     if url is not None and embed is not True:
                         self.embed = False
                     else:
                         self.embed = True
                     self.autoplay = autoplay
                     super(Audio, self).__init__(data=data, url=url, filename=filename)
                     if self.data is not None and not isinstance(self.data, bytes):
                         self.data = self._make_wav(data,rate)
                 def reload(self):
                     """Reload the raw data from file or URL."""
                     import mimetypes
                     if self.embed:
                         super(Audio, self).reload()
                     if self.filename is not None:
                         self.mimetype = mimetypes.guess_type(self.filename)[0]
                     elif self.url is not None:
                         self.mimetype = mimetypes.guess_type(self.url)[0]
                     else:
                         self.mimetype = "audio/wav"
                 def _make_wav(self, data, rate):
                     """ Transform a numpy array to a PCM bytestring """
                     import struct
                     from io import BytesIO
                     import wave
                     try:
                         import numpy as np
                         data = np.array(data,dtype=float)
                         if len(data.shape) > 1:
                             raise ValueError("encoding of stereo PCM signals are unsupported")
                         scaled = np.int16(data/np.max(np.abs(data))*32767).tolist()
                     except ImportError:
                         maxabsvalue = float(max([abs(x) for x in data]))
                         scaled = [int(x/maxabsvalue*32767) for x in data]
                     fp = BytesIO()
                     waveobj = wave.open(fp,mode='wb')
                     waveobj.setnchannels(1)
                     waveobj.setframerate(rate)
                     waveobj.setsampwidth(2)
                     waveobj.setcomptype('NONE','NONE')
                     waveobj.writeframes(b''.join([struct.pack('<h',x) for x in scaled]))
                     val = fp.getvalue()
                     waveobj.close()
                     return val
                 def _data_and_metadata(self):
                     """shortcut for returning metadata with url information, if defined"""
                     md = {}
                     if self.url:
                         md['url'] = self.url
                     if md:
                         return self.data, md
                     else:
                         return self.data
                 def _repr_html_(self):
                     src = """
                             <audio controls="controls" {autoplay}>
                                 <source src="{src}" type="{type}" />
                                 Your browser does not support the audio element.
                             </audio>
                           """
                     return src.format(src=self.src_attr(),type=self.mimetype, autoplay=self.autoplay_attr())
                 def src_attr(self):
                     import base64
                     if self.embed and (self.data is not None):
                         data = base64=base64.b64encode(self.data).decode('ascii')
                         return """data:{type};base64,{base64}""".format(type=self.mimetype,
                                                                         base64=data)
                     elif self.url is not None:
                         return self.url
                     else:
                         return ""
                 def autoplay_attr(self):
                     if(self.autoplay):
                         return 'autoplay="autoplay"'
                     else:
                         return ''
             class IFrame(object):
                 """
                 Generic class to embed an iframe in an IPython notebook
                 """
                 iframe = """
                     <iframe
                         width="{width}"
                         height={height}"
                         src="{src}{params}"
                         frameborder="0"
                         allowfullscreen
                     ></iframe>
                     """
                 def __init__(self, src, width, height, **kwargs):
                     self.src = src
                     self.width = width
                     self.height = height
                     self.params = kwargs
                 def _repr_html_(self):
                     """return the embed iframe"""
                     if self.params:
                         from urllib import urlencode
                         params = "?" + urlencode(self.params)
                     else:
                         params = ""
                     return self.iframe.format(src=self.src,
                                               width=self.width,
                                               height=self.height,
                                               params=params)
             class YouTubeVideo(IFrame):
                 """Class for embedding a YouTube Video in an IPython session, based on its video id.
                 e.g. to embed the video on this page:
                 http://www.youtube.com/watch?v=foo
                 you would do:
                 vid = YouTubeVideo("foo")
                 display(vid)
                 To start from 30 seconds:
                 vid = YouTubeVideo("abc", start=30)
                 display(vid)
                 To calculate seconds from time as hours, minutes, seconds use:
                 start=int(timedelta(hours=1, minutes=46, seconds=40).total_seconds())
                 Other parameters can be provided as documented at
                 https://developers.google.com/youtube/player_parameters#parameter-subheader
                 """
                 def __init__(self, id, width=400, height=300, **kwargs):
                     src = "http://www.youtube.com/embed/{0}".format(id)
                     super(YouTubeVideo, self).__init__(src, width, height, **kwargs)
             class VimeoVideo(IFrame):
                 """
                 Class for embedding a Vimeo video in an IPython session, based on its video id.
                 """
                 def __init__(self, id, width=400, height=300, **kwargs):
                     src="http://player.vimeo.com/video/{0}".format(id)
                     super(VimeoVideo, self).__init__(src, width, height, **kwargs)
             class ScribdDocument(IFrame):
                 """
                 Class for embedding a Scribd document in an IPython session
                 Use the start_page params to specify a starting point in the document
                 Use the view_mode params to specify display type one off scroll | slideshow | book
                 e.g to Display Wes' foundational paper about PANDAS in book mode from page 3
                 ScribdDocument(71048089, width=800, height=400, start_page=3, view_mode="book")
                 """
                 def __init__(self, id, width=400, height=300, **kwargs):
                     src="http://www.scribd.com/embeds/{0}/content".format(id)
                     super(ScribdDocument, self).__init__(src, width, height, **kwargs)
             class FileLink(object):
                 """Class for embedding a local file link in an IPython session, based on path
                 e.g. to embed a link that was generated in the IPython notebook as my/data.txt
                 you would do::
                     local_file = FileLink("my/data.txt")
                     display(local_file)
                 or in the HTML notebook, just::
                     FileLink("my/data.txt")
                 """
                 html_link_str = "<a href='%s' target='_blank'>%s</a>"
                 def __init__(self,
                              path,
                              url_prefix='files/',
                              result_html_prefix='',
                              result_html_suffix='<br>'):
                     """
                     Parameters
                     ----------
                     path : str
                         path to the file or directory that should be formatted
                     directory_prefix : str
                         prefix to be prepended to all files to form a working link [default:
                         'files']
                     result_html_prefix : str
                         text to append to beginning to link [default: none]
                     result_html_suffix : str
                         text to append at the end of link [default: '<br>']
                     """
                     if isdir(path):
-                        raise ValueError,\
+                        raise ValueError("Cannot display a directory using FileLink. "
-                         ("Cannot display a directory using FileLink. "
                           "Use FileLinks to display '%s'." % path)
                     self.path = path
                     self.url_prefix = url_prefix
                     self.result_html_prefix = result_html_prefix
                     self.result_html_suffix = result_html_suffix
                 def _format_path(self):
                     fp = ''.join([self.url_prefix,self.path])
                     return ''.join([self.result_html_prefix,
                                     self.html_link_str % (fp, self.path),
                                     self.result_html_suffix])
                 def _repr_html_(self):
                     """return html link to file
                     """
                     if not exists(self.path):
                         return ("Path (<tt>%s</tt>) doesn't exist. "
                                 "It may still be in the process of "
                                 "being generated, or you may have the "
                                 "incorrect path." % self.path)
                     return self._format_path()
                 def __repr__(self):
                     """return absolute path to file
                     """
                     return abspath(self.path)
             class FileLinks(FileLink):
                 """Class for embedding local file links in an IPython session, based on path
                 e.g. to embed links to files that were generated in the IPython notebook under my/data
                 you would do:
                 local_files = FileLinks("my/data")
                 display(local_files)
                 or in the HTML notebook, just
                 FileLinks("my/data")
                 """
                 def __init__(self,
                              path,
                              url_prefix='files/',
                              included_suffixes=None,
                              result_html_prefix='',
                              result_html_suffix='<br>',
                              notebook_display_formatter=None,
                              terminal_display_formatter=None):
                     """
                         included_suffixes : list of filename suffixes to include when
                          formatting output [default: include all files]
                         See the FileLink (baseclass of LocalDirectory) docstring for
                          information on additional parameters.
                         notebook_display_formatter : func used to format links for display
                          in the notebook. See discussion of formatter function below.
                         terminal_display_formatter : func used to format links for display
                          in the terminal. See discussion of formatter function below.
                         Passing custom formatter functions
                         ----------------------------------
                          Formatter functions must be of the form:
                           f(dirname, fnames, included_suffixes)
                            dirname : the name of a directory (a string),
                            fnames :  a list of the files in that directory
                            included_suffixes : a list of the file suffixes that should be
                                                included in the output (passing None means
                                                to include all suffixes in the output in
                                                the built-in formatters)
                            returns a list of lines that should will be print in the
                            notebook (if passing notebook_display_formatter) or the terminal
                            (if passing terminal_display_formatter). This function is iterated
                            over for each directory in self.path. Default formatters are in
                            place, can be passed here to support alternative formatting.
                     """
                     if isfile(path):
-                        raise ValueError,\
+                        raise ValueError("Cannot display a file using FileLinks. "
-                         ("Cannot display a file using FileLinks. "
                           "Use FileLink to display '%s'." % path)
                     self.included_suffixes = included_suffixes
                     # remove trailing slashs for more consistent output formatting
                     path = path.rstrip('/')
                     self.path = path
                     self.url_prefix = url_prefix
                     self.result_html_prefix = result_html_prefix
                     self.result_html_suffix = result_html_suffix
                     self.notebook_display_formatter = \
                          notebook_display_formatter or self._get_notebook_display_formatter()
                     self.terminal_display_formatter = \
                          terminal_display_formatter or self._get_terminal_display_formatter()
                 def _get_display_formatter(self,
                                            dirname_output_format,
                                            fname_output_format,
                                            fp_format,
                                            fp_cleaner=None):
                     """ generate built-in formatter function
                        this is used to define both the notebook and terminal built-in
                         formatters as they only differ by some wrapper text for each entry
                        dirname_output_format: string to use for formatting directory
                         names, dirname will be substituted for a single "%s" which
                         must appear in this string
                        fname_output_format: string to use for formatting file names,
                         if a single "%s" appears in the string, fname will be substituted
                         if two "%s" appear in the string, the path to fname will be
                          substituted for the first and fname will be substituted for the
                          second
                        fp_format: string to use for formatting filepaths, must contain
                         exactly two "%s" and the dirname will be subsituted for the first
                         and fname will be substituted for the second
                     """
                     def f(dirname, fnames, included_suffixes=None):
                         result = []
                         # begin by figuring out which filenames, if any,
                         # are going to be displayed
                         display_fnames = []
                         for fname in fnames:
                             if (isfile(join(dirname,fname)) and
                                    (included_suffixes == None or
                                     splitext(fname)[1] in included_suffixes)):
                                   display_fnames.append(fname)
                         if len(display_fnames) == 0:
                             # if there are no filenames to display, don't print anything
                             # (not even the directory name)
                             pass
                         else:
                             # otherwise print the formatted directory name followed by
                             # the formatted filenames
                             dirname_output_line = dirname_output_format % dirname
                             result.append(dirname_output_line)
                             for fname in display_fnames:
                                 fp = fp_format % (dirname,fname)
                                 if fp_cleaner is not None:
                                     fp = fp_cleaner(fp)
                                 try:
                                     # output can include both a filepath and a filename...
                                     fname_output_line = fname_output_format % (fp, fname)
                                 except TypeError:
                                     # ... or just a single filepath
                                     fname_output_line = fname_output_format % fname
                                 result.append(fname_output_line)
                         return result
                     return f
                 def _get_notebook_display_formatter(self,
                                                     spacer="&nbsp;&nbsp;"):
                     """ generate function to use for notebook formatting
                     """
                     dirname_output_format = \
                      self.result_html_prefix + "%s/" + self.result_html_suffix
                     fname_output_format = \
                      self.result_html_prefix + spacer + self.html_link_str + self.result_html_suffix
                     fp_format = self.url_prefix + '%s/%s'
                     if sep == "\\":
                         # Working on a platform where the path separator is "\", so
                         # must convert these to "/" for generating a URI
                         def fp_cleaner(fp):
                             # Replace all occurences of backslash ("\") with a forward
                             # slash ("/") - this is necessary on windows when a path is
                             # provided as input, but we must link to a URI
                             return fp.replace('\\','/')
                     else:
                         fp_cleaner = None
                     return self._get_display_formatter(dirname_output_format,
                                                        fname_output_format,
                                                        fp_format,
                                                        fp_cleaner)
                 def _get_terminal_display_formatter(self,
                                                     spacer="  "):
                     """ generate function to use for terminal formatting
                     """
                     dirname_output_format = "%s/"
                     fname_output_format = spacer + "%s"
                     fp_format = '%s/%s'
                     return self._get_display_formatter(dirname_output_format,
                                                        fname_output_format,
                                                        fp_format)
                 def _format_path(self):
                     result_lines = []
                     walked_dir = list(walk(self.path))
                     walked_dir.sort()
                     for dirname, subdirs, fnames in walked_dir:
                         result_lines += self.notebook_display_formatter(dirname, fnames, self.included_suffixes)
                     return '\n'.join(result_lines)
                 def __repr__(self):
                     """return newline-separated absolute paths
                     """
                     result_lines = []
                     walked_dir = list(walk(self.path))
                     walked_dir.sort()
                     for dirname, subdirs, fnames in walked_dir:
                         result_lines += self.terminal_display_formatter(dirname, fnames, self.included_suffixes)
                     return '\n'.join(result_lines)

IPython/utils/_tokenize_py2.py

0 +2 -2

             """Patched version of standard library tokenize, to deal with various bugs.
             Patches
             - Relevant parts of Gareth Rees' patch for Python issue #12691 (untokenizing),
               manually applied.
             - Newlines in comments and blank lines should be either NL or NEWLINE, depending
               on whether they are in a multi-line statement. Filed as Python issue #17061.
             -------------------------------------------------------------------------------
             Tokenization help for Python programs.
             generate_tokens(readline) is a generator that breaks a stream of
             text into Python tokens.  It accepts a readline-like method which is called
             repeatedly to get the next line of input (or "" for EOF).  It generates
 -tuples with these members:
                 the token type (see token.py)
                 the token (a string)
                 the starting (row, column) indices of the token (a 2-tuple of ints)
                 the ending (row, column) indices of the token (a 2-tuple of ints)
                 the original line (string)
             It is designed to match the working of the Python tokenizer exactly, except
             that it produces COMMENT tokens for comments and gives type OP for all
             operators
             Older entry points
                 tokenize_loop(readline, tokeneater)
                 tokenize(readline, tokeneater=printtoken)
             are the same, except instead of generating tokens, tokeneater is a callback
             function to which the 5 fields described above are passed as 5 arguments,
             each time a new token is found."""
             from __future__ import print_function
             __author__ = 'Ka-Ping Yee <ping@lfw.org>'
             __credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, '
                            'Skip Montanaro, Raymond Hettinger')
             import string, re
             from token import *
             import token
             __all__ = [x for x in dir(token) if not x.startswith("_")]
             __all__ += ["COMMENT", "tokenize", "generate_tokens", "NL", "untokenize"]
             del x
             del token
             __all__ += ["TokenError"]
             COMMENT = N_TOKENS
             tok_name[COMMENT] = 'COMMENT'
             NL = N_TOKENS + 1
             tok_name[NL] = 'NL'
             N_TOKENS += 2
             def group(*choices): return '(' + '|'.join(choices) + ')'
             def any(*choices): return group(*choices) + '*'
             def maybe(*choices): return group(*choices) + '?'
             Whitespace = r'[ \f\t]*'
             Comment = r'#[^\r\n]*'
             Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
             Name = r'[a-zA-Z_]\w*'
             Hexnumber = r'0[xX][\da-fA-F]+[lL]?'
             Octnumber = r'(0[oO][0-7]+)|(0[0-7]*)[lL]?'
             Binnumber = r'0[bB][01]+[lL]?'
             Decnumber = r'[1-9]\d*[lL]?'
             Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
             Exponent = r'[eE][-+]?\d+'
             Pointfloat = group(r'\d+\.\d*', r'\.\d+') + maybe(Exponent)
             Expfloat = r'\d+' + Exponent
             Floatnumber = group(Pointfloat, Expfloat)
             Imagnumber = group(r'\d+[jJ]', Floatnumber + r'[jJ]')
             Number = group(Imagnumber, Floatnumber, Intnumber)
             # Tail end of ' string.
             Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
             # Tail end of " string.
             Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
             # Tail end of ''' string.
             Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
             # Tail end of """ string.
             Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
             Triple = group("[uUbB]?[rR]?'''", '[uUbB]?[rR]?"""')
             # Single-line ' or " string.
             String = group(r"[uUbB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
                            r'[uUbB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*"')
             # Because of leftmost-then-longest match semantics, be sure to put the
             # longest operators first (e.g., if = came before ==, == would get
             # recognized as two instances of =).
             Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"<>", r"!=",
                              r"//=?",
                              r"[+\-*/%&|^=<>]=?",
                              r"~")
             Bracket = '[][(){}]'
             Special = group(r'\r?\n', r'[:;.,`@]')
             Funny = group(Operator, Bracket, Special)
             PlainToken = group(Number, Funny, String, Name)
             Token = Ignore + PlainToken
             # First (or only) line of ' or " string.
             ContStr = group(r"[uUbB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
                             group("'", r'\\\r?\n'),
                             r'[uUbB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
                             group('"', r'\\\r?\n'))
             PseudoExtras = group(r'\\\r?\n', Comment, Triple)
             PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
             tokenprog, pseudoprog, single3prog, double3prog = map(
                 re.compile, (Token, PseudoToken, Single3, Double3))
             endprogs = {"'": re.compile(Single), '"': re.compile(Double),
                         "'''": single3prog, '"""': double3prog,
                         "r'''": single3prog, 'r"""': double3prog,
                         "u'''": single3prog, 'u"""': double3prog,
                         "ur'''": single3prog, 'ur"""': double3prog,
                         "R'''": single3prog, 'R"""': double3prog,
                         "U'''": single3prog, 'U"""': double3prog,
                         "uR'''": single3prog, 'uR"""': double3prog,
                         "Ur'''": single3prog, 'Ur"""': double3prog,
                         "UR'''": single3prog, 'UR"""': double3prog,
                         "b'''": single3prog, 'b"""': double3prog,
                         "br'''": single3prog, 'br"""': double3prog,
                         "B'''": single3prog, 'B"""': double3prog,
                         "bR'''": single3prog, 'bR"""': double3prog,
                         "Br'''": single3prog, 'Br"""': double3prog,
                         "BR'''": single3prog, 'BR"""': double3prog,
                         'r': None, 'R': None, 'u': None, 'U': None,
                         'b': None, 'B': None}
             triple_quoted = {}
             for t in ("'''", '"""',
                       "r'''", 'r"""', "R'''", 'R"""',
                       "u'''", 'u"""', "U'''", 'U"""',
                       "ur'''", 'ur"""', "Ur'''", 'Ur"""',
                       "uR'''", 'uR"""', "UR'''", 'UR"""',
                       "b'''", 'b"""', "B'''", 'B"""',
                       "br'''", 'br"""', "Br'''", 'Br"""',
                       "bR'''", 'bR"""', "BR'''", 'BR"""'):
                 triple_quoted[t] = t
             single_quoted = {}
             for t in ("'", '"',
                       "r'", 'r"', "R'", 'R"',
                       "u'", 'u"', "U'", 'U"',
                       "ur'", 'ur"', "Ur'", 'Ur"',
                       "uR'", 'uR"', "UR'", 'UR"',
                       "b'", 'b"', "B'", 'B"',
                       "br'", 'br"', "Br'", 'Br"',
                       "bR'", 'bR"', "BR'", 'BR"' ):
                 single_quoted[t] = t
             tabsize = 8
             class TokenError(Exception): pass
             class StopTokenizing(Exception): pass
             def printtoken(type, token, srow_scol, erow_ecol, line): # for testing
                 srow, scol = srow_scol
                 erow, ecol = erow_ecol
                 print("%d,%d-%d,%d:\t%s\t%s" % \
                     (srow, scol, erow, ecol, tok_name[type], repr(token)))
             def tokenize(readline, tokeneater=printtoken):
                 """
                 The tokenize() function accepts two parameters: one representing the
                 input stream, and one providing an output mechanism for tokenize().
                 The first parameter, readline, must be a callable object which provides
                 the same interface as the readline() method of built-in file objects.
                 Each call to the function should return one line of input as a string.
                 The second parameter, tokeneater, must also be a callable object. It is
                 called once for each token, with five arguments, corresponding to the
                 tuples generated by generate_tokens().
                 """
                 try:
                     tokenize_loop(readline, tokeneater)
                 except StopTokenizing:
                     pass
             # backwards compatible interface
             def tokenize_loop(readline, tokeneater):
                 for token_info in generate_tokens(readline):
                     tokeneater(*token_info)
             class Untokenizer:
                 def __init__(self):
                     self.tokens = []
                     self.prev_row = 1
                     self.prev_col = 0
                 def add_whitespace(self, start):
                     row, col = start
                     assert row >= self.prev_row
                     col_offset = col - self.prev_col
                     if col_offset > 0:
                         self.tokens.append(" " * col_offset)
                     elif row > self.prev_row and tok_type not in (NEWLINE, NL, ENDMARKER):
                         # Line was backslash-continued
                         self.tokens.append(" ")
                 def untokenize(self, tokens):
                     iterable = iter(tokens)
                     for t in iterable:
                         if len(t) == 2:
                             self.compat(t, iterable)
                             break
                         tok_type, token, start, end = t[:4]
                         self.add_whitespace(start)
                         self.tokens.append(token)
                         self.prev_row, self.prev_col = end
                         if tok_type in (NEWLINE, NL):
                             self.prev_row += 1
                             self.prev_col = 0
                     return "".join(self.tokens)
                 def compat(self, token, iterable):
                     # This import is here to avoid problems when the itertools
                     # module is not built yet and tokenize is imported.
                     from itertools import chain
                     startline = False
                     prevstring = False
                     indents = []
                     toks_append = self.tokens.append
                     for tok in chain([token], iterable):
                         toknum, tokval = tok[:2]
                         if toknum in (NAME, NUMBER):
                             tokval += ' '
                         # Insert a space between two consecutive strings
                         if toknum == STRING:
                             if prevstring:
                                 tokval = ' ' + tokval
                             prevstring = True
                         else:
                             prevstring = False
                         if toknum == INDENT:
                             indents.append(tokval)
                             continue
                         elif toknum == DEDENT:
                             indents.pop()
                             continue
                         elif toknum in (NEWLINE, NL):
                             startline = True
                         elif startline and indents:
                             toks_append(indents[-1])
                             startline = False
                         toks_append(tokval)
             def untokenize(iterable):
                 """Transform tokens back into Python source code.
                 Each element returned by the iterable must be a token sequence
                 with at least two elements, a token number and token value.  If
                 only two tokens are passed, the resulting output is poor.
                 Round-trip invariant for full input:
                     Untokenized source will match input source exactly
                 Round-trip invariant for limited intput:
                     # Output text will tokenize the back to the input
                     t1 = [tok[:2] for tok in generate_tokens(f.readline)]
                     newcode = untokenize(t1)
                     readline = iter(newcode.splitlines(1)).next
                     t2 = [tok[:2] for tok in generate_tokens(readline)]
                     assert t1 == t2
                 """
                 ut = Untokenizer()
                 return ut.untokenize(iterable)
             def generate_tokens(readline):
                 """
                 The generate_tokens() generator requires one argment, readline, which
                 must be a callable object which provides the same interface as the
                 readline() method of built-in file objects. Each call to the function
                 should return one line of input as a string.  Alternately, readline
                 can be a callable function terminating with StopIteration:
                     readline = open(myfile).next    # Example of alternate readline
                 The generator produces 5-tuples with these members: the token type; the
                 token string; a 2-tuple (srow, scol) of ints specifying the row and
                 column where the token begins in the source; a 2-tuple (erow, ecol) of
                 ints specifying the row and column where the token ends in the source;
                 and the line on which the token was found. The line passed is the
                 logical line; continuation lines are included.
                 """
                 lnum = parenlev = continued = 0
                 namechars, numchars = string.ascii_letters + '_', '0123456789'
                 contstr, needcont = '', 0
                 contline = None
                 indents = [0]
                 while 1:                                   # loop over lines in stream
                     try:
                         line = readline()
                     except StopIteration:
                         line = ''
                     lnum += 1
                     pos, max = 0, len(line)
                     if contstr:                            # continued string
                         if not line:
-                            raise TokenError, ("EOF in multi-line string", strstart)
+                            raise TokenError("EOF in multi-line string", strstart)
                         endmatch = endprog.match(line)
                         if endmatch:
                             pos = end = endmatch.end(0)
                             yield (STRING, contstr + line[:end],
                                    strstart, (lnum, end), contline + line)
                             contstr, needcont = '', 0
                             contline = None
                         elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':
                             yield (ERRORTOKEN, contstr + line,
                                        strstart, (lnum, len(line)), contline)
                             contstr = ''
                             contline = None
                             continue
                         else:
                             contstr = contstr + line
                             contline = contline + line
                             continue
                     elif parenlev == 0 and not continued:  # new statement
                         if not line: break
                         column = 0
                         while pos < max:                   # measure leading whitespace
                             if line[pos] == ' ':
                                 column += 1
                             elif line[pos] == '\t':
                                 column = (column//tabsize + 1)*tabsize
                             elif line[pos] == '\f':
                                 column = 0
                             else:
                                 break
                             pos += 1
                         if pos == max:
                             break
                         if line[pos] in '#\r\n':           # skip comments or blank lines
                             if line[pos] == '#':
                                 comment_token = line[pos:].rstrip('\r\n')
                                 nl_pos = pos + len(comment_token)
                                 yield (COMMENT, comment_token,
                                        (lnum, pos), (lnum, pos + len(comment_token)), line)
                                 yield (NEWLINE, line[nl_pos:],
                                        (lnum, nl_pos), (lnum, len(line)), line)
                             else:
                                 yield (NEWLINE, line[pos:],
                                        (lnum, pos), (lnum, len(line)), line)
                             continue
                         if column > indents[-1]:           # count indents or dedents
                             indents.append(column)
                             yield (INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
                         while column < indents[-1]:
                             if column not in indents:
                                 raise IndentationError(
                                     "unindent does not match any outer indentation level",
                                     ("<tokenize>", lnum, pos, line))
                             indents = indents[:-1]
                             yield (DEDENT, '', (lnum, pos), (lnum, pos), line)
                     else:                                  # continued statement
                         if not line:
-                            raise TokenError, ("EOF in multi-line statement", (lnum, 0))
+                            raise TokenError("EOF in multi-line statement", (lnum, 0))
                         continued = 0
                     while pos < max:
                         pseudomatch = pseudoprog.match(line, pos)
                         if pseudomatch:                                # scan for tokens
                             start, end = pseudomatch.span(1)
                             spos, epos, pos = (lnum, start), (lnum, end), end
                             token, initial = line[start:end], line[start]
                             if initial in numchars or \
                                (initial == '.' and token != '.'):      # ordinary number
                                 yield (NUMBER, token, spos, epos, line)
                             elif initial in '\r\n':
                                 yield (NL if parenlev > 0 else NEWLINE,
                                        token, spos, epos, line)
                             elif initial == '#':
                                 assert not token.endswith("\n")
                                 yield (COMMENT, token, spos, epos, line)
                             elif token in triple_quoted:
                                 endprog = endprogs[token]
                                 endmatch = endprog.match(line, pos)
                                 if endmatch:                           # all on one line
                                     pos = endmatch.end(0)
                                     token = line[start:pos]
                                     yield (STRING, token, spos, (lnum, pos), line)
                                 else:
                                     strstart = (lnum, start)           # multiple lines
                                     contstr = line[start:]
                                     contline = line
                                     break
                             elif initial in single_quoted or \
                                 token[:2] in single_quoted or \
                                 token[:3] in single_quoted:
                                 if token[-1] == '\n':                  # continued string
                                     strstart = (lnum, start)
                                     endprog = (endprogs[initial] or endprogs[token[1]] or
                                                endprogs[token[2]])
                                     contstr, needcont = line[start:], 1
                                     contline = line
                                     break
                                 else:                                  # ordinary string
                                     yield (STRING, token, spos, epos, line)
                             elif initial in namechars:                 # ordinary name
                                 yield (NAME, token, spos, epos, line)
                             elif initial == '\\':                      # continued stmt
                                 continued = 1
                             else:
                                 if initial in '([{':
                                     parenlev += 1
                                 elif initial in ')]}':
                                     parenlev -= 1
                                 yield (OP, token, spos, epos, line)
                         else:
                             yield (ERRORTOKEN, line[pos],
                                        (lnum, pos), (lnum, pos+1), line)
                             pos += 1
                 for indent in indents[1:]:                 # pop remaining indent levels
                     yield (DEDENT, '', (lnum, 0), (lnum, 0), '')
                 yield (ENDMARKER, '', (lnum, 0), (lnum, 0), '')
             if __name__ == '__main__':                     # testing
                 import sys
                 if len(sys.argv) > 1:
                     tokenize(open(sys.argv[1]).readline)
                 else:
                     tokenize(sys.stdin.readline)

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages