##// END OF EJS Templates
Make raise statements Python 3 compatible....
Thomas Kluyver -
Show More
@@ -1,495 +1,493 b''
1 1 """Various display related classes.
2 2
3 3 Authors : MinRK, gregcaporaso, dannystaple
4 4 """
5 5 from os.path import exists, isfile, splitext, abspath, join, isdir
6 6 from os import walk, sep
7 7
8 8 from IPython.core.display import DisplayObject
9 9
10 10
11 11 class Audio(DisplayObject):
12 12 """Create an audio object.
13 13
14 14 When this object is returned by an input cell or passed to the
15 15 display function, it will result in Audio controls being displayed
16 16 in the frontend (only works in the notebook).
17 17
18 18 Parameters
19 19 ----------
20 20 data : numpy array, list, unicode, str or bytes
21 21 Can be a
22 22 * Numpy 1d array containing the desired waveform (mono)
23 23 * List of float or integer representing the waveform (mono)
24 24 * String containing the filename
25 25 * Bytestring containing raw PCM data or
26 26 * URL pointing to a file on the web.
27 27
28 28 If the array option is used the waveform will be normalized.
29 29
30 30 If a filename or url is used the format support will be browser
31 31 dependent.
32 32 url : unicode
33 33 A URL to download the data from.
34 34 filename : unicode
35 35 Path to a local file to load the data from.
36 36 embed : boolean
37 37 Should the image data be embedded using a data URI (True) or should
38 38 the original source be referenced. Set this to True if you want the
39 39 audio to playable later with no internet connection in the notebook.
40 40
41 41 Default is `True`, unless the keyword argument `url` is set, then
42 42 default value is `False`.
43 43 rate : integer
44 44 The sampling rate of the raw data.
45 45 Only required when data parameter is being used as an array
46 46 autoplay : bool
47 47 Set to True if the audio should immediately start playing.
48 48 Default is `False`.
49 49
50 50 Examples
51 51 --------
52 52
53 53 # Generate a sound
54 54 import numpy as np
55 55 framerate = 44100
56 56 t = np.linspace(0,5,framerate*5)
57 57 data = np.sin(2*np.pi*220*t) + np.sin(2*np.pi*224*t))
58 58 Audio(data,rate=framerate)
59 59
60 60 Audio("http://www.nch.com.au/acm/8k16bitpcm.wav")
61 61 Audio(url="http://www.w3schools.com/html/horse.ogg")
62 62
63 63 Audio('/path/to/sound.wav')
64 64 Audio(filename='/path/to/sound.ogg')
65 65
66 66 Audio(b'RAW_WAV_DATA..)
67 67 Audio(data=b'RAW_WAV_DATA..)
68 68
69 69 """
70 70
71 71 def __init__(self, data=None, filename=None, url=None, embed=None, rate=None, autoplay=False):
72 72 if filename is None and url is None and data is None:
73 73 raise ValueError("No image data found. Expecting filename, url, or data.")
74 74 if embed is False and url is None:
75 75 raise ValueError("No url found. Expecting url when embed=False")
76 76
77 77 if url is not None and embed is not True:
78 78 self.embed = False
79 79 else:
80 80 self.embed = True
81 81 self.autoplay = autoplay
82 82 super(Audio, self).__init__(data=data, url=url, filename=filename)
83 83
84 84 if self.data is not None and not isinstance(self.data, bytes):
85 85 self.data = self._make_wav(data,rate)
86 86
87 87 def reload(self):
88 88 """Reload the raw data from file or URL."""
89 89 import mimetypes
90 90 if self.embed:
91 91 super(Audio, self).reload()
92 92
93 93 if self.filename is not None:
94 94 self.mimetype = mimetypes.guess_type(self.filename)[0]
95 95 elif self.url is not None:
96 96 self.mimetype = mimetypes.guess_type(self.url)[0]
97 97 else:
98 98 self.mimetype = "audio/wav"
99 99
100 100 def _make_wav(self, data, rate):
101 101 """ Transform a numpy array to a PCM bytestring """
102 102 import struct
103 103 from io import BytesIO
104 104 import wave
105 105 try:
106 106 import numpy as np
107 107 data = np.array(data,dtype=float)
108 108 if len(data.shape) > 1:
109 109 raise ValueError("encoding of stereo PCM signals are unsupported")
110 110 scaled = np.int16(data/np.max(np.abs(data))*32767).tolist()
111 111 except ImportError:
112 112 maxabsvalue = float(max([abs(x) for x in data]))
113 113 scaled = [int(x/maxabsvalue*32767) for x in data]
114 114 fp = BytesIO()
115 115 waveobj = wave.open(fp,mode='wb')
116 116 waveobj.setnchannels(1)
117 117 waveobj.setframerate(rate)
118 118 waveobj.setsampwidth(2)
119 119 waveobj.setcomptype('NONE','NONE')
120 120 waveobj.writeframes(b''.join([struct.pack('<h',x) for x in scaled]))
121 121 val = fp.getvalue()
122 122 waveobj.close()
123 123 return val
124 124
125 125 def _data_and_metadata(self):
126 126 """shortcut for returning metadata with url information, if defined"""
127 127 md = {}
128 128 if self.url:
129 129 md['url'] = self.url
130 130 if md:
131 131 return self.data, md
132 132 else:
133 133 return self.data
134 134
135 135 def _repr_html_(self):
136 136 src = """
137 137 <audio controls="controls" {autoplay}>
138 138 <source src="{src}" type="{type}" />
139 139 Your browser does not support the audio element.
140 140 </audio>
141 141 """
142 142 return src.format(src=self.src_attr(),type=self.mimetype, autoplay=self.autoplay_attr())
143 143
144 144 def src_attr(self):
145 145 import base64
146 146 if self.embed and (self.data is not None):
147 147 data = base64=base64.b64encode(self.data).decode('ascii')
148 148 return """data:{type};base64,{base64}""".format(type=self.mimetype,
149 149 base64=data)
150 150 elif self.url is not None:
151 151 return self.url
152 152 else:
153 153 return ""
154 154
155 155 def autoplay_attr(self):
156 156 if(self.autoplay):
157 157 return 'autoplay="autoplay"'
158 158 else:
159 159 return ''
160 160
161 161 class IFrame(object):
162 162 """
163 163 Generic class to embed an iframe in an IPython notebook
164 164 """
165 165
166 166 iframe = """
167 167 <iframe
168 168 width="{width}"
169 169 height={height}"
170 170 src="{src}{params}"
171 171 frameborder="0"
172 172 allowfullscreen
173 173 ></iframe>
174 174 """
175 175
176 176 def __init__(self, src, width, height, **kwargs):
177 177 self.src = src
178 178 self.width = width
179 179 self.height = height
180 180 self.params = kwargs
181 181
182 182 def _repr_html_(self):
183 183 """return the embed iframe"""
184 184 if self.params:
185 185 from urllib import urlencode
186 186 params = "?" + urlencode(self.params)
187 187 else:
188 188 params = ""
189 189 return self.iframe.format(src=self.src,
190 190 width=self.width,
191 191 height=self.height,
192 192 params=params)
193 193
194 194 class YouTubeVideo(IFrame):
195 195 """Class for embedding a YouTube Video in an IPython session, based on its video id.
196 196
197 197 e.g. to embed the video on this page:
198 198
199 199 http://www.youtube.com/watch?v=foo
200 200
201 201 you would do:
202 202
203 203 vid = YouTubeVideo("foo")
204 204 display(vid)
205 205
206 206 To start from 30 seconds:
207 207
208 208 vid = YouTubeVideo("abc", start=30)
209 209 display(vid)
210 210
211 211 To calculate seconds from time as hours, minutes, seconds use:
212 212 start=int(timedelta(hours=1, minutes=46, seconds=40).total_seconds())
213 213
214 214 Other parameters can be provided as documented at
215 215 https://developers.google.com/youtube/player_parameters#parameter-subheader
216 216 """
217 217
218 218 def __init__(self, id, width=400, height=300, **kwargs):
219 219 src = "http://www.youtube.com/embed/{0}".format(id)
220 220 super(YouTubeVideo, self).__init__(src, width, height, **kwargs)
221 221
222 222 class VimeoVideo(IFrame):
223 223 """
224 224 Class for embedding a Vimeo video in an IPython session, based on its video id.
225 225 """
226 226
227 227 def __init__(self, id, width=400, height=300, **kwargs):
228 228 src="http://player.vimeo.com/video/{0}".format(id)
229 229 super(VimeoVideo, self).__init__(src, width, height, **kwargs)
230 230
231 231 class ScribdDocument(IFrame):
232 232 """
233 233 Class for embedding a Scribd document in an IPython session
234 234
235 235 Use the start_page params to specify a starting point in the document
236 236 Use the view_mode params to specify display type one off scroll | slideshow | book
237 237
238 238 e.g to Display Wes' foundational paper about PANDAS in book mode from page 3
239 239
240 240 ScribdDocument(71048089, width=800, height=400, start_page=3, view_mode="book")
241 241 """
242 242
243 243 def __init__(self, id, width=400, height=300, **kwargs):
244 244 src="http://www.scribd.com/embeds/{0}/content".format(id)
245 245 super(ScribdDocument, self).__init__(src, width, height, **kwargs)
246 246
247 247 class FileLink(object):
248 248 """Class for embedding a local file link in an IPython session, based on path
249 249
250 250 e.g. to embed a link that was generated in the IPython notebook as my/data.txt
251 251
252 252 you would do::
253 253
254 254 local_file = FileLink("my/data.txt")
255 255 display(local_file)
256 256
257 257 or in the HTML notebook, just::
258 258
259 259 FileLink("my/data.txt")
260 260 """
261 261
262 262 html_link_str = "<a href='%s' target='_blank'>%s</a>"
263 263
264 264 def __init__(self,
265 265 path,
266 266 url_prefix='files/',
267 267 result_html_prefix='',
268 268 result_html_suffix='<br>'):
269 269 """
270 270 Parameters
271 271 ----------
272 272 path : str
273 273 path to the file or directory that should be formatted
274 274 directory_prefix : str
275 275 prefix to be prepended to all files to form a working link [default:
276 276 'files']
277 277 result_html_prefix : str
278 278 text to append to beginning to link [default: none]
279 279 result_html_suffix : str
280 280 text to append at the end of link [default: '<br>']
281 281 """
282 282 if isdir(path):
283 raise ValueError,\
284 ("Cannot display a directory using FileLink. "
283 raise ValueError("Cannot display a directory using FileLink. "
285 284 "Use FileLinks to display '%s'." % path)
286 285 self.path = path
287 286 self.url_prefix = url_prefix
288 287 self.result_html_prefix = result_html_prefix
289 288 self.result_html_suffix = result_html_suffix
290 289
291 290 def _format_path(self):
292 291 fp = ''.join([self.url_prefix,self.path])
293 292 return ''.join([self.result_html_prefix,
294 293 self.html_link_str % (fp, self.path),
295 294 self.result_html_suffix])
296 295
297 296 def _repr_html_(self):
298 297 """return html link to file
299 298 """
300 299 if not exists(self.path):
301 300 return ("Path (<tt>%s</tt>) doesn't exist. "
302 301 "It may still be in the process of "
303 302 "being generated, or you may have the "
304 303 "incorrect path." % self.path)
305 304
306 305 return self._format_path()
307 306
308 307 def __repr__(self):
309 308 """return absolute path to file
310 309 """
311 310 return abspath(self.path)
312 311
313 312 class FileLinks(FileLink):
314 313 """Class for embedding local file links in an IPython session, based on path
315 314
316 315 e.g. to embed links to files that were generated in the IPython notebook under my/data
317 316
318 317 you would do:
319 318
320 319 local_files = FileLinks("my/data")
321 320 display(local_files)
322 321
323 322 or in the HTML notebook, just
324 323
325 324 FileLinks("my/data")
326 325
327 326 """
328 327 def __init__(self,
329 328 path,
330 329 url_prefix='files/',
331 330 included_suffixes=None,
332 331 result_html_prefix='',
333 332 result_html_suffix='<br>',
334 333 notebook_display_formatter=None,
335 334 terminal_display_formatter=None):
336 335 """
337 336 included_suffixes : list of filename suffixes to include when
338 337 formatting output [default: include all files]
339 338
340 339 See the FileLink (baseclass of LocalDirectory) docstring for
341 340 information on additional parameters.
342 341
343 342 notebook_display_formatter : func used to format links for display
344 343 in the notebook. See discussion of formatter function below.
345 344
346 345 terminal_display_formatter : func used to format links for display
347 346 in the terminal. See discussion of formatter function below.
348 347
349 348
350 349 Passing custom formatter functions
351 350 ----------------------------------
352 351 Formatter functions must be of the form:
353 352 f(dirname, fnames, included_suffixes)
354 353 dirname : the name of a directory (a string),
355 354 fnames : a list of the files in that directory
356 355 included_suffixes : a list of the file suffixes that should be
357 356 included in the output (passing None means
358 357 to include all suffixes in the output in
359 358 the built-in formatters)
360 359
361 360 returns a list of lines that should will be print in the
362 361 notebook (if passing notebook_display_formatter) or the terminal
363 362 (if passing terminal_display_formatter). This function is iterated
364 363 over for each directory in self.path. Default formatters are in
365 364 place, can be passed here to support alternative formatting.
366 365
367 366 """
368 367 if isfile(path):
369 raise ValueError,\
370 ("Cannot display a file using FileLinks. "
368 raise ValueError("Cannot display a file using FileLinks. "
371 369 "Use FileLink to display '%s'." % path)
372 370 self.included_suffixes = included_suffixes
373 371 # remove trailing slashs for more consistent output formatting
374 372 path = path.rstrip('/')
375 373
376 374 self.path = path
377 375 self.url_prefix = url_prefix
378 376 self.result_html_prefix = result_html_prefix
379 377 self.result_html_suffix = result_html_suffix
380 378
381 379 self.notebook_display_formatter = \
382 380 notebook_display_formatter or self._get_notebook_display_formatter()
383 381 self.terminal_display_formatter = \
384 382 terminal_display_formatter or self._get_terminal_display_formatter()
385 383
386 384 def _get_display_formatter(self,
387 385 dirname_output_format,
388 386 fname_output_format,
389 387 fp_format,
390 388 fp_cleaner=None):
391 389 """ generate built-in formatter function
392 390
393 391 this is used to define both the notebook and terminal built-in
394 392 formatters as they only differ by some wrapper text for each entry
395 393
396 394 dirname_output_format: string to use for formatting directory
397 395 names, dirname will be substituted for a single "%s" which
398 396 must appear in this string
399 397 fname_output_format: string to use for formatting file names,
400 398 if a single "%s" appears in the string, fname will be substituted
401 399 if two "%s" appear in the string, the path to fname will be
402 400 substituted for the first and fname will be substituted for the
403 401 second
404 402 fp_format: string to use for formatting filepaths, must contain
405 403 exactly two "%s" and the dirname will be subsituted for the first
406 404 and fname will be substituted for the second
407 405 """
408 406 def f(dirname, fnames, included_suffixes=None):
409 407 result = []
410 408 # begin by figuring out which filenames, if any,
411 409 # are going to be displayed
412 410 display_fnames = []
413 411 for fname in fnames:
414 412 if (isfile(join(dirname,fname)) and
415 413 (included_suffixes == None or
416 414 splitext(fname)[1] in included_suffixes)):
417 415 display_fnames.append(fname)
418 416
419 417 if len(display_fnames) == 0:
420 418 # if there are no filenames to display, don't print anything
421 419 # (not even the directory name)
422 420 pass
423 421 else:
424 422 # otherwise print the formatted directory name followed by
425 423 # the formatted filenames
426 424 dirname_output_line = dirname_output_format % dirname
427 425 result.append(dirname_output_line)
428 426 for fname in display_fnames:
429 427 fp = fp_format % (dirname,fname)
430 428 if fp_cleaner is not None:
431 429 fp = fp_cleaner(fp)
432 430 try:
433 431 # output can include both a filepath and a filename...
434 432 fname_output_line = fname_output_format % (fp, fname)
435 433 except TypeError:
436 434 # ... or just a single filepath
437 435 fname_output_line = fname_output_format % fname
438 436 result.append(fname_output_line)
439 437 return result
440 438 return f
441 439
442 440 def _get_notebook_display_formatter(self,
443 441 spacer="&nbsp;&nbsp;"):
444 442 """ generate function to use for notebook formatting
445 443 """
446 444 dirname_output_format = \
447 445 self.result_html_prefix + "%s/" + self.result_html_suffix
448 446 fname_output_format = \
449 447 self.result_html_prefix + spacer + self.html_link_str + self.result_html_suffix
450 448 fp_format = self.url_prefix + '%s/%s'
451 449 if sep == "\\":
452 450 # Working on a platform where the path separator is "\", so
453 451 # must convert these to "/" for generating a URI
454 452 def fp_cleaner(fp):
455 453 # Replace all occurences of backslash ("\") with a forward
456 454 # slash ("/") - this is necessary on windows when a path is
457 455 # provided as input, but we must link to a URI
458 456 return fp.replace('\\','/')
459 457 else:
460 458 fp_cleaner = None
461 459
462 460 return self._get_display_formatter(dirname_output_format,
463 461 fname_output_format,
464 462 fp_format,
465 463 fp_cleaner)
466 464
467 465 def _get_terminal_display_formatter(self,
468 466 spacer=" "):
469 467 """ generate function to use for terminal formatting
470 468 """
471 469 dirname_output_format = "%s/"
472 470 fname_output_format = spacer + "%s"
473 471 fp_format = '%s/%s'
474 472
475 473 return self._get_display_formatter(dirname_output_format,
476 474 fname_output_format,
477 475 fp_format)
478 476
479 477 def _format_path(self):
480 478 result_lines = []
481 479 walked_dir = list(walk(self.path))
482 480 walked_dir.sort()
483 481 for dirname, subdirs, fnames in walked_dir:
484 482 result_lines += self.notebook_display_formatter(dirname, fnames, self.included_suffixes)
485 483 return '\n'.join(result_lines)
486 484
487 485 def __repr__(self):
488 486 """return newline-separated absolute paths
489 487 """
490 488 result_lines = []
491 489 walked_dir = list(walk(self.path))
492 490 walked_dir.sort()
493 491 for dirname, subdirs, fnames in walked_dir:
494 492 result_lines += self.terminal_display_formatter(dirname, fnames, self.included_suffixes)
495 493 return '\n'.join(result_lines)
@@ -1,439 +1,439 b''
1 1 """Patched version of standard library tokenize, to deal with various bugs.
2 2
3 3 Patches
4 4
5 5 - Relevant parts of Gareth Rees' patch for Python issue #12691 (untokenizing),
6 6 manually applied.
7 7 - Newlines in comments and blank lines should be either NL or NEWLINE, depending
8 8 on whether they are in a multi-line statement. Filed as Python issue #17061.
9 9
10 10 -------------------------------------------------------------------------------
11 11 Tokenization help for Python programs.
12 12
13 13 generate_tokens(readline) is a generator that breaks a stream of
14 14 text into Python tokens. It accepts a readline-like method which is called
15 15 repeatedly to get the next line of input (or "" for EOF). It generates
16 16 5-tuples with these members:
17 17
18 18 the token type (see token.py)
19 19 the token (a string)
20 20 the starting (row, column) indices of the token (a 2-tuple of ints)
21 21 the ending (row, column) indices of the token (a 2-tuple of ints)
22 22 the original line (string)
23 23
24 24 It is designed to match the working of the Python tokenizer exactly, except
25 25 that it produces COMMENT tokens for comments and gives type OP for all
26 26 operators
27 27
28 28 Older entry points
29 29 tokenize_loop(readline, tokeneater)
30 30 tokenize(readline, tokeneater=printtoken)
31 31 are the same, except instead of generating tokens, tokeneater is a callback
32 32 function to which the 5 fields described above are passed as 5 arguments,
33 33 each time a new token is found."""
34 34 from __future__ import print_function
35 35
36 36 __author__ = 'Ka-Ping Yee <ping@lfw.org>'
37 37 __credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, '
38 38 'Skip Montanaro, Raymond Hettinger')
39 39
40 40 import string, re
41 41 from token import *
42 42
43 43 import token
44 44 __all__ = [x for x in dir(token) if not x.startswith("_")]
45 45 __all__ += ["COMMENT", "tokenize", "generate_tokens", "NL", "untokenize"]
46 46 del x
47 47 del token
48 48
49 49 __all__ += ["TokenError"]
50 50
51 51 COMMENT = N_TOKENS
52 52 tok_name[COMMENT] = 'COMMENT'
53 53 NL = N_TOKENS + 1
54 54 tok_name[NL] = 'NL'
55 55 N_TOKENS += 2
56 56
57 57 def group(*choices): return '(' + '|'.join(choices) + ')'
58 58 def any(*choices): return group(*choices) + '*'
59 59 def maybe(*choices): return group(*choices) + '?'
60 60
61 61 Whitespace = r'[ \f\t]*'
62 62 Comment = r'#[^\r\n]*'
63 63 Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
64 64 Name = r'[a-zA-Z_]\w*'
65 65
66 66 Hexnumber = r'0[xX][\da-fA-F]+[lL]?'
67 67 Octnumber = r'(0[oO][0-7]+)|(0[0-7]*)[lL]?'
68 68 Binnumber = r'0[bB][01]+[lL]?'
69 69 Decnumber = r'[1-9]\d*[lL]?'
70 70 Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
71 71 Exponent = r'[eE][-+]?\d+'
72 72 Pointfloat = group(r'\d+\.\d*', r'\.\d+') + maybe(Exponent)
73 73 Expfloat = r'\d+' + Exponent
74 74 Floatnumber = group(Pointfloat, Expfloat)
75 75 Imagnumber = group(r'\d+[jJ]', Floatnumber + r'[jJ]')
76 76 Number = group(Imagnumber, Floatnumber, Intnumber)
77 77
78 78 # Tail end of ' string.
79 79 Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
80 80 # Tail end of " string.
81 81 Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
82 82 # Tail end of ''' string.
83 83 Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
84 84 # Tail end of """ string.
85 85 Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
86 86 Triple = group("[uUbB]?[rR]?'''", '[uUbB]?[rR]?"""')
87 87 # Single-line ' or " string.
88 88 String = group(r"[uUbB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
89 89 r'[uUbB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*"')
90 90
91 91 # Because of leftmost-then-longest match semantics, be sure to put the
92 92 # longest operators first (e.g., if = came before ==, == would get
93 93 # recognized as two instances of =).
94 94 Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"<>", r"!=",
95 95 r"//=?",
96 96 r"[+\-*/%&|^=<>]=?",
97 97 r"~")
98 98
99 99 Bracket = '[][(){}]'
100 100 Special = group(r'\r?\n', r'[:;.,`@]')
101 101 Funny = group(Operator, Bracket, Special)
102 102
103 103 PlainToken = group(Number, Funny, String, Name)
104 104 Token = Ignore + PlainToken
105 105
106 106 # First (or only) line of ' or " string.
107 107 ContStr = group(r"[uUbB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
108 108 group("'", r'\\\r?\n'),
109 109 r'[uUbB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
110 110 group('"', r'\\\r?\n'))
111 111 PseudoExtras = group(r'\\\r?\n', Comment, Triple)
112 112 PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
113 113
114 114 tokenprog, pseudoprog, single3prog, double3prog = map(
115 115 re.compile, (Token, PseudoToken, Single3, Double3))
116 116 endprogs = {"'": re.compile(Single), '"': re.compile(Double),
117 117 "'''": single3prog, '"""': double3prog,
118 118 "r'''": single3prog, 'r"""': double3prog,
119 119 "u'''": single3prog, 'u"""': double3prog,
120 120 "ur'''": single3prog, 'ur"""': double3prog,
121 121 "R'''": single3prog, 'R"""': double3prog,
122 122 "U'''": single3prog, 'U"""': double3prog,
123 123 "uR'''": single3prog, 'uR"""': double3prog,
124 124 "Ur'''": single3prog, 'Ur"""': double3prog,
125 125 "UR'''": single3prog, 'UR"""': double3prog,
126 126 "b'''": single3prog, 'b"""': double3prog,
127 127 "br'''": single3prog, 'br"""': double3prog,
128 128 "B'''": single3prog, 'B"""': double3prog,
129 129 "bR'''": single3prog, 'bR"""': double3prog,
130 130 "Br'''": single3prog, 'Br"""': double3prog,
131 131 "BR'''": single3prog, 'BR"""': double3prog,
132 132 'r': None, 'R': None, 'u': None, 'U': None,
133 133 'b': None, 'B': None}
134 134
135 135 triple_quoted = {}
136 136 for t in ("'''", '"""',
137 137 "r'''", 'r"""', "R'''", 'R"""',
138 138 "u'''", 'u"""', "U'''", 'U"""',
139 139 "ur'''", 'ur"""', "Ur'''", 'Ur"""',
140 140 "uR'''", 'uR"""', "UR'''", 'UR"""',
141 141 "b'''", 'b"""', "B'''", 'B"""',
142 142 "br'''", 'br"""', "Br'''", 'Br"""',
143 143 "bR'''", 'bR"""', "BR'''", 'BR"""'):
144 144 triple_quoted[t] = t
145 145 single_quoted = {}
146 146 for t in ("'", '"',
147 147 "r'", 'r"', "R'", 'R"',
148 148 "u'", 'u"', "U'", 'U"',
149 149 "ur'", 'ur"', "Ur'", 'Ur"',
150 150 "uR'", 'uR"', "UR'", 'UR"',
151 151 "b'", 'b"', "B'", 'B"',
152 152 "br'", 'br"', "Br'", 'Br"',
153 153 "bR'", 'bR"', "BR'", 'BR"' ):
154 154 single_quoted[t] = t
155 155
156 156 tabsize = 8
157 157
158 158 class TokenError(Exception): pass
159 159
160 160 class StopTokenizing(Exception): pass
161 161
162 162 def printtoken(type, token, srow_scol, erow_ecol, line): # for testing
163 163 srow, scol = srow_scol
164 164 erow, ecol = erow_ecol
165 165 print("%d,%d-%d,%d:\t%s\t%s" % \
166 166 (srow, scol, erow, ecol, tok_name[type], repr(token)))
167 167
168 168 def tokenize(readline, tokeneater=printtoken):
169 169 """
170 170 The tokenize() function accepts two parameters: one representing the
171 171 input stream, and one providing an output mechanism for tokenize().
172 172
173 173 The first parameter, readline, must be a callable object which provides
174 174 the same interface as the readline() method of built-in file objects.
175 175 Each call to the function should return one line of input as a string.
176 176
177 177 The second parameter, tokeneater, must also be a callable object. It is
178 178 called once for each token, with five arguments, corresponding to the
179 179 tuples generated by generate_tokens().
180 180 """
181 181 try:
182 182 tokenize_loop(readline, tokeneater)
183 183 except StopTokenizing:
184 184 pass
185 185
186 186 # backwards compatible interface
187 187 def tokenize_loop(readline, tokeneater):
188 188 for token_info in generate_tokens(readline):
189 189 tokeneater(*token_info)
190 190
191 191 class Untokenizer:
192 192
193 193 def __init__(self):
194 194 self.tokens = []
195 195 self.prev_row = 1
196 196 self.prev_col = 0
197 197
198 198 def add_whitespace(self, start):
199 199 row, col = start
200 200 assert row >= self.prev_row
201 201 col_offset = col - self.prev_col
202 202 if col_offset > 0:
203 203 self.tokens.append(" " * col_offset)
204 204 elif row > self.prev_row and tok_type not in (NEWLINE, NL, ENDMARKER):
205 205 # Line was backslash-continued
206 206 self.tokens.append(" ")
207 207
208 208 def untokenize(self, tokens):
209 209 iterable = iter(tokens)
210 210 for t in iterable:
211 211 if len(t) == 2:
212 212 self.compat(t, iterable)
213 213 break
214 214 tok_type, token, start, end = t[:4]
215 215 self.add_whitespace(start)
216 216 self.tokens.append(token)
217 217 self.prev_row, self.prev_col = end
218 218 if tok_type in (NEWLINE, NL):
219 219 self.prev_row += 1
220 220 self.prev_col = 0
221 221 return "".join(self.tokens)
222 222
223 223 def compat(self, token, iterable):
224 224 # This import is here to avoid problems when the itertools
225 225 # module is not built yet and tokenize is imported.
226 226 from itertools import chain
227 227 startline = False
228 228 prevstring = False
229 229 indents = []
230 230 toks_append = self.tokens.append
231 231 for tok in chain([token], iterable):
232 232 toknum, tokval = tok[:2]
233 233
234 234 if toknum in (NAME, NUMBER):
235 235 tokval += ' '
236 236
237 237 # Insert a space between two consecutive strings
238 238 if toknum == STRING:
239 239 if prevstring:
240 240 tokval = ' ' + tokval
241 241 prevstring = True
242 242 else:
243 243 prevstring = False
244 244
245 245 if toknum == INDENT:
246 246 indents.append(tokval)
247 247 continue
248 248 elif toknum == DEDENT:
249 249 indents.pop()
250 250 continue
251 251 elif toknum in (NEWLINE, NL):
252 252 startline = True
253 253 elif startline and indents:
254 254 toks_append(indents[-1])
255 255 startline = False
256 256 toks_append(tokval)
257 257
258 258 def untokenize(iterable):
259 259 """Transform tokens back into Python source code.
260 260
261 261 Each element returned by the iterable must be a token sequence
262 262 with at least two elements, a token number and token value. If
263 263 only two tokens are passed, the resulting output is poor.
264 264
265 265 Round-trip invariant for full input:
266 266 Untokenized source will match input source exactly
267 267
268 268 Round-trip invariant for limited intput:
269 269 # Output text will tokenize the back to the input
270 270 t1 = [tok[:2] for tok in generate_tokens(f.readline)]
271 271 newcode = untokenize(t1)
272 272 readline = iter(newcode.splitlines(1)).next
273 273 t2 = [tok[:2] for tok in generate_tokens(readline)]
274 274 assert t1 == t2
275 275 """
276 276 ut = Untokenizer()
277 277 return ut.untokenize(iterable)
278 278
279 279 def generate_tokens(readline):
280 280 """
281 281 The generate_tokens() generator requires one argment, readline, which
282 282 must be a callable object which provides the same interface as the
283 283 readline() method of built-in file objects. Each call to the function
284 284 should return one line of input as a string. Alternately, readline
285 285 can be a callable function terminating with StopIteration:
286 286 readline = open(myfile).next # Example of alternate readline
287 287
288 288 The generator produces 5-tuples with these members: the token type; the
289 289 token string; a 2-tuple (srow, scol) of ints specifying the row and
290 290 column where the token begins in the source; a 2-tuple (erow, ecol) of
291 291 ints specifying the row and column where the token ends in the source;
292 292 and the line on which the token was found. The line passed is the
293 293 logical line; continuation lines are included.
294 294 """
295 295 lnum = parenlev = continued = 0
296 296 namechars, numchars = string.ascii_letters + '_', '0123456789'
297 297 contstr, needcont = '', 0
298 298 contline = None
299 299 indents = [0]
300 300
301 301 while 1: # loop over lines in stream
302 302 try:
303 303 line = readline()
304 304 except StopIteration:
305 305 line = ''
306 306 lnum += 1
307 307 pos, max = 0, len(line)
308 308
309 309 if contstr: # continued string
310 310 if not line:
311 raise TokenError, ("EOF in multi-line string", strstart)
311 raise TokenError("EOF in multi-line string", strstart)
312 312 endmatch = endprog.match(line)
313 313 if endmatch:
314 314 pos = end = endmatch.end(0)
315 315 yield (STRING, contstr + line[:end],
316 316 strstart, (lnum, end), contline + line)
317 317 contstr, needcont = '', 0
318 318 contline = None
319 319 elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':
320 320 yield (ERRORTOKEN, contstr + line,
321 321 strstart, (lnum, len(line)), contline)
322 322 contstr = ''
323 323 contline = None
324 324 continue
325 325 else:
326 326 contstr = contstr + line
327 327 contline = contline + line
328 328 continue
329 329
330 330 elif parenlev == 0 and not continued: # new statement
331 331 if not line: break
332 332 column = 0
333 333 while pos < max: # measure leading whitespace
334 334 if line[pos] == ' ':
335 335 column += 1
336 336 elif line[pos] == '\t':
337 337 column = (column//tabsize + 1)*tabsize
338 338 elif line[pos] == '\f':
339 339 column = 0
340 340 else:
341 341 break
342 342 pos += 1
343 343 if pos == max:
344 344 break
345 345
346 346 if line[pos] in '#\r\n': # skip comments or blank lines
347 347 if line[pos] == '#':
348 348 comment_token = line[pos:].rstrip('\r\n')
349 349 nl_pos = pos + len(comment_token)
350 350 yield (COMMENT, comment_token,
351 351 (lnum, pos), (lnum, pos + len(comment_token)), line)
352 352 yield (NEWLINE, line[nl_pos:],
353 353 (lnum, nl_pos), (lnum, len(line)), line)
354 354 else:
355 355 yield (NEWLINE, line[pos:],
356 356 (lnum, pos), (lnum, len(line)), line)
357 357 continue
358 358
359 359 if column > indents[-1]: # count indents or dedents
360 360 indents.append(column)
361 361 yield (INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
362 362 while column < indents[-1]:
363 363 if column not in indents:
364 364 raise IndentationError(
365 365 "unindent does not match any outer indentation level",
366 366 ("<tokenize>", lnum, pos, line))
367 367 indents = indents[:-1]
368 368 yield (DEDENT, '', (lnum, pos), (lnum, pos), line)
369 369
370 370 else: # continued statement
371 371 if not line:
372 raise TokenError, ("EOF in multi-line statement", (lnum, 0))
372 raise TokenError("EOF in multi-line statement", (lnum, 0))
373 373 continued = 0
374 374
375 375 while pos < max:
376 376 pseudomatch = pseudoprog.match(line, pos)
377 377 if pseudomatch: # scan for tokens
378 378 start, end = pseudomatch.span(1)
379 379 spos, epos, pos = (lnum, start), (lnum, end), end
380 380 token, initial = line[start:end], line[start]
381 381
382 382 if initial in numchars or \
383 383 (initial == '.' and token != '.'): # ordinary number
384 384 yield (NUMBER, token, spos, epos, line)
385 385 elif initial in '\r\n':
386 386 yield (NL if parenlev > 0 else NEWLINE,
387 387 token, spos, epos, line)
388 388 elif initial == '#':
389 389 assert not token.endswith("\n")
390 390 yield (COMMENT, token, spos, epos, line)
391 391 elif token in triple_quoted:
392 392 endprog = endprogs[token]
393 393 endmatch = endprog.match(line, pos)
394 394 if endmatch: # all on one line
395 395 pos = endmatch.end(0)
396 396 token = line[start:pos]
397 397 yield (STRING, token, spos, (lnum, pos), line)
398 398 else:
399 399 strstart = (lnum, start) # multiple lines
400 400 contstr = line[start:]
401 401 contline = line
402 402 break
403 403 elif initial in single_quoted or \
404 404 token[:2] in single_quoted or \
405 405 token[:3] in single_quoted:
406 406 if token[-1] == '\n': # continued string
407 407 strstart = (lnum, start)
408 408 endprog = (endprogs[initial] or endprogs[token[1]] or
409 409 endprogs[token[2]])
410 410 contstr, needcont = line[start:], 1
411 411 contline = line
412 412 break
413 413 else: # ordinary string
414 414 yield (STRING, token, spos, epos, line)
415 415 elif initial in namechars: # ordinary name
416 416 yield (NAME, token, spos, epos, line)
417 417 elif initial == '\\': # continued stmt
418 418 continued = 1
419 419 else:
420 420 if initial in '([{':
421 421 parenlev += 1
422 422 elif initial in ')]}':
423 423 parenlev -= 1
424 424 yield (OP, token, spos, epos, line)
425 425 else:
426 426 yield (ERRORTOKEN, line[pos],
427 427 (lnum, pos), (lnum, pos+1), line)
428 428 pos += 1
429 429
430 430 for indent in indents[1:]: # pop remaining indent levels
431 431 yield (DEDENT, '', (lnum, 0), (lnum, 0), '')
432 432 yield (ENDMARKER, '', (lnum, 0), (lnum, 0), '')
433 433
434 434 if __name__ == '__main__': # testing
435 435 import sys
436 436 if len(sys.argv) > 1:
437 437 tokenize(open(sys.argv[1]).readline)
438 438 else:
439 439 tokenize(sys.stdin.readline)
General Comments 0
You need to be logged in to leave comments. Login now