##// END OF EJS Templates
Make raise statements Python 3 compatible....
Thomas Kluyver -
Show More
@@ -1,495 +1,493 b''
1 """Various display related classes.
1 """Various display related classes.
2
2
3 Authors : MinRK, gregcaporaso, dannystaple
3 Authors : MinRK, gregcaporaso, dannystaple
4 """
4 """
5 from os.path import exists, isfile, splitext, abspath, join, isdir
5 from os.path import exists, isfile, splitext, abspath, join, isdir
6 from os import walk, sep
6 from os import walk, sep
7
7
8 from IPython.core.display import DisplayObject
8 from IPython.core.display import DisplayObject
9
9
10
10
11 class Audio(DisplayObject):
11 class Audio(DisplayObject):
12 """Create an audio object.
12 """Create an audio object.
13
13
14 When this object is returned by an input cell or passed to the
14 When this object is returned by an input cell or passed to the
15 display function, it will result in Audio controls being displayed
15 display function, it will result in Audio controls being displayed
16 in the frontend (only works in the notebook).
16 in the frontend (only works in the notebook).
17
17
18 Parameters
18 Parameters
19 ----------
19 ----------
20 data : numpy array, list, unicode, str or bytes
20 data : numpy array, list, unicode, str or bytes
21 Can be a
21 Can be a
22 * Numpy 1d array containing the desired waveform (mono)
22 * Numpy 1d array containing the desired waveform (mono)
23 * List of float or integer representing the waveform (mono)
23 * List of float or integer representing the waveform (mono)
24 * String containing the filename
24 * String containing the filename
25 * Bytestring containing raw PCM data or
25 * Bytestring containing raw PCM data or
26 * URL pointing to a file on the web.
26 * URL pointing to a file on the web.
27
27
28 If the array option is used the waveform will be normalized.
28 If the array option is used the waveform will be normalized.
29
29
30 If a filename or url is used the format support will be browser
30 If a filename or url is used the format support will be browser
31 dependent.
31 dependent.
32 url : unicode
32 url : unicode
33 A URL to download the data from.
33 A URL to download the data from.
34 filename : unicode
34 filename : unicode
35 Path to a local file to load the data from.
35 Path to a local file to load the data from.
36 embed : boolean
36 embed : boolean
37 Should the image data be embedded using a data URI (True) or should
37 Should the image data be embedded using a data URI (True) or should
38 the original source be referenced. Set this to True if you want the
38 the original source be referenced. Set this to True if you want the
39 audio to playable later with no internet connection in the notebook.
39 audio to playable later with no internet connection in the notebook.
40
40
41 Default is `True`, unless the keyword argument `url` is set, then
41 Default is `True`, unless the keyword argument `url` is set, then
42 default value is `False`.
42 default value is `False`.
43 rate : integer
43 rate : integer
44 The sampling rate of the raw data.
44 The sampling rate of the raw data.
45 Only required when data parameter is being used as an array
45 Only required when data parameter is being used as an array
46 autoplay : bool
46 autoplay : bool
47 Set to True if the audio should immediately start playing.
47 Set to True if the audio should immediately start playing.
48 Default is `False`.
48 Default is `False`.
49
49
50 Examples
50 Examples
51 --------
51 --------
52
52
53 # Generate a sound
53 # Generate a sound
54 import numpy as np
54 import numpy as np
55 framerate = 44100
55 framerate = 44100
56 t = np.linspace(0,5,framerate*5)
56 t = np.linspace(0,5,framerate*5)
57 data = np.sin(2*np.pi*220*t) + np.sin(2*np.pi*224*t))
57 data = np.sin(2*np.pi*220*t) + np.sin(2*np.pi*224*t))
58 Audio(data,rate=framerate)
58 Audio(data,rate=framerate)
59
59
60 Audio("http://www.nch.com.au/acm/8k16bitpcm.wav")
60 Audio("http://www.nch.com.au/acm/8k16bitpcm.wav")
61 Audio(url="http://www.w3schools.com/html/horse.ogg")
61 Audio(url="http://www.w3schools.com/html/horse.ogg")
62
62
63 Audio('/path/to/sound.wav')
63 Audio('/path/to/sound.wav')
64 Audio(filename='/path/to/sound.ogg')
64 Audio(filename='/path/to/sound.ogg')
65
65
66 Audio(b'RAW_WAV_DATA..)
66 Audio(b'RAW_WAV_DATA..)
67 Audio(data=b'RAW_WAV_DATA..)
67 Audio(data=b'RAW_WAV_DATA..)
68
68
69 """
69 """
70
70
71 def __init__(self, data=None, filename=None, url=None, embed=None, rate=None, autoplay=False):
71 def __init__(self, data=None, filename=None, url=None, embed=None, rate=None, autoplay=False):
72 if filename is None and url is None and data is None:
72 if filename is None and url is None and data is None:
73 raise ValueError("No image data found. Expecting filename, url, or data.")
73 raise ValueError("No image data found. Expecting filename, url, or data.")
74 if embed is False and url is None:
74 if embed is False and url is None:
75 raise ValueError("No url found. Expecting url when embed=False")
75 raise ValueError("No url found. Expecting url when embed=False")
76
76
77 if url is not None and embed is not True:
77 if url is not None and embed is not True:
78 self.embed = False
78 self.embed = False
79 else:
79 else:
80 self.embed = True
80 self.embed = True
81 self.autoplay = autoplay
81 self.autoplay = autoplay
82 super(Audio, self).__init__(data=data, url=url, filename=filename)
82 super(Audio, self).__init__(data=data, url=url, filename=filename)
83
83
84 if self.data is not None and not isinstance(self.data, bytes):
84 if self.data is not None and not isinstance(self.data, bytes):
85 self.data = self._make_wav(data,rate)
85 self.data = self._make_wav(data,rate)
86
86
87 def reload(self):
87 def reload(self):
88 """Reload the raw data from file or URL."""
88 """Reload the raw data from file or URL."""
89 import mimetypes
89 import mimetypes
90 if self.embed:
90 if self.embed:
91 super(Audio, self).reload()
91 super(Audio, self).reload()
92
92
93 if self.filename is not None:
93 if self.filename is not None:
94 self.mimetype = mimetypes.guess_type(self.filename)[0]
94 self.mimetype = mimetypes.guess_type(self.filename)[0]
95 elif self.url is not None:
95 elif self.url is not None:
96 self.mimetype = mimetypes.guess_type(self.url)[0]
96 self.mimetype = mimetypes.guess_type(self.url)[0]
97 else:
97 else:
98 self.mimetype = "audio/wav"
98 self.mimetype = "audio/wav"
99
99
100 def _make_wav(self, data, rate):
100 def _make_wav(self, data, rate):
101 """ Transform a numpy array to a PCM bytestring """
101 """ Transform a numpy array to a PCM bytestring """
102 import struct
102 import struct
103 from io import BytesIO
103 from io import BytesIO
104 import wave
104 import wave
105 try:
105 try:
106 import numpy as np
106 import numpy as np
107 data = np.array(data,dtype=float)
107 data = np.array(data,dtype=float)
108 if len(data.shape) > 1:
108 if len(data.shape) > 1:
109 raise ValueError("encoding of stereo PCM signals are unsupported")
109 raise ValueError("encoding of stereo PCM signals are unsupported")
110 scaled = np.int16(data/np.max(np.abs(data))*32767).tolist()
110 scaled = np.int16(data/np.max(np.abs(data))*32767).tolist()
111 except ImportError:
111 except ImportError:
112 maxabsvalue = float(max([abs(x) for x in data]))
112 maxabsvalue = float(max([abs(x) for x in data]))
113 scaled = [int(x/maxabsvalue*32767) for x in data]
113 scaled = [int(x/maxabsvalue*32767) for x in data]
114 fp = BytesIO()
114 fp = BytesIO()
115 waveobj = wave.open(fp,mode='wb')
115 waveobj = wave.open(fp,mode='wb')
116 waveobj.setnchannels(1)
116 waveobj.setnchannels(1)
117 waveobj.setframerate(rate)
117 waveobj.setframerate(rate)
118 waveobj.setsampwidth(2)
118 waveobj.setsampwidth(2)
119 waveobj.setcomptype('NONE','NONE')
119 waveobj.setcomptype('NONE','NONE')
120 waveobj.writeframes(b''.join([struct.pack('<h',x) for x in scaled]))
120 waveobj.writeframes(b''.join([struct.pack('<h',x) for x in scaled]))
121 val = fp.getvalue()
121 val = fp.getvalue()
122 waveobj.close()
122 waveobj.close()
123 return val
123 return val
124
124
125 def _data_and_metadata(self):
125 def _data_and_metadata(self):
126 """shortcut for returning metadata with url information, if defined"""
126 """shortcut for returning metadata with url information, if defined"""
127 md = {}
127 md = {}
128 if self.url:
128 if self.url:
129 md['url'] = self.url
129 md['url'] = self.url
130 if md:
130 if md:
131 return self.data, md
131 return self.data, md
132 else:
132 else:
133 return self.data
133 return self.data
134
134
135 def _repr_html_(self):
135 def _repr_html_(self):
136 src = """
136 src = """
137 <audio controls="controls" {autoplay}>
137 <audio controls="controls" {autoplay}>
138 <source src="{src}" type="{type}" />
138 <source src="{src}" type="{type}" />
139 Your browser does not support the audio element.
139 Your browser does not support the audio element.
140 </audio>
140 </audio>
141 """
141 """
142 return src.format(src=self.src_attr(),type=self.mimetype, autoplay=self.autoplay_attr())
142 return src.format(src=self.src_attr(),type=self.mimetype, autoplay=self.autoplay_attr())
143
143
144 def src_attr(self):
144 def src_attr(self):
145 import base64
145 import base64
146 if self.embed and (self.data is not None):
146 if self.embed and (self.data is not None):
147 data = base64=base64.b64encode(self.data).decode('ascii')
147 data = base64=base64.b64encode(self.data).decode('ascii')
148 return """data:{type};base64,{base64}""".format(type=self.mimetype,
148 return """data:{type};base64,{base64}""".format(type=self.mimetype,
149 base64=data)
149 base64=data)
150 elif self.url is not None:
150 elif self.url is not None:
151 return self.url
151 return self.url
152 else:
152 else:
153 return ""
153 return ""
154
154
155 def autoplay_attr(self):
155 def autoplay_attr(self):
156 if(self.autoplay):
156 if(self.autoplay):
157 return 'autoplay="autoplay"'
157 return 'autoplay="autoplay"'
158 else:
158 else:
159 return ''
159 return ''
160
160
161 class IFrame(object):
161 class IFrame(object):
162 """
162 """
163 Generic class to embed an iframe in an IPython notebook
163 Generic class to embed an iframe in an IPython notebook
164 """
164 """
165
165
166 iframe = """
166 iframe = """
167 <iframe
167 <iframe
168 width="{width}"
168 width="{width}"
169 height={height}"
169 height={height}"
170 src="{src}{params}"
170 src="{src}{params}"
171 frameborder="0"
171 frameborder="0"
172 allowfullscreen
172 allowfullscreen
173 ></iframe>
173 ></iframe>
174 """
174 """
175
175
176 def __init__(self, src, width, height, **kwargs):
176 def __init__(self, src, width, height, **kwargs):
177 self.src = src
177 self.src = src
178 self.width = width
178 self.width = width
179 self.height = height
179 self.height = height
180 self.params = kwargs
180 self.params = kwargs
181
181
182 def _repr_html_(self):
182 def _repr_html_(self):
183 """return the embed iframe"""
183 """return the embed iframe"""
184 if self.params:
184 if self.params:
185 from urllib import urlencode
185 from urllib import urlencode
186 params = "?" + urlencode(self.params)
186 params = "?" + urlencode(self.params)
187 else:
187 else:
188 params = ""
188 params = ""
189 return self.iframe.format(src=self.src,
189 return self.iframe.format(src=self.src,
190 width=self.width,
190 width=self.width,
191 height=self.height,
191 height=self.height,
192 params=params)
192 params=params)
193
193
194 class YouTubeVideo(IFrame):
194 class YouTubeVideo(IFrame):
195 """Class for embedding a YouTube Video in an IPython session, based on its video id.
195 """Class for embedding a YouTube Video in an IPython session, based on its video id.
196
196
197 e.g. to embed the video on this page:
197 e.g. to embed the video on this page:
198
198
199 http://www.youtube.com/watch?v=foo
199 http://www.youtube.com/watch?v=foo
200
200
201 you would do:
201 you would do:
202
202
203 vid = YouTubeVideo("foo")
203 vid = YouTubeVideo("foo")
204 display(vid)
204 display(vid)
205
205
206 To start from 30 seconds:
206 To start from 30 seconds:
207
207
208 vid = YouTubeVideo("abc", start=30)
208 vid = YouTubeVideo("abc", start=30)
209 display(vid)
209 display(vid)
210
210
211 To calculate seconds from time as hours, minutes, seconds use:
211 To calculate seconds from time as hours, minutes, seconds use:
212 start=int(timedelta(hours=1, minutes=46, seconds=40).total_seconds())
212 start=int(timedelta(hours=1, minutes=46, seconds=40).total_seconds())
213
213
214 Other parameters can be provided as documented at
214 Other parameters can be provided as documented at
215 https://developers.google.com/youtube/player_parameters#parameter-subheader
215 https://developers.google.com/youtube/player_parameters#parameter-subheader
216 """
216 """
217
217
218 def __init__(self, id, width=400, height=300, **kwargs):
218 def __init__(self, id, width=400, height=300, **kwargs):
219 src = "http://www.youtube.com/embed/{0}".format(id)
219 src = "http://www.youtube.com/embed/{0}".format(id)
220 super(YouTubeVideo, self).__init__(src, width, height, **kwargs)
220 super(YouTubeVideo, self).__init__(src, width, height, **kwargs)
221
221
222 class VimeoVideo(IFrame):
222 class VimeoVideo(IFrame):
223 """
223 """
224 Class for embedding a Vimeo video in an IPython session, based on its video id.
224 Class for embedding a Vimeo video in an IPython session, based on its video id.
225 """
225 """
226
226
227 def __init__(self, id, width=400, height=300, **kwargs):
227 def __init__(self, id, width=400, height=300, **kwargs):
228 src="http://player.vimeo.com/video/{0}".format(id)
228 src="http://player.vimeo.com/video/{0}".format(id)
229 super(VimeoVideo, self).__init__(src, width, height, **kwargs)
229 super(VimeoVideo, self).__init__(src, width, height, **kwargs)
230
230
231 class ScribdDocument(IFrame):
231 class ScribdDocument(IFrame):
232 """
232 """
233 Class for embedding a Scribd document in an IPython session
233 Class for embedding a Scribd document in an IPython session
234
234
235 Use the start_page params to specify a starting point in the document
235 Use the start_page params to specify a starting point in the document
236 Use the view_mode params to specify display type one off scroll | slideshow | book
236 Use the view_mode params to specify display type one off scroll | slideshow | book
237
237
238 e.g to Display Wes' foundational paper about PANDAS in book mode from page 3
238 e.g to Display Wes' foundational paper about PANDAS in book mode from page 3
239
239
240 ScribdDocument(71048089, width=800, height=400, start_page=3, view_mode="book")
240 ScribdDocument(71048089, width=800, height=400, start_page=3, view_mode="book")
241 """
241 """
242
242
243 def __init__(self, id, width=400, height=300, **kwargs):
243 def __init__(self, id, width=400, height=300, **kwargs):
244 src="http://www.scribd.com/embeds/{0}/content".format(id)
244 src="http://www.scribd.com/embeds/{0}/content".format(id)
245 super(ScribdDocument, self).__init__(src, width, height, **kwargs)
245 super(ScribdDocument, self).__init__(src, width, height, **kwargs)
246
246
247 class FileLink(object):
247 class FileLink(object):
248 """Class for embedding a local file link in an IPython session, based on path
248 """Class for embedding a local file link in an IPython session, based on path
249
249
250 e.g. to embed a link that was generated in the IPython notebook as my/data.txt
250 e.g. to embed a link that was generated in the IPython notebook as my/data.txt
251
251
252 you would do::
252 you would do::
253
253
254 local_file = FileLink("my/data.txt")
254 local_file = FileLink("my/data.txt")
255 display(local_file)
255 display(local_file)
256
256
257 or in the HTML notebook, just::
257 or in the HTML notebook, just::
258
258
259 FileLink("my/data.txt")
259 FileLink("my/data.txt")
260 """
260 """
261
261
262 html_link_str = "<a href='%s' target='_blank'>%s</a>"
262 html_link_str = "<a href='%s' target='_blank'>%s</a>"
263
263
264 def __init__(self,
264 def __init__(self,
265 path,
265 path,
266 url_prefix='files/',
266 url_prefix='files/',
267 result_html_prefix='',
267 result_html_prefix='',
268 result_html_suffix='<br>'):
268 result_html_suffix='<br>'):
269 """
269 """
270 Parameters
270 Parameters
271 ----------
271 ----------
272 path : str
272 path : str
273 path to the file or directory that should be formatted
273 path to the file or directory that should be formatted
274 directory_prefix : str
274 directory_prefix : str
275 prefix to be prepended to all files to form a working link [default:
275 prefix to be prepended to all files to form a working link [default:
276 'files']
276 'files']
277 result_html_prefix : str
277 result_html_prefix : str
278 text to append to beginning to link [default: none]
278 text to append to beginning to link [default: none]
279 result_html_suffix : str
279 result_html_suffix : str
280 text to append at the end of link [default: '<br>']
280 text to append at the end of link [default: '<br>']
281 """
281 """
282 if isdir(path):
282 if isdir(path):
283 raise ValueError,\
283 raise ValueError("Cannot display a directory using FileLink. "
284 ("Cannot display a directory using FileLink. "
285 "Use FileLinks to display '%s'." % path)
284 "Use FileLinks to display '%s'." % path)
286 self.path = path
285 self.path = path
287 self.url_prefix = url_prefix
286 self.url_prefix = url_prefix
288 self.result_html_prefix = result_html_prefix
287 self.result_html_prefix = result_html_prefix
289 self.result_html_suffix = result_html_suffix
288 self.result_html_suffix = result_html_suffix
290
289
291 def _format_path(self):
290 def _format_path(self):
292 fp = ''.join([self.url_prefix,self.path])
291 fp = ''.join([self.url_prefix,self.path])
293 return ''.join([self.result_html_prefix,
292 return ''.join([self.result_html_prefix,
294 self.html_link_str % (fp, self.path),
293 self.html_link_str % (fp, self.path),
295 self.result_html_suffix])
294 self.result_html_suffix])
296
295
297 def _repr_html_(self):
296 def _repr_html_(self):
298 """return html link to file
297 """return html link to file
299 """
298 """
300 if not exists(self.path):
299 if not exists(self.path):
301 return ("Path (<tt>%s</tt>) doesn't exist. "
300 return ("Path (<tt>%s</tt>) doesn't exist. "
302 "It may still be in the process of "
301 "It may still be in the process of "
303 "being generated, or you may have the "
302 "being generated, or you may have the "
304 "incorrect path." % self.path)
303 "incorrect path." % self.path)
305
304
306 return self._format_path()
305 return self._format_path()
307
306
308 def __repr__(self):
307 def __repr__(self):
309 """return absolute path to file
308 """return absolute path to file
310 """
309 """
311 return abspath(self.path)
310 return abspath(self.path)
312
311
313 class FileLinks(FileLink):
312 class FileLinks(FileLink):
314 """Class for embedding local file links in an IPython session, based on path
313 """Class for embedding local file links in an IPython session, based on path
315
314
316 e.g. to embed links to files that were generated in the IPython notebook under my/data
315 e.g. to embed links to files that were generated in the IPython notebook under my/data
317
316
318 you would do:
317 you would do:
319
318
320 local_files = FileLinks("my/data")
319 local_files = FileLinks("my/data")
321 display(local_files)
320 display(local_files)
322
321
323 or in the HTML notebook, just
322 or in the HTML notebook, just
324
323
325 FileLinks("my/data")
324 FileLinks("my/data")
326
325
327 """
326 """
328 def __init__(self,
327 def __init__(self,
329 path,
328 path,
330 url_prefix='files/',
329 url_prefix='files/',
331 included_suffixes=None,
330 included_suffixes=None,
332 result_html_prefix='',
331 result_html_prefix='',
333 result_html_suffix='<br>',
332 result_html_suffix='<br>',
334 notebook_display_formatter=None,
333 notebook_display_formatter=None,
335 terminal_display_formatter=None):
334 terminal_display_formatter=None):
336 """
335 """
337 included_suffixes : list of filename suffixes to include when
336 included_suffixes : list of filename suffixes to include when
338 formatting output [default: include all files]
337 formatting output [default: include all files]
339
338
340 See the FileLink (baseclass of LocalDirectory) docstring for
339 See the FileLink (baseclass of LocalDirectory) docstring for
341 information on additional parameters.
340 information on additional parameters.
342
341
343 notebook_display_formatter : func used to format links for display
342 notebook_display_formatter : func used to format links for display
344 in the notebook. See discussion of formatter function below.
343 in the notebook. See discussion of formatter function below.
345
344
346 terminal_display_formatter : func used to format links for display
345 terminal_display_formatter : func used to format links for display
347 in the terminal. See discussion of formatter function below.
346 in the terminal. See discussion of formatter function below.
348
347
349
348
350 Passing custom formatter functions
349 Passing custom formatter functions
351 ----------------------------------
350 ----------------------------------
352 Formatter functions must be of the form:
351 Formatter functions must be of the form:
353 f(dirname, fnames, included_suffixes)
352 f(dirname, fnames, included_suffixes)
354 dirname : the name of a directory (a string),
353 dirname : the name of a directory (a string),
355 fnames : a list of the files in that directory
354 fnames : a list of the files in that directory
356 included_suffixes : a list of the file suffixes that should be
355 included_suffixes : a list of the file suffixes that should be
357 included in the output (passing None means
356 included in the output (passing None means
358 to include all suffixes in the output in
357 to include all suffixes in the output in
359 the built-in formatters)
358 the built-in formatters)
360
359
361 returns a list of lines that should will be print in the
360 returns a list of lines that should will be print in the
362 notebook (if passing notebook_display_formatter) or the terminal
361 notebook (if passing notebook_display_formatter) or the terminal
363 (if passing terminal_display_formatter). This function is iterated
362 (if passing terminal_display_formatter). This function is iterated
364 over for each directory in self.path. Default formatters are in
363 over for each directory in self.path. Default formatters are in
365 place, can be passed here to support alternative formatting.
364 place, can be passed here to support alternative formatting.
366
365
367 """
366 """
368 if isfile(path):
367 if isfile(path):
369 raise ValueError,\
368 raise ValueError("Cannot display a file using FileLinks. "
370 ("Cannot display a file using FileLinks. "
371 "Use FileLink to display '%s'." % path)
369 "Use FileLink to display '%s'." % path)
372 self.included_suffixes = included_suffixes
370 self.included_suffixes = included_suffixes
373 # remove trailing slashs for more consistent output formatting
371 # remove trailing slashs for more consistent output formatting
374 path = path.rstrip('/')
372 path = path.rstrip('/')
375
373
376 self.path = path
374 self.path = path
377 self.url_prefix = url_prefix
375 self.url_prefix = url_prefix
378 self.result_html_prefix = result_html_prefix
376 self.result_html_prefix = result_html_prefix
379 self.result_html_suffix = result_html_suffix
377 self.result_html_suffix = result_html_suffix
380
378
381 self.notebook_display_formatter = \
379 self.notebook_display_formatter = \
382 notebook_display_formatter or self._get_notebook_display_formatter()
380 notebook_display_formatter or self._get_notebook_display_formatter()
383 self.terminal_display_formatter = \
381 self.terminal_display_formatter = \
384 terminal_display_formatter or self._get_terminal_display_formatter()
382 terminal_display_formatter or self._get_terminal_display_formatter()
385
383
386 def _get_display_formatter(self,
384 def _get_display_formatter(self,
387 dirname_output_format,
385 dirname_output_format,
388 fname_output_format,
386 fname_output_format,
389 fp_format,
387 fp_format,
390 fp_cleaner=None):
388 fp_cleaner=None):
391 """ generate built-in formatter function
389 """ generate built-in formatter function
392
390
393 this is used to define both the notebook and terminal built-in
391 this is used to define both the notebook and terminal built-in
394 formatters as they only differ by some wrapper text for each entry
392 formatters as they only differ by some wrapper text for each entry
395
393
396 dirname_output_format: string to use for formatting directory
394 dirname_output_format: string to use for formatting directory
397 names, dirname will be substituted for a single "%s" which
395 names, dirname will be substituted for a single "%s" which
398 must appear in this string
396 must appear in this string
399 fname_output_format: string to use for formatting file names,
397 fname_output_format: string to use for formatting file names,
400 if a single "%s" appears in the string, fname will be substituted
398 if a single "%s" appears in the string, fname will be substituted
401 if two "%s" appear in the string, the path to fname will be
399 if two "%s" appear in the string, the path to fname will be
402 substituted for the first and fname will be substituted for the
400 substituted for the first and fname will be substituted for the
403 second
401 second
404 fp_format: string to use for formatting filepaths, must contain
402 fp_format: string to use for formatting filepaths, must contain
405 exactly two "%s" and the dirname will be subsituted for the first
403 exactly two "%s" and the dirname will be subsituted for the first
406 and fname will be substituted for the second
404 and fname will be substituted for the second
407 """
405 """
408 def f(dirname, fnames, included_suffixes=None):
406 def f(dirname, fnames, included_suffixes=None):
409 result = []
407 result = []
410 # begin by figuring out which filenames, if any,
408 # begin by figuring out which filenames, if any,
411 # are going to be displayed
409 # are going to be displayed
412 display_fnames = []
410 display_fnames = []
413 for fname in fnames:
411 for fname in fnames:
414 if (isfile(join(dirname,fname)) and
412 if (isfile(join(dirname,fname)) and
415 (included_suffixes == None or
413 (included_suffixes == None or
416 splitext(fname)[1] in included_suffixes)):
414 splitext(fname)[1] in included_suffixes)):
417 display_fnames.append(fname)
415 display_fnames.append(fname)
418
416
419 if len(display_fnames) == 0:
417 if len(display_fnames) == 0:
420 # if there are no filenames to display, don't print anything
418 # if there are no filenames to display, don't print anything
421 # (not even the directory name)
419 # (not even the directory name)
422 pass
420 pass
423 else:
421 else:
424 # otherwise print the formatted directory name followed by
422 # otherwise print the formatted directory name followed by
425 # the formatted filenames
423 # the formatted filenames
426 dirname_output_line = dirname_output_format % dirname
424 dirname_output_line = dirname_output_format % dirname
427 result.append(dirname_output_line)
425 result.append(dirname_output_line)
428 for fname in display_fnames:
426 for fname in display_fnames:
429 fp = fp_format % (dirname,fname)
427 fp = fp_format % (dirname,fname)
430 if fp_cleaner is not None:
428 if fp_cleaner is not None:
431 fp = fp_cleaner(fp)
429 fp = fp_cleaner(fp)
432 try:
430 try:
433 # output can include both a filepath and a filename...
431 # output can include both a filepath and a filename...
434 fname_output_line = fname_output_format % (fp, fname)
432 fname_output_line = fname_output_format % (fp, fname)
435 except TypeError:
433 except TypeError:
436 # ... or just a single filepath
434 # ... or just a single filepath
437 fname_output_line = fname_output_format % fname
435 fname_output_line = fname_output_format % fname
438 result.append(fname_output_line)
436 result.append(fname_output_line)
439 return result
437 return result
440 return f
438 return f
441
439
442 def _get_notebook_display_formatter(self,
440 def _get_notebook_display_formatter(self,
443 spacer="&nbsp;&nbsp;"):
441 spacer="&nbsp;&nbsp;"):
444 """ generate function to use for notebook formatting
442 """ generate function to use for notebook formatting
445 """
443 """
446 dirname_output_format = \
444 dirname_output_format = \
447 self.result_html_prefix + "%s/" + self.result_html_suffix
445 self.result_html_prefix + "%s/" + self.result_html_suffix
448 fname_output_format = \
446 fname_output_format = \
449 self.result_html_prefix + spacer + self.html_link_str + self.result_html_suffix
447 self.result_html_prefix + spacer + self.html_link_str + self.result_html_suffix
450 fp_format = self.url_prefix + '%s/%s'
448 fp_format = self.url_prefix + '%s/%s'
451 if sep == "\\":
449 if sep == "\\":
452 # Working on a platform where the path separator is "\", so
450 # Working on a platform where the path separator is "\", so
453 # must convert these to "/" for generating a URI
451 # must convert these to "/" for generating a URI
454 def fp_cleaner(fp):
452 def fp_cleaner(fp):
455 # Replace all occurences of backslash ("\") with a forward
453 # Replace all occurences of backslash ("\") with a forward
456 # slash ("/") - this is necessary on windows when a path is
454 # slash ("/") - this is necessary on windows when a path is
457 # provided as input, but we must link to a URI
455 # provided as input, but we must link to a URI
458 return fp.replace('\\','/')
456 return fp.replace('\\','/')
459 else:
457 else:
460 fp_cleaner = None
458 fp_cleaner = None
461
459
462 return self._get_display_formatter(dirname_output_format,
460 return self._get_display_formatter(dirname_output_format,
463 fname_output_format,
461 fname_output_format,
464 fp_format,
462 fp_format,
465 fp_cleaner)
463 fp_cleaner)
466
464
467 def _get_terminal_display_formatter(self,
465 def _get_terminal_display_formatter(self,
468 spacer=" "):
466 spacer=" "):
469 """ generate function to use for terminal formatting
467 """ generate function to use for terminal formatting
470 """
468 """
471 dirname_output_format = "%s/"
469 dirname_output_format = "%s/"
472 fname_output_format = spacer + "%s"
470 fname_output_format = spacer + "%s"
473 fp_format = '%s/%s'
471 fp_format = '%s/%s'
474
472
475 return self._get_display_formatter(dirname_output_format,
473 return self._get_display_formatter(dirname_output_format,
476 fname_output_format,
474 fname_output_format,
477 fp_format)
475 fp_format)
478
476
479 def _format_path(self):
477 def _format_path(self):
480 result_lines = []
478 result_lines = []
481 walked_dir = list(walk(self.path))
479 walked_dir = list(walk(self.path))
482 walked_dir.sort()
480 walked_dir.sort()
483 for dirname, subdirs, fnames in walked_dir:
481 for dirname, subdirs, fnames in walked_dir:
484 result_lines += self.notebook_display_formatter(dirname, fnames, self.included_suffixes)
482 result_lines += self.notebook_display_formatter(dirname, fnames, self.included_suffixes)
485 return '\n'.join(result_lines)
483 return '\n'.join(result_lines)
486
484
487 def __repr__(self):
485 def __repr__(self):
488 """return newline-separated absolute paths
486 """return newline-separated absolute paths
489 """
487 """
490 result_lines = []
488 result_lines = []
491 walked_dir = list(walk(self.path))
489 walked_dir = list(walk(self.path))
492 walked_dir.sort()
490 walked_dir.sort()
493 for dirname, subdirs, fnames in walked_dir:
491 for dirname, subdirs, fnames in walked_dir:
494 result_lines += self.terminal_display_formatter(dirname, fnames, self.included_suffixes)
492 result_lines += self.terminal_display_formatter(dirname, fnames, self.included_suffixes)
495 return '\n'.join(result_lines)
493 return '\n'.join(result_lines)
@@ -1,439 +1,439 b''
1 """Patched version of standard library tokenize, to deal with various bugs.
1 """Patched version of standard library tokenize, to deal with various bugs.
2
2
3 Patches
3 Patches
4
4
5 - Relevant parts of Gareth Rees' patch for Python issue #12691 (untokenizing),
5 - Relevant parts of Gareth Rees' patch for Python issue #12691 (untokenizing),
6 manually applied.
6 manually applied.
7 - Newlines in comments and blank lines should be either NL or NEWLINE, depending
7 - Newlines in comments and blank lines should be either NL or NEWLINE, depending
8 on whether they are in a multi-line statement. Filed as Python issue #17061.
8 on whether they are in a multi-line statement. Filed as Python issue #17061.
9
9
10 -------------------------------------------------------------------------------
10 -------------------------------------------------------------------------------
11 Tokenization help for Python programs.
11 Tokenization help for Python programs.
12
12
13 generate_tokens(readline) is a generator that breaks a stream of
13 generate_tokens(readline) is a generator that breaks a stream of
14 text into Python tokens. It accepts a readline-like method which is called
14 text into Python tokens. It accepts a readline-like method which is called
15 repeatedly to get the next line of input (or "" for EOF). It generates
15 repeatedly to get the next line of input (or "" for EOF). It generates
16 5-tuples with these members:
16 5-tuples with these members:
17
17
18 the token type (see token.py)
18 the token type (see token.py)
19 the token (a string)
19 the token (a string)
20 the starting (row, column) indices of the token (a 2-tuple of ints)
20 the starting (row, column) indices of the token (a 2-tuple of ints)
21 the ending (row, column) indices of the token (a 2-tuple of ints)
21 the ending (row, column) indices of the token (a 2-tuple of ints)
22 the original line (string)
22 the original line (string)
23
23
24 It is designed to match the working of the Python tokenizer exactly, except
24 It is designed to match the working of the Python tokenizer exactly, except
25 that it produces COMMENT tokens for comments and gives type OP for all
25 that it produces COMMENT tokens for comments and gives type OP for all
26 operators
26 operators
27
27
28 Older entry points
28 Older entry points
29 tokenize_loop(readline, tokeneater)
29 tokenize_loop(readline, tokeneater)
30 tokenize(readline, tokeneater=printtoken)
30 tokenize(readline, tokeneater=printtoken)
31 are the same, except instead of generating tokens, tokeneater is a callback
31 are the same, except instead of generating tokens, tokeneater is a callback
32 function to which the 5 fields described above are passed as 5 arguments,
32 function to which the 5 fields described above are passed as 5 arguments,
33 each time a new token is found."""
33 each time a new token is found."""
34 from __future__ import print_function
34 from __future__ import print_function
35
35
36 __author__ = 'Ka-Ping Yee <ping@lfw.org>'
36 __author__ = 'Ka-Ping Yee <ping@lfw.org>'
37 __credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, '
37 __credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, '
38 'Skip Montanaro, Raymond Hettinger')
38 'Skip Montanaro, Raymond Hettinger')
39
39
40 import string, re
40 import string, re
41 from token import *
41 from token import *
42
42
43 import token
43 import token
44 __all__ = [x for x in dir(token) if not x.startswith("_")]
44 __all__ = [x for x in dir(token) if not x.startswith("_")]
45 __all__ += ["COMMENT", "tokenize", "generate_tokens", "NL", "untokenize"]
45 __all__ += ["COMMENT", "tokenize", "generate_tokens", "NL", "untokenize"]
46 del x
46 del x
47 del token
47 del token
48
48
49 __all__ += ["TokenError"]
49 __all__ += ["TokenError"]
50
50
51 COMMENT = N_TOKENS
51 COMMENT = N_TOKENS
52 tok_name[COMMENT] = 'COMMENT'
52 tok_name[COMMENT] = 'COMMENT'
53 NL = N_TOKENS + 1
53 NL = N_TOKENS + 1
54 tok_name[NL] = 'NL'
54 tok_name[NL] = 'NL'
55 N_TOKENS += 2
55 N_TOKENS += 2
56
56
57 def group(*choices): return '(' + '|'.join(choices) + ')'
57 def group(*choices): return '(' + '|'.join(choices) + ')'
58 def any(*choices): return group(*choices) + '*'
58 def any(*choices): return group(*choices) + '*'
59 def maybe(*choices): return group(*choices) + '?'
59 def maybe(*choices): return group(*choices) + '?'
60
60
61 Whitespace = r'[ \f\t]*'
61 Whitespace = r'[ \f\t]*'
62 Comment = r'#[^\r\n]*'
62 Comment = r'#[^\r\n]*'
63 Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
63 Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
64 Name = r'[a-zA-Z_]\w*'
64 Name = r'[a-zA-Z_]\w*'
65
65
66 Hexnumber = r'0[xX][\da-fA-F]+[lL]?'
66 Hexnumber = r'0[xX][\da-fA-F]+[lL]?'
67 Octnumber = r'(0[oO][0-7]+)|(0[0-7]*)[lL]?'
67 Octnumber = r'(0[oO][0-7]+)|(0[0-7]*)[lL]?'
68 Binnumber = r'0[bB][01]+[lL]?'
68 Binnumber = r'0[bB][01]+[lL]?'
69 Decnumber = r'[1-9]\d*[lL]?'
69 Decnumber = r'[1-9]\d*[lL]?'
70 Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
70 Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
71 Exponent = r'[eE][-+]?\d+'
71 Exponent = r'[eE][-+]?\d+'
72 Pointfloat = group(r'\d+\.\d*', r'\.\d+') + maybe(Exponent)
72 Pointfloat = group(r'\d+\.\d*', r'\.\d+') + maybe(Exponent)
73 Expfloat = r'\d+' + Exponent
73 Expfloat = r'\d+' + Exponent
74 Floatnumber = group(Pointfloat, Expfloat)
74 Floatnumber = group(Pointfloat, Expfloat)
75 Imagnumber = group(r'\d+[jJ]', Floatnumber + r'[jJ]')
75 Imagnumber = group(r'\d+[jJ]', Floatnumber + r'[jJ]')
76 Number = group(Imagnumber, Floatnumber, Intnumber)
76 Number = group(Imagnumber, Floatnumber, Intnumber)
77
77
78 # Tail end of ' string.
78 # Tail end of ' string.
79 Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
79 Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
80 # Tail end of " string.
80 # Tail end of " string.
81 Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
81 Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
82 # Tail end of ''' string.
82 # Tail end of ''' string.
83 Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
83 Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
84 # Tail end of """ string.
84 # Tail end of """ string.
85 Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
85 Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
86 Triple = group("[uUbB]?[rR]?'''", '[uUbB]?[rR]?"""')
86 Triple = group("[uUbB]?[rR]?'''", '[uUbB]?[rR]?"""')
87 # Single-line ' or " string.
87 # Single-line ' or " string.
88 String = group(r"[uUbB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
88 String = group(r"[uUbB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
89 r'[uUbB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*"')
89 r'[uUbB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*"')
90
90
91 # Because of leftmost-then-longest match semantics, be sure to put the
91 # Because of leftmost-then-longest match semantics, be sure to put the
92 # longest operators first (e.g., if = came before ==, == would get
92 # longest operators first (e.g., if = came before ==, == would get
93 # recognized as two instances of =).
93 # recognized as two instances of =).
94 Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"<>", r"!=",
94 Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"<>", r"!=",
95 r"//=?",
95 r"//=?",
96 r"[+\-*/%&|^=<>]=?",
96 r"[+\-*/%&|^=<>]=?",
97 r"~")
97 r"~")
98
98
99 Bracket = '[][(){}]'
99 Bracket = '[][(){}]'
100 Special = group(r'\r?\n', r'[:;.,`@]')
100 Special = group(r'\r?\n', r'[:;.,`@]')
101 Funny = group(Operator, Bracket, Special)
101 Funny = group(Operator, Bracket, Special)
102
102
103 PlainToken = group(Number, Funny, String, Name)
103 PlainToken = group(Number, Funny, String, Name)
104 Token = Ignore + PlainToken
104 Token = Ignore + PlainToken
105
105
106 # First (or only) line of ' or " string.
106 # First (or only) line of ' or " string.
107 ContStr = group(r"[uUbB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
107 ContStr = group(r"[uUbB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
108 group("'", r'\\\r?\n'),
108 group("'", r'\\\r?\n'),
109 r'[uUbB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
109 r'[uUbB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
110 group('"', r'\\\r?\n'))
110 group('"', r'\\\r?\n'))
111 PseudoExtras = group(r'\\\r?\n', Comment, Triple)
111 PseudoExtras = group(r'\\\r?\n', Comment, Triple)
112 PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
112 PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
113
113
114 tokenprog, pseudoprog, single3prog, double3prog = map(
114 tokenprog, pseudoprog, single3prog, double3prog = map(
115 re.compile, (Token, PseudoToken, Single3, Double3))
115 re.compile, (Token, PseudoToken, Single3, Double3))
116 endprogs = {"'": re.compile(Single), '"': re.compile(Double),
116 endprogs = {"'": re.compile(Single), '"': re.compile(Double),
117 "'''": single3prog, '"""': double3prog,
117 "'''": single3prog, '"""': double3prog,
118 "r'''": single3prog, 'r"""': double3prog,
118 "r'''": single3prog, 'r"""': double3prog,
119 "u'''": single3prog, 'u"""': double3prog,
119 "u'''": single3prog, 'u"""': double3prog,
120 "ur'''": single3prog, 'ur"""': double3prog,
120 "ur'''": single3prog, 'ur"""': double3prog,
121 "R'''": single3prog, 'R"""': double3prog,
121 "R'''": single3prog, 'R"""': double3prog,
122 "U'''": single3prog, 'U"""': double3prog,
122 "U'''": single3prog, 'U"""': double3prog,
123 "uR'''": single3prog, 'uR"""': double3prog,
123 "uR'''": single3prog, 'uR"""': double3prog,
124 "Ur'''": single3prog, 'Ur"""': double3prog,
124 "Ur'''": single3prog, 'Ur"""': double3prog,
125 "UR'''": single3prog, 'UR"""': double3prog,
125 "UR'''": single3prog, 'UR"""': double3prog,
126 "b'''": single3prog, 'b"""': double3prog,
126 "b'''": single3prog, 'b"""': double3prog,
127 "br'''": single3prog, 'br"""': double3prog,
127 "br'''": single3prog, 'br"""': double3prog,
128 "B'''": single3prog, 'B"""': double3prog,
128 "B'''": single3prog, 'B"""': double3prog,
129 "bR'''": single3prog, 'bR"""': double3prog,
129 "bR'''": single3prog, 'bR"""': double3prog,
130 "Br'''": single3prog, 'Br"""': double3prog,
130 "Br'''": single3prog, 'Br"""': double3prog,
131 "BR'''": single3prog, 'BR"""': double3prog,
131 "BR'''": single3prog, 'BR"""': double3prog,
132 'r': None, 'R': None, 'u': None, 'U': None,
132 'r': None, 'R': None, 'u': None, 'U': None,
133 'b': None, 'B': None}
133 'b': None, 'B': None}
134
134
135 triple_quoted = {}
135 triple_quoted = {}
136 for t in ("'''", '"""',
136 for t in ("'''", '"""',
137 "r'''", 'r"""', "R'''", 'R"""',
137 "r'''", 'r"""', "R'''", 'R"""',
138 "u'''", 'u"""', "U'''", 'U"""',
138 "u'''", 'u"""', "U'''", 'U"""',
139 "ur'''", 'ur"""', "Ur'''", 'Ur"""',
139 "ur'''", 'ur"""', "Ur'''", 'Ur"""',
140 "uR'''", 'uR"""', "UR'''", 'UR"""',
140 "uR'''", 'uR"""', "UR'''", 'UR"""',
141 "b'''", 'b"""', "B'''", 'B"""',
141 "b'''", 'b"""', "B'''", 'B"""',
142 "br'''", 'br"""', "Br'''", 'Br"""',
142 "br'''", 'br"""', "Br'''", 'Br"""',
143 "bR'''", 'bR"""', "BR'''", 'BR"""'):
143 "bR'''", 'bR"""', "BR'''", 'BR"""'):
144 triple_quoted[t] = t
144 triple_quoted[t] = t
145 single_quoted = {}
145 single_quoted = {}
146 for t in ("'", '"',
146 for t in ("'", '"',
147 "r'", 'r"', "R'", 'R"',
147 "r'", 'r"', "R'", 'R"',
148 "u'", 'u"', "U'", 'U"',
148 "u'", 'u"', "U'", 'U"',
149 "ur'", 'ur"', "Ur'", 'Ur"',
149 "ur'", 'ur"', "Ur'", 'Ur"',
150 "uR'", 'uR"', "UR'", 'UR"',
150 "uR'", 'uR"', "UR'", 'UR"',
151 "b'", 'b"', "B'", 'B"',
151 "b'", 'b"', "B'", 'B"',
152 "br'", 'br"', "Br'", 'Br"',
152 "br'", 'br"', "Br'", 'Br"',
153 "bR'", 'bR"', "BR'", 'BR"' ):
153 "bR'", 'bR"', "BR'", 'BR"' ):
154 single_quoted[t] = t
154 single_quoted[t] = t
155
155
156 tabsize = 8
156 tabsize = 8
157
157
158 class TokenError(Exception): pass
158 class TokenError(Exception): pass
159
159
160 class StopTokenizing(Exception): pass
160 class StopTokenizing(Exception): pass
161
161
162 def printtoken(type, token, srow_scol, erow_ecol, line): # for testing
162 def printtoken(type, token, srow_scol, erow_ecol, line): # for testing
163 srow, scol = srow_scol
163 srow, scol = srow_scol
164 erow, ecol = erow_ecol
164 erow, ecol = erow_ecol
165 print("%d,%d-%d,%d:\t%s\t%s" % \
165 print("%d,%d-%d,%d:\t%s\t%s" % \
166 (srow, scol, erow, ecol, tok_name[type], repr(token)))
166 (srow, scol, erow, ecol, tok_name[type], repr(token)))
167
167
168 def tokenize(readline, tokeneater=printtoken):
168 def tokenize(readline, tokeneater=printtoken):
169 """
169 """
170 The tokenize() function accepts two parameters: one representing the
170 The tokenize() function accepts two parameters: one representing the
171 input stream, and one providing an output mechanism for tokenize().
171 input stream, and one providing an output mechanism for tokenize().
172
172
173 The first parameter, readline, must be a callable object which provides
173 The first parameter, readline, must be a callable object which provides
174 the same interface as the readline() method of built-in file objects.
174 the same interface as the readline() method of built-in file objects.
175 Each call to the function should return one line of input as a string.
175 Each call to the function should return one line of input as a string.
176
176
177 The second parameter, tokeneater, must also be a callable object. It is
177 The second parameter, tokeneater, must also be a callable object. It is
178 called once for each token, with five arguments, corresponding to the
178 called once for each token, with five arguments, corresponding to the
179 tuples generated by generate_tokens().
179 tuples generated by generate_tokens().
180 """
180 """
181 try:
181 try:
182 tokenize_loop(readline, tokeneater)
182 tokenize_loop(readline, tokeneater)
183 except StopTokenizing:
183 except StopTokenizing:
184 pass
184 pass
185
185
186 # backwards compatible interface
186 # backwards compatible interface
187 def tokenize_loop(readline, tokeneater):
187 def tokenize_loop(readline, tokeneater):
188 for token_info in generate_tokens(readline):
188 for token_info in generate_tokens(readline):
189 tokeneater(*token_info)
189 tokeneater(*token_info)
190
190
191 class Untokenizer:
191 class Untokenizer:
192
192
193 def __init__(self):
193 def __init__(self):
194 self.tokens = []
194 self.tokens = []
195 self.prev_row = 1
195 self.prev_row = 1
196 self.prev_col = 0
196 self.prev_col = 0
197
197
198 def add_whitespace(self, start):
198 def add_whitespace(self, start):
199 row, col = start
199 row, col = start
200 assert row >= self.prev_row
200 assert row >= self.prev_row
201 col_offset = col - self.prev_col
201 col_offset = col - self.prev_col
202 if col_offset > 0:
202 if col_offset > 0:
203 self.tokens.append(" " * col_offset)
203 self.tokens.append(" " * col_offset)
204 elif row > self.prev_row and tok_type not in (NEWLINE, NL, ENDMARKER):
204 elif row > self.prev_row and tok_type not in (NEWLINE, NL, ENDMARKER):
205 # Line was backslash-continued
205 # Line was backslash-continued
206 self.tokens.append(" ")
206 self.tokens.append(" ")
207
207
208 def untokenize(self, tokens):
208 def untokenize(self, tokens):
209 iterable = iter(tokens)
209 iterable = iter(tokens)
210 for t in iterable:
210 for t in iterable:
211 if len(t) == 2:
211 if len(t) == 2:
212 self.compat(t, iterable)
212 self.compat(t, iterable)
213 break
213 break
214 tok_type, token, start, end = t[:4]
214 tok_type, token, start, end = t[:4]
215 self.add_whitespace(start)
215 self.add_whitespace(start)
216 self.tokens.append(token)
216 self.tokens.append(token)
217 self.prev_row, self.prev_col = end
217 self.prev_row, self.prev_col = end
218 if tok_type in (NEWLINE, NL):
218 if tok_type in (NEWLINE, NL):
219 self.prev_row += 1
219 self.prev_row += 1
220 self.prev_col = 0
220 self.prev_col = 0
221 return "".join(self.tokens)
221 return "".join(self.tokens)
222
222
223 def compat(self, token, iterable):
223 def compat(self, token, iterable):
224 # This import is here to avoid problems when the itertools
224 # This import is here to avoid problems when the itertools
225 # module is not built yet and tokenize is imported.
225 # module is not built yet and tokenize is imported.
226 from itertools import chain
226 from itertools import chain
227 startline = False
227 startline = False
228 prevstring = False
228 prevstring = False
229 indents = []
229 indents = []
230 toks_append = self.tokens.append
230 toks_append = self.tokens.append
231 for tok in chain([token], iterable):
231 for tok in chain([token], iterable):
232 toknum, tokval = tok[:2]
232 toknum, tokval = tok[:2]
233
233
234 if toknum in (NAME, NUMBER):
234 if toknum in (NAME, NUMBER):
235 tokval += ' '
235 tokval += ' '
236
236
237 # Insert a space between two consecutive strings
237 # Insert a space between two consecutive strings
238 if toknum == STRING:
238 if toknum == STRING:
239 if prevstring:
239 if prevstring:
240 tokval = ' ' + tokval
240 tokval = ' ' + tokval
241 prevstring = True
241 prevstring = True
242 else:
242 else:
243 prevstring = False
243 prevstring = False
244
244
245 if toknum == INDENT:
245 if toknum == INDENT:
246 indents.append(tokval)
246 indents.append(tokval)
247 continue
247 continue
248 elif toknum == DEDENT:
248 elif toknum == DEDENT:
249 indents.pop()
249 indents.pop()
250 continue
250 continue
251 elif toknum in (NEWLINE, NL):
251 elif toknum in (NEWLINE, NL):
252 startline = True
252 startline = True
253 elif startline and indents:
253 elif startline and indents:
254 toks_append(indents[-1])
254 toks_append(indents[-1])
255 startline = False
255 startline = False
256 toks_append(tokval)
256 toks_append(tokval)
257
257
258 def untokenize(iterable):
258 def untokenize(iterable):
259 """Transform tokens back into Python source code.
259 """Transform tokens back into Python source code.
260
260
261 Each element returned by the iterable must be a token sequence
261 Each element returned by the iterable must be a token sequence
262 with at least two elements, a token number and token value. If
262 with at least two elements, a token number and token value. If
263 only two tokens are passed, the resulting output is poor.
263 only two tokens are passed, the resulting output is poor.
264
264
265 Round-trip invariant for full input:
265 Round-trip invariant for full input:
266 Untokenized source will match input source exactly
266 Untokenized source will match input source exactly
267
267
268 Round-trip invariant for limited intput:
268 Round-trip invariant for limited intput:
269 # Output text will tokenize the back to the input
269 # Output text will tokenize the back to the input
270 t1 = [tok[:2] for tok in generate_tokens(f.readline)]
270 t1 = [tok[:2] for tok in generate_tokens(f.readline)]
271 newcode = untokenize(t1)
271 newcode = untokenize(t1)
272 readline = iter(newcode.splitlines(1)).next
272 readline = iter(newcode.splitlines(1)).next
273 t2 = [tok[:2] for tok in generate_tokens(readline)]
273 t2 = [tok[:2] for tok in generate_tokens(readline)]
274 assert t1 == t2
274 assert t1 == t2
275 """
275 """
276 ut = Untokenizer()
276 ut = Untokenizer()
277 return ut.untokenize(iterable)
277 return ut.untokenize(iterable)
278
278
279 def generate_tokens(readline):
279 def generate_tokens(readline):
280 """
280 """
281 The generate_tokens() generator requires one argment, readline, which
281 The generate_tokens() generator requires one argment, readline, which
282 must be a callable object which provides the same interface as the
282 must be a callable object which provides the same interface as the
283 readline() method of built-in file objects. Each call to the function
283 readline() method of built-in file objects. Each call to the function
284 should return one line of input as a string. Alternately, readline
284 should return one line of input as a string. Alternately, readline
285 can be a callable function terminating with StopIteration:
285 can be a callable function terminating with StopIteration:
286 readline = open(myfile).next # Example of alternate readline
286 readline = open(myfile).next # Example of alternate readline
287
287
288 The generator produces 5-tuples with these members: the token type; the
288 The generator produces 5-tuples with these members: the token type; the
289 token string; a 2-tuple (srow, scol) of ints specifying the row and
289 token string; a 2-tuple (srow, scol) of ints specifying the row and
290 column where the token begins in the source; a 2-tuple (erow, ecol) of
290 column where the token begins in the source; a 2-tuple (erow, ecol) of
291 ints specifying the row and column where the token ends in the source;
291 ints specifying the row and column where the token ends in the source;
292 and the line on which the token was found. The line passed is the
292 and the line on which the token was found. The line passed is the
293 logical line; continuation lines are included.
293 logical line; continuation lines are included.
294 """
294 """
295 lnum = parenlev = continued = 0
295 lnum = parenlev = continued = 0
296 namechars, numchars = string.ascii_letters + '_', '0123456789'
296 namechars, numchars = string.ascii_letters + '_', '0123456789'
297 contstr, needcont = '', 0
297 contstr, needcont = '', 0
298 contline = None
298 contline = None
299 indents = [0]
299 indents = [0]
300
300
301 while 1: # loop over lines in stream
301 while 1: # loop over lines in stream
302 try:
302 try:
303 line = readline()
303 line = readline()
304 except StopIteration:
304 except StopIteration:
305 line = ''
305 line = ''
306 lnum += 1
306 lnum += 1
307 pos, max = 0, len(line)
307 pos, max = 0, len(line)
308
308
309 if contstr: # continued string
309 if contstr: # continued string
310 if not line:
310 if not line:
311 raise TokenError, ("EOF in multi-line string", strstart)
311 raise TokenError("EOF in multi-line string", strstart)
312 endmatch = endprog.match(line)
312 endmatch = endprog.match(line)
313 if endmatch:
313 if endmatch:
314 pos = end = endmatch.end(0)
314 pos = end = endmatch.end(0)
315 yield (STRING, contstr + line[:end],
315 yield (STRING, contstr + line[:end],
316 strstart, (lnum, end), contline + line)
316 strstart, (lnum, end), contline + line)
317 contstr, needcont = '', 0
317 contstr, needcont = '', 0
318 contline = None
318 contline = None
319 elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':
319 elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':
320 yield (ERRORTOKEN, contstr + line,
320 yield (ERRORTOKEN, contstr + line,
321 strstart, (lnum, len(line)), contline)
321 strstart, (lnum, len(line)), contline)
322 contstr = ''
322 contstr = ''
323 contline = None
323 contline = None
324 continue
324 continue
325 else:
325 else:
326 contstr = contstr + line
326 contstr = contstr + line
327 contline = contline + line
327 contline = contline + line
328 continue
328 continue
329
329
330 elif parenlev == 0 and not continued: # new statement
330 elif parenlev == 0 and not continued: # new statement
331 if not line: break
331 if not line: break
332 column = 0
332 column = 0
333 while pos < max: # measure leading whitespace
333 while pos < max: # measure leading whitespace
334 if line[pos] == ' ':
334 if line[pos] == ' ':
335 column += 1
335 column += 1
336 elif line[pos] == '\t':
336 elif line[pos] == '\t':
337 column = (column//tabsize + 1)*tabsize
337 column = (column//tabsize + 1)*tabsize
338 elif line[pos] == '\f':
338 elif line[pos] == '\f':
339 column = 0
339 column = 0
340 else:
340 else:
341 break
341 break
342 pos += 1
342 pos += 1
343 if pos == max:
343 if pos == max:
344 break
344 break
345
345
346 if line[pos] in '#\r\n': # skip comments or blank lines
346 if line[pos] in '#\r\n': # skip comments or blank lines
347 if line[pos] == '#':
347 if line[pos] == '#':
348 comment_token = line[pos:].rstrip('\r\n')
348 comment_token = line[pos:].rstrip('\r\n')
349 nl_pos = pos + len(comment_token)
349 nl_pos = pos + len(comment_token)
350 yield (COMMENT, comment_token,
350 yield (COMMENT, comment_token,
351 (lnum, pos), (lnum, pos + len(comment_token)), line)
351 (lnum, pos), (lnum, pos + len(comment_token)), line)
352 yield (NEWLINE, line[nl_pos:],
352 yield (NEWLINE, line[nl_pos:],
353 (lnum, nl_pos), (lnum, len(line)), line)
353 (lnum, nl_pos), (lnum, len(line)), line)
354 else:
354 else:
355 yield (NEWLINE, line[pos:],
355 yield (NEWLINE, line[pos:],
356 (lnum, pos), (lnum, len(line)), line)
356 (lnum, pos), (lnum, len(line)), line)
357 continue
357 continue
358
358
359 if column > indents[-1]: # count indents or dedents
359 if column > indents[-1]: # count indents or dedents
360 indents.append(column)
360 indents.append(column)
361 yield (INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
361 yield (INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
362 while column < indents[-1]:
362 while column < indents[-1]:
363 if column not in indents:
363 if column not in indents:
364 raise IndentationError(
364 raise IndentationError(
365 "unindent does not match any outer indentation level",
365 "unindent does not match any outer indentation level",
366 ("<tokenize>", lnum, pos, line))
366 ("<tokenize>", lnum, pos, line))
367 indents = indents[:-1]
367 indents = indents[:-1]
368 yield (DEDENT, '', (lnum, pos), (lnum, pos), line)
368 yield (DEDENT, '', (lnum, pos), (lnum, pos), line)
369
369
370 else: # continued statement
370 else: # continued statement
371 if not line:
371 if not line:
372 raise TokenError, ("EOF in multi-line statement", (lnum, 0))
372 raise TokenError("EOF in multi-line statement", (lnum, 0))
373 continued = 0
373 continued = 0
374
374
375 while pos < max:
375 while pos < max:
376 pseudomatch = pseudoprog.match(line, pos)
376 pseudomatch = pseudoprog.match(line, pos)
377 if pseudomatch: # scan for tokens
377 if pseudomatch: # scan for tokens
378 start, end = pseudomatch.span(1)
378 start, end = pseudomatch.span(1)
379 spos, epos, pos = (lnum, start), (lnum, end), end
379 spos, epos, pos = (lnum, start), (lnum, end), end
380 token, initial = line[start:end], line[start]
380 token, initial = line[start:end], line[start]
381
381
382 if initial in numchars or \
382 if initial in numchars or \
383 (initial == '.' and token != '.'): # ordinary number
383 (initial == '.' and token != '.'): # ordinary number
384 yield (NUMBER, token, spos, epos, line)
384 yield (NUMBER, token, spos, epos, line)
385 elif initial in '\r\n':
385 elif initial in '\r\n':
386 yield (NL if parenlev > 0 else NEWLINE,
386 yield (NL if parenlev > 0 else NEWLINE,
387 token, spos, epos, line)
387 token, spos, epos, line)
388 elif initial == '#':
388 elif initial == '#':
389 assert not token.endswith("\n")
389 assert not token.endswith("\n")
390 yield (COMMENT, token, spos, epos, line)
390 yield (COMMENT, token, spos, epos, line)
391 elif token in triple_quoted:
391 elif token in triple_quoted:
392 endprog = endprogs[token]
392 endprog = endprogs[token]
393 endmatch = endprog.match(line, pos)
393 endmatch = endprog.match(line, pos)
394 if endmatch: # all on one line
394 if endmatch: # all on one line
395 pos = endmatch.end(0)
395 pos = endmatch.end(0)
396 token = line[start:pos]
396 token = line[start:pos]
397 yield (STRING, token, spos, (lnum, pos), line)
397 yield (STRING, token, spos, (lnum, pos), line)
398 else:
398 else:
399 strstart = (lnum, start) # multiple lines
399 strstart = (lnum, start) # multiple lines
400 contstr = line[start:]
400 contstr = line[start:]
401 contline = line
401 contline = line
402 break
402 break
403 elif initial in single_quoted or \
403 elif initial in single_quoted or \
404 token[:2] in single_quoted or \
404 token[:2] in single_quoted or \
405 token[:3] in single_quoted:
405 token[:3] in single_quoted:
406 if token[-1] == '\n': # continued string
406 if token[-1] == '\n': # continued string
407 strstart = (lnum, start)
407 strstart = (lnum, start)
408 endprog = (endprogs[initial] or endprogs[token[1]] or
408 endprog = (endprogs[initial] or endprogs[token[1]] or
409 endprogs[token[2]])
409 endprogs[token[2]])
410 contstr, needcont = line[start:], 1
410 contstr, needcont = line[start:], 1
411 contline = line
411 contline = line
412 break
412 break
413 else: # ordinary string
413 else: # ordinary string
414 yield (STRING, token, spos, epos, line)
414 yield (STRING, token, spos, epos, line)
415 elif initial in namechars: # ordinary name
415 elif initial in namechars: # ordinary name
416 yield (NAME, token, spos, epos, line)
416 yield (NAME, token, spos, epos, line)
417 elif initial == '\\': # continued stmt
417 elif initial == '\\': # continued stmt
418 continued = 1
418 continued = 1
419 else:
419 else:
420 if initial in '([{':
420 if initial in '([{':
421 parenlev += 1
421 parenlev += 1
422 elif initial in ')]}':
422 elif initial in ')]}':
423 parenlev -= 1
423 parenlev -= 1
424 yield (OP, token, spos, epos, line)
424 yield (OP, token, spos, epos, line)
425 else:
425 else:
426 yield (ERRORTOKEN, line[pos],
426 yield (ERRORTOKEN, line[pos],
427 (lnum, pos), (lnum, pos+1), line)
427 (lnum, pos), (lnum, pos+1), line)
428 pos += 1
428 pos += 1
429
429
430 for indent in indents[1:]: # pop remaining indent levels
430 for indent in indents[1:]: # pop remaining indent levels
431 yield (DEDENT, '', (lnum, 0), (lnum, 0), '')
431 yield (DEDENT, '', (lnum, 0), (lnum, 0), '')
432 yield (ENDMARKER, '', (lnum, 0), (lnum, 0), '')
432 yield (ENDMARKER, '', (lnum, 0), (lnum, 0), '')
433
433
434 if __name__ == '__main__': # testing
434 if __name__ == '__main__': # testing
435 import sys
435 import sys
436 if len(sys.argv) > 1:
436 if len(sys.argv) > 1:
437 tokenize(open(sys.argv[1]).readline)
437 tokenize(open(sys.argv[1]).readline)
438 else:
438 else:
439 tokenize(sys.stdin.readline)
439 tokenize(sys.stdin.readline)
General Comments 0
You need to be logged in to leave comments. Login now