##// END OF EJS Templates
FileLink: escape HTML unsafe characters from path...
Cristian Ciupitu -
Show More
@@ -1,557 +1,559 b''
1 1 """Various display related classes.
2 2
3 3 Authors : MinRK, gregcaporaso, dannystaple
4 4 """
5 from html import escape as html_escape
5 6 from os.path import exists, isfile, splitext, abspath, join, isdir
6 7 from os import walk, sep, fsdecode
7 8
8 9 from IPython.core.display import DisplayObject
9 10
10 11 __all__ = ['Audio', 'IFrame', 'YouTubeVideo', 'VimeoVideo', 'ScribdDocument',
11 12 'FileLink', 'FileLinks']
12 13
13 14
14 15 class Audio(DisplayObject):
15 16 """Create an audio object.
16 17
17 18 When this object is returned by an input cell or passed to the
18 19 display function, it will result in Audio controls being displayed
19 20 in the frontend (only works in the notebook).
20 21
21 22 Parameters
22 23 ----------
23 24 data : numpy array, list, unicode, str or bytes
24 25 Can be one of
25 26
26 27 * Numpy 1d array containing the desired waveform (mono)
27 28 * Numpy 2d array containing waveforms for each channel.
28 29 Shape=(NCHAN, NSAMPLES). For the standard channel order, see
29 30 http://msdn.microsoft.com/en-us/library/windows/hardware/dn653308(v=vs.85).aspx
30 31 * List of float or integer representing the waveform (mono)
31 32 * String containing the filename
32 33 * Bytestring containing raw PCM data or
33 34 * URL pointing to a file on the web.
34 35
35 36 If the array option is used the waveform will be normalized.
36 37
37 38 If a filename or url is used the format support will be browser
38 39 dependent.
39 40 url : unicode
40 41 A URL to download the data from.
41 42 filename : unicode
42 43 Path to a local file to load the data from.
43 44 embed : boolean
44 45 Should the audio data be embedded using a data URI (True) or should
45 46 the original source be referenced. Set this to True if you want the
46 47 audio to playable later with no internet connection in the notebook.
47 48
48 49 Default is `True`, unless the keyword argument `url` is set, then
49 50 default value is `False`.
50 51 rate : integer
51 52 The sampling rate of the raw data.
52 53 Only required when data parameter is being used as an array
53 54 autoplay : bool
54 55 Set to True if the audio should immediately start playing.
55 56 Default is `False`.
56 57
57 58 Examples
58 59 --------
59 60 ::
60 61
61 62 # Generate a sound
62 63 import numpy as np
63 64 framerate = 44100
64 65 t = np.linspace(0,5,framerate*5)
65 66 data = np.sin(2*np.pi*220*t) + np.sin(2*np.pi*224*t))
66 67 Audio(data,rate=framerate)
67 68
68 69 # Can also do stereo or more channels
69 70 dataleft = np.sin(2*np.pi*220*t)
70 71 dataright = np.sin(2*np.pi*224*t)
71 72 Audio([dataleft, dataright],rate=framerate)
72 73
73 74 Audio("http://www.nch.com.au/acm/8k16bitpcm.wav") # From URL
74 75 Audio(url="http://www.w3schools.com/html/horse.ogg")
75 76
76 77 Audio('/path/to/sound.wav') # From file
77 78 Audio(filename='/path/to/sound.ogg')
78 79
79 80 Audio(b'RAW_WAV_DATA..) # From bytes
80 81 Audio(data=b'RAW_WAV_DATA..)
81 82
82 83 """
83 84 _read_flags = 'rb'
84 85
85 86 def __init__(self, data=None, filename=None, url=None, embed=None, rate=None, autoplay=False):
86 87 if filename is None and url is None and data is None:
87 88 raise ValueError("No image data found. Expecting filename, url, or data.")
88 89 if embed is False and url is None:
89 90 raise ValueError("No url found. Expecting url when embed=False")
90 91
91 92 if url is not None and embed is not True:
92 93 self.embed = False
93 94 else:
94 95 self.embed = True
95 96 self.autoplay = autoplay
96 97 super(Audio, self).__init__(data=data, url=url, filename=filename)
97 98
98 99 if self.data is not None and not isinstance(self.data, bytes):
99 100 self.data = self._make_wav(data,rate)
100 101
101 102 def reload(self):
102 103 """Reload the raw data from file or URL."""
103 104 import mimetypes
104 105 if self.embed:
105 106 super(Audio, self).reload()
106 107
107 108 if self.filename is not None:
108 109 self.mimetype = mimetypes.guess_type(self.filename)[0]
109 110 elif self.url is not None:
110 111 self.mimetype = mimetypes.guess_type(self.url)[0]
111 112 else:
112 113 self.mimetype = "audio/wav"
113 114
114 115 def _make_wav(self, data, rate):
115 116 """ Transform a numpy array to a PCM bytestring """
116 117 import struct
117 118 from io import BytesIO
118 119 import wave
119 120
120 121 try:
121 122 import numpy as np
122 123
123 124 data = np.array(data, dtype=float)
124 125 if len(data.shape) == 1:
125 126 nchan = 1
126 127 elif len(data.shape) == 2:
127 128 # In wave files,channels are interleaved. E.g.,
128 129 # "L1R1L2R2..." for stereo. See
129 130 # http://msdn.microsoft.com/en-us/library/windows/hardware/dn653308(v=vs.85).aspx
130 131 # for channel ordering
131 132 nchan = data.shape[0]
132 133 data = data.T.ravel()
133 134 else:
134 135 raise ValueError('Array audio input must be a 1D or 2D array')
135 136 scaled = np.int16(data/np.max(np.abs(data))*32767).tolist()
136 137 except ImportError:
137 138 # check that it is a "1D" list
138 139 idata = iter(data) # fails if not an iterable
139 140 try:
140 141 iter(idata.next())
141 142 raise TypeError('Only lists of mono audio are '
142 143 'supported if numpy is not installed')
143 144 except TypeError:
144 145 # this means it's not a nested list, which is what we want
145 146 pass
146 147 maxabsvalue = float(max([abs(x) for x in data]))
147 148 scaled = [int(x/maxabsvalue*32767) for x in data]
148 149 nchan = 1
149 150
150 151 fp = BytesIO()
151 152 waveobj = wave.open(fp,mode='wb')
152 153 waveobj.setnchannels(nchan)
153 154 waveobj.setframerate(rate)
154 155 waveobj.setsampwidth(2)
155 156 waveobj.setcomptype('NONE','NONE')
156 157 waveobj.writeframes(b''.join([struct.pack('<h',x) for x in scaled]))
157 158 val = fp.getvalue()
158 159 waveobj.close()
159 160
160 161 return val
161 162
162 163 def _data_and_metadata(self):
163 164 """shortcut for returning metadata with url information, if defined"""
164 165 md = {}
165 166 if self.url:
166 167 md['url'] = self.url
167 168 if md:
168 169 return self.data, md
169 170 else:
170 171 return self.data
171 172
172 173 def _repr_html_(self):
173 174 src = """
174 175 <audio controls="controls" {autoplay}>
175 176 <source src="{src}" type="{type}" />
176 177 Your browser does not support the audio element.
177 178 </audio>
178 179 """
179 180 return src.format(src=self.src_attr(),type=self.mimetype, autoplay=self.autoplay_attr())
180 181
181 182 def src_attr(self):
182 183 import base64
183 184 if self.embed and (self.data is not None):
184 185 data = base64=base64.b64encode(self.data).decode('ascii')
185 186 return """data:{type};base64,{base64}""".format(type=self.mimetype,
186 187 base64=data)
187 188 elif self.url is not None:
188 189 return self.url
189 190 else:
190 191 return ""
191 192
192 193 def autoplay_attr(self):
193 194 if(self.autoplay):
194 195 return 'autoplay="autoplay"'
195 196 else:
196 197 return ''
197 198
198 199 class IFrame(object):
199 200 """
200 201 Generic class to embed an iframe in an IPython notebook
201 202 """
202 203
203 204 iframe = """
204 205 <iframe
205 206 width="{width}"
206 207 height="{height}"
207 208 src="{src}{params}"
208 209 frameborder="0"
209 210 allowfullscreen
210 211 ></iframe>
211 212 """
212 213
213 214 def __init__(self, src, width, height, **kwargs):
214 215 self.src = src
215 216 self.width = width
216 217 self.height = height
217 218 self.params = kwargs
218 219
219 220 def _repr_html_(self):
220 221 """return the embed iframe"""
221 222 if self.params:
222 223 try:
223 224 from urllib.parse import urlencode # Py 3
224 225 except ImportError:
225 226 from urllib import urlencode
226 227 params = "?" + urlencode(self.params)
227 228 else:
228 229 params = ""
229 230 return self.iframe.format(src=self.src,
230 231 width=self.width,
231 232 height=self.height,
232 233 params=params)
233 234
234 235 class YouTubeVideo(IFrame):
235 236 """Class for embedding a YouTube Video in an IPython session, based on its video id.
236 237
237 238 e.g. to embed the video from https://www.youtube.com/watch?v=foo , you would
238 239 do::
239 240
240 241 vid = YouTubeVideo("foo")
241 242 display(vid)
242 243
243 244 To start from 30 seconds::
244 245
245 246 vid = YouTubeVideo("abc", start=30)
246 247 display(vid)
247 248
248 249 To calculate seconds from time as hours, minutes, seconds use
249 250 :class:`datetime.timedelta`::
250 251
251 252 start=int(timedelta(hours=1, minutes=46, seconds=40).total_seconds())
252 253
253 254 Other parameters can be provided as documented at
254 255 https://developers.google.com/youtube/player_parameters#Parameters
255 256
256 257 When converting the notebook using nbconvert, a jpeg representation of the video
257 258 will be inserted in the document.
258 259 """
259 260
260 261 def __init__(self, id, width=400, height=300, **kwargs):
261 262 self.id=id
262 263 src = "https://www.youtube.com/embed/{0}".format(id)
263 264 super(YouTubeVideo, self).__init__(src, width, height, **kwargs)
264 265
265 266 def _repr_jpeg_(self):
266 267 # Deferred import
267 268 from urllib.request import urlopen
268 269
269 270 try:
270 271 return urlopen("https://img.youtube.com/vi/{id}/hqdefault.jpg".format(id=self.id)).read()
271 272 except IOError:
272 273 return None
273 274
274 275 class VimeoVideo(IFrame):
275 276 """
276 277 Class for embedding a Vimeo video in an IPython session, based on its video id.
277 278 """
278 279
279 280 def __init__(self, id, width=400, height=300, **kwargs):
280 281 src="https://player.vimeo.com/video/{0}".format(id)
281 282 super(VimeoVideo, self).__init__(src, width, height, **kwargs)
282 283
283 284 class ScribdDocument(IFrame):
284 285 """
285 286 Class for embedding a Scribd document in an IPython session
286 287
287 288 Use the start_page params to specify a starting point in the document
288 289 Use the view_mode params to specify display type one off scroll | slideshow | book
289 290
290 291 e.g to Display Wes' foundational paper about PANDAS in book mode from page 3
291 292
292 293 ScribdDocument(71048089, width=800, height=400, start_page=3, view_mode="book")
293 294 """
294 295
295 296 def __init__(self, id, width=400, height=300, **kwargs):
296 297 src="https://www.scribd.com/embeds/{0}/content".format(id)
297 298 super(ScribdDocument, self).__init__(src, width, height, **kwargs)
298 299
299 300 class FileLink(object):
300 301 """Class for embedding a local file link in an IPython session, based on path
301 302
302 303 e.g. to embed a link that was generated in the IPython notebook as my/data.txt
303 304
304 305 you would do::
305 306
306 307 local_file = FileLink("my/data.txt")
307 308 display(local_file)
308 309
309 310 or in the HTML notebook, just::
310 311
311 312 FileLink("my/data.txt")
312 313 """
313 314
314 315 html_link_str = "<a href='%s' target='_blank'>%s</a>"
315 316
316 317 def __init__(self,
317 318 path,
318 319 url_prefix='',
319 320 result_html_prefix='',
320 321 result_html_suffix='<br>'):
321 322 """
322 323 Parameters
323 324 ----------
324 325 path : str
325 326 path to the file or directory that should be formatted
326 327 url_prefix : str
327 328 prefix to be prepended to all files to form a working link [default:
328 329 '']
329 330 result_html_prefix : str
330 331 text to append to beginning to link [default: '']
331 332 result_html_suffix : str
332 333 text to append at the end of link [default: '<br>']
333 334 """
334 335 if isdir(path):
335 336 raise ValueError("Cannot display a directory using FileLink. "
336 337 "Use FileLinks to display '%s'." % path)
337 338 self.path = fsdecode(path)
338 339 self.url_prefix = url_prefix
339 340 self.result_html_prefix = result_html_prefix
340 341 self.result_html_suffix = result_html_suffix
341 342
342 343 def _format_path(self):
343 fp = ''.join([self.url_prefix,self.path])
344 fp = ''.join([self.url_prefix, html_escape(self.path)])
344 345 return ''.join([self.result_html_prefix,
345 self.html_link_str % (fp, self.path),
346 self.html_link_str % \
347 (fp, html_escape(self.path, quote=False)),
346 348 self.result_html_suffix])
347 349
348 350 def _repr_html_(self):
349 351 """return html link to file
350 352 """
351 353 if not exists(self.path):
352 354 return ("Path (<tt>%s</tt>) doesn't exist. "
353 355 "It may still be in the process of "
354 356 "being generated, or you may have the "
355 357 "incorrect path." % self.path)
356 358
357 359 return self._format_path()
358 360
359 361 def __repr__(self):
360 362 """return absolute path to file
361 363 """
362 364 return abspath(self.path)
363 365
364 366 class FileLinks(FileLink):
365 367 """Class for embedding local file links in an IPython session, based on path
366 368
367 369 e.g. to embed links to files that were generated in the IPython notebook
368 370 under ``my/data``, you would do::
369 371
370 372 local_files = FileLinks("my/data")
371 373 display(local_files)
372 374
373 375 or in the HTML notebook, just::
374 376
375 377 FileLinks("my/data")
376 378 """
377 379 def __init__(self,
378 380 path,
379 381 url_prefix='',
380 382 included_suffixes=None,
381 383 result_html_prefix='',
382 384 result_html_suffix='<br>',
383 385 notebook_display_formatter=None,
384 386 terminal_display_formatter=None,
385 387 recursive=True):
386 388 """
387 389 See :class:`FileLink` for the ``path``, ``url_prefix``,
388 390 ``result_html_prefix`` and ``result_html_suffix`` parameters.
389 391
390 392 included_suffixes : list
391 393 Filename suffixes to include when formatting output [default: include
392 394 all files]
393 395
394 396 notebook_display_formatter : function
395 397 Used to format links for display in the notebook. See discussion of
396 398 formatter functions below.
397 399
398 400 terminal_display_formatter : function
399 401 Used to format links for display in the terminal. See discussion of
400 402 formatter functions below.
401 403
402 404 Formatter functions must be of the form::
403 405
404 406 f(dirname, fnames, included_suffixes)
405 407
406 408 dirname : str
407 409 The name of a directory
408 410 fnames : list
409 411 The files in that directory
410 412 included_suffixes : list
411 413 The file suffixes that should be included in the output (passing None
412 414 meansto include all suffixes in the output in the built-in formatters)
413 415 recursive : boolean
414 416 Whether to recurse into subdirectories. Default is True.
415 417
416 418 The function should return a list of lines that will be printed in the
417 419 notebook (if passing notebook_display_formatter) or the terminal (if
418 420 passing terminal_display_formatter). This function is iterated over for
419 421 each directory in self.path. Default formatters are in place, can be
420 422 passed here to support alternative formatting.
421 423
422 424 """
423 425 if isfile(path):
424 426 raise ValueError("Cannot display a file using FileLinks. "
425 427 "Use FileLink to display '%s'." % path)
426 428 self.included_suffixes = included_suffixes
427 429 # remove trailing slashes for more consistent output formatting
428 430 path = path.rstrip('/')
429 431
430 432 self.path = path
431 433 self.url_prefix = url_prefix
432 434 self.result_html_prefix = result_html_prefix
433 435 self.result_html_suffix = result_html_suffix
434 436
435 437 self.notebook_display_formatter = \
436 438 notebook_display_formatter or self._get_notebook_display_formatter()
437 439 self.terminal_display_formatter = \
438 440 terminal_display_formatter or self._get_terminal_display_formatter()
439 441
440 442 self.recursive = recursive
441 443
442 444 def _get_display_formatter(self,
443 445 dirname_output_format,
444 446 fname_output_format,
445 447 fp_format,
446 448 fp_cleaner=None):
447 449 """ generate built-in formatter function
448 450
449 451 this is used to define both the notebook and terminal built-in
450 452 formatters as they only differ by some wrapper text for each entry
451 453
452 454 dirname_output_format: string to use for formatting directory
453 455 names, dirname will be substituted for a single "%s" which
454 456 must appear in this string
455 457 fname_output_format: string to use for formatting file names,
456 458 if a single "%s" appears in the string, fname will be substituted
457 459 if two "%s" appear in the string, the path to fname will be
458 460 substituted for the first and fname will be substituted for the
459 461 second
460 462 fp_format: string to use for formatting filepaths, must contain
461 463 exactly two "%s" and the dirname will be subsituted for the first
462 464 and fname will be substituted for the second
463 465 """
464 466 def f(dirname, fnames, included_suffixes=None):
465 467 result = []
466 468 # begin by figuring out which filenames, if any,
467 469 # are going to be displayed
468 470 display_fnames = []
469 471 for fname in fnames:
470 472 if (isfile(join(dirname,fname)) and
471 473 (included_suffixes is None or
472 474 splitext(fname)[1] in included_suffixes)):
473 475 display_fnames.append(fname)
474 476
475 477 if len(display_fnames) == 0:
476 478 # if there are no filenames to display, don't print anything
477 479 # (not even the directory name)
478 480 pass
479 481 else:
480 482 # otherwise print the formatted directory name followed by
481 483 # the formatted filenames
482 484 dirname_output_line = dirname_output_format % dirname
483 485 result.append(dirname_output_line)
484 486 for fname in display_fnames:
485 487 fp = fp_format % (dirname,fname)
486 488 if fp_cleaner is not None:
487 489 fp = fp_cleaner(fp)
488 490 try:
489 491 # output can include both a filepath and a filename...
490 492 fname_output_line = fname_output_format % (fp, fname)
491 493 except TypeError:
492 494 # ... or just a single filepath
493 495 fname_output_line = fname_output_format % fname
494 496 result.append(fname_output_line)
495 497 return result
496 498 return f
497 499
498 500 def _get_notebook_display_formatter(self,
499 501 spacer="&nbsp;&nbsp;"):
500 502 """ generate function to use for notebook formatting
501 503 """
502 504 dirname_output_format = \
503 505 self.result_html_prefix + "%s/" + self.result_html_suffix
504 506 fname_output_format = \
505 507 self.result_html_prefix + spacer + self.html_link_str + self.result_html_suffix
506 508 fp_format = self.url_prefix + '%s/%s'
507 509 if sep == "\\":
508 510 # Working on a platform where the path separator is "\", so
509 511 # must convert these to "/" for generating a URI
510 512 def fp_cleaner(fp):
511 513 # Replace all occurrences of backslash ("\") with a forward
512 514 # slash ("/") - this is necessary on windows when a path is
513 515 # provided as input, but we must link to a URI
514 516 return fp.replace('\\','/')
515 517 else:
516 518 fp_cleaner = None
517 519
518 520 return self._get_display_formatter(dirname_output_format,
519 521 fname_output_format,
520 522 fp_format,
521 523 fp_cleaner)
522 524
523 525 def _get_terminal_display_formatter(self,
524 526 spacer=" "):
525 527 """ generate function to use for terminal formatting
526 528 """
527 529 dirname_output_format = "%s/"
528 530 fname_output_format = spacer + "%s"
529 531 fp_format = '%s/%s'
530 532
531 533 return self._get_display_formatter(dirname_output_format,
532 534 fname_output_format,
533 535 fp_format)
534 536
535 537 def _format_path(self):
536 538 result_lines = []
537 539 if self.recursive:
538 540 walked_dir = list(walk(self.path))
539 541 else:
540 542 walked_dir = [next(walk(self.path))]
541 543 walked_dir.sort()
542 544 for dirname, subdirs, fnames in walked_dir:
543 545 result_lines += self.notebook_display_formatter(dirname, fnames, self.included_suffixes)
544 546 return '\n'.join(result_lines)
545 547
546 548 def __repr__(self):
547 549 """return newline-separated absolute paths
548 550 """
549 551 result_lines = []
550 552 if self.recursive:
551 553 walked_dir = list(walk(self.path))
552 554 else:
553 555 walked_dir = [next(walk(self.path))]
554 556 walked_dir.sort()
555 557 for dirname, subdirs, fnames in walked_dir:
556 558 result_lines += self.terminal_display_formatter(dirname, fnames, self.included_suffixes)
557 559 return '\n'.join(result_lines)
General Comments 0
You need to be logged in to leave comments. Login now