nbxml.py
189 lines
| 6.7 KiB
| text/x-python
|
PythonLexer
Brian E. Granger
|
r4609 | """Read and write notebook files as XML. | ||
Authors: | ||||
* Brian Granger | ||||
""" | ||||
#----------------------------------------------------------------------------- | ||||
# Copyright (C) 2008-2011 The IPython Development Team | ||||
# | ||||
# Distributed under the terms of the BSD License. The full license is in | ||||
# the file COPYING, distributed as part of this software. | ||||
#----------------------------------------------------------------------------- | ||||
#----------------------------------------------------------------------------- | ||||
# Imports | ||||
#----------------------------------------------------------------------------- | ||||
Brian E. Granger
|
r4392 | |||
Brian E. Granger
|
r4406 | from base64 import encodestring, decodestring | ||
Brian E. Granger
|
r4633 | import warnings | ||
Brian E. Granger
|
r4392 | from xml.etree import ElementTree as ET | ||
Thomas Kluyver
|
r13353 | from IPython.utils.py3compat import unicode_type | ||
Brian E. Granger
|
r4401 | from .rwbase import NotebookReader, NotebookWriter | ||
Brian E. Granger
|
r4402 | from .nbbase import ( | ||
Brian E. Granger
|
r4649 | new_code_cell, new_text_cell, new_worksheet, new_notebook, new_output, | ||
new_metadata | ||||
Brian E. Granger
|
r4402 | ) | ||
Brian E. Granger
|
r4401 | |||
Brian E. Granger
|
r4609 | #----------------------------------------------------------------------------- | ||
# Code | ||||
#----------------------------------------------------------------------------- | ||||
Brian E. Granger
|
r4401 | def indent(elem, level=0): | ||
i = "\n" + level*" " | ||||
if len(elem): | ||||
if not elem.text or not elem.text.strip(): | ||||
elem.text = i + " " | ||||
if not elem.tail or not elem.tail.strip(): | ||||
elem.tail = i | ||||
for elem in elem: | ||||
indent(elem, level+1) | ||||
if not elem.tail or not elem.tail.strip(): | ||||
elem.tail = i | ||||
else: | ||||
if level and (not elem.tail or not elem.tail.strip()): | ||||
elem.tail = i | ||||
def _get_text(e, tag): | ||||
sub_e = e.find(tag) | ||||
if sub_e is None: | ||||
return None | ||||
else: | ||||
return sub_e.text | ||||
Brian E. Granger
|
r4392 | |||
Brian E. Granger
|
r4402 | def _set_text(nbnode, attr, parent, tag): | ||
if attr in nbnode: | ||||
e = ET.SubElement(parent, tag) | ||||
e.text = nbnode[attr] | ||||
def _get_int(e, tag): | ||||
sub_e = e.find(tag) | ||||
if sub_e is None: | ||||
return None | ||||
else: | ||||
return int(sub_e.text) | ||||
def _set_int(nbnode, attr, parent, tag): | ||||
if attr in nbnode: | ||||
e = ET.SubElement(parent, tag) | ||||
Thomas Kluyver
|
r13353 | e.text = unicode_type(nbnode[attr]) | ||
Brian E. Granger
|
r4402 | |||
Brian E. Granger
|
r4533 | def _get_bool(e, tag): | ||
sub_e = e.find(tag) | ||||
if sub_e is None: | ||||
return None | ||||
else: | ||||
return bool(int(sub_e.text)) | ||||
def _set_bool(nbnode, attr, parent, tag): | ||||
if attr in nbnode: | ||||
e = ET.SubElement(parent, tag) | ||||
if nbnode[attr]: | ||||
e.text = u'1' | ||||
else: | ||||
e.text = u'0' | ||||
Brian E. Granger
|
r4406 | def _get_binary(e, tag): | ||
sub_e = e.find(tag) | ||||
if sub_e is None: | ||||
return None | ||||
else: | ||||
return decodestring(sub_e.text) | ||||
def _set_binary(nbnode, attr, parent, tag): | ||||
if attr in nbnode: | ||||
e = ET.SubElement(parent, tag) | ||||
e.text = encodestring(nbnode[attr]) | ||||
Brian E. Granger
|
r4392 | class XMLReader(NotebookReader): | ||
Brian E. Granger
|
r4401 | def reads(self, s, **kwargs): | ||
root = ET.fromstring(s) | ||||
Brian E. Granger
|
r4406 | return self.to_notebook(root, **kwargs) | ||
Brian E. Granger
|
r4401 | |||
Brian E. Granger
|
r4406 | def to_notebook(self, root, **kwargs): | ||
Brian E. Granger
|
r4633 | warnings.warn('The XML notebook format is no longer supported, ' | ||
'please convert your notebooks to JSON.', DeprecationWarning) | ||||
Brian E. Granger
|
r4540 | nbname = _get_text(root,u'name') | ||
nbauthor = _get_text(root,u'author') | ||||
nbemail = _get_text(root,u'email') | ||||
nblicense = _get_text(root,u'license') | ||||
nbcreated = _get_text(root,u'created') | ||||
nbsaved = _get_text(root,u'saved') | ||||
Brian E. Granger
|
r4401 | |||
worksheets = [] | ||||
Brian E. Granger
|
r4540 | for ws_e in root.find(u'worksheets').getiterator(u'worksheet'): | ||
wsname = _get_text(ws_e,u'name') | ||||
Brian E. Granger
|
r4401 | cells = [] | ||
Brian E. Granger
|
r4540 | for cell_e in ws_e.find(u'cells').getiterator(): | ||
if cell_e.tag == u'codecell': | ||||
input = _get_text(cell_e,u'input') | ||||
prompt_number = _get_int(cell_e,u'prompt_number') | ||||
collapsed = _get_bool(cell_e,u'collapsed') | ||||
language = _get_text(cell_e,u'language') | ||||
Brian E. Granger
|
r4402 | outputs = [] | ||
Brian E. Granger
|
r4540 | for output_e in cell_e.find(u'outputs').getiterator(u'output'): | ||
output_type = _get_text(output_e,u'output_type') | ||||
output_text = _get_text(output_e,u'text') | ||||
output_png = _get_binary(output_e,u'png') | ||||
output_jpeg = _get_binary(output_e,u'jpeg') | ||||
output_svg = _get_text(output_e,u'svg') | ||||
output_html = _get_text(output_e,u'html') | ||||
output_latex = _get_text(output_e,u'latex') | ||||
output_json = _get_text(output_e,u'json') | ||||
output_javascript = _get_text(output_e,u'javascript') | ||||
out_prompt_number = _get_int(output_e,u'prompt_number') | ||||
etype = _get_text(output_e,u'etype') | ||||
evalue = _get_text(output_e,u'evalue') | ||||
traceback = [] | ||||
traceback_e = output_e.find(u'traceback') | ||||
if traceback_e is not None: | ||||
for frame_e in traceback_e.getiterator(u'frame'): | ||||
traceback.append(frame_e.text) | ||||
if len(traceback) == 0: | ||||
traceback = None | ||||
Brian E. Granger
|
r4402 | output = new_output(output_type=output_type,output_png=output_png, | ||
Brian E. Granger
|
r4528 | output_text=output_text, output_svg=output_svg, | ||
output_html=output_html, output_latex=output_latex, | ||||
output_json=output_json, output_javascript=output_javascript, | ||||
Brian E. Granger
|
r4540 | output_jpeg=output_jpeg, prompt_number=out_prompt_number, | ||
etype=etype, evalue=evalue, traceback=traceback | ||||
Brian E. Granger
|
r4402 | ) | ||
outputs.append(output) | ||||
cc = new_code_cell(input=input,prompt_number=prompt_number, | ||||
Brian E. Granger
|
r4533 | language=language,outputs=outputs,collapsed=collapsed) | ||
Brian E. Granger
|
r4401 | cells.append(cc) | ||
Brian E. Granger
|
r4540 | if cell_e.tag == u'htmlcell': | ||
source = _get_text(cell_e,u'source') | ||||
rendered = _get_text(cell_e,u'rendered') | ||||
Brian E. Granger
|
r4511 | cells.append(new_text_cell(u'html', source=source, rendered=rendered)) | ||
Brian E. Granger
|
r4540 | if cell_e.tag == u'markdowncell': | ||
source = _get_text(cell_e,u'source') | ||||
rendered = _get_text(cell_e,u'rendered') | ||||
Brian E. Granger
|
r4511 | cells.append(new_text_cell(u'markdown', source=source, rendered=rendered)) | ||
Brian E. Granger
|
r4401 | ws = new_worksheet(name=wsname,cells=cells) | ||
worksheets.append(ws) | ||||
Brian E. Granger
|
r4649 | md = new_metadata(name=nbname) | ||
nb = new_notebook(metadata=md,worksheets=worksheets) | ||||
Brian E. Granger
|
r4401 | return nb | ||
Brian E. Granger
|
r4392 | |||
Brian E. Granger
|
r4637 | |||
Brian E. Granger
|
r4392 | _reader = XMLReader() | ||
reads = _reader.reads | ||||
read = _reader.read | ||||
Brian E. Granger
|
r4406 | to_notebook = _reader.to_notebook | ||