nbxml.py
228 lines
| 9.0 KiB
| text/x-python
|
PythonLexer
Brian E. Granger
|
r4392 | """Read and write notebook files as XML.""" | ||
Brian E. Granger
|
r4406 | from base64 import encodestring, decodestring | ||
Brian E. Granger
|
r4392 | from xml.etree import ElementTree as ET | ||
Brian E. Granger
|
r4401 | from .rwbase import NotebookReader, NotebookWriter | ||
Brian E. Granger
|
r4402 | from .nbbase import ( | ||
Brian E. Granger
|
r4511 | new_code_cell, new_text_cell, new_worksheet, new_notebook, new_output | ||
Brian E. Granger
|
r4402 | ) | ||
Brian E. Granger
|
r4401 | |||
def indent(elem, level=0): | ||||
i = "\n" + level*" " | ||||
if len(elem): | ||||
if not elem.text or not elem.text.strip(): | ||||
elem.text = i + " " | ||||
if not elem.tail or not elem.tail.strip(): | ||||
elem.tail = i | ||||
for elem in elem: | ||||
indent(elem, level+1) | ||||
if not elem.tail or not elem.tail.strip(): | ||||
elem.tail = i | ||||
else: | ||||
if level and (not elem.tail or not elem.tail.strip()): | ||||
elem.tail = i | ||||
def _get_text(e, tag): | ||||
sub_e = e.find(tag) | ||||
if sub_e is None: | ||||
return None | ||||
else: | ||||
return sub_e.text | ||||
Brian E. Granger
|
r4392 | |||
Brian E. Granger
|
r4402 | def _set_text(nbnode, attr, parent, tag): | ||
if attr in nbnode: | ||||
e = ET.SubElement(parent, tag) | ||||
e.text = nbnode[attr] | ||||
def _get_int(e, tag): | ||||
sub_e = e.find(tag) | ||||
if sub_e is None: | ||||
return None | ||||
else: | ||||
return int(sub_e.text) | ||||
def _set_int(nbnode, attr, parent, tag): | ||||
if attr in nbnode: | ||||
e = ET.SubElement(parent, tag) | ||||
e.text = unicode(nbnode[attr]) | ||||
Brian E. Granger
|
r4533 | def _get_bool(e, tag): | ||
sub_e = e.find(tag) | ||||
if sub_e is None: | ||||
return None | ||||
else: | ||||
return bool(int(sub_e.text)) | ||||
def _set_bool(nbnode, attr, parent, tag): | ||||
if attr in nbnode: | ||||
e = ET.SubElement(parent, tag) | ||||
if nbnode[attr]: | ||||
e.text = u'1' | ||||
else: | ||||
e.text = u'0' | ||||
Brian E. Granger
|
r4406 | def _get_binary(e, tag): | ||
sub_e = e.find(tag) | ||||
if sub_e is None: | ||||
return None | ||||
else: | ||||
return decodestring(sub_e.text) | ||||
def _set_binary(nbnode, attr, parent, tag): | ||||
if attr in nbnode: | ||||
e = ET.SubElement(parent, tag) | ||||
e.text = encodestring(nbnode[attr]) | ||||
Brian E. Granger
|
r4392 | class XMLReader(NotebookReader): | ||
Brian E. Granger
|
r4401 | def reads(self, s, **kwargs): | ||
root = ET.fromstring(s) | ||||
Brian E. Granger
|
r4406 | return self.to_notebook(root, **kwargs) | ||
Brian E. Granger
|
r4401 | |||
Brian E. Granger
|
r4406 | def to_notebook(self, root, **kwargs): | ||
Brian E. Granger
|
r4540 | nbname = _get_text(root,u'name') | ||
nbid = _get_text(root,u'id') | ||||
nbauthor = _get_text(root,u'author') | ||||
nbemail = _get_text(root,u'email') | ||||
nblicense = _get_text(root,u'license') | ||||
nbcreated = _get_text(root,u'created') | ||||
nbsaved = _get_text(root,u'saved') | ||||
Brian E. Granger
|
r4401 | |||
worksheets = [] | ||||
Brian E. Granger
|
r4540 | for ws_e in root.find(u'worksheets').getiterator(u'worksheet'): | ||
wsname = _get_text(ws_e,u'name') | ||||
Brian E. Granger
|
r4401 | cells = [] | ||
Brian E. Granger
|
r4540 | for cell_e in ws_e.find(u'cells').getiterator(): | ||
if cell_e.tag == u'codecell': | ||||
input = _get_text(cell_e,u'input') | ||||
prompt_number = _get_int(cell_e,u'prompt_number') | ||||
collapsed = _get_bool(cell_e,u'collapsed') | ||||
language = _get_text(cell_e,u'language') | ||||
Brian E. Granger
|
r4402 | outputs = [] | ||
Brian E. Granger
|
r4540 | for output_e in cell_e.find(u'outputs').getiterator(u'output'): | ||
output_type = _get_text(output_e,u'output_type') | ||||
output_text = _get_text(output_e,u'text') | ||||
output_png = _get_binary(output_e,u'png') | ||||
output_jpeg = _get_binary(output_e,u'jpeg') | ||||
output_svg = _get_text(output_e,u'svg') | ||||
output_html = _get_text(output_e,u'html') | ||||
output_latex = _get_text(output_e,u'latex') | ||||
output_json = _get_text(output_e,u'json') | ||||
output_javascript = _get_text(output_e,u'javascript') | ||||
out_prompt_number = _get_int(output_e,u'prompt_number') | ||||
etype = _get_text(output_e,u'etype') | ||||
evalue = _get_text(output_e,u'evalue') | ||||
traceback = [] | ||||
traceback_e = output_e.find(u'traceback') | ||||
if traceback_e is not None: | ||||
for frame_e in traceback_e.getiterator(u'frame'): | ||||
traceback.append(frame_e.text) | ||||
if len(traceback) == 0: | ||||
traceback = None | ||||
Brian E. Granger
|
r4402 | output = new_output(output_type=output_type,output_png=output_png, | ||
Brian E. Granger
|
r4528 | output_text=output_text, output_svg=output_svg, | ||
output_html=output_html, output_latex=output_latex, | ||||
output_json=output_json, output_javascript=output_javascript, | ||||
Brian E. Granger
|
r4540 | output_jpeg=output_jpeg, prompt_number=out_prompt_number, | ||
etype=etype, evalue=evalue, traceback=traceback | ||||
Brian E. Granger
|
r4402 | ) | ||
outputs.append(output) | ||||
cc = new_code_cell(input=input,prompt_number=prompt_number, | ||||
Brian E. Granger
|
r4533 | language=language,outputs=outputs,collapsed=collapsed) | ||
Brian E. Granger
|
r4401 | cells.append(cc) | ||
Brian E. Granger
|
r4540 | if cell_e.tag == u'htmlcell': | ||
source = _get_text(cell_e,u'source') | ||||
rendered = _get_text(cell_e,u'rendered') | ||||
Brian E. Granger
|
r4511 | cells.append(new_text_cell(u'html', source=source, rendered=rendered)) | ||
Brian E. Granger
|
r4540 | if cell_e.tag == u'markdowncell': | ||
source = _get_text(cell_e,u'source') | ||||
rendered = _get_text(cell_e,u'rendered') | ||||
Brian E. Granger
|
r4511 | cells.append(new_text_cell(u'markdown', source=source, rendered=rendered)) | ||
Brian E. Granger
|
r4401 | ws = new_worksheet(name=wsname,cells=cells) | ||
worksheets.append(ws) | ||||
Brian E. Granger
|
r4536 | nb = new_notebook(name=nbname,id=nbid,worksheets=worksheets,author=nbauthor, | ||
email=nbemail,license=nblicense,saved=nbsaved,created=nbcreated) | ||||
Brian E. Granger
|
r4401 | return nb | ||
Brian E. Granger
|
r4392 | |||
class XMLWriter(NotebookWriter): | ||||
Brian E. Granger
|
r4401 | def writes(self, nb, **kwargs): | ||
Brian E. Granger
|
r4540 | nb_e = ET.Element(u'notebook') | ||
_set_text(nb,u'name',nb_e,u'name') | ||||
_set_text(nb,u'id',nb_e,u'id') | ||||
_set_text(nb,u'author',nb_e,u'author') | ||||
_set_text(nb,u'email',nb_e,u'email') | ||||
_set_text(nb,u'license',nb_e,u'license') | ||||
_set_text(nb,u'created',nb_e,u'created') | ||||
_set_text(nb,u'saved',nb_e,u'saved') | ||||
_set_int(nb,u'nbformat',nb_e,u'nbformat') | ||||
wss_e = ET.SubElement(nb_e,u'worksheets') | ||||
Brian E. Granger
|
r4401 | for ws in nb.worksheets: | ||
Brian E. Granger
|
r4540 | ws_e = ET.SubElement(wss_e, u'worksheet') | ||
_set_text(ws,u'name',ws_e,u'name') | ||||
cells_e = ET.SubElement(ws_e,u'cells') | ||||
Brian E. Granger
|
r4401 | for cell in ws.cells: | ||
cell_type = cell.cell_type | ||||
Brian E. Granger
|
r4540 | if cell_type == u'code': | ||
cell_e = ET.SubElement(cells_e, u'codecell') | ||||
_set_text(cell,u'input',cell_e,u'input') | ||||
_set_text(cell,u'language',cell_e,u'language') | ||||
_set_int(cell,u'prompt_number',cell_e,u'prompt_number') | ||||
_set_bool(cell,u'collapsed',cell_e,u'collapsed') | ||||
outputs_e = ET.SubElement(cell_e, u'outputs') | ||||
Brian E. Granger
|
r4402 | for output in cell.outputs: | ||
Brian E. Granger
|
r4540 | output_e = ET.SubElement(outputs_e, u'output') | ||
_set_text(output,u'output_type',output_e,u'output_type') | ||||
_set_text(output,u'text',output_e,u'text') | ||||
_set_binary(output,u'png',output_e,u'png') | ||||
_set_binary(output,u'jpeg',output_e,u'jpeg') | ||||
_set_text(output,u'html',output_e,u'html') | ||||
_set_text(output,u'svg',output_e,u'svg') | ||||
_set_text(output,u'latex',output_e,u'latex') | ||||
_set_text(output,u'json',output_e,u'json') | ||||
_set_text(output,u'javascript',output_e,u'javascript') | ||||
_set_int(output,u'prompt_number',output_e,u'prompt_number') | ||||
_set_text(output,u'etype',output_e,u'etype') | ||||
_set_text(output,u'evalue',output_e,u'evalue') | ||||
if u'traceback' in output: | ||||
tb_e = ET.SubElement(output_e, u'traceback') | ||||
for frame in output.traceback: | ||||
frame_e = ET.SubElement(tb_e, u'frame') | ||||
frame_e.text = frame | ||||
elif cell_type == u'html': | ||||
cell_e = ET.SubElement(cells_e, u'htmlcell') | ||||
_set_text(cell,u'source',cell_e,u'source') | ||||
_set_text(cell,u'rendered',cell_e,u'rendered') | ||||
elif cell_type == u'markdown': | ||||
cell_e = ET.SubElement(cells_e, u'markdowncell') | ||||
_set_text(cell,u'source',cell_e,u'source') | ||||
_set_text(cell,u'rendered',cell_e,u'rendered') | ||||
Brian E. Granger
|
r4401 | |||
indent(nb_e) | ||||
txt = ET.tostring(nb_e, encoding="utf-8") | ||||
txt = '<?xml version="1.0" encoding="utf-8"?>\n' + txt | ||||
return txt | ||||
Brian E. Granger
|
r4392 | _reader = XMLReader() | ||
_writer = XMLWriter() | ||||
reads = _reader.reads | ||||
read = _reader.read | ||||
Brian E. Granger
|
r4406 | to_notebook = _reader.to_notebook | ||
Brian E. Granger
|
r4392 | write = _writer.write | ||
writes = _writer.writes | ||||
Brian E. Granger
|
r4406 | |||