nbxml.py
178 lines
| 6.7 KiB
| text/x-python
|
PythonLexer
Brian E. Granger
|
r4392 | """Read and write notebook files as XML.""" | ||
Brian E. Granger
|
r4406 | from base64 import encodestring, decodestring | ||
Brian E. Granger
|
r4392 | from xml.etree import ElementTree as ET | ||
Brian E. Granger
|
r4401 | from .rwbase import NotebookReader, NotebookWriter | ||
Brian E. Granger
|
r4402 | from .nbbase import ( | ||
Brian E. Granger
|
r4511 | new_code_cell, new_text_cell, new_worksheet, new_notebook, new_output | ||
Brian E. Granger
|
r4402 | ) | ||
Brian E. Granger
|
r4401 | |||
def indent(elem, level=0): | ||||
i = "\n" + level*" " | ||||
if len(elem): | ||||
if not elem.text or not elem.text.strip(): | ||||
elem.text = i + " " | ||||
if not elem.tail or not elem.tail.strip(): | ||||
elem.tail = i | ||||
for elem in elem: | ||||
indent(elem, level+1) | ||||
if not elem.tail or not elem.tail.strip(): | ||||
elem.tail = i | ||||
else: | ||||
if level and (not elem.tail or not elem.tail.strip()): | ||||
elem.tail = i | ||||
def _get_text(e, tag): | ||||
sub_e = e.find(tag) | ||||
if sub_e is None: | ||||
return None | ||||
else: | ||||
return sub_e.text | ||||
Brian E. Granger
|
r4392 | |||
Brian E. Granger
|
r4402 | def _set_text(nbnode, attr, parent, tag): | ||
if attr in nbnode: | ||||
e = ET.SubElement(parent, tag) | ||||
e.text = nbnode[attr] | ||||
def _get_int(e, tag): | ||||
sub_e = e.find(tag) | ||||
if sub_e is None: | ||||
return None | ||||
else: | ||||
return int(sub_e.text) | ||||
def _set_int(nbnode, attr, parent, tag): | ||||
if attr in nbnode: | ||||
e = ET.SubElement(parent, tag) | ||||
e.text = unicode(nbnode[attr]) | ||||
Brian E. Granger
|
r4406 | def _get_binary(e, tag): | ||
sub_e = e.find(tag) | ||||
if sub_e is None: | ||||
return None | ||||
else: | ||||
return decodestring(sub_e.text) | ||||
def _set_binary(nbnode, attr, parent, tag): | ||||
if attr in nbnode: | ||||
e = ET.SubElement(parent, tag) | ||||
e.text = encodestring(nbnode[attr]) | ||||
Brian E. Granger
|
r4392 | class XMLReader(NotebookReader): | ||
Brian E. Granger
|
r4401 | def reads(self, s, **kwargs): | ||
root = ET.fromstring(s) | ||||
Brian E. Granger
|
r4406 | return self.to_notebook(root, **kwargs) | ||
Brian E. Granger
|
r4401 | |||
Brian E. Granger
|
r4406 | def to_notebook(self, root, **kwargs): | ||
Brian E. Granger
|
r4401 | nbname = _get_text(root,'name') | ||
nbid = _get_text(root,'id') | ||||
worksheets = [] | ||||
Brian E. Granger
|
r4402 | for ws_e in root.find('worksheets').getiterator('worksheet'): | ||
Brian E. Granger
|
r4401 | wsname = _get_text(ws_e,'name') | ||
cells = [] | ||||
Brian E. Granger
|
r4402 | for cell_e in ws_e.find('cells').getiterator(): | ||
Brian E. Granger
|
r4401 | if cell_e.tag == 'codecell': | ||
input = _get_text(cell_e,'input') | ||||
Brian E. Granger
|
r4402 | prompt_number = _get_int(cell_e,'prompt_number') | ||
language = _get_text(cell_e,'language') | ||||
outputs = [] | ||||
for output_e in cell_e.find('outputs').getiterator('output'): | ||||
Brian E. Granger
|
r4498 | out_prompt_number = _get_int(output_e,'prompt_number') | ||
Brian E. Granger
|
r4402 | output_type = _get_text(output_e,'output_type') | ||
Brian E. Granger
|
r4401 | output_text = _get_text(output_e,'text') | ||
Brian E. Granger
|
r4406 | output_png = _get_binary(output_e,'png') | ||
Brian E. Granger
|
r4401 | output_svg = _get_text(output_e,'svg') | ||
output_html = _get_text(output_e,'html') | ||||
output_latex = _get_text(output_e,'latex') | ||||
output_json = _get_text(output_e,'json') | ||||
output_javascript = _get_text(output_e,'javascript') | ||||
Brian E. Granger
|
r4402 | output = new_output(output_type=output_type,output_png=output_png, | ||
output_text=output_text,output_svg=output_svg, | ||||
output_html=output_html,output_latex=output_latex, | ||||
Brian E. Granger
|
r4497 | output_json=output_json,output_javascript=output_javascript, | ||
Brian E. Granger
|
r4498 | prompt_number=out_prompt_number | ||
Brian E. Granger
|
r4402 | ) | ||
outputs.append(output) | ||||
cc = new_code_cell(input=input,prompt_number=prompt_number, | ||||
language=language,outputs=outputs) | ||||
Brian E. Granger
|
r4401 | cells.append(cc) | ||
Brian E. Granger
|
r4498 | if cell_e.tag == 'htmlcell': | ||
source = _get_text(cell_e,'source') | ||||
Brian E. Granger
|
r4511 | rendered = _get_text(cell_e,'rendered') | ||
cells.append(new_text_cell(u'html', source=source, rendered=rendered)) | ||||
if cell_e.tag == 'markdowncell': | ||||
source = _get_text(cell_e,'source') | ||||
rendered = _get_text(cell_e,'rendered') | ||||
cells.append(new_text_cell(u'markdown', source=source, rendered=rendered)) | ||||
Brian E. Granger
|
r4401 | ws = new_worksheet(name=wsname,cells=cells) | ||
worksheets.append(ws) | ||||
nb = new_notebook(name=nbname,id=nbid,worksheets=worksheets) | ||||
return nb | ||||
Brian E. Granger
|
r4392 | |||
class XMLWriter(NotebookWriter): | ||||
Brian E. Granger
|
r4401 | def writes(self, nb, **kwargs): | ||
Brian E. Granger
|
r4392 | nb_e = ET.Element('notebook') | ||
Brian E. Granger
|
r4402 | _set_text(nb,'name',nb_e,'name') | ||
_set_text(nb,'id',nb_e,'id') | ||||
Brian E. Granger
|
r4406 | _set_int(nb,'nbformat',nb_e,'nbformat') | ||
Brian E. Granger
|
r4402 | wss_e = ET.SubElement(nb_e,'worksheets') | ||
Brian E. Granger
|
r4401 | for ws in nb.worksheets: | ||
Brian E. Granger
|
r4402 | ws_e = ET.SubElement(wss_e, 'worksheet') | ||
_set_text(ws,'name',ws_e,'name') | ||||
cells_e = ET.SubElement(ws_e,'cells') | ||||
Brian E. Granger
|
r4401 | for cell in ws.cells: | ||
cell_type = cell.cell_type | ||||
Brian E. Granger
|
r4392 | if cell_type == 'code': | ||
Brian E. Granger
|
r4402 | cell_e = ET.SubElement(cells_e, 'codecell') | ||
_set_text(cell,'input',cell_e,'input') | ||||
_set_text(cell,'language',cell_e,'language') | ||||
_set_int(cell,'prompt_number',cell_e,'prompt_number') | ||||
outputs_e = ET.SubElement(cell_e, 'outputs') | ||||
for output in cell.outputs: | ||||
output_e = ET.SubElement(outputs_e, 'output') | ||||
Brian E. Granger
|
r4498 | _set_int(output,'prompt_number',output_e,'prompt_number') | ||
Brian E. Granger
|
r4402 | _set_text(output,'output_type',output_e,'output_type') | ||
_set_text(output,'text',output_e,'text') | ||||
Brian E. Granger
|
r4406 | _set_binary(output,'png',output_e,'png') | ||
Brian E. Granger
|
r4402 | _set_text(output,'html',output_e,'html') | ||
_set_text(output,'svg',output_e,'svg') | ||||
_set_text(output,'latex',output_e,'latex') | ||||
_set_text(output,'json',output_e,'json') | ||||
_set_text(output,'javascript',output_e,'javascript') | ||||
Brian E. Granger
|
r4498 | elif cell_type == 'html': | ||
cell_e = ET.SubElement(cells_e, 'htmlcell') | ||||
_set_text(cell,'source',cell_e,'source') | ||||
Brian E. Granger
|
r4511 | _set_text(cell,'rendered',cell_e,'rendered') | ||
elif cell_type == 'markdown': | ||||
cell_e = ET.SubElement(cells_e, 'markdowncell') | ||||
_set_text(cell,'source',cell_e,'source') | ||||
_set_text(cell,'rendered',cell_e,'rendered') | ||||
Brian E. Granger
|
r4401 | |||
indent(nb_e) | ||||
txt = ET.tostring(nb_e, encoding="utf-8") | ||||
txt = '<?xml version="1.0" encoding="utf-8"?>\n' + txt | ||||
return txt | ||||
Brian E. Granger
|
r4392 | _reader = XMLReader() | ||
_writer = XMLWriter() | ||||
reads = _reader.reads | ||||
read = _reader.read | ||||
Brian E. Granger
|
r4406 | to_notebook = _reader.to_notebook | ||
Brian E. Granger
|
r4392 | write = _writer.write | ||
writes = _writer.writes | ||||
Brian E. Granger
|
r4406 | |||