From f3a5072fc60b4502521f27f1bdf0a2ec67cf0668 2011-07-28 20:51:20 From: Brian E. Granger Date: 2011-07-28 20:51:20 Subject: [PATCH] Initial draft of more formal notebook format. * Basic Python notebook uses a simple Struct subclass, NotebookNode for representing the notebook components. * XML and JSON readers/writers work in full round trip manner. * Python reader/writter works for code cells. * Everything is tested. --- diff --git a/IPython/nbformat/nbdict.py b/IPython/nbformat/nbbase.py similarity index 53% rename from IPython/nbformat/nbdict.py rename to IPython/nbformat/nbbase.py index fa2900d..5bda71a 100644 --- a/IPython/nbformat/nbdict.py +++ b/IPython/nbformat/nbbase.py @@ -1,76 +1,79 @@ """The basic dict based notebook format.""" +import pprint import uuid +from IPython.utils.ipstruct import Struct + + +class NotebookNode(Struct): + pass + def new_code_cell(input=None, prompt_number=None, output_text=None, output_png=None, output_html=None, output_svg=None, output_latex=None, output_json=None, output_javascript=None): """Create a new code cell with input and output""" - cell = {} - cell['cell_type'] = 'code' + cell = NotebookNode() + cell.cell_type = 'code' if input is not None: - cell['input'] = unicode(input) + cell.input = unicode(input) if prompt_number is not None: - cell['prompt_number'] = int(prompt_number) + cell.prompt_number = int(prompt_number) - output = {} + output = NotebookNode() if output_text is not None: - output['text/plain'] = unicode(output_text) + output.text = unicode(output_text) if output_png is not None: - output['image/png'] = bytes(output_png) + output.png = bytes(output_png) if output_html is not None: - output['text/html'] = unicode(output_html) + output.html = unicode(output_html) if output_svg is not None: - output['image/svg+xml'] = unicode(output_svg) + output.svg = unicode(output_svg) if output_latex is not None: - output['text/latex'] = unicode(output_latex) + output.latex = unicode(output_latex) if output_json is not None: - output['application/json'] = unicode(output_json) + output.json = unicode(output_json) if output_javascript is not None: - output['application/javascript'] = unicode(output_javascript) + output.javascript = unicode(output_javascript) - cell['output'] = output + cell.output = output return cell def new_text_cell(text=None): """Create a new text cell.""" - cell = {} + cell = NotebookNode() if text is not None: - cell['text'] = unicode(text) - cell['cell_type'] = 'text' + cell.text = unicode(text) + cell.cell_type = 'text' return cell def new_worksheet(name=None, cells=None): """Create a worksheet by name with with a list of cells.""" - ws = {} + ws = NotebookNode() if name is not None: - ws['name'] = unicode(name) - else: - ws['name'] = u'' + ws.name = unicode(name) if cells is None: - ws['cells'] = [] + ws.cells = [] else: - ws['cells'] = list(cells) + ws.cells = list(cells) return ws def new_notebook(name=None, id=None, worksheets=None): """Create a notebook by name, id and a list of worksheets.""" - nb = {} + nb = NotebookNode() if name is not None: - nb['name'] = unicode(name) - else: - nb['name'] = u'' + nb.name = unicode(name) if id is None: - nb['id'] = unicode(uuid.uuid4()) + nb.id = unicode(uuid.uuid4()) else: - nb['id'] = unicode(id) + nb.id = unicode(id) if worksheets is None: - nb['worksheets'] = [] + nb.worksheets = [] else: - nb['worksheets'] = list(worksheets) + nb.worksheets = list(worksheets) return nb diff --git a/IPython/nbformat/nbjson.py b/IPython/nbformat/nbjson.py index 00b8582..e98578d 100644 --- a/IPython/nbformat/nbjson.py +++ b/IPython/nbformat/nbjson.py @@ -1,7 +1,7 @@ """Read and write notebooks in JSON format.""" from base64 import encodestring -from .base import NotebookReader, NotebookWriter, base64_decode +from .rwbase import NotebookReader, NotebookWriter, base64_decode import json @@ -14,7 +14,7 @@ class BytesEncoder(json.JSONEncoder): class JSONReader(NotebookReader): - def reads(s, **kwargs): + def reads(self, s, **kwargs): nb = json.loads(s, **kwargs) nb = base64_decode(nb) return nb @@ -22,7 +22,7 @@ class JSONReader(NotebookReader): class JSONWriter(NotebookWriter): - def writes(nb, **kwargs): + def writes(self, nb, **kwargs): kwargs['cls'] = BytesEncoder kwargs['indent'] = 4 return json.dumps(nb, **kwargs) diff --git a/IPython/nbformat/nbpy.py b/IPython/nbformat/nbpy.py index a5328e7..bd07820 100644 --- a/IPython/nbformat/nbpy.py +++ b/IPython/nbformat/nbpy.py @@ -1,19 +1,19 @@ """Read and write notebooks as regular .py files.""" -from .base import NotebookReader, NotebookWriter -from .nbdict import new_code_cell, new_worksheet, new_notebook +from .rwbase import NotebookReader, NotebookWriter +from .nbbase import new_code_cell, new_worksheet, new_notebook class PyReader(NotebookReader): - def reads(s, **kwargs): + def reads(self, s, **kwargs): lines = s.splitlines() cells = [] cell_lines = [] for line in lines: - if line.startswith('# '): - code = '\n'.join(cell_lines) - code = code.strip('\n') + if line.startswith(u'# '): + code = u'\n'.join(cell_lines) + code = code.strip(u'\n') if code: cells.append(new_code_cell(input=code)) cell_lines = [] @@ -26,15 +26,16 @@ class PyReader(NotebookReader): class PyWriter(NotebookWriter): - def writes(nb, **kwargs): + def writes(self, nb, **kwargs): lines = [] - for ws in nb['worksheets']: - for cell in ws['cells']: - if cell['cell_type'] == 'code': - input = cell['input'] + for ws in nb.worksheets: + for cell in ws.cells: + if cell.cell_type == 'code': + input = cell.input + lines.extend([u'# ',u'']) lines.extend(input.splitlines()) - lines.extend(['','# ','']) - return ''.join(lines) + lines.append(u'') + return unicode('\n'.join(lines)) _reader = PyReader() diff --git a/IPython/nbformat/nbxml.py b/IPython/nbformat/nbxml.py index aa22f3f..5c854fd 100644 --- a/IPython/nbformat/nbxml.py +++ b/IPython/nbformat/nbxml.py @@ -2,43 +2,136 @@ from xml.etree import ElementTree as ET -from .base import NotebookReader, NotebookWriter -from .nbdict import new_code_cell, new_worksheet, new_notebook +from .rwbase import NotebookReader, NotebookWriter +from .nbbase import new_code_cell, new_text_cell, new_worksheet, new_notebook + + +def indent(elem, level=0): + i = "\n" + level*" " + if len(elem): + if not elem.text or not elem.text.strip(): + elem.text = i + " " + if not elem.tail or not elem.tail.strip(): + elem.tail = i + for elem in elem: + indent(elem, level+1) + if not elem.tail or not elem.tail.strip(): + elem.tail = i + else: + if level and (not elem.tail or not elem.tail.strip()): + elem.tail = i + + +def _get_text(e, tag): + sub_e = e.find(tag) + if sub_e is None: + return None + else: + return sub_e.text class XMLReader(NotebookReader): - def reads(s, **kwargs): - pass + def reads(self, s, **kwargs): + root = ET.fromstring(s) + + nbname = _get_text(root,'name') + nbid = _get_text(root,'id') + + worksheets = [] + for ws_e in root.getiterator('worksheet'): + wsname = _get_text(ws_e,'name') + cells = [] + for cell_e in ws_e.getiterator(): + if cell_e.tag == 'codecell': + input = _get_text(cell_e,'input') + output_e = cell_e.find('output') + if output_e is not None: + output_text = _get_text(output_e,'text') + output_png = _get_text(output_e,'png') + output_svg = _get_text(output_e,'svg') + output_html = _get_text(output_e,'html') + output_latex = _get_text(output_e,'latex') + output_json = _get_text(output_e,'json') + output_javascript = _get_text(output_e,'javascript') + cc = new_code_cell(input=input,output_png=output_png, + output_text=output_text,output_svg=output_svg, + output_html=output_html,output_latex=output_latex, + output_json=output_json,output_javascript=output_javascript + ) + cells.append(cc) + if cell_e.tag == 'textcell': + text = _get_text(cell_e,'text') + cells.append(new_text_cell(text=text)) + ws = new_worksheet(name=wsname,cells=cells) + worksheets.append(ws) + + nb = new_notebook(name=nbname,id=nbid,worksheets=worksheets) + return nb class XMLWriter(NotebookWriter): - def writes(nb, **kwargs): + def writes(self, nb, **kwargs): nb_e = ET.Element('notebook') - name_e = ET.SubElement(nb_e, 'name') - name_e.text = nb.get('name','') - id_e = ET.SubElement(nb_e, 'id') - id_e.text = nb.get('id','') - for ws in nb['worksheets']: + if 'name' in nb: + name_e = ET.SubElement(nb_e, 'name') + name_e.text = nb.name + if 'id' in nb: + id_e = ET.SubElement(nb_e, 'id') + id_e.text = nb.id + for ws in nb.worksheets: ws_e = ET.SubElement(nb_e, 'worksheet') - ws_name_e = ET.SubElement(ws_e, 'name') - ws_name_e.text = ws.get('name','') - for cell in ws['cells']: - cell_type = cell['cell_type'] + if 'name' in ws: + ws_name_e = ET.SubElement(ws_e, 'name') + ws_name_e.text = ws.name + for cell in ws.cells: + cell_type = cell.cell_type if cell_type == 'code': - output = cell['output'] - cell_e = ET.SubElement(ws_e, 'cell') - input_e = ET.SubElement(cell_e, 'input') - input_e.text = cell.get('input','') + output = cell.output + cell_e = ET.SubElement(ws_e, 'codecell') output_e = ET.SubElement(cell_e, 'output') - text_e = ET.SubElement(output_e, 'text') - text_e.text = cell.output - elif cell_type == 'text': - pass - + if 'input' in cell: + input_e = ET.SubElement(cell_e, 'input') + input_e.text = cell.input + if 'prompt_number' in cell: + prompt_number_e = ET.SubElement(cell_e, 'prompt_number') + input_e.text = cell.prompt_number + if 'text' in output: + text_e = ET.SubElement(output_e, 'text') + text_e.text = output.text + if 'png' in output: + png_e = ET.SubElement(output_e, 'png') + png_e.text = output.png + if 'html' in output: + html_e = ET.SubElement(output_e, 'html') + html_e.text = output.html + if 'svg' in output: + svg_e = ET.SubElement(output_e, 'svg') + svg_e.text = output.svg + if 'latex' in output: + latex_e = ET.SubElement(output_e, 'latex') + latex_e.text = output.latex + if 'json' in output: + json_e = ET.SubElement(output_e, 'json') + json_e.text = output.json + if 'javascript' in output: + javascript_e = ET.SubElement(output_e, 'javascript') + javascript_e.text = output.javascript + elif cell_type == 'text': + cell_e = ET.SubElement(ws_e, 'textcell') + if 'text' in cell: + cell_text_e = ET.SubElement(cell_e, 'text') + cell_text_e.text = cell.text + + indent(nb_e) + txt = ET.tostring(nb_e, encoding="utf-8") + txt = '\n' + txt + return txt + + _reader = XMLReader() _writer = XMLWriter() diff --git a/IPython/nbformat/base.py b/IPython/nbformat/rwbase.py similarity index 93% rename from IPython/nbformat/base.py rename to IPython/nbformat/rwbase.py index 833a5ec..17747c9 100644 --- a/IPython/nbformat/base.py +++ b/IPython/nbformat/rwbase.py @@ -1,7 +1,7 @@ from base64 import encodestring, decodestring -def base64_decode(self, nb): +def base64_decode(nb): """Base64 encode all bytes objects in the notebook.""" for ws in nb['worksheets']: for cell in ws['cells']: @@ -11,7 +11,7 @@ def base64_decode(self, nb): return nb -def base64_encode(self, nb): +def base64_encode(nb): """Base64 decode all binary objects in the notebook.""" for ws in nb['worksheets']: for cell in ws['cells']: @@ -43,3 +43,4 @@ class NotebookWriter(object): return fp.write(self.dumps(nb,**kwargs)) + diff --git a/IPython/nbformat/tests/__init__.py b/IPython/nbformat/tests/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/IPython/nbformat/tests/__init__.py diff --git a/IPython/nbformat/tests/nbexamples.py b/IPython/nbformat/tests/nbexamples.py new file mode 100644 index 0000000..9173701 --- /dev/null +++ b/IPython/nbformat/tests/nbexamples.py @@ -0,0 +1,52 @@ +from IPython.nbformat.nbbase import ( + NotebookNode, + new_code_cell, new_text_cell, new_worksheet, new_notebook +) + + + +ws = new_worksheet(name='worksheet1') + +ws.cells.append(new_text_cell( + text='Some NumPy Examples' +)) + + +ws.cells.append(new_code_cell( + input='import numpy' +)) + +ws.cells.append(new_code_cell( + input='a = numpy.random.rand(100)' +)) + +ws.cells.append(new_code_cell( + input='print a', + output_text='', + output_html='The HTML rep', + output_latex='$a$', + output_png=b'data', + output_svg='', + output_json='json data', + output_javascript='var i=0;' +)) + +nb0 = new_notebook( + name='nb0', + worksheets=[ws, new_worksheet(name='worksheet2')] +) + +nb0_py = """# + +import numpy + +# + +a = numpy.random.rand(100) + +# + +print a +""" + + diff --git a/IPython/nbformat/tests/test_json.py b/IPython/nbformat/tests/test_json.py new file mode 100644 index 0000000..7b7430e --- /dev/null +++ b/IPython/nbformat/tests/test_json.py @@ -0,0 +1,14 @@ +from unittest import TestCase + +from IPython.nbformat.nbjson import reads, writes +from IPython.nbformat.tests.nbexamples import nb0 + + +class TestJSON(TestCase): + + def test_roundtrip(self): + s = writes(nb0) + self.assertEquals(reads(s),nb0) + + + diff --git a/IPython/nbformat/tests/test_nbbase.py b/IPython/nbformat/tests/test_nbbase.py new file mode 100644 index 0000000..c0df3e9 --- /dev/null +++ b/IPython/nbformat/tests/test_nbbase.py @@ -0,0 +1,60 @@ +from unittest import TestCase + +from IPython.nbformat.nbbase import ( + NotebookNode, + new_code_cell, new_text_cell, new_worksheet, new_notebook +) + +class TestCell(TestCase): + + def test_empty_code_cell(self): + cc = new_code_cell() + self.assertEquals(cc.cell_type,'code') + self.assertEquals('input' not in cc, True) + self.assertEquals('prompt_number' not in cc, True) + self.assertEquals(cc.output, NotebookNode()) + + def test_code_cell(self): + cc = new_code_cell(input='a=10', prompt_number=0, output_svg='foo', output_text='10') + self.assertEquals(cc.input, u'a=10') + self.assertEquals(cc.prompt_number, 0) + self.assertEquals(cc.output.svg, u'foo') + self.assertEquals(cc.output.text, u'10') + + def test_empty_text_cell(self): + tc = new_text_cell() + self.assertEquals(tc.cell_type, 'text') + self.assertEquals('text' not in tc, True) + + def test_text_cell(self): + tc = new_text_cell('hi') + self.assertEquals(tc.text, u'hi') + + +class TestWorksheet(TestCase): + + def test_empty_worksheet(self): + ws = new_worksheet() + self.assertEquals(ws.cells,[]) + self.assertEquals('name' not in ws, True) + + def test_worksheet(self): + cells = [new_code_cell(), new_text_cell()] + ws = new_worksheet(cells=cells,name='foo') + self.assertEquals(ws.cells,cells) + self.assertEquals(ws.name,u'foo') + +class TestNotebook(TestCase): + + def test_empty_notebook(self): + nb = new_notebook() + self.assertEquals('id' in nb, True) + self.assertEquals(nb.worksheets, []) + self.assertEquals('name' not in nb, True) + + def test_notebooke(self): + worksheets = [new_worksheet(),new_worksheet()] + nb = new_notebook(name='foo',worksheets=worksheets) + self.assertEquals(nb.name,u'foo') + self.assertEquals(nb.worksheets,worksheets) + diff --git a/IPython/nbformat/tests/test_nbpy.py b/IPython/nbformat/tests/test_nbpy.py new file mode 100644 index 0000000..9fa685b --- /dev/null +++ b/IPython/nbformat/tests/test_nbpy.py @@ -0,0 +1,18 @@ +from unittest import TestCase + +from IPython.nbformat.nbbase import ( + NotebookNode, + new_code_cell, new_text_cell, new_worksheet, new_notebook +) + +from IPython.nbformat.nbpy import reads, writes +from IPython.nbformat.tests.nbexamples import nb0, nb0_py + + +class TestPy(TestCase): + + def test_write(self): + s = writes(nb0) + self.assertEquals(s,nb0_py) + + diff --git a/IPython/nbformat/tests/test_xml.py b/IPython/nbformat/tests/test_xml.py new file mode 100644 index 0000000..04afba4 --- /dev/null +++ b/IPython/nbformat/tests/test_xml.py @@ -0,0 +1,12 @@ +from unittest import TestCase + +from IPython.nbformat.nbxml import reads, writes +from IPython.nbformat.tests.nbexamples import nb0 + + +class TestXML(TestCase): + + def test_roundtrip(self): + s = writes(nb0) + self.assertEquals(reads(s),nb0) +