From f3a5072fc60b4502521f27f1bdf0a2ec67cf0668 2011-07-28 20:51:20
From: Brian E. Granger <ellisonbg@gmail.com>
Date: 2011-07-28 20:51:20
Subject: [PATCH] Initial draft of more formal notebook format.

* Basic Python notebook uses a simple Struct subclass,
  NotebookNode for representing the notebook components.
* XML and JSON readers/writers work in full round trip manner.
* Python reader/writter works for code cells.
* Everything is tested.

---

diff --git a/IPython/nbformat/nbdict.py b/IPython/nbformat/nbbase.py
similarity index 53%
rename from IPython/nbformat/nbdict.py
rename to IPython/nbformat/nbbase.py
index fa2900d..5bda71a 100644
--- a/IPython/nbformat/nbdict.py
+++ b/IPython/nbformat/nbbase.py
@@ -1,76 +1,79 @@
 """The basic dict based notebook format."""
 
+import pprint
 import uuid
 
+from IPython.utils.ipstruct import Struct
+
+
+class NotebookNode(Struct):
+    pass
+
 
 def new_code_cell(input=None, prompt_number=None, output_text=None, output_png=None,
     output_html=None, output_svg=None, output_latex=None, output_json=None, 
     output_javascript=None):
     """Create a new code cell with input and output"""
-    cell = {}
-    cell['cell_type'] = 'code'
+    cell = NotebookNode()
+    cell.cell_type = 'code'
     if input is not None:
-        cell['input'] = unicode(input)
+        cell.input = unicode(input)
     if prompt_number is not None:
-        cell['prompt_number'] = int(prompt_number)
+        cell.prompt_number = int(prompt_number)
 
-    output = {}
+    output = NotebookNode()
     if output_text is not None:
-        output['text/plain'] = unicode(output_text)
+        output.text = unicode(output_text)
     if output_png is not None:
-        output['image/png'] = bytes(output_png)
+        output.png = bytes(output_png)
     if output_html is not None:
-        output['text/html'] = unicode(output_html)
+        output.html = unicode(output_html)
     if output_svg is not None:
-        output['image/svg+xml'] = unicode(output_svg)
+        output.svg = unicode(output_svg)
     if output_latex is not None:
-        output['text/latex'] = unicode(output_latex)
+        output.latex = unicode(output_latex)
     if output_json is not None:
-        output['application/json'] = unicode(output_json)
+        output.json = unicode(output_json)
     if output_javascript is not None:
-        output['application/javascript'] = unicode(output_javascript)
+        output.javascript = unicode(output_javascript)
 
-    cell['output'] = output
+    cell.output = output
     return cell
 
 
 def new_text_cell(text=None):
     """Create a new text cell."""
-    cell = {}
+    cell = NotebookNode()
     if text is not None:
-        cell['text'] = unicode(text)
-    cell['cell_type'] = 'text'
+        cell.text = unicode(text)
+    cell.cell_type = 'text'
     return cell
 
 
 def new_worksheet(name=None, cells=None):
     """Create a worksheet by name with with a list of cells."""
-    ws = {}
+    ws = NotebookNode()
     if name is not None:
-        ws['name'] = unicode(name)
-    else:
-        ws['name'] = u''
+        ws.name = unicode(name)
     if cells is None:
-        ws['cells'] = []
+        ws.cells = []
     else:
-        ws['cells'] = list(cells)
+        ws.cells = list(cells)
     return ws
 
 
 def new_notebook(name=None, id=None, worksheets=None):
     """Create a notebook by name, id and a list of worksheets."""
-    nb = {}
+    nb = NotebookNode()
     if name is not None:
-        nb['name'] = unicode(name)
-    else:
-        nb['name'] = u''
+        nb.name = unicode(name)
     if id is None:
-        nb['id'] = unicode(uuid.uuid4())
+        nb.id = unicode(uuid.uuid4())
     else:
-        nb['id'] = unicode(id)
+        nb.id = unicode(id)
     if worksheets is None:
-        nb['worksheets'] = []
+        nb.worksheets = []
     else:
-        nb['worksheets'] = list(worksheets)
+        nb.worksheets = list(worksheets)
     return nb
 
diff --git a/IPython/nbformat/nbjson.py b/IPython/nbformat/nbjson.py
index 00b8582..e98578d 100644
--- a/IPython/nbformat/nbjson.py
+++ b/IPython/nbformat/nbjson.py
@@ -1,7 +1,7 @@
 """Read and write notebooks in JSON format."""
 
 from base64 import encodestring
-from .base import NotebookReader, NotebookWriter, base64_decode
+from .rwbase import NotebookReader, NotebookWriter, base64_decode
 import json
 
 
@@ -14,7 +14,7 @@ class BytesEncoder(json.JSONEncoder):
 
 class JSONReader(NotebookReader):
 
-    def reads(s, **kwargs):
+    def reads(self, s, **kwargs):
         nb = json.loads(s, **kwargs)
         nb = base64_decode(nb)
         return nb
@@ -22,7 +22,7 @@ class JSONReader(NotebookReader):
 
 class JSONWriter(NotebookWriter):
 
-    def writes(nb, **kwargs):
+    def writes(self, nb, **kwargs):
         kwargs['cls'] = BytesEncoder
         kwargs['indent'] = 4
         return json.dumps(nb, **kwargs)
diff --git a/IPython/nbformat/nbpy.py b/IPython/nbformat/nbpy.py
index a5328e7..bd07820 100644
--- a/IPython/nbformat/nbpy.py
+++ b/IPython/nbformat/nbpy.py
@@ -1,19 +1,19 @@
 """Read and write notebooks as regular .py files."""
 
-from .base import NotebookReader, NotebookWriter
-from .nbdict import new_code_cell, new_worksheet, new_notebook
+from .rwbase import NotebookReader, NotebookWriter
+from .nbbase import new_code_cell, new_worksheet, new_notebook
 
 
 class PyReader(NotebookReader):
 
-    def reads(s, **kwargs):
+    def reads(self, s, **kwargs):
         lines = s.splitlines()
         cells = []
         cell_lines = []
         for line in lines:
-            if line.startswith('# <codecell>'):
-                code = '\n'.join(cell_lines)
-                code = code.strip('\n')
+            if line.startswith(u'# <codecell>'):
+                code = u'\n'.join(cell_lines)
+                code = code.strip(u'\n')
                 if code:
                     cells.append(new_code_cell(input=code))
                     cell_lines = []
@@ -26,15 +26,16 @@ class PyReader(NotebookReader):
 
 class PyWriter(NotebookWriter):
 
-    def writes(nb, **kwargs):
+    def writes(self, nb, **kwargs):
         lines = []
-        for ws in nb['worksheets']:
-            for cell in ws['cells']:
-                if cell['cell_type'] == 'code':
-                    input = cell['input']
+        for ws in nb.worksheets:
+            for cell in ws.cells:
+                if cell.cell_type == 'code':
+                    input = cell.input
+                    lines.extend([u'# <codecell>',u''])
                     lines.extend(input.splitlines())
-                    lines.extend(['','# <codecell>',''])
-        return ''.join(lines)
+                    lines.append(u'')
+        return unicode('\n'.join(lines))
 
 
 _reader = PyReader()
diff --git a/IPython/nbformat/nbxml.py b/IPython/nbformat/nbxml.py
index aa22f3f..5c854fd 100644
--- a/IPython/nbformat/nbxml.py
+++ b/IPython/nbformat/nbxml.py
@@ -2,43 +2,136 @@
 
 from xml.etree import ElementTree as ET
 
-from .base import NotebookReader, NotebookWriter
-from .nbdict import new_code_cell, new_worksheet, new_notebook
+from .rwbase import NotebookReader, NotebookWriter
+from .nbbase import new_code_cell, new_text_cell, new_worksheet, new_notebook
+
+
+def indent(elem, level=0):
+    i = "\n" + level*"  "
+    if len(elem):
+        if not elem.text or not elem.text.strip():
+            elem.text = i + "  "
+        if not elem.tail or not elem.tail.strip():
+            elem.tail = i
+        for elem in elem:
+            indent(elem, level+1)
+        if not elem.tail or not elem.tail.strip():
+            elem.tail = i
+    else:
+        if level and (not elem.tail or not elem.tail.strip()):
+            elem.tail = i
+
+
+def _get_text(e, tag):
+    sub_e = e.find(tag)
+    if sub_e is None:
+        return None
+    else:
+        return sub_e.text
 
 
 class XMLReader(NotebookReader):
 
-    def reads(s, **kwargs):
-        pass
+    def reads(self, s, **kwargs):
+        root = ET.fromstring(s)
+
+        nbname = _get_text(root,'name')
+        nbid = _get_text(root,'id')
+        
+        worksheets = []
+        for ws_e in root.getiterator('worksheet'):
+            wsname = _get_text(ws_e,'name')
+            cells = []
+            for cell_e in ws_e.getiterator():
+                if cell_e.tag == 'codecell':
+                    input = _get_text(cell_e,'input')
+                    output_e = cell_e.find('output')
+                    if output_e is not None:
+                        output_text = _get_text(output_e,'text')
+                        output_png = _get_text(output_e,'png')
+                        output_svg = _get_text(output_e,'svg')
+                        output_html = _get_text(output_e,'html')
+                        output_latex = _get_text(output_e,'latex')
+                        output_json = _get_text(output_e,'json')
+                        output_javascript = _get_text(output_e,'javascript')
+                    cc = new_code_cell(input=input,output_png=output_png,
+                        output_text=output_text,output_svg=output_svg,
+                        output_html=output_html,output_latex=output_latex,
+                        output_json=output_json,output_javascript=output_javascript
+                    )
+                    cells.append(cc)
+                if cell_e.tag == 'textcell':
+                    text = _get_text(cell_e,'text')
+                    cells.append(new_text_cell(text=text))
+            ws = new_worksheet(name=wsname,cells=cells)
+            worksheets.append(ws)
+
+        nb = new_notebook(name=nbname,id=nbid,worksheets=worksheets)
+        return nb
 
 
 class XMLWriter(NotebookWriter):
 
-    def writes(nb, **kwargs):
+    def writes(self, nb, **kwargs):
         nb_e = ET.Element('notebook')
-        name_e = ET.SubElement(nb_e, 'name')
-        name_e.text = nb.get('name','')
-        id_e = ET.SubElement(nb_e, 'id')
-        id_e.text = nb.get('id','')
-        for ws in nb['worksheets']:
+        if 'name' in nb:
+            name_e = ET.SubElement(nb_e, 'name')
+            name_e.text = nb.name
+        if 'id' in nb:
+            id_e = ET.SubElement(nb_e, 'id')
+            id_e.text = nb.id
+        for ws in nb.worksheets:
             ws_e = ET.SubElement(nb_e, 'worksheet')
-            ws_name_e = ET.SubElement(ws_e, 'name')
-            ws_name_e.text = ws.get('name','')
-            for cell in ws['cells']:
-                cell_type = cell['cell_type']
+            if 'name' in ws:
+                ws_name_e = ET.SubElement(ws_e, 'name')
+                ws_name_e.text = ws.name
+            for cell in ws.cells:
+                cell_type = cell.cell_type
                 if cell_type == 'code':
-                    output = cell['output']
-                    cell_e = ET.SubElement(ws_e, 'cell')
-                    input_e = ET.SubElement(cell_e, 'input')
-                    input_e.text = cell.get('input','')
+                    output = cell.output
+                    cell_e = ET.SubElement(ws_e, 'codecell')
                     output_e = ET.SubElement(cell_e, 'output')
-                    text_e = ET.SubElement(output_e, 'text')
-                    text_e.text = cell.output
-                elif cell_type == 'text':
-                    pass
-                    
 
+                    if 'input' in cell:
+                        input_e = ET.SubElement(cell_e, 'input')
+                        input_e.text = cell.input
+                    if 'prompt_number' in cell:
+                        prompt_number_e = ET.SubElement(cell_e, 'prompt_number')
+                        input_e.text = cell.prompt_number
 
+                    if 'text' in output:
+                        text_e = ET.SubElement(output_e, 'text')
+                        text_e.text = output.text
+                    if 'png' in output:
+                        png_e = ET.SubElement(output_e, 'png')
+                        png_e.text = output.png
+                    if 'html' in output:
+                        html_e = ET.SubElement(output_e, 'html')
+                        html_e.text = output.html
+                    if 'svg' in output:
+                        svg_e = ET.SubElement(output_e, 'svg')
+                        svg_e.text = output.svg
+                    if 'latex' in output:
+                        latex_e = ET.SubElement(output_e, 'latex')
+                        latex_e.text = output.latex
+                    if 'json' in output:
+                        json_e = ET.SubElement(output_e, 'json')
+                        json_e.text = output.json
+                    if 'javascript' in output:
+                        javascript_e = ET.SubElement(output_e, 'javascript')
+                        javascript_e.text = output.javascript
+                elif cell_type == 'text':
+                    cell_e = ET.SubElement(ws_e, 'textcell')
+                    if 'text' in cell:
+                        cell_text_e = ET.SubElement(cell_e, 'text')
+                        cell_text_e.text = cell.text
+
+        indent(nb_e)
+        txt = ET.tostring(nb_e, encoding="utf-8")
+        txt = '<?xml version="1.0" encoding="utf-8"?>\n' + txt
+        return txt
+        
+                    
 _reader = XMLReader()
 _writer = XMLWriter()
 
diff --git a/IPython/nbformat/base.py b/IPython/nbformat/rwbase.py
similarity index 93%
rename from IPython/nbformat/base.py
rename to IPython/nbformat/rwbase.py
index 833a5ec..17747c9 100644
--- a/IPython/nbformat/base.py
+++ b/IPython/nbformat/rwbase.py
@@ -1,7 +1,7 @@
 from base64 import encodestring, decodestring
 
 
-def base64_decode(self, nb):
+def base64_decode(nb):
     """Base64 encode all bytes objects in the notebook."""
     for ws in nb['worksheets']:
         for cell in ws['cells']:
@@ -11,7 +11,7 @@ def base64_decode(self, nb):
     return nb
 
 
-def base64_encode(self, nb):
+def base64_encode(nb):
     """Base64 decode all binary objects in the notebook."""
     for ws in nb['worksheets']:
         for cell in ws['cells']:
@@ -43,3 +43,4 @@ class NotebookWriter(object):
         return fp.write(self.dumps(nb,**kwargs))
 
 
+
diff --git a/IPython/nbformat/tests/__init__.py b/IPython/nbformat/tests/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/IPython/nbformat/tests/__init__.py
diff --git a/IPython/nbformat/tests/nbexamples.py b/IPython/nbformat/tests/nbexamples.py
new file mode 100644
index 0000000..9173701
--- /dev/null
+++ b/IPython/nbformat/tests/nbexamples.py
@@ -0,0 +1,52 @@
+from IPython.nbformat.nbbase import (
+    NotebookNode,
+    new_code_cell, new_text_cell, new_worksheet, new_notebook
+)
+
+
+
+ws = new_worksheet(name='worksheet1')
+
+ws.cells.append(new_text_cell(
+    text='Some NumPy Examples'
+))
+
+
+ws.cells.append(new_code_cell(
+    input='import numpy'
+))
+
+ws.cells.append(new_code_cell(
+    input='a = numpy.random.rand(100)'
+))
+
+ws.cells.append(new_code_cell(
+    input='print a',
+    output_text='<array a>',
+    output_html='The HTML rep',
+    output_latex='$a$',
+    output_png=b'data',
+    output_svg='<svg>',
+    output_json='json data',
+    output_javascript='var i=0;'
+))
+
+nb0 = new_notebook(
+    name='nb0',
+    worksheets=[ws, new_worksheet(name='worksheet2')]
+)
+
+nb0_py = """# <codecell>
+
+import numpy
+
+# <codecell>
+
+a = numpy.random.rand(100)
+
+# <codecell>
+
+print a
+"""
+
+
diff --git a/IPython/nbformat/tests/test_json.py b/IPython/nbformat/tests/test_json.py
new file mode 100644
index 0000000..7b7430e
--- /dev/null
+++ b/IPython/nbformat/tests/test_json.py
@@ -0,0 +1,14 @@
+from unittest import TestCase
+
+from IPython.nbformat.nbjson import reads, writes
+from IPython.nbformat.tests.nbexamples import nb0
+
+
+class TestJSON(TestCase):
+
+    def test_roundtrip(self):
+        s = writes(nb0)
+        self.assertEquals(reads(s),nb0)
+
+
+
diff --git a/IPython/nbformat/tests/test_nbbase.py b/IPython/nbformat/tests/test_nbbase.py
new file mode 100644
index 0000000..c0df3e9
--- /dev/null
+++ b/IPython/nbformat/tests/test_nbbase.py
@@ -0,0 +1,60 @@
+from unittest import TestCase
+
+from IPython.nbformat.nbbase import (
+    NotebookNode,
+    new_code_cell, new_text_cell, new_worksheet, new_notebook
+)
+
+class TestCell(TestCase):
+
+    def test_empty_code_cell(self):
+        cc = new_code_cell()
+        self.assertEquals(cc.cell_type,'code')
+        self.assertEquals('input' not in cc, True)
+        self.assertEquals('prompt_number' not in cc, True)
+        self.assertEquals(cc.output, NotebookNode())
+
+    def test_code_cell(self):
+        cc = new_code_cell(input='a=10', prompt_number=0, output_svg='foo', output_text='10')
+        self.assertEquals(cc.input, u'a=10')
+        self.assertEquals(cc.prompt_number, 0)
+        self.assertEquals(cc.output.svg, u'foo')
+        self.assertEquals(cc.output.text, u'10')
+
+    def test_empty_text_cell(self):
+        tc = new_text_cell()
+        self.assertEquals(tc.cell_type, 'text')
+        self.assertEquals('text' not in tc, True)
+
+    def test_text_cell(self):
+        tc = new_text_cell('hi')
+        self.assertEquals(tc.text, u'hi')
+
+
+class TestWorksheet(TestCase):
+
+    def test_empty_worksheet(self):
+        ws = new_worksheet()
+        self.assertEquals(ws.cells,[])
+        self.assertEquals('name' not in ws, True)
+
+    def test_worksheet(self):
+        cells = [new_code_cell(), new_text_cell()]
+        ws = new_worksheet(cells=cells,name='foo')
+        self.assertEquals(ws.cells,cells)
+        self.assertEquals(ws.name,u'foo')
+
+class TestNotebook(TestCase):
+
+    def test_empty_notebook(self):
+        nb = new_notebook()
+        self.assertEquals('id' in nb, True)
+        self.assertEquals(nb.worksheets, [])
+        self.assertEquals('name' not in nb, True)
+
+    def test_notebooke(self):
+        worksheets = [new_worksheet(),new_worksheet()]
+        nb = new_notebook(name='foo',worksheets=worksheets)
+        self.assertEquals(nb.name,u'foo')
+        self.assertEquals(nb.worksheets,worksheets)
+
diff --git a/IPython/nbformat/tests/test_nbpy.py b/IPython/nbformat/tests/test_nbpy.py
new file mode 100644
index 0000000..9fa685b
--- /dev/null
+++ b/IPython/nbformat/tests/test_nbpy.py
@@ -0,0 +1,18 @@
+from unittest import TestCase
+
+from IPython.nbformat.nbbase import (
+    NotebookNode,
+    new_code_cell, new_text_cell, new_worksheet, new_notebook
+)
+
+from IPython.nbformat.nbpy import reads, writes
+from IPython.nbformat.tests.nbexamples import nb0, nb0_py
+
+
+class TestPy(TestCase):
+
+    def test_write(self):
+        s = writes(nb0)
+        self.assertEquals(s,nb0_py)
+
+
diff --git a/IPython/nbformat/tests/test_xml.py b/IPython/nbformat/tests/test_xml.py
new file mode 100644
index 0000000..04afba4
--- /dev/null
+++ b/IPython/nbformat/tests/test_xml.py
@@ -0,0 +1,12 @@
+from unittest import TestCase
+
+from IPython.nbformat.nbxml import reads, writes
+from IPython.nbformat.tests.nbexamples import nb0
+
+
+class TestXML(TestCase):
+
+    def test_roundtrip(self):
+        s = writes(nb0)
+        self.assertEquals(reads(s),nb0)
+