From 5a27a1372c2644c5fb5a3d513155ae1d2682e6b4 2013-09-06 21:53:30
From: Min RK <benjaminrk@gmail.com>
Date: 2013-09-06 21:53:30
Subject: [PATCH] Merge pull request #4043 from minrk/no-restore-bytes

don't 'restore_bytes' in from_JSON 

It makes no sense to coerce base64-encoded unicode strings into base64-encoded byte strings.

closes #4036

---

diff --git a/IPython/nbconvert/preprocessors/tests/base.py b/IPython/nbconvert/preprocessors/tests/base.py
index 8c72e51..d5c1459 100644
--- a/IPython/nbconvert/preprocessors/tests/base.py
+++ b/IPython/nbconvert/preprocessors/tests/base.py
@@ -36,7 +36,7 @@ class PreprocessorTestsBase(TestsBase):
                    nbformat.new_output(output_type="stream", stream="stdout", output_text="d"),
                    nbformat.new_output(output_type="stream", stream="stderr", output_text="e"),
                    nbformat.new_output(output_type="stream", stream="stderr", output_text="f"),
-                   nbformat.new_output(output_type="png", output_png=b'Zw==')] #g
+                   nbformat.new_output(output_type="png", output_png='Zw==')] #g
         
         cells=[nbformat.new_code_cell(input="$ e $", prompt_number=1,outputs=outputs),
                nbformat.new_text_cell('markdown', source="$ e $")]
diff --git a/IPython/nbconvert/preprocessors/tests/test_extractoutput.py b/IPython/nbconvert/preprocessors/tests/test_extractoutput.py
index 4a13ebd..08bd1f2 100644
--- a/IPython/nbconvert/preprocessors/tests/test_extractoutput.py
+++ b/IPython/nbconvert/preprocessors/tests/test_extractoutput.py
@@ -46,12 +46,14 @@ class TestExtractOutput(PreprocessorTestsBase):
         nb, res = preprocessor(nb, res)
 
         # Check if text was extracted.
-        assert 'text_filename' in nb.worksheets[0].cells[0].outputs[1]
-        text_filename = nb.worksheets[0].cells[0].outputs[1]['text_filename']
+        output = nb.worksheets[0].cells[0].outputs[1]
+        assert 'text_filename' in output
+        text_filename = output['text_filename']
 
         # Check if png was extracted.
-        assert 'png_filename' in nb.worksheets[0].cells[0].outputs[6]
-        png_filename = nb.worksheets[0].cells[0].outputs[6]['png_filename']
+        output = nb.worksheets[0].cells[0].outputs[6]
+        assert 'png_filename' in output
+        png_filename = output['png_filename']
 
         # Verify text output
         assert text_filename in res['outputs']
diff --git a/IPython/nbconvert/tests/test_nbconvertapp.py b/IPython/nbconvert/tests/test_nbconvertapp.py
index 6fa82b9..4b21bd3 100644
--- a/IPython/nbconvert/tests/test_nbconvertapp.py
+++ b/IPython/nbconvert/tests/test_nbconvertapp.py
@@ -13,6 +13,7 @@
 
 import os
 import glob
+import sys
 
 from .base import TestsBase
 
@@ -104,6 +105,17 @@ class TestNbConvertApp(TestsBase):
 
 
     @dec.onlyif_cmds_exist('pandoc')
+    def test_png_base64_html_ok(self):
+        """Is embedded png data well formed in HTML?"""
+        with self.create_temp_cwd(['notebook2.ipynb']):
+            self.call('nbconvert --log-level 0 --to HTML '
+                      'notebook2.ipynb --template full')
+            assert os.path.isfile('notebook2.html')
+            with open('notebook2.html') as f:
+                assert "data:image/png;base64,b'" not in f.read()
+
+
+    @dec.onlyif_cmds_exist('pandoc')
     def test_template(self):
         """
         Do export templates work?
diff --git a/IPython/nbformat/v3/nbbase.py b/IPython/nbformat/v3/nbbase.py
index bac60df..a1b697f 100644
--- a/IPython/nbformat/v3/nbbase.py
+++ b/IPython/nbformat/v3/nbbase.py
@@ -25,6 +25,7 @@ import pprint
 import uuid
 
 from IPython.utils.ipstruct import Struct
+from IPython.utils.py3compat import cast_unicode
 
 #-----------------------------------------------------------------------------
 # Code
@@ -67,21 +68,21 @@ def new_output(output_type=None, output_text=None, output_png=None,
 
     if output_type != 'pyerr':
         if output_text is not None:
-            output.text = unicode(output_text)
+            output.text = cast_unicode(output_text)
         if output_png is not None:
-            output.png = bytes(output_png)
+            output.png = cast_unicode(output_png)
         if output_jpeg is not None:
-            output.jpeg = bytes(output_jpeg)
+            output.jpeg = cast_unicode(output_jpeg)
         if output_html is not None:
-            output.html = unicode(output_html)
+            output.html = cast_unicode(output_html)
         if output_svg is not None:
-            output.svg = unicode(output_svg)
+            output.svg = cast_unicode(output_svg)
         if output_latex is not None:
-            output.latex = unicode(output_latex)
+            output.latex = cast_unicode(output_latex)
         if output_json is not None:
-            output.json = unicode(output_json)
+            output.json = cast_unicode(output_json)
         if output_javascript is not None:
-            output.javascript = unicode(output_javascript)
+            output.javascript = cast_unicode(output_javascript)
 
     if output_type == u'pyout':
         if prompt_number is not None:
@@ -89,14 +90,14 @@ def new_output(output_type=None, output_text=None, output_png=None,
 
     if output_type == u'pyerr':
         if ename is not None:
-            output.ename = unicode(ename)
+            output.ename = cast_unicode(ename)
         if evalue is not None:
-            output.evalue = unicode(evalue)
+            output.evalue = cast_unicode(evalue)
         if traceback is not None:
-            output.traceback = [unicode(frame) for frame in list(traceback)]
+            output.traceback = [cast_unicode(frame) for frame in list(traceback)]
 
     if output_type == u'stream':
-        output.stream = 'stdout' if stream is None else unicode(stream)
+        output.stream = 'stdout' if stream is None else cast_unicode(stream)
     
     return output
 
@@ -107,9 +108,9 @@ def new_code_cell(input=None, prompt_number=None, outputs=None,
     cell = NotebookNode()
     cell.cell_type = u'code'
     if language is not None:
-        cell.language = unicode(language)
+        cell.language = cast_unicode(language)
     if input is not None:
-        cell.input = unicode(input)
+        cell.input = cast_unicode(input)
     if prompt_number is not None:
         cell.prompt_number = int(prompt_number)
     if outputs is None:
@@ -130,9 +131,9 @@ def new_text_cell(cell_type, source=None, rendered=None, metadata=None):
     if cell_type == 'plaintext':
         cell_type = 'raw'
     if source is not None:
-        cell.source = unicode(source)
+        cell.source = cast_unicode(source)
     if rendered is not None:
-        cell.rendered = unicode(rendered)
+        cell.rendered = cast_unicode(rendered)
     cell.metadata = NotebookNode(metadata or {})
     cell.cell_type = cell_type
     return cell
@@ -143,9 +144,9 @@ def new_heading_cell(source=None, rendered=None, level=1, metadata=None):
     cell = NotebookNode()
     cell.cell_type = u'heading'
     if source is not None:
-        cell.source = unicode(source)
+        cell.source = cast_unicode(source)
     if rendered is not None:
-        cell.rendered = unicode(rendered)
+        cell.rendered = cast_unicode(rendered)
     cell.level = int(level)
     cell.metadata = NotebookNode(metadata or {})
     return cell
@@ -155,7 +156,7 @@ def new_worksheet(name=None, cells=None, metadata=None):
     """Create a worksheet by name with with a list of cells."""
     ws = NotebookNode()
     if name is not None:
-        ws.name = unicode(name)
+        ws.name = cast_unicode(name)
     if cells is None:
         ws.cells = []
     else:
@@ -178,7 +179,7 @@ def new_notebook(name=None, metadata=None, worksheets=None):
     else:
         nb.metadata = NotebookNode(metadata)
     if name is not None:
-        nb.metadata.name = unicode(name)
+        nb.metadata.name = cast_unicode(name)
     return nb
 
 
@@ -187,29 +188,29 @@ def new_metadata(name=None, authors=None, license=None, created=None,
     """Create a new metadata node."""
     metadata = NotebookNode()
     if name is not None:
-        metadata.name = unicode(name)
+        metadata.name = cast_unicode(name)
     if authors is not None:
         metadata.authors = list(authors)
     if created is not None:
-        metadata.created = unicode(created)
+        metadata.created = cast_unicode(created)
     if modified is not None:
-        metadata.modified = unicode(modified)
+        metadata.modified = cast_unicode(modified)
     if license is not None:
-        metadata.license = unicode(license)
+        metadata.license = cast_unicode(license)
     if gistid is not None:
-        metadata.gistid = unicode(gistid)
+        metadata.gistid = cast_unicode(gistid)
     return metadata
 
 def new_author(name=None, email=None, affiliation=None, url=None):
     """Create a new author."""
     author = NotebookNode()
     if name is not None:
-        author.name = unicode(name)
+        author.name = cast_unicode(name)
     if email is not None:
-        author.email = unicode(email)
+        author.email = cast_unicode(email)
     if affiliation is not None:
-        author.affiliation = unicode(affiliation)
+        author.affiliation = cast_unicode(affiliation)
     if url is not None:
-        author.url = unicode(url)
+        author.url = cast_unicode(url)
     return author
 
diff --git a/IPython/nbformat/v3/nbjson.py b/IPython/nbformat/v3/nbjson.py
index 5c63a4e..be9ee28 100644
--- a/IPython/nbformat/v3/nbjson.py
+++ b/IPython/nbformat/v3/nbjson.py
@@ -46,7 +46,7 @@ class JSONReader(NotebookReader):
         return nb
 
     def to_notebook(self, d, **kwargs):
-        return restore_bytes(rejoin_lines(from_dict(d)))
+        return rejoin_lines(from_dict(d))
 
 
 class JSONWriter(NotebookWriter):
diff --git a/IPython/nbformat/v3/nbpy.py b/IPython/nbformat/v3/nbpy.py
index 23fc2f0..bd4a9ad 100644
--- a/IPython/nbformat/v3/nbpy.py
+++ b/IPython/nbformat/v3/nbpy.py
@@ -190,7 +190,7 @@ class PyWriter(NotebookWriter):
                         lines.extend([u'# ' + line for line in input.splitlines()])
                         lines.append(u'')
         lines.append('')
-        return unicode('\n'.join(lines))
+        return u'\n'.join(lines)
 
 
 _reader = PyReader()
diff --git a/IPython/nbformat/v3/rwbase.py b/IPython/nbformat/v3/rwbase.py
index d8c1f17..a381242 100644
--- a/IPython/nbformat/v3/rwbase.py
+++ b/IPython/nbformat/v3/rwbase.py
@@ -32,6 +32,8 @@ def restore_bytes(nb):
     
     Base64 encoding is handled elsewhere.  Bytes objects in the notebook are
     always b64-encoded. We DO NOT encode/decode around file formats.
+    
+    Note: this is never used
     """
     for ws in nb.worksheets:
         for cell in ws.cells:
diff --git a/IPython/nbformat/v3/tests/nbexamples.py b/IPython/nbformat/v3/tests/nbexamples.py
index 96685ca..713957d 100644
--- a/IPython/nbformat/v3/tests/nbexamples.py
+++ b/IPython/nbformat/v3/tests/nbexamples.py
@@ -9,9 +9,9 @@ from ..nbbase import (
     new_metadata, new_author, new_heading_cell, nbformat, nbformat_minor
 )
 
-# some random base64-encoded *bytes*
-png = encodestring(os.urandom(5))
-jpeg = encodestring(os.urandom(6))
+# some random base64-encoded *text*
+png = encodestring(os.urandom(5)).decode('ascii')
+jpeg = encodestring(os.urandom(6)).decode('ascii')
 
 ws = new_worksheet(name='worksheet1')
 
diff --git a/IPython/nbformat/v3/tests/test_json.py b/IPython/nbformat/v3/tests/test_json.py
index 9897443..fba1585 100644
--- a/IPython/nbformat/v3/tests/test_json.py
+++ b/IPython/nbformat/v3/tests/test_json.py
@@ -1,4 +1,5 @@
 import pprint
+from base64 import decodestring
 from unittest import TestCase
 
 from ..nbjson import reads, writes
@@ -29,5 +30,42 @@ class TestJSON(formattest.NBFormatTest, TestCase):
         s = writes(nb0, split_lines=True)
         self.assertEqual(nbjson.reads(s),nb0)
 
+    def test_read_png(self):
+        """PNG output data is b64 unicode"""
+        s = writes(nb0)
+        nb1 = nbjson.reads(s)
+        found_png = False
+        for cell in nb1.worksheets[0].cells:
+            if not 'outputs' in cell:
+                continue
+            for output in cell.outputs:
+                if 'png' in output:
+                    found_png = True
+                    pngdata = output['png']
+                    self.assertEqual(type(pngdata), unicode)
+                    # test that it is valid b64 data
+                    b64bytes = pngdata.encode('ascii')
+                    raw_bytes = decodestring(b64bytes)
+        assert found_png, "never found png output"
+
+    def test_read_jpeg(self):
+        """JPEG output data is b64 unicode"""
+        s = writes(nb0)
+        nb1 = nbjson.reads(s)
+        found_jpeg = False
+        for cell in nb1.worksheets[0].cells:
+            if not 'outputs' in cell:
+                continue
+            for output in cell.outputs:
+                if 'jpeg' in output:
+                    found_jpeg = True
+                    jpegdata = output['jpeg']
+                    self.assertEqual(type(jpegdata), unicode)
+                    # test that it is valid b64 data
+                    b64bytes = jpegdata.encode('ascii')
+                    raw_bytes = decodestring(b64bytes)
+        assert found_jpeg, "never found jpeg output"
+
+
 
 
diff --git a/IPython/nbformat/v3/tests/test_nbbase.py b/IPython/nbformat/v3/tests/test_nbbase.py
index 67b4b79..2d137b8 100644
--- a/IPython/nbformat/v3/tests/test_nbbase.py
+++ b/IPython/nbformat/v3/tests/test_nbbase.py
@@ -141,3 +141,17 @@ class TestMetadata(TestCase):
         self.assertEqual(md.gistid, u'21341231')
         self.assertEqual(md.authors, authors)
 
+class TestOutputs(TestCase):
+    def test_binary_png(self):
+        out = new_output(output_png=b'\x89PNG\r\n\x1a\n')
+
+    def test_b64b6tes_png(self):
+        out = new_output(output_png=b'iVBORw0KG')
+    
+    def test_binary_jpeg(self):
+        out = new_output(output_jpeg=b'\xff\xd8')
+
+    def test_b64b6tes_jpeg(self):
+        out = new_output(output_jpeg=b'/9')
+        
+