##// END OF EJS Templates
split likely multiline strings when writing to/from JSON
MinRK -
Show More
@@ -1,228 +1,233 b''
1 1 """A notebook manager that uses the local file system for storage.
2 2
3 3 Authors:
4 4
5 5 * Brian Granger
6 6 """
7 7
8 8 #-----------------------------------------------------------------------------
9 9 # Copyright (C) 2008-2011 The IPython Development Team
10 10 #
11 11 # Distributed under the terms of the BSD License. The full license is in
12 12 # the file COPYING, distributed as part of this software.
13 13 #-----------------------------------------------------------------------------
14 14
15 15 #-----------------------------------------------------------------------------
16 16 # Imports
17 17 #-----------------------------------------------------------------------------
18 18
19 19 import datetime
20 20 import os
21 21 import uuid
22 22 import glob
23 23
24 24 from tornado import web
25 25
26 26 from IPython.config.configurable import LoggingConfigurable
27 27 from IPython.nbformat import current
28 28 from IPython.utils.traitlets import Unicode, List, Dict
29 29
30 30
31 31 #-----------------------------------------------------------------------------
32 32 # Code
33 33 #-----------------------------------------------------------------------------
34 34
35 35
36 36 class NotebookManager(LoggingConfigurable):
37 37
38 38 notebook_dir = Unicode(os.getcwd(), config=True, help="""
39 39 The directory to use for notebooks.
40 40 """)
41 41 filename_ext = Unicode(u'.ipynb')
42 42 allowed_formats = List([u'json',u'py'])
43 43
44 44 # Map notebook_ids to notebook names
45 45 mapping = Dict()
46 46 # Map notebook names to notebook_ids
47 47 rev_mapping = Dict()
48 48
49 49 def list_notebooks(self):
50 50 """List all notebooks in the notebook dir.
51 51
52 52 This returns a list of dicts of the form::
53 53
54 54 dict(notebook_id=notebook,name=name)
55 55 """
56 56 names = glob.glob(os.path.join(self.notebook_dir,
57 57 '*' + self.filename_ext))
58 58 names = [os.path.splitext(os.path.basename(name))[0]
59 59 for name in names]
60 60
61 61 data = []
62 62 for name in names:
63 63 if name not in self.rev_mapping:
64 64 notebook_id = self.new_notebook_id(name)
65 65 else:
66 66 notebook_id = self.rev_mapping[name]
67 67 data.append(dict(notebook_id=notebook_id,name=name))
68 68 data = sorted(data, key=lambda item: item['name'])
69 69 return data
70 70
71 71 def new_notebook_id(self, name):
72 72 """Generate a new notebook_id for a name and store its mappings."""
73 73 # TODO: the following will give stable urls for notebooks, but unless
74 74 # the notebooks are immediately redirected to their new urls when their
75 75 # filemname changes, nasty inconsistencies result. So for now it's
76 76 # disabled and instead we use a random uuid4() call. But we leave the
77 77 # logic here so that we can later reactivate it, whhen the necessary
78 78 # url redirection code is written.
79 79 #notebook_id = unicode(uuid.uuid5(uuid.NAMESPACE_URL,
80 80 # 'file://'+self.get_path_by_name(name).encode('utf-8')))
81 81
82 82 notebook_id = unicode(uuid.uuid4())
83 83
84 84 self.mapping[notebook_id] = name
85 85 self.rev_mapping[name] = notebook_id
86 86 return notebook_id
87 87
88 88 def delete_notebook_id(self, notebook_id):
89 89 """Delete a notebook's id only. This doesn't delete the actual notebook."""
90 90 name = self.mapping[notebook_id]
91 91 del self.mapping[notebook_id]
92 92 del self.rev_mapping[name]
93 93
94 94 def notebook_exists(self, notebook_id):
95 95 """Does a notebook exist?"""
96 96 if notebook_id not in self.mapping:
97 97 return False
98 98 path = self.get_path_by_name(self.mapping[notebook_id])
99 99 return os.path.isfile(path)
100 100
101 101 def find_path(self, notebook_id):
102 102 """Return a full path to a notebook given its notebook_id."""
103 103 try:
104 104 name = self.mapping[notebook_id]
105 105 except KeyError:
106 106 raise web.HTTPError(404, u'Notebook does not exist: %s' % notebook_id)
107 107 return self.get_path_by_name(name)
108 108
109 109 def get_path_by_name(self, name):
110 110 """Return a full path to a notebook given its name."""
111 111 filename = name + self.filename_ext
112 112 path = os.path.join(self.notebook_dir, filename)
113 113 return path
114 114
115 115 def get_notebook(self, notebook_id, format=u'json'):
116 116 """Get the representation of a notebook in format by notebook_id."""
117 117 format = unicode(format)
118 118 if format not in self.allowed_formats:
119 119 raise web.HTTPError(415, u'Invalid notebook format: %s' % format)
120 120 last_modified, nb = self.get_notebook_object(notebook_id)
121 data = current.writes(nb, format)
121 kwargs = {}
122 if format == 'json':
123 # don't split lines for sending over the wire, because it
124 # should match the Python in-memory format.
125 kwargs['split_lines'] = False
126 data = current.writes(nb, format, **kwargs)
122 127 name = nb.get('name','notebook')
123 128 return last_modified, name, data
124 129
125 130 def get_notebook_object(self, notebook_id):
126 131 """Get the NotebookNode representation of a notebook by notebook_id."""
127 132 path = self.find_path(notebook_id)
128 133 if not os.path.isfile(path):
129 134 raise web.HTTPError(404, u'Notebook does not exist: %s' % notebook_id)
130 135 info = os.stat(path)
131 136 last_modified = datetime.datetime.utcfromtimestamp(info.st_mtime)
132 137 with open(path,'r') as f:
133 138 s = f.read()
134 139 try:
135 140 # v1 and v2 and json in the .ipynb files.
136 141 nb = current.reads(s, u'json')
137 142 except:
138 143 raise web.HTTPError(500, u'Unreadable JSON notebook.')
139 144 if 'name' not in nb:
140 145 nb.name = os.path.split(path)[-1].split(u'.')[0]
141 146 return last_modified, nb
142 147
143 148 def save_new_notebook(self, data, name=None, format=u'json'):
144 149 """Save a new notebook and return its notebook_id.
145 150
146 151 If a name is passed in, it overrides any values in the notebook data
147 152 and the value in the data is updated to use that value.
148 153 """
149 154 if format not in self.allowed_formats:
150 155 raise web.HTTPError(415, u'Invalid notebook format: %s' % format)
151 156
152 157 try:
153 158 nb = current.reads(data.decode('utf-8'), format)
154 159 except:
155 160 raise web.HTTPError(400, u'Invalid JSON data')
156 161
157 162 if name is None:
158 163 try:
159 164 name = nb.metadata.name
160 165 except AttributeError:
161 166 raise web.HTTPError(400, u'Missing notebook name')
162 167 nb.metadata.name = name
163 168
164 169 notebook_id = self.new_notebook_id(name)
165 170 self.save_notebook_object(notebook_id, nb)
166 171 return notebook_id
167 172
168 173 def save_notebook(self, notebook_id, data, name=None, format=u'json'):
169 174 """Save an existing notebook by notebook_id."""
170 175 if format not in self.allowed_formats:
171 176 raise web.HTTPError(415, u'Invalid notebook format: %s' % format)
172 177
173 178 try:
174 179 nb = current.reads(data.decode('utf-8'), format)
175 180 except:
176 181 raise web.HTTPError(400, u'Invalid JSON data')
177 182
178 183 if name is not None:
179 184 nb.metadata.name = name
180 185 self.save_notebook_object(notebook_id, nb)
181 186
182 187 def save_notebook_object(self, notebook_id, nb):
183 188 """Save an existing notebook object by notebook_id."""
184 189 if notebook_id not in self.mapping:
185 190 raise web.HTTPError(404, u'Notebook does not exist: %s' % notebook_id)
186 191 old_name = self.mapping[notebook_id]
187 192 try:
188 193 new_name = nb.metadata.name
189 194 except AttributeError:
190 195 raise web.HTTPError(400, u'Missing notebook name')
191 196 path = self.get_path_by_name(new_name)
192 197 try:
193 198 with open(path,'w') as f:
194 199 current.write(nb, f, u'json')
195 200 except:
196 201 raise web.HTTPError(400, u'Unexpected error while saving notebook')
197 202 if old_name != new_name:
198 203 old_path = self.get_path_by_name(old_name)
199 204 if os.path.isfile(old_path):
200 205 os.unlink(old_path)
201 206 self.mapping[notebook_id] = new_name
202 207 self.rev_mapping[new_name] = notebook_id
203 208
204 209 def delete_notebook(self, notebook_id):
205 210 """Delete notebook by notebook_id."""
206 211 path = self.find_path(notebook_id)
207 212 if not os.path.isfile(path):
208 213 raise web.HTTPError(404, u'Notebook does not exist: %s' % notebook_id)
209 214 os.unlink(path)
210 215 self.delete_notebook_id(notebook_id)
211 216
212 217 def new_notebook(self):
213 218 """Create a new notebook and returns its notebook_id."""
214 219 i = 0
215 220 while True:
216 221 name = u'Untitled%i' % i
217 222 path = self.get_path_by_name(name)
218 223 if not os.path.isfile(path):
219 224 break
220 225 else:
221 226 i = i+1
222 227 notebook_id = self.new_notebook_id(name)
223 228 metadata = current.new_metadata(name=name)
224 229 nb = current.new_notebook(metadata=metadata)
225 230 with open(path,'w') as f:
226 231 current.write(nb, f, u'json')
227 232 return notebook_id
228 233
@@ -1,63 +1,69 b''
1 1 """Read and write notebooks in JSON format.
2 2
3 3 Authors:
4 4
5 5 * Brian Granger
6 6 """
7 7
8 8 #-----------------------------------------------------------------------------
9 9 # Copyright (C) 2008-2011 The IPython Development Team
10 10 #
11 11 # Distributed under the terms of the BSD License. The full license is in
12 12 # the file COPYING, distributed as part of this software.
13 13 #-----------------------------------------------------------------------------
14 14
15 15 #-----------------------------------------------------------------------------
16 16 # Imports
17 17 #-----------------------------------------------------------------------------
18 18
19 from .nbbase import from_dict
20 from .rwbase import NotebookReader, NotebookWriter, restore_bytes
19 import copy
21 20 import json
22 21
22 from .nbbase import from_dict
23 from .rwbase import (
24 NotebookReader, NotebookWriter, restore_bytes, rejoin_lines, split_lines
25 )
26
23 27 #-----------------------------------------------------------------------------
24 28 # Code
25 29 #-----------------------------------------------------------------------------
26 30
27 31 class BytesEncoder(json.JSONEncoder):
28 32 """A JSON encoder that accepts b64 (and other *ascii*) bytestrings."""
29 33 def default(self, obj):
30 34 if isinstance(obj, bytes):
31 35 return obj.decode('ascii')
32 36 return json.JSONEncoder.default(self, obj)
33 37
34 38
35 39 class JSONReader(NotebookReader):
36 40
37 41 def reads(self, s, **kwargs):
38 42 nb = json.loads(s, **kwargs)
39 43 nb = self.to_notebook(nb, **kwargs)
40 44 return nb
41 45
42 46 def to_notebook(self, d, **kwargs):
43 return restore_bytes(from_dict(d))
47 return restore_bytes(rejoin_lines(from_dict(d)))
44 48
45 49
46 50 class JSONWriter(NotebookWriter):
47 51
48 52 def writes(self, nb, **kwargs):
49 53 kwargs['cls'] = BytesEncoder
50 54 kwargs['indent'] = 4
51 55 kwargs['sort_keys'] = True
56 if kwargs.pop('split_lines', True):
57 nb = split_lines(copy.deepcopy(nb))
52 58 return json.dumps(nb, **kwargs)
53
59
54 60
55 61 _reader = JSONReader()
56 62 _writer = JSONWriter()
57 63
58 64 reads = _reader.reads
59 65 read = _reader.read
60 66 to_notebook = _reader.to_notebook
61 67 write = _writer.write
62 68 writes = _writer.writes
63 69
@@ -1,110 +1,165 b''
1 1 """Base classes and utilities for readers and writers.
2 2
3 3 Authors:
4 4
5 5 * Brian Granger
6 6 """
7 7
8 8 #-----------------------------------------------------------------------------
9 9 # Copyright (C) 2008-2011 The IPython Development Team
10 10 #
11 11 # Distributed under the terms of the BSD License. The full license is in
12 12 # the file COPYING, distributed as part of this software.
13 13 #-----------------------------------------------------------------------------
14 14
15 15 #-----------------------------------------------------------------------------
16 16 # Imports
17 17 #-----------------------------------------------------------------------------
18 18
19 19 from base64 import encodestring, decodestring
20 20 import pprint
21 21
22 22 from IPython.utils.py3compat import str_to_bytes
23 23
24 24 #-----------------------------------------------------------------------------
25 25 # Code
26 26 #-----------------------------------------------------------------------------
27 27
28 28 def restore_bytes(nb):
29 29 """Restore bytes of image data from unicode-only formats.
30 30
31 31 Base64 encoding is handled elsewhere. Bytes objects in the notebook are
32 32 always b64-encoded. We DO NOT encode/decode around file formats.
33 33 """
34 34 for ws in nb.worksheets:
35 35 for cell in ws.cells:
36 36 if cell.cell_type == 'code':
37 37 for output in cell.outputs:
38 38 if 'png' in output:
39 39 output.png = str_to_bytes(output.png, 'ascii')
40 40 if 'jpeg' in output:
41 41 output.jpeg = str_to_bytes(output.jpeg, 'ascii')
42 42 return nb
43 43
44 # output keys that are likely to have multiline values
45 _multiline_outputs = ['text', 'html', 'svg', 'latex', 'javascript', 'json']
46
47 def rejoin_lines(nb):
48 """rejoin multiline text into strings
49
50 For reversing effects of ``split_lines(nb)``.
51
52 This only rejoins lines that have been split, so if text objects were not split
53 they will pass through unchanged.
54
55 Used when reading JSON files that may have been passed through split_lines.
56 """
57 for ws in nb.worksheets:
58 for cell in ws.cells:
59 if cell.cell_type == 'code':
60 if 'input' in cell and isinstance(cell.input, list):
61 cell.input = u'\n'.join(cell.input)
62 for output in cell.outputs:
63 for key in _multiline_outputs:
64 item = output.get(key, None)
65 if isinstance(item, list):
66 output[key] = u'\n'.join(item)
67 else: # text cell
68 for key in ['source', 'rendered']:
69 item = cell.get(key, None)
70 if isinstance(item, list):
71 cell[key] = u'\n'.join(item)
72 return nb
73
74
75 def split_lines(nb):
76 """split likely multiline text into lists of strings
77
78 For file output more friendly to line-based VCS. ``rejoin_lines(nb)`` will
79 reverse the effects of ``split_lines(nb)``.
80
81 Used when writing JSON files.
82 """
83 for ws in nb.worksheets:
84 for cell in ws.cells:
85 if cell.cell_type == 'code':
86 if 'input' in cell and isinstance(cell.input, basestring):
87 cell.input = cell.input.splitlines()
88 for output in cell.outputs:
89 for key in _multiline_outputs:
90 item = output.get(key, None)
91 if isinstance(item, basestring):
92 output[key] = item.splitlines()
93 else: # text cell
94 for key in ['source', 'rendered']:
95 item = cell.get(key, None)
96 if isinstance(item, basestring):
97 cell[key] = item.splitlines()
98 return nb
44 99
45 100 # b64 encode/decode are never actually used, because all bytes objects in
46 101 # the notebook are already b64-encoded, and we don't need/want to double-encode
47 102
48 103 def base64_decode(nb):
49 104 """Restore all bytes objects in the notebook from base64-encoded strings.
50 105
51 106 Note: This is never used
52 107 """
53 108 for ws in nb.worksheets:
54 109 for cell in ws.cells:
55 110 if cell.cell_type == 'code':
56 111 for output in cell.outputs:
57 112 if 'png' in output:
58 113 if isinstance(output.png, unicode):
59 114 output.png = output.png.encode('ascii')
60 115 output.png = decodestring(output.png)
61 116 if 'jpeg' in output:
62 117 if isinstance(output.jpeg, unicode):
63 118 output.jpeg = output.jpeg.encode('ascii')
64 119 output.jpeg = decodestring(output.jpeg)
65 120 return nb
66 121
67 122
68 123 def base64_encode(nb):
69 124 """Base64 encode all bytes objects in the notebook.
70 125
71 126 These will be b64-encoded unicode strings
72 127
73 128 Note: This is never used
74 129 """
75 130 for ws in nb.worksheets:
76 131 for cell in ws.cells:
77 132 if cell.cell_type == 'code':
78 133 for output in cell.outputs:
79 134 if 'png' in output:
80 135 output.png = encodestring(output.png).decode('ascii')
81 136 if 'jpeg' in output:
82 137 output.jpeg = encodestring(output.jpeg).decode('ascii')
83 138 return nb
84 139
85 140
86 141 class NotebookReader(object):
87 142 """A class for reading notebooks."""
88 143
89 144 def reads(self, s, **kwargs):
90 145 """Read a notebook from a string."""
91 146 raise NotImplementedError("loads must be implemented in a subclass")
92 147
93 148 def read(self, fp, **kwargs):
94 149 """Read a notebook from a file like object"""
95 150 return self.read(fp.read(), **kwargs)
96 151
97 152
98 153 class NotebookWriter(object):
99 154 """A class for writing notebooks."""
100 155
101 156 def writes(self, nb, **kwargs):
102 157 """Write a notebook to a string."""
103 158 raise NotImplementedError("loads must be implemented in a subclass")
104 159
105 160 def write(self, nb, fp, **kwargs):
106 161 """Write a notebook to a file like object"""
107 162 return fp.write(self.writes(nb,**kwargs))
108 163
109 164
110 165
@@ -1,21 +1,34 b''
1 1 import pprint
2 2 from unittest import TestCase
3 3
4 4 from ..nbjson import reads, writes
5 5 from .nbexamples import nb0
6 6
7 7
8 8 class TestJSON(TestCase):
9 9
10 10 def test_roundtrip(self):
11 11 s = writes(nb0)
12 12 # print
13 13 # print pprint.pformat(nb0,indent=2)
14 14 # print
15 15 # print pprint.pformat(reads(s),indent=2)
16 16 # print
17 17 # print s
18 18 self.assertEquals(reads(s),nb0)
19
20 def test_roundtrip_nosplit(self):
21 """Ensure that multiline blobs are still readable"""
22 # ensures that notebooks written prior to splitlines change
23 # are still readable.
24 s = writes(nb0, split_lines=False)
25 self.assertEquals(reads(s),nb0)
26
27 def test_roundtrip_split(self):
28 """Ensure that splitting multiline blocks is safe"""
29 # This won't differ from test_roundtrip unless the default changes
30 s = writes(nb0, split_lines=True)
31 self.assertEquals(reads(s),nb0)
19 32
20 33
21 34
General Comments 0
You need to be logged in to leave comments. Login now