upstream/ipython Commit - r18303:5c8096f7

1

"""Base classes and utilities for readers and writers."""

1

"""Base classes and utilities for readers and writers."""

2

3

# Copyright (c) IPython Development Team.

3

# Copyright (c) IPython Development Team.

4

# Distributed under the terms of the Modified BSD License.

4

# Distributed under the terms of the Modified BSD License.

5

6

from base64 import encodestring, decodestring

6

from base64 import encodestring, decodestring

7

8

from IPython.utils import py3compat

8

from IPython.utils import py3compat

9

from IPython.utils.py3compat import str_to_bytes, unicode_type, string_types

9

from IPython.utils.py3compat import str_to_bytes, unicode_type, string_types

10

11

12

def restore_bytes(nb):

12

def restore_bytes(nb):

13

"""Restore bytes of image data from unicode-only formats.

13

"""Restore bytes of image data from unicode-only formats.

14

15

Base64 encoding is handled elsewhere. Bytes objects in the notebook are

15

Base64 encoding is handled elsewhere. Bytes objects in the notebook are

16

always b64-encoded. We DO NOT encode/decode around file formats.

16

always b64-encoded. We DO NOT encode/decode around file formats.

17

18

Note: this is never used

18

Note: this is never used

19

"""

19

"""

20

for ws in nb.worksheets:

20

for ws in nb.worksheets:

21

for cell in ws.cells:

21

for cell in ws.cells:

22

if cell.cell_type == 'code':

22

if cell.cell_type == 'code':

23

for output in cell.outputs:

23

for output in cell.outputs:

24

if 'png' in output:

24

if 'png' in output:

25

output.png = str_to_bytes(output.png, 'ascii')

25

output.png = str_to_bytes(output.png, 'ascii')

26

if 'jpeg' in output:

26

if 'jpeg' in output:

27

output.jpeg = str_to_bytes(output.jpeg, 'ascii')

27

output.jpeg = str_to_bytes(output.jpeg, 'ascii')

28

return nb

28

return nb

29

30

# output keys that are likely to have multiline values

30

# output keys that are likely to have multiline values

31

_multiline_outputs = ['text', 'html', 'svg', 'latex', 'javascript', 'json']

31

_multiline_outputs = ['text', 'html', 'svg', 'latex', 'javascript', 'json']

32

33

34

# FIXME: workaround for old splitlines()

34

# FIXME: workaround for old splitlines()

35

def _join_lines(lines):

35

def _join_lines(lines):

36

"""join lines that have been written by splitlines()

36

"""join lines that have been written by splitlines()

37

38

Has logic to protect against `splitlines()`, which

38

Has logic to protect against `splitlines()`, which

39

should have been `splitlines(True)`

39

should have been `splitlines(True)`

40

"""

40

"""

41

if lines and lines[0].endswith(('\n', '\r')):

41

if lines and lines[0].endswith(('\n', '\r')):

42

# created by splitlines(True)

42

# created by splitlines(True)

43

return u''.join(lines)

43

return u''.join(lines)

44

else:

44

else:

45

# created by splitlines()

45

# created by splitlines()

46

return u'\n'.join(lines)

46

return u'\n'.join(lines)

47

48

49

def rejoin_lines(nb):

49

def rejoin_lines(nb):

50

"""rejoin multiline text into strings

50

"""rejoin multiline text into strings

51

52

For reversing effects of ``split_lines(nb)``.

52

For reversing effects of ``split_lines(nb)``.

53

54

This only rejoins lines that have been split, so if text objects were not split

54

This only rejoins lines that have been split, so if text objects were not split

55

they will pass through unchanged.

55

they will pass through unchanged.

56

57

Used when reading JSON files that may have been passed through split_lines.

57

Used when reading JSON files that may have been passed through split_lines.

58

"""

58

"""

59

for ws in nb.worksheets:

59

for ws in nb.worksheets:

60

for cell in ws.cells:

60

for cell in ws.cells:

61

if cell.cell_type == 'code':

61

if cell.cell_type == 'code':

62

if 'input' in cell and isinstance(cell.input, list):

62

if 'input' in cell and isinstance(cell.input, list):

63

cell.input = _join_lines(cell.input)

63

cell.input = _join_lines(cell.input)

64

for output in cell.outputs:

64

for output in cell.outputs:

65

for key in _multiline_outputs:

65

for key in _multiline_outputs:

66

item = output.get(key, None)

66

item = output.get(key, None)

67

if isinstance(item, list):

67

if isinstance(item, list):

68

output[key] = _join_lines(item)

68

output[key] = _join_lines(item)

69

else: # text, heading cell

69

else: # text, heading cell

70

for key in ['source', 'rendered']:

70

for key in ['source', 'rendered']:

71

item = cell.get(key, None)

71

item = cell.get(key, None)

72

if isinstance(item, list):

72

if isinstance(item, list):

73

cell[key] = _join_lines(item)

73

cell[key] = _join_lines(item)

74

return nb

74

return nb

75

76

77

def split_lines(nb):

77

def split_lines(nb):

78

"""split likely multiline text into lists of strings

78

"""split likely multiline text into lists of strings

79

80

For file output more friendly to line-based VCS. ``rejoin_lines(nb)`` will

80

For file output more friendly to line-based VCS. ``rejoin_lines(nb)`` will

81

reverse the effects of ``split_lines(nb)``.

81

reverse the effects of ``split_lines(nb)``.

82

83

Used when writing JSON files.

83

Used when writing JSON files.

84

"""

84

"""

85

for ws in nb.worksheets:

85

for ws in nb.worksheets:

86

for cell in ws.cells:

86

for cell in ws.cells:

87

if cell.cell_type == 'code':

87

if cell.cell_type == 'code':

88

if 'input' in cell and isinstance(cell.input, string_types):

88

if 'input' in cell and isinstance(cell.input, string_types):

89

cell.input = cell.input.splitlines(True)

89

cell.input = cell.input.splitlines(True)

90

for output in cell.outputs:

90

for output in cell.outputs:

91

for key in _multiline_outputs:

91

for key in _multiline_outputs:

92

item = output.get(key, None)

92

item = output.get(key, None)

93

if isinstance(item, string_types):

93

if isinstance(item, string_types):

94

output[key] = item.splitlines(True)

94

output[key] = item.splitlines(True)

95

else: # text, heading cell

95

else: # text, heading cell

96

for key in ['source', 'rendered']:

96

for key in ['source', 'rendered']:

97

item = cell.get(key, None)

97

item = cell.get(key, None)

98

if isinstance(item, string_types):

98

if isinstance(item, string_types):

99

cell[key] = item.splitlines(True)

99

cell[key] = item.splitlines(True)

100

return nb

100

return nb

101

102

# b64 encode/decode are never actually used, because all bytes objects in

102

# b64 encode/decode are never actually used, because all bytes objects in

103

# the notebook are already b64-encoded, and we don't need/want to double-encode

103

# the notebook are already b64-encoded, and we don't need/want to double-encode

104

105

def base64_decode(nb):

105

def base64_decode(nb):

106

"""Restore all bytes objects in the notebook from base64-encoded strings.

106

"""Restore all bytes objects in the notebook from base64-encoded strings.

107

108

Note: This is never used

108

Note: This is never used

109

"""

109

"""

110

for ws in nb.worksheets:

110

for ws in nb.worksheets:

111

for cell in ws.cells:

111

for cell in ws.cells:

112

if cell.cell_type == 'code':

112

if cell.cell_type == 'code':

113

for output in cell.outputs:

113

for output in cell.outputs:

114

if 'png' in output:

114

if 'png' in output:

115

if isinstance(output.png, unicode_type):

115

if isinstance(output.png, unicode_type):

116

output.png = output.png.encode('ascii')

116

output.png = output.png.encode('ascii')

117

output.png = decodestring(output.png)

117

output.png = decodestring(output.png)

118

if 'jpeg' in output:

118

if 'jpeg' in output:

119

if isinstance(output.jpeg, unicode_type):

119

if isinstance(output.jpeg, unicode_type):

120

output.jpeg = output.jpeg.encode('ascii')

120

output.jpeg = output.jpeg.encode('ascii')

121

output.jpeg = decodestring(output.jpeg)

121

output.jpeg = decodestring(output.jpeg)

122

return nb

122

return nb

123

124

125

def base64_encode(nb):

125

def base64_encode(nb):

126

"""Base64 encode all bytes objects in the notebook.

126

"""Base64 encode all bytes objects in the notebook.

127

128

These will be b64-encoded unicode strings

128

These will be b64-encoded unicode strings

129

130

Note: This is never used

130

Note: This is never used

131

"""

131

"""

132

for ws in nb.worksheets:

132

for ws in nb.worksheets:

133

for cell in ws.cells:

133

for cell in ws.cells:

134

if cell.cell_type == 'code':

134

if cell.cell_type == 'code':

135

for output in cell.outputs:

135

for output in cell.outputs:

136

if 'png' in output:

136

if 'png' in output:

137

output.png = encodestring(output.png).decode('ascii')

137

output.png = encodestring(output.png).decode('ascii')

138

if 'jpeg' in output:

138

if 'jpeg' in output:

139

output.jpeg = encodestring(output.jpeg).decode('ascii')

139

output.jpeg = encodestring(output.jpeg).decode('ascii')

140

return nb

140

return nb

141

142

143

def strip_transient(nb):

143

def strip_transient(nb):

144

"""Strip transient values that shouldn't be stored in files.

144

"""Strip transient values that shouldn't be stored in files.

145

146

This should be called in *both* read and write.

146

This should be called in *both* read and write.

147

"""

147

"""

148

nb.pop('orig_nbformat', None)

148

nb.pop('orig_nbformat', None)

149

nb.pop('orig_nbformat_minor', None)

149

nb.pop('orig_nbformat_minor', None)

150

for ws in nb['worksheets']:

150

for ws in nb['worksheets']:

151

for cell in ws['cells']:

151

for cell in ws['cells']:

152

cell.get('metadata', {}).pop('trusted', None)

152

cell.get('metadata', {}).pop('trusted', None)

153

# strip cell.trusted even though it shouldn't be used,

154

# since it's where the transient value used to be stored.

155

cell.pop('trusted', None)

153

return nb

156

return nb

154

157

155

158

156

class NotebookReader(object):

159

class NotebookReader(object):

157

"""A class for reading notebooks."""

160

"""A class for reading notebooks."""

158

161

159

def reads(self, s, **kwargs):

162

def reads(self, s, **kwargs):

160

"""Read a notebook from a string."""

163

"""Read a notebook from a string."""

161

raise NotImplementedError("loads must be implemented in a subclass")

164

raise NotImplementedError("loads must be implemented in a subclass")

162

165

163

def read(self, fp, **kwargs):

166

def read(self, fp, **kwargs):

164

"""Read a notebook from a file like object"""

167

"""Read a notebook from a file like object"""

165

nbs = fp.read()

168

nbs = fp.read()

166

if not py3compat.PY3 and not isinstance(nbs, unicode_type):

169

if not py3compat.PY3 and not isinstance(nbs, unicode_type):

167

nbs = py3compat.str_to_unicode(nbs)

170

nbs = py3compat.str_to_unicode(nbs)

168

return self.reads(nbs, **kwargs)

171

return self.reads(nbs, **kwargs)

169

172

170

173

171

class NotebookWriter(object):

174

class NotebookWriter(object):

172

"""A class for writing notebooks."""

175

"""A class for writing notebooks."""

173

176

174

def writes(self, nb, **kwargs):

177

def writes(self, nb, **kwargs):

175

"""Write a notebook to a string."""

178

"""Write a notebook to a string."""

176

raise NotImplementedError("loads must be implemented in a subclass")

179

raise NotImplementedError("loads must be implemented in a subclass")

177

180

178

def write(self, nb, fp, **kwargs):

181

def write(self, nb, fp, **kwargs):

179

"""Write a notebook to a file like object"""

182

"""Write a notebook to a file like object"""

180

nbs = self.writes(nb,**kwargs)

183

nbs = self.writes(nb,**kwargs)

181

if not py3compat.PY3 and not isinstance(nbs, unicode_type):

184

if not py3compat.PY3 and not isinstance(nbs, unicode_type):

182

# this branch is likely only taken for JSON on Python 2

185

# this branch is likely only taken for JSON on Python 2

183

nbs = py3compat.str_to_unicode(nbs)

186

nbs = py3compat.str_to_unicode(nbs)

184

return fp.write(nbs)

187

return fp.write(nbs)

185

188

186

189

187

190

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             """Base classes and utilities for readers and writers."""
             # Copyright (c) IPython Development Team.
             # Distributed under the terms of the Modified BSD License.
             from base64 import encodestring, decodestring
             from IPython.utils import py3compat
             from IPython.utils.py3compat import str_to_bytes, unicode_type, string_types
             def restore_bytes(nb):
                 """Restore bytes of image data from unicode-only formats.
                 Base64 encoding is handled elsewhere.  Bytes objects in the notebook are
                 always b64-encoded. We DO NOT encode/decode around file formats.
                 Note: this is never used
                 """
                 for ws in nb.worksheets:
                     for cell in ws.cells:
                         if cell.cell_type == 'code':
                             for output in cell.outputs:
                                 if 'png' in output:
                                     output.png = str_to_bytes(output.png, 'ascii')
                                 if 'jpeg' in output:
                                     output.jpeg = str_to_bytes(output.jpeg, 'ascii')
                 return nb
             # output keys that are likely to have multiline values
             _multiline_outputs = ['text', 'html', 'svg', 'latex', 'javascript', 'json']
             # FIXME: workaround for old splitlines()
             def _join_lines(lines):
                 """join lines that have been written by splitlines()
                 Has logic to protect against `splitlines()`, which
                 should have been `splitlines(True)`
                 """
                 if lines and lines[0].endswith(('\n', '\r')):
                     # created by splitlines(True)
                     return u''.join(lines)
                 else:
                     # created by splitlines()
                     return u'\n'.join(lines)
             def rejoin_lines(nb):
                 """rejoin multiline text into strings
                 For reversing effects of ``split_lines(nb)``.
                 This only rejoins lines that have been split, so if text objects were not split
                 they will pass through unchanged.
                 Used when reading JSON files that may have been passed through split_lines.
                 """
                 for ws in nb.worksheets:
                     for cell in ws.cells:
                         if cell.cell_type == 'code':
                             if 'input' in cell and isinstance(cell.input, list):
                                 cell.input = _join_lines(cell.input)
                             for output in cell.outputs:
                                 for key in _multiline_outputs:
                                     item = output.get(key, None)
                                     if isinstance(item, list):
                                         output[key] = _join_lines(item)
                         else: # text, heading cell
                             for key in ['source', 'rendered']:
                                 item = cell.get(key, None)
                                 if isinstance(item, list):
                                     cell[key] = _join_lines(item)
                 return nb
             def split_lines(nb):
                 """split likely multiline text into lists of strings
                 For file output more friendly to line-based VCS. ``rejoin_lines(nb)`` will
                 reverse the effects of ``split_lines(nb)``.
                 Used when writing JSON files.
                 """
                 for ws in nb.worksheets:
                     for cell in ws.cells:
                         if cell.cell_type == 'code':
                             if 'input' in cell and isinstance(cell.input, string_types):
                                 cell.input = cell.input.splitlines(True)
                             for output in cell.outputs:
                                 for key in _multiline_outputs:
                                     item = output.get(key, None)
                                     if isinstance(item, string_types):
                                         output[key] = item.splitlines(True)
                         else: # text, heading cell
                             for key in ['source', 'rendered']:
                                 item = cell.get(key, None)
                                 if isinstance(item, string_types):
                                     cell[key] = item.splitlines(True)
                 return nb
             # b64 encode/decode are never actually used, because all bytes objects in
             # the notebook are already b64-encoded, and we don't need/want to double-encode
             def base64_decode(nb):
                 """Restore all bytes objects in the notebook from base64-encoded strings.
                 Note: This is never used
                 """
                 for ws in nb.worksheets:
                     for cell in ws.cells:
                         if cell.cell_type == 'code':
                             for output in cell.outputs:
                                 if 'png' in output:
                                     if isinstance(output.png, unicode_type):
                                         output.png = output.png.encode('ascii')
                                     output.png = decodestring(output.png)
                                 if 'jpeg' in output:
                                     if isinstance(output.jpeg, unicode_type):
                                         output.jpeg = output.jpeg.encode('ascii')
                                     output.jpeg = decodestring(output.jpeg)
                 return nb
             def base64_encode(nb):
                 """Base64 encode all bytes objects in the notebook.
                 These will be b64-encoded unicode strings
                 Note: This is never used
                 """
                 for ws in nb.worksheets:
                     for cell in ws.cells:
                         if cell.cell_type == 'code':
                             for output in cell.outputs:
                                 if 'png' in output:
                                     output.png = encodestring(output.png).decode('ascii')
                                 if 'jpeg' in output:
                                     output.jpeg = encodestring(output.jpeg).decode('ascii')
                 return nb
             def strip_transient(nb):
                 """Strip transient values that shouldn't be stored in files.
                 This should be called in *both* read and write.
                 """
                 nb.pop('orig_nbformat', None)
                 nb.pop('orig_nbformat_minor', None)
                 for ws in nb['worksheets']:
                     for cell in ws['cells']:
                         cell.get('metadata', {}).pop('trusted', None)
+                        # strip cell.trusted even though it shouldn't be used,
+                        # since it's where the transient value used to be stored.
+                        cell.pop('trusted', None)
                 return nb
             class NotebookReader(object):
                 """A class for reading notebooks."""
                 def reads(self, s, **kwargs):
                     """Read a notebook from a string."""
                     raise NotImplementedError("loads must be implemented in a subclass")
                 def read(self, fp, **kwargs):
                     """Read a notebook from a file like object"""
                     nbs = fp.read()
                     if not py3compat.PY3 and not isinstance(nbs, unicode_type):
                         nbs = py3compat.str_to_unicode(nbs)
                     return self.reads(nbs, **kwargs)
             class NotebookWriter(object):
                 """A class for writing notebooks."""
                 def writes(self, nb, **kwargs):
                     """Write a notebook to a string."""
                     raise NotImplementedError("loads must be implemented in a subclass")
                 def write(self, nb, fp, **kwargs):
                     """Write a notebook to a file like object"""
                     nbs = self.writes(nb,**kwargs)
                     if not py3compat.PY3 and not isinstance(nbs, unicode_type):
                         # this branch is likely only taken for JSON on Python 2
                         nbs = py3compat.str_to_unicode(nbs)
                     return fp.write(nbs)