##// END OF EJS Templates
LaTeX converter: remove problematic characters from filenames
Rick Lupton -
Show More
@@ -1,323 +1,329 b''
1 from __future__ import print_function, absolute_import
1 from __future__ import print_function, absolute_import
2 from converters.utils import remove_fake_files_url
2 from converters.utils import remove_fake_files_url
3
3
4 # Stdlib
4 # Stdlib
5 import codecs
5 import codecs
6 import io
6 import io
7 import logging
7 import logging
8 import os
8 import os
9 import pprint
9 import pprint
10 import re
10 from types import FunctionType
11 from types import FunctionType
11
12
12 # From IPython
13 # From IPython
13 from IPython.nbformat import current as nbformat
14 from IPython.nbformat import current as nbformat
14
15
15 # local
16 # local
16
17
18 def clean_filename(filename):
19 """Remove unusual characters from filename, so it works with LaTeX"""
20 filename = re.sub(r'[^a-zA-Z0-9_]', '_', filename)
21 return filename
22
17 #-----------------------------------------------------------------------------
23 #-----------------------------------------------------------------------------
18 # Class declarations
24 # Class declarations
19 #-----------------------------------------------------------------------------
25 #-----------------------------------------------------------------------------
20
26
21 class ConversionException(Exception):
27 class ConversionException(Exception):
22 pass
28 pass
23
29
24 class DocStringInheritor(type):
30 class DocStringInheritor(type):
25 """
31 """
26 This metaclass will walk the list of bases until the desired
32 This metaclass will walk the list of bases until the desired
27 superclass method is found AND if that method has a docstring and only
33 superclass method is found AND if that method has a docstring and only
28 THEN does it attach the superdocstring to the derived class method.
34 THEN does it attach the superdocstring to the derived class method.
29
35
30 Please use carefully, I just did the metaclass thing by following
36 Please use carefully, I just did the metaclass thing by following
31 Michael Foord's Metaclass tutorial
37 Michael Foord's Metaclass tutorial
32 (http://www.voidspace.org.uk/python/articles/metaclasses.shtml), I may
38 (http://www.voidspace.org.uk/python/articles/metaclasses.shtml), I may
33 have missed a step or two.
39 have missed a step or two.
34
40
35 source:
41 source:
36 http://groups.google.com/group/comp.lang.python/msg/26f7b4fcb4d66c95
42 http://groups.google.com/group/comp.lang.python/msg/26f7b4fcb4d66c95
37 by Paul McGuire
43 by Paul McGuire
38 """
44 """
39 def __new__(meta, classname, bases, classDict):
45 def __new__(meta, classname, bases, classDict):
40 newClassDict = {}
46 newClassDict = {}
41 for attributeName, attribute in classDict.items():
47 for attributeName, attribute in classDict.items():
42 if type(attribute) == FunctionType:
48 if type(attribute) == FunctionType:
43 # look through bases for matching function by name
49 # look through bases for matching function by name
44 for baseclass in bases:
50 for baseclass in bases:
45 if hasattr(baseclass, attributeName):
51 if hasattr(baseclass, attributeName):
46 basefn = getattr(baseclass, attributeName)
52 basefn = getattr(baseclass, attributeName)
47 if basefn.__doc__:
53 if basefn.__doc__:
48 attribute.__doc__ = basefn.__doc__
54 attribute.__doc__ = basefn.__doc__
49 break
55 break
50 newClassDict[attributeName] = attribute
56 newClassDict[attributeName] = attribute
51 return type.__new__(meta, classname, bases, newClassDict)
57 return type.__new__(meta, classname, bases, newClassDict)
52
58
53 class Converter(object):
59 class Converter(object):
54 __metaclass__ = DocStringInheritor
60 __metaclass__ = DocStringInheritor
55 default_encoding = 'utf-8'
61 default_encoding = 'utf-8'
56 extension = str()
62 extension = str()
57 figures_counter = 0
63 figures_counter = 0
58 infile = str()
64 infile = str()
59 infile_dir = str()
65 infile_dir = str()
60 infile_root = str()
66 infile_root = str()
61 files_dir = str()
67 files_dir = str()
62 with_preamble = True
68 with_preamble = True
63 user_preamble = None
69 user_preamble = None
64 output = unicode()
70 output = unicode()
65 raw_as_verbatim = False
71 raw_as_verbatim = False
66
72
67 def __init__(self, infile):
73 def __init__(self, infile):
68 self.infile = infile
74 self.infile = infile
69 self.infile_dir, infile_root = os.path.split(infile)
75 self.infile_dir, infile_root = os.path.split(infile)
70 infile_root = os.path.splitext(infile_root)[0]
76 infile_root = os.path.splitext(infile_root)[0]
71 files_dir = os.path.join(self.infile_dir, infile_root + '_files')
77 files_dir = os.path.join(self.infile_dir, clean_filename(infile_root) + '_files')
72 if not os.path.isdir(files_dir):
78 if not os.path.isdir(files_dir):
73 os.mkdir(files_dir)
79 os.mkdir(files_dir)
74 self.infile_root = infile_root
80 self.infile_root = infile_root
75 self.files_dir = files_dir
81 self.files_dir = files_dir
76 self.outbase = os.path.join(self.infile_dir, infile_root)
82 self.outbase = os.path.join(self.infile_dir, infile_root)
77
83
78 def __del__(self):
84 def __del__(self):
79 if os.path.isdir(self.files_dir) and not os.listdir(self.files_dir):
85 if os.path.isdir(self.files_dir) and not os.listdir(self.files_dir):
80 os.rmdir(self.files_dir)
86 os.rmdir(self.files_dir)
81
87
82 def dispatch(self, cell_type):
88 def dispatch(self, cell_type):
83 """return cell_type dependent render method, for example render_code
89 """return cell_type dependent render method, for example render_code
84 """
90 """
85 return getattr(self, 'render_' + cell_type, self.render_unknown)
91 return getattr(self, 'render_' + cell_type, self.render_unknown)
86
92
87 def dispatch_display_format(self, format):
93 def dispatch_display_format(self, format):
88 """return output_type dependent render method, for example render_output_text
94 """return output_type dependent render method, for example render_output_text
89 """
95 """
90 return getattr(self, 'render_display_format_' + format, self.render_unknown_display)
96 return getattr(self, 'render_display_format_' + format, self.render_unknown_display)
91
97
92 def convert(self, cell_separator='\n'):
98 def convert(self, cell_separator='\n'):
93 """
99 """
94 Generic method to converts notebook to a string representation.
100 Generic method to converts notebook to a string representation.
95
101
96 This is accomplished by dispatching on the cell_type, so subclasses of
102 This is accomplished by dispatching on the cell_type, so subclasses of
97 Convereter class do not need to re-implement this method, but just
103 Convereter class do not need to re-implement this method, but just
98 need implementation for the methods that will be dispatched.
104 need implementation for the methods that will be dispatched.
99
105
100 Parameters
106 Parameters
101 ----------
107 ----------
102 cell_separator : string
108 cell_separator : string
103 Character or string to join cells with. Default is "\n"
109 Character or string to join cells with. Default is "\n"
104
110
105 Returns
111 Returns
106 -------
112 -------
107 out : string
113 out : string
108 """
114 """
109 lines = []
115 lines = []
110 lines.extend(self.optional_header())
116 lines.extend(self.optional_header())
111 lines.extend(self.main_body(cell_separator))
117 lines.extend(self.main_body(cell_separator))
112 lines.extend(self.optional_footer())
118 lines.extend(self.optional_footer())
113 return u'\n'.join(lines)
119 return u'\n'.join(lines)
114
120
115 def main_body(self, cell_separator='\n'):
121 def main_body(self, cell_separator='\n'):
116 converted_cells = []
122 converted_cells = []
117 for worksheet in self.nb.worksheets:
123 for worksheet in self.nb.worksheets:
118 for cell in worksheet.cells:
124 for cell in worksheet.cells:
119 #print(cell.cell_type) # dbg
125 #print(cell.cell_type) # dbg
120 conv_fn = self.dispatch(cell.cell_type)
126 conv_fn = self.dispatch(cell.cell_type)
121 if cell.cell_type in ('markdown', 'raw'):
127 if cell.cell_type in ('markdown', 'raw'):
122 remove_fake_files_url(cell)
128 remove_fake_files_url(cell)
123 converted_cells.append('\n'.join(conv_fn(cell)))
129 converted_cells.append('\n'.join(conv_fn(cell)))
124 cell_lines = cell_separator.join(converted_cells).split('\n')
130 cell_lines = cell_separator.join(converted_cells).split('\n')
125 return cell_lines
131 return cell_lines
126
132
127 def render(self):
133 def render(self):
128 "read, convert, and save self.infile"
134 "read, convert, and save self.infile"
129 if not hasattr(self, 'nb'):
135 if not hasattr(self, 'nb'):
130 self.read()
136 self.read()
131 self.output = self.convert()
137 self.output = self.convert()
132 assert(type(self.output) == unicode)
138 assert(type(self.output) == unicode)
133 return self.save()
139 return self.save()
134
140
135 def read(self):
141 def read(self):
136 "read and parse notebook into NotebookNode called self.nb"
142 "read and parse notebook into NotebookNode called self.nb"
137 with open(self.infile) as f:
143 with open(self.infile) as f:
138 self.nb = nbformat.read(f, 'json')
144 self.nb = nbformat.read(f, 'json')
139
145
140 def save(self, outfile=None, encoding=None):
146 def save(self, outfile=None, encoding=None):
141 "read and parse notebook into self.nb"
147 "read and parse notebook into self.nb"
142 if outfile is None:
148 if outfile is None:
143 outfile = self.outbase + '.' + self.extension
149 outfile = self.outbase + '.' + self.extension
144 if encoding is None:
150 if encoding is None:
145 encoding = self.default_encoding
151 encoding = self.default_encoding
146 with io.open(outfile, 'w', encoding=encoding) as f:
152 with io.open(outfile, 'w', encoding=encoding) as f:
147 f.write(self.output)
153 f.write(self.output)
148 return os.path.abspath(outfile)
154 return os.path.abspath(outfile)
149
155
150 def optional_header(self):
156 def optional_header(self):
151 """
157 """
152 Optional header to insert at the top of the converted notebook
158 Optional header to insert at the top of the converted notebook
153
159
154 Returns a list
160 Returns a list
155 """
161 """
156 return []
162 return []
157
163
158 def optional_footer(self):
164 def optional_footer(self):
159 """
165 """
160 Optional footer to insert at the end of the converted notebook
166 Optional footer to insert at the end of the converted notebook
161
167
162 Returns a list
168 Returns a list
163 """
169 """
164 return []
170 return []
165
171
166 def _new_figure(self, data, fmt):
172 def _new_figure(self, data, fmt):
167 """Create a new figure file in the given format.
173 """Create a new figure file in the given format.
168
174
169 Returns a path relative to the input file.
175 Returns a path relative to the input file.
170 """
176 """
171 figname = '%s_fig_%02i.%s' % (self.infile_root,
177 figname = '%s_fig_%02i.%s' % (clean_filename(self.infile_root),
172 self.figures_counter, fmt)
178 self.figures_counter, fmt)
173 self.figures_counter += 1
179 self.figures_counter += 1
174 fullname = os.path.join(self.files_dir, figname)
180 fullname = os.path.join(self.files_dir, figname)
175
181
176 # Binary files are base64-encoded, SVG is already XML
182 # Binary files are base64-encoded, SVG is already XML
177 if fmt in ('png', 'jpg', 'pdf'):
183 if fmt in ('png', 'jpg', 'pdf'):
178 data = data.decode('base64')
184 data = data.decode('base64')
179 fopen = lambda fname: open(fname, 'wb')
185 fopen = lambda fname: open(fname, 'wb')
180 else:
186 else:
181 fopen = lambda fname: codecs.open(fname, 'wb', self.default_encoding)
187 fopen = lambda fname: codecs.open(fname, 'wb', self.default_encoding)
182
188
183 with fopen(fullname) as f:
189 with fopen(fullname) as f:
184 f.write(data)
190 f.write(data)
185
191
186 return fullname
192 return fullname
187
193
188 def render_heading(self, cell):
194 def render_heading(self, cell):
189 """convert a heading cell
195 """convert a heading cell
190
196
191 Returns list."""
197 Returns list."""
192 raise NotImplementedError
198 raise NotImplementedError
193
199
194 def render_code(self, cell):
200 def render_code(self, cell):
195 """Convert a code cell
201 """Convert a code cell
196
202
197 Returns list."""
203 Returns list."""
198 raise NotImplementedError
204 raise NotImplementedError
199
205
200 def render_markdown(self, cell):
206 def render_markdown(self, cell):
201 """convert a markdown cell
207 """convert a markdown cell
202
208
203 Returns list."""
209 Returns list."""
204 raise NotImplementedError
210 raise NotImplementedError
205
211
206 def _img_lines(self, img_file):
212 def _img_lines(self, img_file):
207 """Return list of lines to include an image file."""
213 """Return list of lines to include an image file."""
208 # Note: subclasses may choose to implement format-specific _FMT_lines
214 # Note: subclasses may choose to implement format-specific _FMT_lines
209 # methods if they so choose (FMT in {png, svg, jpg, pdf}).
215 # methods if they so choose (FMT in {png, svg, jpg, pdf}).
210 raise NotImplementedError
216 raise NotImplementedError
211
217
212 def render_display_data(self, output):
218 def render_display_data(self, output):
213 """convert display data from the output of a code cell
219 """convert display data from the output of a code cell
214
220
215 Returns list.
221 Returns list.
216 """
222 """
217 lines = []
223 lines = []
218
224
219 for fmt in output.keys():
225 for fmt in output.keys():
220 if fmt in ['png', 'svg', 'jpg', 'pdf']:
226 if fmt in ['png', 'svg', 'jpg', 'pdf']:
221 img_file = self._new_figure(output[fmt], fmt)
227 img_file = self._new_figure(output[fmt], fmt)
222 # Subclasses can have format-specific render functions (e.g.,
228 # Subclasses can have format-specific render functions (e.g.,
223 # latex has to auto-convert all SVG to PDF first).
229 # latex has to auto-convert all SVG to PDF first).
224 lines_fun = getattr(self, '_%s_lines' % fmt, None)
230 lines_fun = getattr(self, '_%s_lines' % fmt, None)
225 if not lines_fun:
231 if not lines_fun:
226 lines_fun = self._img_lines
232 lines_fun = self._img_lines
227 lines.extend(lines_fun(img_file))
233 lines.extend(lines_fun(img_file))
228 elif fmt != 'output_type':
234 elif fmt != 'output_type':
229 conv_fn = self.dispatch_display_format(fmt)
235 conv_fn = self.dispatch_display_format(fmt)
230 lines.extend(conv_fn(output))
236 lines.extend(conv_fn(output))
231 return lines
237 return lines
232
238
233 def render_raw(self, cell):
239 def render_raw(self, cell):
234 """convert a cell with raw text
240 """convert a cell with raw text
235
241
236 Returns list."""
242 Returns list."""
237 raise NotImplementedError
243 raise NotImplementedError
238
244
239 def render_unknown(self, cell):
245 def render_unknown(self, cell):
240 """Render cells of unkown type
246 """Render cells of unkown type
241
247
242 Returns list."""
248 Returns list."""
243 data = pprint.pformat(cell)
249 data = pprint.pformat(cell)
244 logging.warning('Unknown cell: %s' % cell.cell_type)
250 logging.warning('Unknown cell: %s' % cell.cell_type)
245 return self._unknown_lines(data)
251 return self._unknown_lines(data)
246
252
247 def render_unknown_display(self, output, type):
253 def render_unknown_display(self, output, type):
248 """Render cells of unkown type
254 """Render cells of unkown type
249
255
250 Returns list."""
256 Returns list."""
251 data = pprint.pformat(output)
257 data = pprint.pformat(output)
252 logging.warning('Unknown output: %s' % output.output_type)
258 logging.warning('Unknown output: %s' % output.output_type)
253 return self._unknown_lines(data)
259 return self._unknown_lines(data)
254
260
255 def render_stream(self, output):
261 def render_stream(self, output):
256 """render the stream part of an output
262 """render the stream part of an output
257
263
258 Returns list.
264 Returns list.
259
265
260 Identical to render_display_format_text
266 Identical to render_display_format_text
261 """
267 """
262 return self.render_display_format_text(output)
268 return self.render_display_format_text(output)
263
269
264 def render_pyout(self, output):
270 def render_pyout(self, output):
265 """convert pyout part of a code cell
271 """convert pyout part of a code cell
266
272
267 Returns list."""
273 Returns list."""
268 raise NotImplementedError
274 raise NotImplementedError
269
275
270
276
271 def render_pyerr(self, output):
277 def render_pyerr(self, output):
272 """convert pyerr part of a code cell
278 """convert pyerr part of a code cell
273
279
274 Returns list."""
280 Returns list."""
275 raise NotImplementedError
281 raise NotImplementedError
276
282
277 def _unknown_lines(self, data):
283 def _unknown_lines(self, data):
278 """Return list of lines for an unknown cell.
284 """Return list of lines for an unknown cell.
279
285
280 Parameters
286 Parameters
281 ----------
287 ----------
282 data : str
288 data : str
283 The content of the unknown data as a single string.
289 The content of the unknown data as a single string.
284 """
290 """
285 raise NotImplementedError
291 raise NotImplementedError
286
292
287 # These are the possible format types in an output node
293 # These are the possible format types in an output node
288
294
289 def render_display_format_text(self, output):
295 def render_display_format_text(self, output):
290 """render the text part of an output
296 """render the text part of an output
291
297
292 Returns list.
298 Returns list.
293 """
299 """
294 raise NotImplementedError
300 raise NotImplementedError
295
301
296 def render_display_format_html(self, output):
302 def render_display_format_html(self, output):
297 """render the html part of an output
303 """render the html part of an output
298
304
299 Returns list.
305 Returns list.
300 """
306 """
301 raise NotImplementedError
307 raise NotImplementedError
302
308
303 def render_display_format_latex(self, output):
309 def render_display_format_latex(self, output):
304 """render the latex part of an output
310 """render the latex part of an output
305
311
306 Returns list.
312 Returns list.
307 """
313 """
308 raise NotImplementedError
314 raise NotImplementedError
309
315
310 def render_display_format_json(self, output):
316 def render_display_format_json(self, output):
311 """render the json part of an output
317 """render the json part of an output
312
318
313 Returns list.
319 Returns list.
314 """
320 """
315 raise NotImplementedError
321 raise NotImplementedError
316
322
317 def render_display_format_javascript(self, output):
323 def render_display_format_javascript(self, output):
318 """render the javascript part of an output
324 """render the javascript part of an output
319
325
320 Returns list.
326 Returns list.
321 """
327 """
322 raise NotImplementedError
328 raise NotImplementedError
323
329
General Comments 0
You need to be logged in to leave comments. Login now