##// END OF EJS Templates
LaTeX converter: remove problematic characters from filenames
Rick Lupton -
Show More
@@ -1,323 +1,329 b''
1 1 from __future__ import print_function, absolute_import
2 2 from converters.utils import remove_fake_files_url
3 3
4 4 # Stdlib
5 5 import codecs
6 6 import io
7 7 import logging
8 8 import os
9 9 import pprint
10 import re
10 11 from types import FunctionType
11 12
12 13 # From IPython
13 14 from IPython.nbformat import current as nbformat
14 15
15 16 # local
16 17
18 def clean_filename(filename):
19 """Remove unusual characters from filename, so it works with LaTeX"""
20 filename = re.sub(r'[^a-zA-Z0-9_]', '_', filename)
21 return filename
22
17 23 #-----------------------------------------------------------------------------
18 24 # Class declarations
19 25 #-----------------------------------------------------------------------------
20 26
21 27 class ConversionException(Exception):
22 28 pass
23 29
24 30 class DocStringInheritor(type):
25 31 """
26 32 This metaclass will walk the list of bases until the desired
27 33 superclass method is found AND if that method has a docstring and only
28 34 THEN does it attach the superdocstring to the derived class method.
29 35
30 36 Please use carefully, I just did the metaclass thing by following
31 37 Michael Foord's Metaclass tutorial
32 38 (http://www.voidspace.org.uk/python/articles/metaclasses.shtml), I may
33 39 have missed a step or two.
34 40
35 41 source:
36 42 http://groups.google.com/group/comp.lang.python/msg/26f7b4fcb4d66c95
37 43 by Paul McGuire
38 44 """
39 45 def __new__(meta, classname, bases, classDict):
40 46 newClassDict = {}
41 47 for attributeName, attribute in classDict.items():
42 48 if type(attribute) == FunctionType:
43 49 # look through bases for matching function by name
44 50 for baseclass in bases:
45 51 if hasattr(baseclass, attributeName):
46 52 basefn = getattr(baseclass, attributeName)
47 53 if basefn.__doc__:
48 54 attribute.__doc__ = basefn.__doc__
49 55 break
50 56 newClassDict[attributeName] = attribute
51 57 return type.__new__(meta, classname, bases, newClassDict)
52 58
53 59 class Converter(object):
54 60 __metaclass__ = DocStringInheritor
55 61 default_encoding = 'utf-8'
56 62 extension = str()
57 63 figures_counter = 0
58 64 infile = str()
59 65 infile_dir = str()
60 66 infile_root = str()
61 67 files_dir = str()
62 68 with_preamble = True
63 69 user_preamble = None
64 70 output = unicode()
65 71 raw_as_verbatim = False
66 72
67 73 def __init__(self, infile):
68 74 self.infile = infile
69 75 self.infile_dir, infile_root = os.path.split(infile)
70 76 infile_root = os.path.splitext(infile_root)[0]
71 files_dir = os.path.join(self.infile_dir, infile_root + '_files')
77 files_dir = os.path.join(self.infile_dir, clean_filename(infile_root) + '_files')
72 78 if not os.path.isdir(files_dir):
73 79 os.mkdir(files_dir)
74 80 self.infile_root = infile_root
75 81 self.files_dir = files_dir
76 82 self.outbase = os.path.join(self.infile_dir, infile_root)
77 83
78 84 def __del__(self):
79 85 if os.path.isdir(self.files_dir) and not os.listdir(self.files_dir):
80 86 os.rmdir(self.files_dir)
81 87
82 88 def dispatch(self, cell_type):
83 89 """return cell_type dependent render method, for example render_code
84 90 """
85 91 return getattr(self, 'render_' + cell_type, self.render_unknown)
86 92
87 93 def dispatch_display_format(self, format):
88 94 """return output_type dependent render method, for example render_output_text
89 95 """
90 96 return getattr(self, 'render_display_format_' + format, self.render_unknown_display)
91 97
92 98 def convert(self, cell_separator='\n'):
93 99 """
94 100 Generic method to converts notebook to a string representation.
95 101
96 102 This is accomplished by dispatching on the cell_type, so subclasses of
97 103 Convereter class do not need to re-implement this method, but just
98 104 need implementation for the methods that will be dispatched.
99 105
100 106 Parameters
101 107 ----------
102 108 cell_separator : string
103 109 Character or string to join cells with. Default is "\n"
104 110
105 111 Returns
106 112 -------
107 113 out : string
108 114 """
109 115 lines = []
110 116 lines.extend(self.optional_header())
111 117 lines.extend(self.main_body(cell_separator))
112 118 lines.extend(self.optional_footer())
113 119 return u'\n'.join(lines)
114 120
115 121 def main_body(self, cell_separator='\n'):
116 122 converted_cells = []
117 123 for worksheet in self.nb.worksheets:
118 124 for cell in worksheet.cells:
119 125 #print(cell.cell_type) # dbg
120 126 conv_fn = self.dispatch(cell.cell_type)
121 127 if cell.cell_type in ('markdown', 'raw'):
122 128 remove_fake_files_url(cell)
123 129 converted_cells.append('\n'.join(conv_fn(cell)))
124 130 cell_lines = cell_separator.join(converted_cells).split('\n')
125 131 return cell_lines
126 132
127 133 def render(self):
128 134 "read, convert, and save self.infile"
129 135 if not hasattr(self, 'nb'):
130 136 self.read()
131 137 self.output = self.convert()
132 138 assert(type(self.output) == unicode)
133 139 return self.save()
134 140
135 141 def read(self):
136 142 "read and parse notebook into NotebookNode called self.nb"
137 143 with open(self.infile) as f:
138 144 self.nb = nbformat.read(f, 'json')
139 145
140 146 def save(self, outfile=None, encoding=None):
141 147 "read and parse notebook into self.nb"
142 148 if outfile is None:
143 149 outfile = self.outbase + '.' + self.extension
144 150 if encoding is None:
145 151 encoding = self.default_encoding
146 152 with io.open(outfile, 'w', encoding=encoding) as f:
147 153 f.write(self.output)
148 154 return os.path.abspath(outfile)
149 155
150 156 def optional_header(self):
151 157 """
152 158 Optional header to insert at the top of the converted notebook
153 159
154 160 Returns a list
155 161 """
156 162 return []
157 163
158 164 def optional_footer(self):
159 165 """
160 166 Optional footer to insert at the end of the converted notebook
161 167
162 168 Returns a list
163 169 """
164 170 return []
165 171
166 172 def _new_figure(self, data, fmt):
167 173 """Create a new figure file in the given format.
168 174
169 175 Returns a path relative to the input file.
170 176 """
171 figname = '%s_fig_%02i.%s' % (self.infile_root,
177 figname = '%s_fig_%02i.%s' % (clean_filename(self.infile_root),
172 178 self.figures_counter, fmt)
173 179 self.figures_counter += 1
174 180 fullname = os.path.join(self.files_dir, figname)
175 181
176 182 # Binary files are base64-encoded, SVG is already XML
177 183 if fmt in ('png', 'jpg', 'pdf'):
178 184 data = data.decode('base64')
179 185 fopen = lambda fname: open(fname, 'wb')
180 186 else:
181 187 fopen = lambda fname: codecs.open(fname, 'wb', self.default_encoding)
182 188
183 189 with fopen(fullname) as f:
184 190 f.write(data)
185 191
186 192 return fullname
187 193
188 194 def render_heading(self, cell):
189 195 """convert a heading cell
190 196
191 197 Returns list."""
192 198 raise NotImplementedError
193 199
194 200 def render_code(self, cell):
195 201 """Convert a code cell
196 202
197 203 Returns list."""
198 204 raise NotImplementedError
199 205
200 206 def render_markdown(self, cell):
201 207 """convert a markdown cell
202 208
203 209 Returns list."""
204 210 raise NotImplementedError
205 211
206 212 def _img_lines(self, img_file):
207 213 """Return list of lines to include an image file."""
208 214 # Note: subclasses may choose to implement format-specific _FMT_lines
209 215 # methods if they so choose (FMT in {png, svg, jpg, pdf}).
210 216 raise NotImplementedError
211 217
212 218 def render_display_data(self, output):
213 219 """convert display data from the output of a code cell
214 220
215 221 Returns list.
216 222 """
217 223 lines = []
218 224
219 225 for fmt in output.keys():
220 226 if fmt in ['png', 'svg', 'jpg', 'pdf']:
221 227 img_file = self._new_figure(output[fmt], fmt)
222 228 # Subclasses can have format-specific render functions (e.g.,
223 229 # latex has to auto-convert all SVG to PDF first).
224 230 lines_fun = getattr(self, '_%s_lines' % fmt, None)
225 231 if not lines_fun:
226 232 lines_fun = self._img_lines
227 233 lines.extend(lines_fun(img_file))
228 234 elif fmt != 'output_type':
229 235 conv_fn = self.dispatch_display_format(fmt)
230 236 lines.extend(conv_fn(output))
231 237 return lines
232 238
233 239 def render_raw(self, cell):
234 240 """convert a cell with raw text
235 241
236 242 Returns list."""
237 243 raise NotImplementedError
238 244
239 245 def render_unknown(self, cell):
240 246 """Render cells of unkown type
241 247
242 248 Returns list."""
243 249 data = pprint.pformat(cell)
244 250 logging.warning('Unknown cell: %s' % cell.cell_type)
245 251 return self._unknown_lines(data)
246 252
247 253 def render_unknown_display(self, output, type):
248 254 """Render cells of unkown type
249 255
250 256 Returns list."""
251 257 data = pprint.pformat(output)
252 258 logging.warning('Unknown output: %s' % output.output_type)
253 259 return self._unknown_lines(data)
254 260
255 261 def render_stream(self, output):
256 262 """render the stream part of an output
257 263
258 264 Returns list.
259 265
260 266 Identical to render_display_format_text
261 267 """
262 268 return self.render_display_format_text(output)
263 269
264 270 def render_pyout(self, output):
265 271 """convert pyout part of a code cell
266 272
267 273 Returns list."""
268 274 raise NotImplementedError
269 275
270 276
271 277 def render_pyerr(self, output):
272 278 """convert pyerr part of a code cell
273 279
274 280 Returns list."""
275 281 raise NotImplementedError
276 282
277 283 def _unknown_lines(self, data):
278 284 """Return list of lines for an unknown cell.
279 285
280 286 Parameters
281 287 ----------
282 288 data : str
283 289 The content of the unknown data as a single string.
284 290 """
285 291 raise NotImplementedError
286 292
287 293 # These are the possible format types in an output node
288 294
289 295 def render_display_format_text(self, output):
290 296 """render the text part of an output
291 297
292 298 Returns list.
293 299 """
294 300 raise NotImplementedError
295 301
296 302 def render_display_format_html(self, output):
297 303 """render the html part of an output
298 304
299 305 Returns list.
300 306 """
301 307 raise NotImplementedError
302 308
303 309 def render_display_format_latex(self, output):
304 310 """render the latex part of an output
305 311
306 312 Returns list.
307 313 """
308 314 raise NotImplementedError
309 315
310 316 def render_display_format_json(self, output):
311 317 """render the json part of an output
312 318
313 319 Returns list.
314 320 """
315 321 raise NotImplementedError
316 322
317 323 def render_display_format_javascript(self, output):
318 324 """render the javascript part of an output
319 325
320 326 Returns list.
321 327 """
322 328 raise NotImplementedError
323 329
General Comments 0
You need to be logged in to leave comments. Login now