##// END OF EJS Templates
use unicode for HTML export...
MinRK -
Show More
@@ -1,255 +1,238 b''
1 """ Defines classes and functions for working with Qt's rich text system.
1 """ Defines classes and functions for working with Qt's rich text system.
2 """
2 """
3 #-----------------------------------------------------------------------------
3 #-----------------------------------------------------------------------------
4 # Imports
4 # Imports
5 #-----------------------------------------------------------------------------
5 #-----------------------------------------------------------------------------
6
6
7 # Standard library imports.
7 # Standard library imports
8 import io
8 import os
9 import os
9 import re
10 import re
10
11
11 # System library imports.
12 # System library imports
12 from IPython.external.qt import QtGui
13 from IPython.external.qt import QtGui
13
14
14 # IPython imports
15 # IPython imports
15 from IPython.utils import py3compat
16 from IPython.utils import py3compat
16
17
17 #-----------------------------------------------------------------------------
18 #-----------------------------------------------------------------------------
18 # Constants
19 # Constants
19 #-----------------------------------------------------------------------------
20 #-----------------------------------------------------------------------------
20
21
21 # A regular expression for an HTML paragraph with no content.
22 # A regular expression for an HTML paragraph with no content.
22 EMPTY_P_RE = re.compile(r'<p[^/>]*>\s*</p>')
23 EMPTY_P_RE = re.compile(r'<p[^/>]*>\s*</p>')
23
24
24 # A regular expression for matching images in rich text HTML.
25 # A regular expression for matching images in rich text HTML.
25 # Note that this is overly restrictive, but Qt's output is predictable...
26 # Note that this is overly restrictive, but Qt's output is predictable...
26 IMG_RE = re.compile(r'<img src="(?P<name>[\d]+)" />')
27 IMG_RE = re.compile(r'<img src="(?P<name>[\d]+)" />')
27
28
28 #-----------------------------------------------------------------------------
29 #-----------------------------------------------------------------------------
29 # Classes
30 # Classes
30 #-----------------------------------------------------------------------------
31 #-----------------------------------------------------------------------------
31
32
32 class HtmlExporter(object):
33 class HtmlExporter(object):
33 """ A stateful HTML exporter for a Q(Plain)TextEdit.
34 """ A stateful HTML exporter for a Q(Plain)TextEdit.
34
35
35 This class is designed for convenient user interaction.
36 This class is designed for convenient user interaction.
36 """
37 """
37
38
38 def __init__(self, control):
39 def __init__(self, control):
39 """ Creates an HtmlExporter for the given Q(Plain)TextEdit.
40 """ Creates an HtmlExporter for the given Q(Plain)TextEdit.
40 """
41 """
41 assert isinstance(control, (QtGui.QPlainTextEdit, QtGui.QTextEdit))
42 assert isinstance(control, (QtGui.QPlainTextEdit, QtGui.QTextEdit))
42 self.control = control
43 self.control = control
43 self.filename = 'ipython.html'
44 self.filename = 'ipython.html'
44 self.image_tag = None
45 self.image_tag = None
45 self.inline_png = None
46 self.inline_png = None
46
47
47 def export(self):
48 def export(self):
48 """ Displays a dialog for exporting HTML generated by Qt's rich text
49 """ Displays a dialog for exporting HTML generated by Qt's rich text
49 system.
50 system.
50
51
51 Returns
52 Returns
52 -------
53 -------
53 The name of the file that was saved, or None if no file was saved.
54 The name of the file that was saved, or None if no file was saved.
54 """
55 """
55 parent = self.control.window()
56 parent = self.control.window()
56 dialog = QtGui.QFileDialog(parent, 'Save as...')
57 dialog = QtGui.QFileDialog(parent, 'Save as...')
57 dialog.setAcceptMode(QtGui.QFileDialog.AcceptSave)
58 dialog.setAcceptMode(QtGui.QFileDialog.AcceptSave)
58 filters = [
59 filters = [
59 'HTML with PNG figures (*.html *.htm)',
60 'HTML with PNG figures (*.html *.htm)',
60 'XHTML with inline SVG figures (*.xhtml *.xml)'
61 'XHTML with inline SVG figures (*.xhtml *.xml)'
61 ]
62 ]
62 dialog.setNameFilters(filters)
63 dialog.setNameFilters(filters)
63 if self.filename:
64 if self.filename:
64 dialog.selectFile(self.filename)
65 dialog.selectFile(self.filename)
65 root,ext = os.path.splitext(self.filename)
66 root,ext = os.path.splitext(self.filename)
66 if ext.lower() in ('.xml', '.xhtml'):
67 if ext.lower() in ('.xml', '.xhtml'):
67 dialog.selectNameFilter(filters[-1])
68 dialog.selectNameFilter(filters[-1])
68
69
69 if dialog.exec_():
70 if dialog.exec_():
70 self.filename = dialog.selectedFiles()[0]
71 self.filename = dialog.selectedFiles()[0]
71 choice = dialog.selectedNameFilter()
72 choice = dialog.selectedNameFilter()
72 html = self.control.document().toHtml().encode('utf-8')
73 html = py3compat.cast_unicode(self.control.document().toHtml())
73
74
74 # Configure the exporter.
75 # Configure the exporter.
75 if choice.startswith('XHTML'):
76 if choice.startswith('XHTML'):
76 exporter = export_xhtml
77 exporter = export_xhtml
77 else:
78 else:
78 # If there are PNGs, decide how to export them.
79 # If there are PNGs, decide how to export them.
79 inline = self.inline_png
80 inline = self.inline_png
80 if inline is None and IMG_RE.search(html):
81 if inline is None and IMG_RE.search(html):
81 dialog = QtGui.QDialog(parent)
82 dialog = QtGui.QDialog(parent)
82 dialog.setWindowTitle('Save as...')
83 dialog.setWindowTitle('Save as...')
83 layout = QtGui.QVBoxLayout(dialog)
84 layout = QtGui.QVBoxLayout(dialog)
84 msg = "Exporting HTML with PNGs"
85 msg = "Exporting HTML with PNGs"
85 info = "Would you like inline PNGs (single large html " \
86 info = "Would you like inline PNGs (single large html " \
86 "file) or external image files?"
87 "file) or external image files?"
87 checkbox = QtGui.QCheckBox("&Don't ask again")
88 checkbox = QtGui.QCheckBox("&Don't ask again")
88 checkbox.setShortcut('D')
89 checkbox.setShortcut('D')
89 ib = QtGui.QPushButton("&Inline")
90 ib = QtGui.QPushButton("&Inline")
90 ib.setShortcut('I')
91 ib.setShortcut('I')
91 eb = QtGui.QPushButton("&External")
92 eb = QtGui.QPushButton("&External")
92 eb.setShortcut('E')
93 eb.setShortcut('E')
93 box = QtGui.QMessageBox(QtGui.QMessageBox.Question,
94 box = QtGui.QMessageBox(QtGui.QMessageBox.Question,
94 dialog.windowTitle(), msg)
95 dialog.windowTitle(), msg)
95 box.setInformativeText(info)
96 box.setInformativeText(info)
96 box.addButton(ib, QtGui.QMessageBox.NoRole)
97 box.addButton(ib, QtGui.QMessageBox.NoRole)
97 box.addButton(eb, QtGui.QMessageBox.YesRole)
98 box.addButton(eb, QtGui.QMessageBox.YesRole)
98 layout.setSpacing(0)
99 layout.setSpacing(0)
99 layout.addWidget(box)
100 layout.addWidget(box)
100 layout.addWidget(checkbox)
101 layout.addWidget(checkbox)
101 dialog.setLayout(layout)
102 dialog.setLayout(layout)
102 dialog.show()
103 dialog.show()
103 reply = box.exec_()
104 reply = box.exec_()
104 dialog.hide()
105 dialog.hide()
105 inline = (reply == 0)
106 inline = (reply == 0)
106 if checkbox.checkState():
107 if checkbox.checkState():
107 # Don't ask anymore; always use this choice.
108 # Don't ask anymore; always use this choice.
108 self.inline_png = inline
109 self.inline_png = inline
109 exporter = lambda h, f, i: export_html(h, f, i, inline)
110 exporter = lambda h, f, i: export_html(h, f, i, inline)
110
111
111 # Perform the export!
112 # Perform the export!
112 try:
113 try:
113 return exporter(html, self.filename, self.image_tag)
114 return exporter(html, self.filename, self.image_tag)
114 except Exception as e:
115 except Exception as e:
115 msg = "Error exporting HTML to %s\n" % self.filename + str(e)
116 msg = "Error exporting HTML to %s\n" % self.filename + str(e)
116 reply = QtGui.QMessageBox.warning(parent, 'Error', msg,
117 reply = QtGui.QMessageBox.warning(parent, 'Error', msg,
117 QtGui.QMessageBox.Ok, QtGui.QMessageBox.Ok)
118 QtGui.QMessageBox.Ok, QtGui.QMessageBox.Ok)
118
119
119 return None
120 return None
120
121
121 #-----------------------------------------------------------------------------
122 #-----------------------------------------------------------------------------
122 # Functions
123 # Functions
123 #-----------------------------------------------------------------------------
124 #-----------------------------------------------------------------------------
124
125
125 def export_html(html, filename, image_tag = None, inline = True):
126 def export_html(html, filename, image_tag = None, inline = True):
126 """ Export the contents of the ConsoleWidget as HTML.
127 """ Export the contents of the ConsoleWidget as HTML.
127
128
128 Parameters:
129 Parameters:
129 -----------
130 -----------
130 html : str,
131 html : unicode,
131 A utf-8 encoded Python string containing the Qt HTML to export.
132 A Python unicode string containing the Qt HTML to export.
132
133
133 filename : str
134 filename : str
134 The file to be saved.
135 The file to be saved.
135
136
136 image_tag : callable, optional (default None)
137 image_tag : callable, optional (default None)
137 Used to convert images. See ``default_image_tag()`` for information.
138 Used to convert images. See ``default_image_tag()`` for information.
138
139
139 inline : bool, optional [default True]
140 inline : bool, optional [default True]
140 If True, include images as inline PNGs. Otherwise, include them as
141 If True, include images as inline PNGs. Otherwise, include them as
141 links to external PNG files, mimicking web browsers' "Web Page,
142 links to external PNG files, mimicking web browsers' "Web Page,
142 Complete" behavior.
143 Complete" behavior.
143 """
144 """
144 if image_tag is None:
145 if image_tag is None:
145 image_tag = default_image_tag
146 image_tag = default_image_tag
146 else:
147 image_tag = ensure_utf8(image_tag)
148
147
149 if inline:
148 if inline:
150 path = None
149 path = None
151 else:
150 else:
152 root,ext = os.path.splitext(filename)
151 root,ext = os.path.splitext(filename)
153 path = root + "_files"
152 path = root + "_files"
154 if os.path.isfile(path):
153 if os.path.isfile(path):
155 raise OSError("%s exists, but is not a directory." % path)
154 raise OSError("%s exists, but is not a directory." % path)
156
155
157 with open(filename, 'w') as f:
156 with io.open(filename, 'w', encoding='utf-8') as f:
158 html = fix_html(html)
157 html = fix_html(html)
159 f.write(IMG_RE.sub(lambda x: image_tag(x, path = path, format = "png"),
158 f.write(IMG_RE.sub(lambda x: image_tag(x, path = path, format = "png"),
160 html))
159 html))
161
160
162
161
163 def export_xhtml(html, filename, image_tag=None):
162 def export_xhtml(html, filename, image_tag=None):
164 """ Export the contents of the ConsoleWidget as XHTML with inline SVGs.
163 """ Export the contents of the ConsoleWidget as XHTML with inline SVGs.
165
164
166 Parameters:
165 Parameters:
167 -----------
166 -----------
168 html : str,
167 html : unicode,
169 A utf-8 encoded Python string containing the Qt HTML to export.
168 A Python unicode string containing the Qt HTML to export.
170
169
171 filename : str
170 filename : str
172 The file to be saved.
171 The file to be saved.
173
172
174 image_tag : callable, optional (default None)
173 image_tag : callable, optional (default None)
175 Used to convert images. See ``default_image_tag()`` for information.
174 Used to convert images. See ``default_image_tag()`` for information.
176 """
175 """
177 if image_tag is None:
176 if image_tag is None:
178 image_tag = default_image_tag
177 image_tag = default_image_tag
179 else:
180 image_tag = ensure_utf8(image_tag)
181
178
182 with open(filename, 'w') as f:
179 with io.open(filename, 'w', encoding='utf-8') as f:
183 # Hack to make xhtml header -- note that we are not doing any check for
180 # Hack to make xhtml header -- note that we are not doing any check for
184 # valid XML.
181 # valid XML.
185 offset = html.find("<html>")
182 offset = html.find("<html>")
186 assert offset > -1, 'Invalid HTML string: no <html> tag.'
183 assert offset > -1, 'Invalid HTML string: no <html> tag.'
187 html = ('<html xmlns="http://www.w3.org/1999/xhtml">\n'+
184 html = (u'<html xmlns="http://www.w3.org/1999/xhtml">\n'+
188 html[offset+6:])
185 html[offset+6:])
189
186
190 html = fix_html(html)
187 html = fix_html(html)
191 f.write(IMG_RE.sub(lambda x: image_tag(x, path = None, format = "svg"),
188 f.write(IMG_RE.sub(lambda x: image_tag(x, path = None, format = "svg"),
192 html))
189 html))
193
190
194
191
195 def default_image_tag(match, path = None, format = "png"):
192 def default_image_tag(match, path = None, format = "png"):
196 """ Return (X)HTML mark-up for the image-tag given by match.
193 """ Return (X)HTML mark-up for the image-tag given by match.
197
194
198 This default implementation merely removes the image, and exists mostly
195 This default implementation merely removes the image, and exists mostly
199 for documentation purposes. More information than is present in the Qt
196 for documentation purposes. More information than is present in the Qt
200 HTML is required to supply the images.
197 HTML is required to supply the images.
201
198
202 Parameters
199 Parameters
203 ----------
200 ----------
204 match : re.SRE_Match
201 match : re.SRE_Match
205 A match to an HTML image tag as exported by Qt, with match.group("Name")
202 A match to an HTML image tag as exported by Qt, with match.group("Name")
206 containing the matched image ID.
203 containing the matched image ID.
207
204
208 path : string|None, optional [default None]
205 path : string|None, optional [default None]
209 If not None, specifies a path to which supporting files may be written
206 If not None, specifies a path to which supporting files may be written
210 (e.g., for linked images). If None, all images are to be included
207 (e.g., for linked images). If None, all images are to be included
211 inline.
208 inline.
212
209
213 format : "png"|"svg", optional [default "png"]
210 format : "png"|"svg", optional [default "png"]
214 Format for returned or referenced images.
211 Format for returned or referenced images.
215 """
212 """
216 return ''
213 return u''
217
218
219 def ensure_utf8(image_tag):
220 """wrapper for ensuring image_tag returns utf8-encoded str on Python 2"""
221 if py3compat.PY3:
222 # nothing to do on Python 3
223 return image_tag
224
225 def utf8_image_tag(*args, **kwargs):
226 s = image_tag(*args, **kwargs)
227 if isinstance(s, unicode):
228 s = s.encode('utf8')
229 return s
230 return utf8_image_tag
231
214
232
215
233 def fix_html(html):
216 def fix_html(html):
234 """ Transforms a Qt-generated HTML string into a standards-compliant one.
217 """ Transforms a Qt-generated HTML string into a standards-compliant one.
235
218
236 Parameters:
219 Parameters:
237 -----------
220 -----------
238 html : str,
221 html : unicode,
239 A utf-8 encoded Python string containing the Qt HTML.
222 A Python unicode string containing the Qt HTML.
240 """
223 """
241 # A UTF-8 declaration is needed for proper rendering of some characters
224 # A UTF-8 declaration is needed for proper rendering of some characters
242 # (e.g., indented commands) when viewing exported HTML on a local system
225 # (e.g., indented commands) when viewing exported HTML on a local system
243 # (i.e., without seeing an encoding declaration in an HTTP header).
226 # (i.e., without seeing an encoding declaration in an HTTP header).
244 # C.f. http://www.w3.org/International/O-charset for details.
227 # C.f. http://www.w3.org/International/O-charset for details.
245 offset = html.find('<head>')
228 offset = html.find('<head>')
246 if offset > -1:
229 if offset > -1:
247 html = (html[:offset+6]+
230 html = (html[:offset+6]+
248 '\n<meta http-equiv="Content-Type" '+
231 '\n<meta http-equiv="Content-Type" '+
249 'content="text/html; charset=utf-8" />\n'+
232 'content="text/html; charset=utf-8" />\n'+
250 html[offset+6:])
233 html[offset+6:])
251
234
252 # Replace empty paragraphs tags with line breaks.
235 # Replace empty paragraphs tags with line breaks.
253 html = re.sub(EMPTY_P_RE, '<br/>', html)
236 html = re.sub(EMPTY_P_RE, '<br/>', html)
254
237
255 return html
238 return html
General Comments 0
You need to be logged in to leave comments. Login now