##// END OF EJS Templates
use unicode for HTML export...
MinRK -
Show More
@@ -1,255 +1,238 b''
1 1 """ Defines classes and functions for working with Qt's rich text system.
2 2 """
3 3 #-----------------------------------------------------------------------------
4 4 # Imports
5 5 #-----------------------------------------------------------------------------
6 6
7 # Standard library imports.
7 # Standard library imports
8 import io
8 9 import os
9 10 import re
10 11
11 # System library imports.
12 # System library imports
12 13 from IPython.external.qt import QtGui
13 14
14 15 # IPython imports
15 16 from IPython.utils import py3compat
16 17
17 18 #-----------------------------------------------------------------------------
18 19 # Constants
19 20 #-----------------------------------------------------------------------------
20 21
21 22 # A regular expression for an HTML paragraph with no content.
22 23 EMPTY_P_RE = re.compile(r'<p[^/>]*>\s*</p>')
23 24
24 25 # A regular expression for matching images in rich text HTML.
25 26 # Note that this is overly restrictive, but Qt's output is predictable...
26 27 IMG_RE = re.compile(r'<img src="(?P<name>[\d]+)" />')
27 28
28 29 #-----------------------------------------------------------------------------
29 30 # Classes
30 31 #-----------------------------------------------------------------------------
31 32
32 33 class HtmlExporter(object):
33 34 """ A stateful HTML exporter for a Q(Plain)TextEdit.
34 35
35 36 This class is designed for convenient user interaction.
36 37 """
37 38
38 39 def __init__(self, control):
39 40 """ Creates an HtmlExporter for the given Q(Plain)TextEdit.
40 41 """
41 42 assert isinstance(control, (QtGui.QPlainTextEdit, QtGui.QTextEdit))
42 43 self.control = control
43 44 self.filename = 'ipython.html'
44 45 self.image_tag = None
45 46 self.inline_png = None
46 47
47 48 def export(self):
48 49 """ Displays a dialog for exporting HTML generated by Qt's rich text
49 50 system.
50 51
51 52 Returns
52 53 -------
53 54 The name of the file that was saved, or None if no file was saved.
54 55 """
55 56 parent = self.control.window()
56 57 dialog = QtGui.QFileDialog(parent, 'Save as...')
57 58 dialog.setAcceptMode(QtGui.QFileDialog.AcceptSave)
58 59 filters = [
59 60 'HTML with PNG figures (*.html *.htm)',
60 61 'XHTML with inline SVG figures (*.xhtml *.xml)'
61 62 ]
62 63 dialog.setNameFilters(filters)
63 64 if self.filename:
64 65 dialog.selectFile(self.filename)
65 66 root,ext = os.path.splitext(self.filename)
66 67 if ext.lower() in ('.xml', '.xhtml'):
67 68 dialog.selectNameFilter(filters[-1])
68 69
69 70 if dialog.exec_():
70 71 self.filename = dialog.selectedFiles()[0]
71 72 choice = dialog.selectedNameFilter()
72 html = self.control.document().toHtml().encode('utf-8')
73 html = py3compat.cast_unicode(self.control.document().toHtml())
73 74
74 75 # Configure the exporter.
75 76 if choice.startswith('XHTML'):
76 77 exporter = export_xhtml
77 78 else:
78 79 # If there are PNGs, decide how to export them.
79 80 inline = self.inline_png
80 81 if inline is None and IMG_RE.search(html):
81 82 dialog = QtGui.QDialog(parent)
82 83 dialog.setWindowTitle('Save as...')
83 84 layout = QtGui.QVBoxLayout(dialog)
84 85 msg = "Exporting HTML with PNGs"
85 86 info = "Would you like inline PNGs (single large html " \
86 87 "file) or external image files?"
87 88 checkbox = QtGui.QCheckBox("&Don't ask again")
88 89 checkbox.setShortcut('D')
89 90 ib = QtGui.QPushButton("&Inline")
90 91 ib.setShortcut('I')
91 92 eb = QtGui.QPushButton("&External")
92 93 eb.setShortcut('E')
93 94 box = QtGui.QMessageBox(QtGui.QMessageBox.Question,
94 95 dialog.windowTitle(), msg)
95 96 box.setInformativeText(info)
96 97 box.addButton(ib, QtGui.QMessageBox.NoRole)
97 98 box.addButton(eb, QtGui.QMessageBox.YesRole)
98 99 layout.setSpacing(0)
99 100 layout.addWidget(box)
100 101 layout.addWidget(checkbox)
101 102 dialog.setLayout(layout)
102 103 dialog.show()
103 104 reply = box.exec_()
104 105 dialog.hide()
105 106 inline = (reply == 0)
106 107 if checkbox.checkState():
107 108 # Don't ask anymore; always use this choice.
108 109 self.inline_png = inline
109 110 exporter = lambda h, f, i: export_html(h, f, i, inline)
110 111
111 112 # Perform the export!
112 113 try:
113 114 return exporter(html, self.filename, self.image_tag)
114 115 except Exception as e:
115 116 msg = "Error exporting HTML to %s\n" % self.filename + str(e)
116 117 reply = QtGui.QMessageBox.warning(parent, 'Error', msg,
117 118 QtGui.QMessageBox.Ok, QtGui.QMessageBox.Ok)
118 119
119 120 return None
120 121
121 122 #-----------------------------------------------------------------------------
122 123 # Functions
123 124 #-----------------------------------------------------------------------------
124 125
125 126 def export_html(html, filename, image_tag = None, inline = True):
126 127 """ Export the contents of the ConsoleWidget as HTML.
127 128
128 129 Parameters:
129 130 -----------
130 html : str,
131 A utf-8 encoded Python string containing the Qt HTML to export.
131 html : unicode,
132 A Python unicode string containing the Qt HTML to export.
132 133
133 134 filename : str
134 135 The file to be saved.
135 136
136 137 image_tag : callable, optional (default None)
137 138 Used to convert images. See ``default_image_tag()`` for information.
138 139
139 140 inline : bool, optional [default True]
140 141 If True, include images as inline PNGs. Otherwise, include them as
141 142 links to external PNG files, mimicking web browsers' "Web Page,
142 143 Complete" behavior.
143 144 """
144 145 if image_tag is None:
145 146 image_tag = default_image_tag
146 else:
147 image_tag = ensure_utf8(image_tag)
148 147
149 148 if inline:
150 149 path = None
151 150 else:
152 151 root,ext = os.path.splitext(filename)
153 152 path = root + "_files"
154 153 if os.path.isfile(path):
155 154 raise OSError("%s exists, but is not a directory." % path)
156 155
157 with open(filename, 'w') as f:
156 with io.open(filename, 'w', encoding='utf-8') as f:
158 157 html = fix_html(html)
159 158 f.write(IMG_RE.sub(lambda x: image_tag(x, path = path, format = "png"),
160 159 html))
161 160
162 161
163 162 def export_xhtml(html, filename, image_tag=None):
164 163 """ Export the contents of the ConsoleWidget as XHTML with inline SVGs.
165 164
166 165 Parameters:
167 166 -----------
168 html : str,
169 A utf-8 encoded Python string containing the Qt HTML to export.
167 html : unicode,
168 A Python unicode string containing the Qt HTML to export.
170 169
171 170 filename : str
172 171 The file to be saved.
173 172
174 173 image_tag : callable, optional (default None)
175 174 Used to convert images. See ``default_image_tag()`` for information.
176 175 """
177 176 if image_tag is None:
178 177 image_tag = default_image_tag
179 else:
180 image_tag = ensure_utf8(image_tag)
181 178
182 with open(filename, 'w') as f:
179 with io.open(filename, 'w', encoding='utf-8') as f:
183 180 # Hack to make xhtml header -- note that we are not doing any check for
184 181 # valid XML.
185 182 offset = html.find("<html>")
186 183 assert offset > -1, 'Invalid HTML string: no <html> tag.'
187 html = ('<html xmlns="http://www.w3.org/1999/xhtml">\n'+
184 html = (u'<html xmlns="http://www.w3.org/1999/xhtml">\n'+
188 185 html[offset+6:])
189 186
190 187 html = fix_html(html)
191 188 f.write(IMG_RE.sub(lambda x: image_tag(x, path = None, format = "svg"),
192 189 html))
193 190
194 191
195 192 def default_image_tag(match, path = None, format = "png"):
196 193 """ Return (X)HTML mark-up for the image-tag given by match.
197 194
198 195 This default implementation merely removes the image, and exists mostly
199 196 for documentation purposes. More information than is present in the Qt
200 197 HTML is required to supply the images.
201 198
202 199 Parameters
203 200 ----------
204 201 match : re.SRE_Match
205 202 A match to an HTML image tag as exported by Qt, with match.group("Name")
206 203 containing the matched image ID.
207 204
208 205 path : string|None, optional [default None]
209 206 If not None, specifies a path to which supporting files may be written
210 207 (e.g., for linked images). If None, all images are to be included
211 208 inline.
212 209
213 210 format : "png"|"svg", optional [default "png"]
214 211 Format for returned or referenced images.
215 212 """
216 return ''
217
218
219 def ensure_utf8(image_tag):
220 """wrapper for ensuring image_tag returns utf8-encoded str on Python 2"""
221 if py3compat.PY3:
222 # nothing to do on Python 3
223 return image_tag
224
225 def utf8_image_tag(*args, **kwargs):
226 s = image_tag(*args, **kwargs)
227 if isinstance(s, unicode):
228 s = s.encode('utf8')
229 return s
230 return utf8_image_tag
213 return u''
231 214
232 215
233 216 def fix_html(html):
234 217 """ Transforms a Qt-generated HTML string into a standards-compliant one.
235 218
236 219 Parameters:
237 220 -----------
238 html : str,
239 A utf-8 encoded Python string containing the Qt HTML.
221 html : unicode,
222 A Python unicode string containing the Qt HTML.
240 223 """
241 224 # A UTF-8 declaration is needed for proper rendering of some characters
242 225 # (e.g., indented commands) when viewing exported HTML on a local system
243 226 # (i.e., without seeing an encoding declaration in an HTTP header).
244 227 # C.f. http://www.w3.org/International/O-charset for details.
245 228 offset = html.find('<head>')
246 229 if offset > -1:
247 230 html = (html[:offset+6]+
248 231 '\n<meta http-equiv="Content-Type" '+
249 232 'content="text/html; charset=utf-8" />\n'+
250 233 html[offset+6:])
251 234
252 235 # Replace empty paragraphs tags with line breaks.
253 236 html = re.sub(EMPTY_P_RE, '<br/>', html)
254 237
255 238 return html
General Comments 0
You need to be logged in to leave comments. Login now