From f467f96827d11b2420e921308177517f1f8ce49a 2010-10-13 18:50:53 From: Mark Voorhies Date: 2010-10-13 18:50:53 Subject: [PATCH] Specify character encoding in HTML HEAD This adds an explicit declaration of the UTF-8 character encoding to the Qt generated HTML dump (since we've just explicitly requested UTF-8 conversion from QString, this declaration should be correct), c.f. http://www.w3.org/International/O-charset This patch fixes incorrect characters (e.g., A-hat for tab) in, e.g., Firefox's default rendering of exported HTML. Applying the same fix to both HTML and XHTML export even though Firefox and WebKit appear to assume UTF-8 for XHTML even without an explicit declaration. --- diff --git a/IPython/frontend/qt/console/console_widget.py b/IPython/frontend/qt/console/console_widget.py index 9ae3eae..d4f939e 100644 --- a/IPython/frontend/qt/console/console_widget.py +++ b/IPython/frontend/qt/console/console_widget.py @@ -549,9 +549,11 @@ class ConsoleWidget(Configurable, QtGui.QWidget): # N.B. this is overly restrictive, but Qt's output is # predictable... img_re = re.compile(r'') + html = self.fix_html_encoding( + str(self._control.toHtml().toUtf8())) f.write(img_re.sub( lambda x: self.image_tag(x, path = path, format = "png"), - str(self._control.toHtml().toUtf8()))) + html)) finally: f.close() return filename @@ -578,6 +580,8 @@ class ConsoleWidget(Configurable, QtGui.QWidget): assert(offset > -1) html = ('\n'+ html[offset+6:]) + # And now declare UTF-8 encoding + html = self.fix_html_encoding(html) f.write(img_re.sub( lambda x: self.image_tag(x, path = None, format = "svg"), html)) @@ -586,6 +590,29 @@ class ConsoleWidget(Configurable, QtGui.QWidget): return filename return None + def fix_html_encoding(self, html): + """ Return html string, with a UTF-8 declaration added to . + + Assumes that html is Qt generated and has already been UTF-8 encoded + and coerced to a python string. If the expected head element is + not found, the given object is returned unmodified. + + This patching is needed for proper rendering of some characters + (e.g., indented commands) when viewing exported HTML on a local + system (i.e., without seeing an encoding declaration in an HTTP + header). + + C.f. http://www.w3.org/International/O-charset for details. + """ + offset = html.find("") + if(offset > -1): + html = (html[:offset+6]+ + '\n\n'+ + html[offset+6:]) + + return html + def image_tag(self, match, path = None, format = "png"): """ Return (X)HTML mark-up for the image-tag given by match.