diff --git a/IPython/utils/openpy.py b/IPython/utils/openpy.py index 3d9f3bf..38fb150 100644 --- a/IPython/utils/openpy.py +++ b/IPython/utils/openpy.py @@ -7,9 +7,8 @@ Much of the code is taken from the tokenize module in Python 3.2. from __future__ import absolute_import import io -from io import TextIOWrapper +from io import TextIOWrapper, BytesIO import re -from StringIO import StringIO import urllib cookie_re = re.compile(ur"coding[:=]\s*([-\w.]+)", re.UNICODE) @@ -121,16 +120,31 @@ except ImportError: text.mode = 'r' return text -def source_to_unicode(txt): - """Converts string with python source code to unicode +def source_to_unicode(txt, errors='replace', skip_encoding_cookie=True): + """Converts a bytes string with python source code to unicode. + + Unicode strings are passed through unchanged. Byte strings are checked + for the python source file encoding cookie to determine encoding. + txt can be either a bytes buffer or a string containing the source + code. """ if isinstance(txt, unicode): return txt + if isinstance(txt, str): + buffer = BytesIO(txt) + else: + buffer = txt try: - coding, _ = detect_encoding(StringIO(txt).readline) + encoding, _ = detect_encoding(buffer.readline) except SyntaxError: - coding = "ascii" - return txt.decode(coding, errors="replace") + encoding = "ascii" + buffer.seek(0) + text = TextIOWrapper(buffer, encoding, errors=errors, line_buffering=True) + text.mode = 'r' + if skip_encoding_cookie: + return u"".join(strip_encoding_cookie(text)) + else: + return text.read() def strip_encoding_cookie(filelike): """Generator to pull lines from a text-mode file, skipping the encoding @@ -193,12 +207,4 @@ def read_py_url(url, errors='replace', skip_encoding_cookie=True): """ response = urllib.urlopen(url) buffer = io.BytesIO(response.read()) - encoding, lines = detect_encoding(buffer.readline) - buffer.seek(0) - text = TextIOWrapper(buffer, encoding, errors=errors, line_buffering=True) - text.mode = 'r' - if skip_encoding_cookie: - return "".join(strip_encoding_cookie(text)) - else: - return text.read() - + return source_to_unicode(buffer, errors, skip_encoding_cookie)