From aca660643f6ea29aeb9953cbdf3e111d4db6bb07 2012-08-27 06:06:43
From: JÃ¶rgen Stenarson <jorgen.stenarson@kroywen.se>
Date: 2012-08-27 06:06:43
Subject: [PATCH] make source_to_unicode use BytesIO and refactor

---

diff --git a/IPython/utils/openpy.py b/IPython/utils/openpy.py
index 3d9f3bf..38fb150 100644
--- a/IPython/utils/openpy.py
+++ b/IPython/utils/openpy.py
@@ -7,9 +7,8 @@ Much of the code is taken from the tokenize module in Python 3.2.
 from __future__ import absolute_import
 
 import io
-from io import TextIOWrapper
+from io import TextIOWrapper, BytesIO
 import re
-from StringIO import StringIO
 import urllib
 
 cookie_re = re.compile(ur"coding[:=]\s*([-\w.]+)", re.UNICODE)
@@ -121,16 +120,31 @@ except ImportError:
         text.mode = 'r'
         return text   
 
-def source_to_unicode(txt):
-    """Converts string with python source code to unicode
+def source_to_unicode(txt, errors='replace', skip_encoding_cookie=True):
+    """Converts a bytes string with python source code to unicode.
+
+    Unicode strings are passed through unchanged. Byte strings are checked
+    for the python source file encoding cookie to determine encoding.
+    txt can be either a bytes buffer or a string containing the source
+    code.
     """
     if isinstance(txt, unicode):
         return txt
+    if isinstance(txt, str):
+        buffer = BytesIO(txt)
+    else:
+        buffer = txt
     try:
-        coding, _ = detect_encoding(StringIO(txt).readline)
+        encoding, _ = detect_encoding(buffer.readline)
     except SyntaxError:
-        coding = "ascii"
-    return txt.decode(coding, errors="replace")
+        encoding = "ascii"
+    buffer.seek(0)
+    text = TextIOWrapper(buffer, encoding, errors=errors, line_buffering=True)
+    text.mode = 'r'
+    if skip_encoding_cookie:
+        return u"".join(strip_encoding_cookie(text))
+    else:
+        return text.read()
 
 def strip_encoding_cookie(filelike):
     """Generator to pull lines from a text-mode file, skipping the encoding
@@ -193,12 +207,4 @@ def read_py_url(url, errors='replace', skip_encoding_cookie=True):
     """
     response = urllib.urlopen(url)
     buffer = io.BytesIO(response.read())
-    encoding, lines = detect_encoding(buffer.readline)
-    buffer.seek(0)
-    text = TextIOWrapper(buffer, encoding, errors=errors, line_buffering=True)
-    text.mode = 'r'
-    if skip_encoding_cookie:
-        return "".join(strip_encoding_cookie(text))
-    else:
-        return text.read()
-
+    return source_to_unicode(buffer, errors, skip_encoding_cookie)