From 9aa74eebfaaec161059396c22c15e2d7ba7dd7d9 2012-08-27 06:06:40
From: JÃ¶rgen Stenarson <jorgen.stenarson@kroywen.se>
Date: 2012-08-27 06:06:40
Subject: [PATCH] merge functionality in io and openpy relating to encoding

New functions were introduced in openpy to deal with encoding in
python files. This commit removes redundant code from io and moves
source_to_unicode to openpy.

---

diff --git a/IPython/core/debugger.py b/IPython/core/debugger.py
index dd72df5..1339ec1 100644
--- a/IPython/core/debugger.py
+++ b/IPython/core/debugger.py
@@ -32,7 +32,7 @@ import sys
 
 from IPython.utils import PyColorize
 from IPython.core import ipapi
-from IPython.utils import coloransi, io
+from IPython.utils import coloransi, io, openpy
 from IPython.core.excolors import exception_colors
 
 # See if we can use pydb.
@@ -352,7 +352,10 @@ class Pdb(OldPdb):
 
         start = lineno - 1 - context//2
         lines = linecache.getlines(filename)
-        encoding = io.guess_encoding(lines)
+        try:
+            encoding, _ = openpy.detect_encoding(lambda :lines[:2].pop(0))
+        except SyntaxError:
+            encoding = "ascii"
         start = max(start, 0)
         start = min(start, len(lines) - context)
         lines = lines[start : start + context]
@@ -363,7 +366,7 @@ class Pdb(OldPdb):
                       and tpl_line_em \
                       or tpl_line
             ret.append(self.__format_line(linetpl, filename,
-                                          start + 1 + i, line.decode(encoding),
+                                          start + 1 + i, line.decode(encoding, errors="replace"),
                                           arrow = show_arrow) )
         return ''.join(ret)
 
@@ -423,9 +426,12 @@ class Pdb(OldPdb):
             tpl_line_em = '%%s%s%%s %s%%s%s' % (Colors.linenoEm, Colors.line, ColorsNormal)
             src = []
             lines = linecache.getlines(filename)
-            encoding = io.guess_encoding(lines)
+            try:
+                encoding, _ = openpy.detect_encoding(lambda :lines[:2].pop(0))
+            except SyntaxError:
+                encoding = "ascii"
             for lineno in range(first, last+1):
-                line = lines[lineno].decode(encoding)
+                line = lines[lineno].decode(encoding, errors="replace")
                 if not line:
                     break
 
diff --git a/IPython/core/oinspect.py b/IPython/core/oinspect.py
index a9d0935..5e90dba 100644
--- a/IPython/core/oinspect.py
+++ b/IPython/core/oinspect.py
@@ -35,6 +35,7 @@ from IPython.core import page
 from IPython.testing.skipdoctest import skip_doctest_py3
 from IPython.utils import PyColorize
 from IPython.utils import io
+from IPython.utils import openpy
 from IPython.utils import py3compat
 from IPython.utils.text import indent
 from IPython.utils.wildcard import list_namespace
@@ -457,7 +458,7 @@ class Inspector:
             # Print only text files, not extension binaries.  Note that
             # getsourcelines returns lineno with 1-offset and page() uses
             # 0-offset, so we must adjust.
-            page.page(self.format(io.source_to_unicode(open(ofile).read())), lineno-1)
+            page.page(self.format(openpy.read_py_file(ofile, skip_encoding_cookie=False)), lineno - 1)
 
     def _format_fields(self, fields, title_width=12):
         """Formats a list of fields for display.
diff --git a/IPython/utils/io.py b/IPython/utils/io.py
index 3b9e0eb..3600fab 100644
--- a/IPython/utils/io.py
+++ b/IPython/utils/io.py
@@ -155,30 +155,6 @@ class Tee(object):
             self.close()
 
 
-def guess_encoding(lines):
-    """check list of lines for line matching the source code encoding pattern
-    
-    Only check first two lines
-    """
-    reg = re.compile("#.*coding[:=]\s*([-\w.]+)")
-    for row in lines[:2]: #We only need to check the first two lines
-        result = reg.match(row)
-        if result:
-            coding = result.groups()[0]
-            break
-    else:
-        coding = "ascii"
-    return coding
-
-def source_to_unicode(txt):
-    """Converts string with python source code to unicode
-    """
-    if isinstance(txt, unicode):
-        return txt
-    coding = guess_encoding(txt.split("\n", 2))
-    return txt.decode(coding, errors="replace")
-    
-
 def file_read(filename):
     """Read a file and close it.  Returns the file source."""
     fobj = open(filename,'r');
diff --git a/IPython/utils/openpy.py b/IPython/utils/openpy.py
index e517cbb..3d9f3bf 100644
--- a/IPython/utils/openpy.py
+++ b/IPython/utils/openpy.py
@@ -9,6 +9,7 @@ from __future__ import absolute_import
 import io
 from io import TextIOWrapper
 import re
+from StringIO import StringIO
 import urllib
 
 cookie_re = re.compile(ur"coding[:=]\s*([-\w.]+)", re.UNICODE)
@@ -120,6 +121,17 @@ except ImportError:
         text.mode = 'r'
         return text   
 
+def source_to_unicode(txt):
+    """Converts string with python source code to unicode
+    """
+    if isinstance(txt, unicode):
+        return txt
+    try:
+        coding, _ = detect_encoding(StringIO(txt).readline)
+    except SyntaxError:
+        coding = "ascii"
+    return txt.decode(coding, errors="replace")
+
 def strip_encoding_cookie(filelike):
     """Generator to pull lines from a text-mode file, skipping the encoding
     cookie if it is found in the first two lines.
diff --git a/IPython/zmq/zmqshell.py b/IPython/zmq/zmqshell.py
index d707ed0..90a3f8b 100644
--- a/IPython/zmq/zmqshell.py
+++ b/IPython/zmq/zmqshell.py
@@ -38,7 +38,7 @@ from IPython.lib.kernel import (
     get_connection_file, get_connection_info, connect_qtconsole
 )
 from IPython.testing.skipdoctest import skip_doctest
-from IPython.utils import io
+from IPython.utils import io, openpy
 from IPython.utils.jsonutil import json_clean, encode_images
 from IPython.utils.process import arg_split
 from IPython.utils import py3compat
@@ -355,7 +355,9 @@ class KernelMagics(Magics):
 
         cont = open(arg_s).read()
         if arg_s.endswith('.py'):
-            cont = self.shell.pycolorize(io.source_to_unicode(cont))
+            cont = self.shell.pycolorize(openpy.read_py_file(arg_s, skip_encoding_cookie=False))
+        else:
+            cont = open(arg_s).read()
         page.page(cont)
 
     more = line_magic('more')(less)