From 9aa74eebfaaec161059396c22c15e2d7ba7dd7d9 2012-08-27 06:06:40 From: Jörgen Stenarson Date: 2012-08-27 06:06:40 Subject: [PATCH] merge functionality in io and openpy relating to encoding New functions were introduced in openpy to deal with encoding in python files. This commit removes redundant code from io and moves source_to_unicode to openpy. --- diff --git a/IPython/core/debugger.py b/IPython/core/debugger.py index dd72df5..1339ec1 100644 --- a/IPython/core/debugger.py +++ b/IPython/core/debugger.py @@ -32,7 +32,7 @@ import sys from IPython.utils import PyColorize from IPython.core import ipapi -from IPython.utils import coloransi, io +from IPython.utils import coloransi, io, openpy from IPython.core.excolors import exception_colors # See if we can use pydb. @@ -352,7 +352,10 @@ class Pdb(OldPdb): start = lineno - 1 - context//2 lines = linecache.getlines(filename) - encoding = io.guess_encoding(lines) + try: + encoding, _ = openpy.detect_encoding(lambda :lines[:2].pop(0)) + except SyntaxError: + encoding = "ascii" start = max(start, 0) start = min(start, len(lines) - context) lines = lines[start : start + context] @@ -363,7 +366,7 @@ class Pdb(OldPdb): and tpl_line_em \ or tpl_line ret.append(self.__format_line(linetpl, filename, - start + 1 + i, line.decode(encoding), + start + 1 + i, line.decode(encoding, errors="replace"), arrow = show_arrow) ) return ''.join(ret) @@ -423,9 +426,12 @@ class Pdb(OldPdb): tpl_line_em = '%%s%s%%s %s%%s%s' % (Colors.linenoEm, Colors.line, ColorsNormal) src = [] lines = linecache.getlines(filename) - encoding = io.guess_encoding(lines) + try: + encoding, _ = openpy.detect_encoding(lambda :lines[:2].pop(0)) + except SyntaxError: + encoding = "ascii" for lineno in range(first, last+1): - line = lines[lineno].decode(encoding) + line = lines[lineno].decode(encoding, errors="replace") if not line: break diff --git a/IPython/core/oinspect.py b/IPython/core/oinspect.py index a9d0935..5e90dba 100644 --- a/IPython/core/oinspect.py +++ b/IPython/core/oinspect.py @@ -35,6 +35,7 @@ from IPython.core import page from IPython.testing.skipdoctest import skip_doctest_py3 from IPython.utils import PyColorize from IPython.utils import io +from IPython.utils import openpy from IPython.utils import py3compat from IPython.utils.text import indent from IPython.utils.wildcard import list_namespace @@ -457,7 +458,7 @@ class Inspector: # Print only text files, not extension binaries. Note that # getsourcelines returns lineno with 1-offset and page() uses # 0-offset, so we must adjust. - page.page(self.format(io.source_to_unicode(open(ofile).read())), lineno-1) + page.page(self.format(openpy.read_py_file(ofile, skip_encoding_cookie=False)), lineno - 1) def _format_fields(self, fields, title_width=12): """Formats a list of fields for display. diff --git a/IPython/utils/io.py b/IPython/utils/io.py index 3b9e0eb..3600fab 100644 --- a/IPython/utils/io.py +++ b/IPython/utils/io.py @@ -155,30 +155,6 @@ class Tee(object): self.close() -def guess_encoding(lines): - """check list of lines for line matching the source code encoding pattern - - Only check first two lines - """ - reg = re.compile("#.*coding[:=]\s*([-\w.]+)") - for row in lines[:2]: #We only need to check the first two lines - result = reg.match(row) - if result: - coding = result.groups()[0] - break - else: - coding = "ascii" - return coding - -def source_to_unicode(txt): - """Converts string with python source code to unicode - """ - if isinstance(txt, unicode): - return txt - coding = guess_encoding(txt.split("\n", 2)) - return txt.decode(coding, errors="replace") - - def file_read(filename): """Read a file and close it. Returns the file source.""" fobj = open(filename,'r'); diff --git a/IPython/utils/openpy.py b/IPython/utils/openpy.py index e517cbb..3d9f3bf 100644 --- a/IPython/utils/openpy.py +++ b/IPython/utils/openpy.py @@ -9,6 +9,7 @@ from __future__ import absolute_import import io from io import TextIOWrapper import re +from StringIO import StringIO import urllib cookie_re = re.compile(ur"coding[:=]\s*([-\w.]+)", re.UNICODE) @@ -120,6 +121,17 @@ except ImportError: text.mode = 'r' return text +def source_to_unicode(txt): + """Converts string with python source code to unicode + """ + if isinstance(txt, unicode): + return txt + try: + coding, _ = detect_encoding(StringIO(txt).readline) + except SyntaxError: + coding = "ascii" + return txt.decode(coding, errors="replace") + def strip_encoding_cookie(filelike): """Generator to pull lines from a text-mode file, skipping the encoding cookie if it is found in the first two lines. diff --git a/IPython/zmq/zmqshell.py b/IPython/zmq/zmqshell.py index d707ed0..90a3f8b 100644 --- a/IPython/zmq/zmqshell.py +++ b/IPython/zmq/zmqshell.py @@ -38,7 +38,7 @@ from IPython.lib.kernel import ( get_connection_file, get_connection_info, connect_qtconsole ) from IPython.testing.skipdoctest import skip_doctest -from IPython.utils import io +from IPython.utils import io, openpy from IPython.utils.jsonutil import json_clean, encode_images from IPython.utils.process import arg_split from IPython.utils import py3compat @@ -355,7 +355,9 @@ class KernelMagics(Magics): cont = open(arg_s).read() if arg_s.endswith('.py'): - cont = self.shell.pycolorize(io.source_to_unicode(cont)) + cont = self.shell.pycolorize(openpy.read_py_file(arg_s, skip_encoding_cookie=False)) + else: + cont = open(arg_s).read() page.page(cont) more = line_magic('more')(less)