diff --git a/IPython/config/loader.py b/IPython/config/loader.py index d6212cd..d8fdb11 100644 --- a/IPython/config/loader.py +++ b/IPython/config/loader.py @@ -26,6 +26,7 @@ import sys from IPython.external import argparse from IPython.utils.path import filefind, get_ipython_dir from IPython.utils import py3compat, text, warn +from IPython.utils.encoding import getdefaultencoding #----------------------------------------------------------------------------- # Exceptions @@ -439,7 +440,7 @@ class KeyValueConfigLoader(CommandLineConfigLoader): """decode argv if bytes, using stin.encoding, falling back on default enc""" uargv = [] if enc is None: - enc = py3compat.getdefaultencoding() + enc = getdefaultencoding() for arg in argv: if not isinstance(arg, unicode): # only decode if not already decoded @@ -603,7 +604,7 @@ class ArgParseConfigLoader(CommandLineConfigLoader): def _parse_args(self, args): """self.parser->self.parsed_data""" # decode sys.argv to support unicode command-line options - enc = py3compat.getdefaultencoding() + enc = getdefaultencoding() uargs = [py3compat.cast_unicode(a, enc) for a in args] self.parsed_data, self.extra_args = self.parser.parse_known_args(uargs) diff --git a/IPython/core/macro.py b/IPython/core/macro.py index d8b878d..75950f2 100644 --- a/IPython/core/macro.py +++ b/IPython/core/macro.py @@ -11,6 +11,7 @@ import re import sys from IPython.utils import py3compat +from IPython.utils.encoding import getdefaultencoding coding_declaration = re.compile(r"#\s*coding[:=]\s*([-\w.]+)") @@ -35,7 +36,7 @@ class Macro(object): lines.append(line) code = "\n".join(lines) if isinstance(code, bytes): - code = code.decode(enc or py3compat.getdefaultencoding()) + code = code.decode(enc or getdefaultencoding()) self.value = code + '\n' def __str__(self): diff --git a/IPython/core/magic.py b/IPython/core/magic.py index 4d3d059..b7590c6 100644 --- a/IPython/core/magic.py +++ b/IPython/core/magic.py @@ -57,6 +57,7 @@ from IPython.core.pylabtools import mpl_runner from IPython.testing.skipdoctest import skip_doctest from IPython.utils import py3compat from IPython.utils import openpy +from IPython.utils.encoding import getdefaultencoding from IPython.utils.io import file_read, nlprint from IPython.utils.module_paths import find_mod from IPython.utils.path import get_py_filename, unquote_filename @@ -949,7 +950,7 @@ Currently the magic system has the following functions:\n""" try: vstr = str(var) except UnicodeEncodeError: - vstr = unicode(var).encode(py3compat.getdefaultencoding(), + vstr = unicode(var).encode(getdefaultencoding(), 'backslashreplace') vstr = vstr.replace('\n','\\n') if len(vstr) < 50: diff --git a/IPython/core/splitinput.py b/IPython/core/splitinput.py index ed54775..ab9063b 100644 --- a/IPython/core/splitinput.py +++ b/IPython/core/splitinput.py @@ -24,6 +24,7 @@ import re import sys from IPython.utils import py3compat +from IPython.utils.encoding import get_stream_enc #----------------------------------------------------------------------------- # Main function @@ -53,7 +54,7 @@ def split_user_input(line, pattern=None): and the rest. """ # We need to ensure that the rest of this routine deals only with unicode - encoding = py3compat.get_stream_enc(sys.stdin, 'utf-8') + encoding = get_stream_enc(sys.stdin, 'utf-8') line = py3compat.cast_unicode(line, encoding) if pattern is None: diff --git a/IPython/core/tests/test_compilerop.py b/IPython/core/tests/test_compilerop.py index 1ffaaa2..f586c33 100644 --- a/IPython/core/tests/test_compilerop.py +++ b/IPython/core/tests/test_compilerop.py @@ -52,7 +52,7 @@ def test_cache(): def setUp(): # Check we're in a proper Python 2 environment (some imports, such # as GTK, can change the default encoding, which can hide bugs.) - nt.assert_equal(py3compat.getdefaultencoding(), "utf-8" if py3compat.PY3 else "ascii") + nt.assert_equal(sys.getdefaultencoding(), "utf-8" if py3compat.PY3 else "ascii") def test_cache_unicode(): cp = compilerop.CachingCompiler() diff --git a/IPython/core/tests/test_history.py b/IPython/core/tests/test_history.py index 3d499cd..b55a8aa 100644 --- a/IPython/core/tests/test_history.py +++ b/IPython/core/tests/test_history.py @@ -23,7 +23,7 @@ from IPython.core.history import HistoryManager, extract_hist_ranges from IPython.utils import py3compat def setUp(): - nt.assert_equal(py3compat.getdefaultencoding(), "utf-8" if py3compat.PY3 else "ascii") + nt.assert_equal(sys.getdefaultencoding(), "utf-8" if py3compat.PY3 else "ascii") def test_history(): ip = get_ipython() diff --git a/IPython/external/pyparsing/_pyparsing.py b/IPython/external/pyparsing/_pyparsing.py index 0218a99..a4f0c3b 100644 --- a/IPython/external/pyparsing/_pyparsing.py +++ b/IPython/external/pyparsing/_pyparsing.py @@ -128,9 +128,9 @@ if not _PY3K: return unicode(obj) # Else encode it... but how? There are many choices... :) # Replace unprintables with escape codes? - #return unicode(obj).encode(py3compat.getdefaultencoding(), 'backslashreplace_errors') + #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors') # Replace unprintables with question marks? - #return unicode(obj).encode(py3compat.getdefaultencoding(), 'replace') + #return unicode(obj).encode(sys.getdefaultencoding(), 'replace') # ... else: _ustr = str diff --git a/IPython/frontend/terminal/interactiveshell.py b/IPython/frontend/terminal/interactiveshell.py index 42fbcfe..49c88dd 100644 --- a/IPython/frontend/terminal/interactiveshell.py +++ b/IPython/frontend/terminal/interactiveshell.py @@ -31,6 +31,7 @@ from IPython.core.usage import interactive_usage, default_banner from IPython.core.interactiveshell import InteractiveShell, InteractiveShellABC from IPython.core.pylabtools import pylab_activate from IPython.testing.skipdoctest import skip_doctest +from IPython.utils.encoding import get_stream_enc from IPython.utils import py3compat from IPython.utils.terminal import toggle_set_term_title, set_term_title from IPython.utils.process import abbrev_cwd @@ -319,7 +320,7 @@ class TerminalInteractiveShell(InteractiveShell): for i in range(hlen - hlen_before_cell): self.readline.remove_history_item(hlen - i - 1) - stdin_encoding = py3compat.get_stream_enc(sys.stdin, 'utf-8') + stdin_encoding = get_stream_enc(sys.stdin, 'utf-8') self.readline.add_history(py3compat.unicode_to_str(source_raw.rstrip(), stdin_encoding)) return self.readline.get_current_history_length() diff --git a/IPython/testing/tools.py b/IPython/testing/tools.py index ef07f78..326a8b7 100644 --- a/IPython/testing/tools.py +++ b/IPython/testing/tools.py @@ -45,6 +45,7 @@ from IPython.utils.process import find_cmd, getoutputerror from IPython.utils.text import list_strings from IPython.utils.io import temp_pyfile, Tee from IPython.utils import py3compat +from IPython.utils.encoding import getdefaultencoding from . import decorators as dec from . import skipdoctest @@ -322,7 +323,7 @@ else: # so we need a class that can handle both. class MyStringIO(StringIO): def write(self, s): - s = py3compat.cast_unicode(s, encoding=py3compat.getdefaultencoding()) + s = py3compat.cast_unicode(s, encoding=getdefaultencoding()) super(MyStringIO, self).write(s) notprinted_msg = """Did not find {0!r} in printed output (on {1}): diff --git a/IPython/utils/_process_posix.py b/IPython/utils/_process_posix.py index d774fda..57fb1bf 100644 --- a/IPython/utils/_process_posix.py +++ b/IPython/utils/_process_posix.py @@ -26,6 +26,7 @@ from .autoattr import auto_attr from ._process_common import getoutput, arg_split from IPython.utils import text from IPython.utils import py3compat +from IPython.utils.encoding import getdefaultencoding #----------------------------------------------------------------------------- # Function definitions @@ -128,7 +129,7 @@ class ProcessHandler(object): int : child's exitstatus """ # Get likely encoding for the output. - enc = py3compat.getdefaultencoding() + enc = getdefaultencoding() # Patterns to match on the output, for pexpect. We read input and # allow either a short timeout or EOF diff --git a/IPython/utils/encoding.py b/IPython/utils/encoding.py new file mode 100644 index 0000000..add75e5 --- /dev/null +++ b/IPython/utils/encoding.py @@ -0,0 +1,54 @@ +# coding: utf-8 +""" +Utilities for dealing with text encodings +""" + +#----------------------------------------------------------------------------- +# Copyright (C) 2008-2012 The IPython Development Team +# +# Distributed under the terms of the BSD License. The full license is in +# the file COPYING, distributed as part of this software. +#----------------------------------------------------------------------------- + +#----------------------------------------------------------------------------- +# Imports +#----------------------------------------------------------------------------- +import sys +import locale + +# to deal with the possibility of sys.std* not being a stream at all +def get_stream_enc(stream, default=None): + """Return the given stream's encoding or a default. + + There are cases where sys.std* might not actually be a stream, so + check for the encoding attribute prior to returning it, and return + a default if it doesn't exist or evaluates as False. `default' + is None if not provided. + """ + if not hasattr(stream, 'encoding') or not stream.encoding: + return default + else: + return stream.encoding + +# Less conservative replacement for sys.getdefaultencoding, that will try +# to match the environment. +# Defined here as central function, so if we find better choices, we +# won't need to make changes all over IPython. +def getdefaultencoding(): + """Return IPython's guess for the default encoding for bytes as text. + + Asks for stdin.encoding first, to match the calling Terminal, but that + is often None for subprocesses. Fall back on locale.getpreferredencoding() + which should be a sensible platform default (that respects LANG environment), + and finally to sys.getdefaultencoding() which is the most conservative option, + and usually ASCII. + """ + enc = get_stream_enc(sys.stdin) + if not enc or enc=='ascii': + try: + # There are reports of getpreferredencoding raising errors + # in some cases, which may well be fixed, but let's be conservative here. + enc = locale.getpreferredencoding() + except Exception: + pass + return enc or sys.getdefaultencoding() diff --git a/IPython/utils/jsonutil.py b/IPython/utils/jsonutil.py index b4bdd95..77ace4f 100644 --- a/IPython/utils/jsonutil.py +++ b/IPython/utils/jsonutil.py @@ -17,6 +17,7 @@ import types from datetime import datetime from IPython.utils import py3compat +from IPython.utils.encoding import getdefaultencoding from IPython.utils import text next_attr_name = '__next__' if py3compat.PY3 else 'next' @@ -135,7 +136,7 @@ def json_clean(obj): return obj if isinstance(obj, bytes): - return obj.decode(py3compat.getdefaultencoding(), 'replace') + return obj.decode(getdefaultencoding(), 'replace') if isinstance(obj, container_to_list) or ( hasattr(obj, '__iter__') and hasattr(obj, next_attr_name)): diff --git a/IPython/utils/py3compat.py b/IPython/utils/py3compat.py index 4115645..3588a3a 100644 --- a/IPython/utils/py3compat.py +++ b/IPython/utils/py3compat.py @@ -5,43 +5,14 @@ import functools import sys import re import types -import locale + +from IPython.utils.encoding import getdefaultencoding orig_open = open def no_code(x, encoding=None): return x -# to deal with the possibility of sys.std* not being a stream at all -def get_stream_enc(stream, default=None): - if not hasattr(stream, 'encoding') or not stream.encoding: - return default - else: - return stream.encoding - -# Less conservative replacement for sys.getdefaultencoding, that will try -# to match the environment. -# Defined here as central function, so if we find better choices, we -# won't need to make changes all over IPython. -def getdefaultencoding(): - """Return IPython's guess for the default encoding for bytes as text. - - Asks for stdin.encoding first, to match the calling Terminal, but that - is often None for subprocesses. Fall back on locale.getpreferredencoding() - which should be a sensible platform default (that respects LANG environment), - and finally to sys.getdefaultencoding() which is the most conservative option, - and usually ASCII. - """ - enc = get_stream_enc(sys.stdin) - if not enc or enc=='ascii': - try: - # There are reports of getpreferredencoding raising errors - # in some cases, which may well be fixed, but let's be conservative here. - enc = locale.getpreferredencoding() - except Exception: - pass - return enc or sys.getdefaultencoding() - def decode(s, encoding=None): encoding = encoding or getdefaultencoding() return s.decode(encoding, "replace") diff --git a/IPython/zmq/iostream.py b/IPython/zmq/iostream.py index 6b43225..984e6c2 100644 --- a/IPython/zmq/iostream.py +++ b/IPython/zmq/iostream.py @@ -4,7 +4,7 @@ from io import StringIO from session import extract_header, Message -from IPython.utils import io, text, py3compat +from IPython.utils import io, text, encoding #----------------------------------------------------------------------------- # Globals @@ -69,7 +69,7 @@ class OutStream(object): else: # Make sure that we're handling unicode if not isinstance(string, unicode): - enc = py3compat.getdefaultencoding() + enc = encoding.getdefaultencoding() string = string.decode(enc, 'replace') self._buffer.write(string)