##// END OF EJS Templates
Feedback from pull request #1245
Brandon Parsons -
Show More
@@ -0,0 +1,54 b''
1 # coding: utf-8
2 """
3 Utilities for dealing with text encodings
4 """
5
6 #-----------------------------------------------------------------------------
7 # Copyright (C) 2008-2012 The IPython Development Team
8 #
9 # Distributed under the terms of the BSD License. The full license is in
10 # the file COPYING, distributed as part of this software.
11 #-----------------------------------------------------------------------------
12
13 #-----------------------------------------------------------------------------
14 # Imports
15 #-----------------------------------------------------------------------------
16 import sys
17 import locale
18
19 # to deal with the possibility of sys.std* not being a stream at all
20 def get_stream_enc(stream, default=None):
21 """Return the given stream's encoding or a default.
22
23 There are cases where sys.std* might not actually be a stream, so
24 check for the encoding attribute prior to returning it, and return
25 a default if it doesn't exist or evaluates as False. `default'
26 is None if not provided.
27 """
28 if not hasattr(stream, 'encoding') or not stream.encoding:
29 return default
30 else:
31 return stream.encoding
32
33 # Less conservative replacement for sys.getdefaultencoding, that will try
34 # to match the environment.
35 # Defined here as central function, so if we find better choices, we
36 # won't need to make changes all over IPython.
37 def getdefaultencoding():
38 """Return IPython's guess for the default encoding for bytes as text.
39
40 Asks for stdin.encoding first, to match the calling Terminal, but that
41 is often None for subprocesses. Fall back on locale.getpreferredencoding()
42 which should be a sensible platform default (that respects LANG environment),
43 and finally to sys.getdefaultencoding() which is the most conservative option,
44 and usually ASCII.
45 """
46 enc = get_stream_enc(sys.stdin)
47 if not enc or enc=='ascii':
48 try:
49 # There are reports of getpreferredencoding raising errors
50 # in some cases, which may well be fixed, but let's be conservative here.
51 enc = locale.getpreferredencoding()
52 except Exception:
53 pass
54 return enc or sys.getdefaultencoding()
@@ -26,6 +26,7 b' import sys'
26 from IPython.external import argparse
26 from IPython.external import argparse
27 from IPython.utils.path import filefind, get_ipython_dir
27 from IPython.utils.path import filefind, get_ipython_dir
28 from IPython.utils import py3compat, text, warn
28 from IPython.utils import py3compat, text, warn
29 from IPython.utils.encoding import getdefaultencoding
29
30
30 #-----------------------------------------------------------------------------
31 #-----------------------------------------------------------------------------
31 # Exceptions
32 # Exceptions
@@ -439,7 +440,7 b' class KeyValueConfigLoader(CommandLineConfigLoader):'
439 """decode argv if bytes, using stin.encoding, falling back on default enc"""
440 """decode argv if bytes, using stin.encoding, falling back on default enc"""
440 uargv = []
441 uargv = []
441 if enc is None:
442 if enc is None:
442 enc = py3compat.getdefaultencoding()
443 enc = getdefaultencoding()
443 for arg in argv:
444 for arg in argv:
444 if not isinstance(arg, unicode):
445 if not isinstance(arg, unicode):
445 # only decode if not already decoded
446 # only decode if not already decoded
@@ -603,7 +604,7 b' class ArgParseConfigLoader(CommandLineConfigLoader):'
603 def _parse_args(self, args):
604 def _parse_args(self, args):
604 """self.parser->self.parsed_data"""
605 """self.parser->self.parsed_data"""
605 # decode sys.argv to support unicode command-line options
606 # decode sys.argv to support unicode command-line options
606 enc = py3compat.getdefaultencoding()
607 enc = getdefaultencoding()
607 uargs = [py3compat.cast_unicode(a, enc) for a in args]
608 uargs = [py3compat.cast_unicode(a, enc) for a in args]
608 self.parsed_data, self.extra_args = self.parser.parse_known_args(uargs)
609 self.parsed_data, self.extra_args = self.parser.parse_known_args(uargs)
609
610
@@ -11,6 +11,7 b' import re'
11 import sys
11 import sys
12
12
13 from IPython.utils import py3compat
13 from IPython.utils import py3compat
14 from IPython.utils.encoding import getdefaultencoding
14
15
15 coding_declaration = re.compile(r"#\s*coding[:=]\s*([-\w.]+)")
16 coding_declaration = re.compile(r"#\s*coding[:=]\s*([-\w.]+)")
16
17
@@ -35,7 +36,7 b' class Macro(object):'
35 lines.append(line)
36 lines.append(line)
36 code = "\n".join(lines)
37 code = "\n".join(lines)
37 if isinstance(code, bytes):
38 if isinstance(code, bytes):
38 code = code.decode(enc or py3compat.getdefaultencoding())
39 code = code.decode(enc or getdefaultencoding())
39 self.value = code + '\n'
40 self.value = code + '\n'
40
41
41 def __str__(self):
42 def __str__(self):
@@ -57,6 +57,7 b' from IPython.core.pylabtools import mpl_runner'
57 from IPython.testing.skipdoctest import skip_doctest
57 from IPython.testing.skipdoctest import skip_doctest
58 from IPython.utils import py3compat
58 from IPython.utils import py3compat
59 from IPython.utils import openpy
59 from IPython.utils import openpy
60 from IPython.utils.encoding import getdefaultencoding
60 from IPython.utils.io import file_read, nlprint
61 from IPython.utils.io import file_read, nlprint
61 from IPython.utils.module_paths import find_mod
62 from IPython.utils.module_paths import find_mod
62 from IPython.utils.path import get_py_filename, unquote_filename
63 from IPython.utils.path import get_py_filename, unquote_filename
@@ -949,7 +950,7 b' Currently the magic system has the following functions:\\n"""'
949 try:
950 try:
950 vstr = str(var)
951 vstr = str(var)
951 except UnicodeEncodeError:
952 except UnicodeEncodeError:
952 vstr = unicode(var).encode(py3compat.getdefaultencoding(),
953 vstr = unicode(var).encode(getdefaultencoding(),
953 'backslashreplace')
954 'backslashreplace')
954 vstr = vstr.replace('\n','\\n')
955 vstr = vstr.replace('\n','\\n')
955 if len(vstr) < 50:
956 if len(vstr) < 50:
@@ -24,6 +24,7 b' import re'
24 import sys
24 import sys
25
25
26 from IPython.utils import py3compat
26 from IPython.utils import py3compat
27 from IPython.utils.encoding import get_stream_enc
27
28
28 #-----------------------------------------------------------------------------
29 #-----------------------------------------------------------------------------
29 # Main function
30 # Main function
@@ -53,7 +54,7 b' def split_user_input(line, pattern=None):'
53 and the rest.
54 and the rest.
54 """
55 """
55 # We need to ensure that the rest of this routine deals only with unicode
56 # We need to ensure that the rest of this routine deals only with unicode
56 encoding = py3compat.get_stream_enc(sys.stdin, 'utf-8')
57 encoding = get_stream_enc(sys.stdin, 'utf-8')
57 line = py3compat.cast_unicode(line, encoding)
58 line = py3compat.cast_unicode(line, encoding)
58
59
59 if pattern is None:
60 if pattern is None:
@@ -52,7 +52,7 b' def test_cache():'
52 def setUp():
52 def setUp():
53 # Check we're in a proper Python 2 environment (some imports, such
53 # Check we're in a proper Python 2 environment (some imports, such
54 # as GTK, can change the default encoding, which can hide bugs.)
54 # as GTK, can change the default encoding, which can hide bugs.)
55 nt.assert_equal(py3compat.getdefaultencoding(), "utf-8" if py3compat.PY3 else "ascii")
55 nt.assert_equal(sys.getdefaultencoding(), "utf-8" if py3compat.PY3 else "ascii")
56
56
57 def test_cache_unicode():
57 def test_cache_unicode():
58 cp = compilerop.CachingCompiler()
58 cp = compilerop.CachingCompiler()
@@ -23,7 +23,7 b' from IPython.core.history import HistoryManager, extract_hist_ranges'
23 from IPython.utils import py3compat
23 from IPython.utils import py3compat
24
24
25 def setUp():
25 def setUp():
26 nt.assert_equal(py3compat.getdefaultencoding(), "utf-8" if py3compat.PY3 else "ascii")
26 nt.assert_equal(sys.getdefaultencoding(), "utf-8" if py3compat.PY3 else "ascii")
27
27
28 def test_history():
28 def test_history():
29 ip = get_ipython()
29 ip = get_ipython()
@@ -128,9 +128,9 b' if not _PY3K:'
128 return unicode(obj)
128 return unicode(obj)
129 # Else encode it... but how? There are many choices... :)
129 # Else encode it... but how? There are many choices... :)
130 # Replace unprintables with escape codes?
130 # Replace unprintables with escape codes?
131 #return unicode(obj).encode(py3compat.getdefaultencoding(), 'backslashreplace_errors')
131 #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors')
132 # Replace unprintables with question marks?
132 # Replace unprintables with question marks?
133 #return unicode(obj).encode(py3compat.getdefaultencoding(), 'replace')
133 #return unicode(obj).encode(sys.getdefaultencoding(), 'replace')
134 # ...
134 # ...
135 else:
135 else:
136 _ustr = str
136 _ustr = str
@@ -31,6 +31,7 b' from IPython.core.usage import interactive_usage, default_banner'
31 from IPython.core.interactiveshell import InteractiveShell, InteractiveShellABC
31 from IPython.core.interactiveshell import InteractiveShell, InteractiveShellABC
32 from IPython.core.pylabtools import pylab_activate
32 from IPython.core.pylabtools import pylab_activate
33 from IPython.testing.skipdoctest import skip_doctest
33 from IPython.testing.skipdoctest import skip_doctest
34 from IPython.utils.encoding import get_stream_enc
34 from IPython.utils import py3compat
35 from IPython.utils import py3compat
35 from IPython.utils.terminal import toggle_set_term_title, set_term_title
36 from IPython.utils.terminal import toggle_set_term_title, set_term_title
36 from IPython.utils.process import abbrev_cwd
37 from IPython.utils.process import abbrev_cwd
@@ -319,7 +320,7 b' class TerminalInteractiveShell(InteractiveShell):'
319
320
320 for i in range(hlen - hlen_before_cell):
321 for i in range(hlen - hlen_before_cell):
321 self.readline.remove_history_item(hlen - i - 1)
322 self.readline.remove_history_item(hlen - i - 1)
322 stdin_encoding = py3compat.get_stream_enc(sys.stdin, 'utf-8')
323 stdin_encoding = get_stream_enc(sys.stdin, 'utf-8')
323 self.readline.add_history(py3compat.unicode_to_str(source_raw.rstrip(),
324 self.readline.add_history(py3compat.unicode_to_str(source_raw.rstrip(),
324 stdin_encoding))
325 stdin_encoding))
325 return self.readline.get_current_history_length()
326 return self.readline.get_current_history_length()
@@ -45,6 +45,7 b' from IPython.utils.process import find_cmd, getoutputerror'
45 from IPython.utils.text import list_strings
45 from IPython.utils.text import list_strings
46 from IPython.utils.io import temp_pyfile, Tee
46 from IPython.utils.io import temp_pyfile, Tee
47 from IPython.utils import py3compat
47 from IPython.utils import py3compat
48 from IPython.utils.encoding import getdefaultencoding
48
49
49 from . import decorators as dec
50 from . import decorators as dec
50 from . import skipdoctest
51 from . import skipdoctest
@@ -322,7 +323,7 b' else:'
322 # so we need a class that can handle both.
323 # so we need a class that can handle both.
323 class MyStringIO(StringIO):
324 class MyStringIO(StringIO):
324 def write(self, s):
325 def write(self, s):
325 s = py3compat.cast_unicode(s, encoding=py3compat.getdefaultencoding())
326 s = py3compat.cast_unicode(s, encoding=getdefaultencoding())
326 super(MyStringIO, self).write(s)
327 super(MyStringIO, self).write(s)
327
328
328 notprinted_msg = """Did not find {0!r} in printed output (on {1}):
329 notprinted_msg = """Did not find {0!r} in printed output (on {1}):
@@ -26,6 +26,7 b' from .autoattr import auto_attr'
26 from ._process_common import getoutput, arg_split
26 from ._process_common import getoutput, arg_split
27 from IPython.utils import text
27 from IPython.utils import text
28 from IPython.utils import py3compat
28 from IPython.utils import py3compat
29 from IPython.utils.encoding import getdefaultencoding
29
30
30 #-----------------------------------------------------------------------------
31 #-----------------------------------------------------------------------------
31 # Function definitions
32 # Function definitions
@@ -128,7 +129,7 b' class ProcessHandler(object):'
128 int : child's exitstatus
129 int : child's exitstatus
129 """
130 """
130 # Get likely encoding for the output.
131 # Get likely encoding for the output.
131 enc = py3compat.getdefaultencoding()
132 enc = getdefaultencoding()
132
133
133 # Patterns to match on the output, for pexpect. We read input and
134 # Patterns to match on the output, for pexpect. We read input and
134 # allow either a short timeout or EOF
135 # allow either a short timeout or EOF
@@ -17,6 +17,7 b' import types'
17 from datetime import datetime
17 from datetime import datetime
18
18
19 from IPython.utils import py3compat
19 from IPython.utils import py3compat
20 from IPython.utils.encoding import getdefaultencoding
20 from IPython.utils import text
21 from IPython.utils import text
21 next_attr_name = '__next__' if py3compat.PY3 else 'next'
22 next_attr_name = '__next__' if py3compat.PY3 else 'next'
22
23
@@ -135,7 +136,7 b' def json_clean(obj):'
135 return obj
136 return obj
136
137
137 if isinstance(obj, bytes):
138 if isinstance(obj, bytes):
138 return obj.decode(py3compat.getdefaultencoding(), 'replace')
139 return obj.decode(getdefaultencoding(), 'replace')
139
140
140 if isinstance(obj, container_to_list) or (
141 if isinstance(obj, container_to_list) or (
141 hasattr(obj, '__iter__') and hasattr(obj, next_attr_name)):
142 hasattr(obj, '__iter__') and hasattr(obj, next_attr_name)):
@@ -5,43 +5,14 b' import functools'
5 import sys
5 import sys
6 import re
6 import re
7 import types
7 import types
8 import locale
8
9 from IPython.utils.encoding import getdefaultencoding
9
10
10 orig_open = open
11 orig_open = open
11
12
12 def no_code(x, encoding=None):
13 def no_code(x, encoding=None):
13 return x
14 return x
14
15
15 # to deal with the possibility of sys.std* not being a stream at all
16 def get_stream_enc(stream, default=None):
17 if not hasattr(stream, 'encoding') or not stream.encoding:
18 return default
19 else:
20 return stream.encoding
21
22 # Less conservative replacement for sys.getdefaultencoding, that will try
23 # to match the environment.
24 # Defined here as central function, so if we find better choices, we
25 # won't need to make changes all over IPython.
26 def getdefaultencoding():
27 """Return IPython's guess for the default encoding for bytes as text.
28
29 Asks for stdin.encoding first, to match the calling Terminal, but that
30 is often None for subprocesses. Fall back on locale.getpreferredencoding()
31 which should be a sensible platform default (that respects LANG environment),
32 and finally to sys.getdefaultencoding() which is the most conservative option,
33 and usually ASCII.
34 """
35 enc = get_stream_enc(sys.stdin)
36 if not enc or enc=='ascii':
37 try:
38 # There are reports of getpreferredencoding raising errors
39 # in some cases, which may well be fixed, but let's be conservative here.
40 enc = locale.getpreferredencoding()
41 except Exception:
42 pass
43 return enc or sys.getdefaultencoding()
44
45 def decode(s, encoding=None):
16 def decode(s, encoding=None):
46 encoding = encoding or getdefaultencoding()
17 encoding = encoding or getdefaultencoding()
47 return s.decode(encoding, "replace")
18 return s.decode(encoding, "replace")
@@ -4,7 +4,7 b' from io import StringIO'
4
4
5 from session import extract_header, Message
5 from session import extract_header, Message
6
6
7 from IPython.utils import io, text, py3compat
7 from IPython.utils import io, text, encoding
8
8
9 #-----------------------------------------------------------------------------
9 #-----------------------------------------------------------------------------
10 # Globals
10 # Globals
@@ -69,7 +69,7 b' class OutStream(object):'
69 else:
69 else:
70 # Make sure that we're handling unicode
70 # Make sure that we're handling unicode
71 if not isinstance(string, unicode):
71 if not isinstance(string, unicode):
72 enc = py3compat.getdefaultencoding()
72 enc = encoding.getdefaultencoding()
73 string = string.decode(enc, 'replace')
73 string = string.decode(enc, 'replace')
74
74
75 self._buffer.write(string)
75 self._buffer.write(string)
General Comments 0
You need to be logged in to leave comments. Login now