Show More
@@ -0,0 +1,54 b'' | |||||
|
1 | # coding: utf-8 | |||
|
2 | """ | |||
|
3 | Utilities for dealing with text encodings | |||
|
4 | """ | |||
|
5 | ||||
|
6 | #----------------------------------------------------------------------------- | |||
|
7 | # Copyright (C) 2008-2012 The IPython Development Team | |||
|
8 | # | |||
|
9 | # Distributed under the terms of the BSD License. The full license is in | |||
|
10 | # the file COPYING, distributed as part of this software. | |||
|
11 | #----------------------------------------------------------------------------- | |||
|
12 | ||||
|
13 | #----------------------------------------------------------------------------- | |||
|
14 | # Imports | |||
|
15 | #----------------------------------------------------------------------------- | |||
|
16 | import sys | |||
|
17 | import locale | |||
|
18 | ||||
|
19 | # to deal with the possibility of sys.std* not being a stream at all | |||
|
20 | def get_stream_enc(stream, default=None): | |||
|
21 | """Return the given stream's encoding or a default. | |||
|
22 | ||||
|
23 | There are cases where sys.std* might not actually be a stream, so | |||
|
24 | check for the encoding attribute prior to returning it, and return | |||
|
25 | a default if it doesn't exist or evaluates as False. `default' | |||
|
26 | is None if not provided. | |||
|
27 | """ | |||
|
28 | if not hasattr(stream, 'encoding') or not stream.encoding: | |||
|
29 | return default | |||
|
30 | else: | |||
|
31 | return stream.encoding | |||
|
32 | ||||
|
33 | # Less conservative replacement for sys.getdefaultencoding, that will try | |||
|
34 | # to match the environment. | |||
|
35 | # Defined here as central function, so if we find better choices, we | |||
|
36 | # won't need to make changes all over IPython. | |||
|
37 | def getdefaultencoding(): | |||
|
38 | """Return IPython's guess for the default encoding for bytes as text. | |||
|
39 | ||||
|
40 | Asks for stdin.encoding first, to match the calling Terminal, but that | |||
|
41 | is often None for subprocesses. Fall back on locale.getpreferredencoding() | |||
|
42 | which should be a sensible platform default (that respects LANG environment), | |||
|
43 | and finally to sys.getdefaultencoding() which is the most conservative option, | |||
|
44 | and usually ASCII. | |||
|
45 | """ | |||
|
46 | enc = get_stream_enc(sys.stdin) | |||
|
47 | if not enc or enc=='ascii': | |||
|
48 | try: | |||
|
49 | # There are reports of getpreferredencoding raising errors | |||
|
50 | # in some cases, which may well be fixed, but let's be conservative here. | |||
|
51 | enc = locale.getpreferredencoding() | |||
|
52 | except Exception: | |||
|
53 | pass | |||
|
54 | return enc or sys.getdefaultencoding() |
@@ -26,6 +26,7 b' import sys' | |||||
26 | from IPython.external import argparse |
|
26 | from IPython.external import argparse | |
27 | from IPython.utils.path import filefind, get_ipython_dir |
|
27 | from IPython.utils.path import filefind, get_ipython_dir | |
28 | from IPython.utils import py3compat, text, warn |
|
28 | from IPython.utils import py3compat, text, warn | |
|
29 | from IPython.utils.encoding import getdefaultencoding | |||
29 |
|
30 | |||
30 | #----------------------------------------------------------------------------- |
|
31 | #----------------------------------------------------------------------------- | |
31 | # Exceptions |
|
32 | # Exceptions | |
@@ -439,7 +440,7 b' class KeyValueConfigLoader(CommandLineConfigLoader):' | |||||
439 | """decode argv if bytes, using stin.encoding, falling back on default enc""" |
|
440 | """decode argv if bytes, using stin.encoding, falling back on default enc""" | |
440 | uargv = [] |
|
441 | uargv = [] | |
441 | if enc is None: |
|
442 | if enc is None: | |
442 |
enc = |
|
443 | enc = getdefaultencoding() | |
443 | for arg in argv: |
|
444 | for arg in argv: | |
444 | if not isinstance(arg, unicode): |
|
445 | if not isinstance(arg, unicode): | |
445 | # only decode if not already decoded |
|
446 | # only decode if not already decoded | |
@@ -603,7 +604,7 b' class ArgParseConfigLoader(CommandLineConfigLoader):' | |||||
603 | def _parse_args(self, args): |
|
604 | def _parse_args(self, args): | |
604 | """self.parser->self.parsed_data""" |
|
605 | """self.parser->self.parsed_data""" | |
605 | # decode sys.argv to support unicode command-line options |
|
606 | # decode sys.argv to support unicode command-line options | |
606 |
enc = |
|
607 | enc = getdefaultencoding() | |
607 | uargs = [py3compat.cast_unicode(a, enc) for a in args] |
|
608 | uargs = [py3compat.cast_unicode(a, enc) for a in args] | |
608 | self.parsed_data, self.extra_args = self.parser.parse_known_args(uargs) |
|
609 | self.parsed_data, self.extra_args = self.parser.parse_known_args(uargs) | |
609 |
|
610 |
@@ -11,6 +11,7 b' import re' | |||||
11 | import sys |
|
11 | import sys | |
12 |
|
12 | |||
13 | from IPython.utils import py3compat |
|
13 | from IPython.utils import py3compat | |
|
14 | from IPython.utils.encoding import getdefaultencoding | |||
14 |
|
15 | |||
15 | coding_declaration = re.compile(r"#\s*coding[:=]\s*([-\w.]+)") |
|
16 | coding_declaration = re.compile(r"#\s*coding[:=]\s*([-\w.]+)") | |
16 |
|
17 | |||
@@ -35,7 +36,7 b' class Macro(object):' | |||||
35 | lines.append(line) |
|
36 | lines.append(line) | |
36 | code = "\n".join(lines) |
|
37 | code = "\n".join(lines) | |
37 | if isinstance(code, bytes): |
|
38 | if isinstance(code, bytes): | |
38 |
code = code.decode(enc or |
|
39 | code = code.decode(enc or getdefaultencoding()) | |
39 | self.value = code + '\n' |
|
40 | self.value = code + '\n' | |
40 |
|
41 | |||
41 | def __str__(self): |
|
42 | def __str__(self): |
@@ -57,6 +57,7 b' from IPython.core.pylabtools import mpl_runner' | |||||
57 | from IPython.testing.skipdoctest import skip_doctest |
|
57 | from IPython.testing.skipdoctest import skip_doctest | |
58 | from IPython.utils import py3compat |
|
58 | from IPython.utils import py3compat | |
59 | from IPython.utils import openpy |
|
59 | from IPython.utils import openpy | |
|
60 | from IPython.utils.encoding import getdefaultencoding | |||
60 | from IPython.utils.io import file_read, nlprint |
|
61 | from IPython.utils.io import file_read, nlprint | |
61 | from IPython.utils.module_paths import find_mod |
|
62 | from IPython.utils.module_paths import find_mod | |
62 | from IPython.utils.path import get_py_filename, unquote_filename |
|
63 | from IPython.utils.path import get_py_filename, unquote_filename | |
@@ -949,7 +950,7 b' Currently the magic system has the following functions:\\n"""' | |||||
949 | try: |
|
950 | try: | |
950 | vstr = str(var) |
|
951 | vstr = str(var) | |
951 | except UnicodeEncodeError: |
|
952 | except UnicodeEncodeError: | |
952 |
vstr = unicode(var).encode( |
|
953 | vstr = unicode(var).encode(getdefaultencoding(), | |
953 | 'backslashreplace') |
|
954 | 'backslashreplace') | |
954 | vstr = vstr.replace('\n','\\n') |
|
955 | vstr = vstr.replace('\n','\\n') | |
955 | if len(vstr) < 50: |
|
956 | if len(vstr) < 50: |
@@ -24,6 +24,7 b' import re' | |||||
24 | import sys |
|
24 | import sys | |
25 |
|
25 | |||
26 | from IPython.utils import py3compat |
|
26 | from IPython.utils import py3compat | |
|
27 | from IPython.utils.encoding import get_stream_enc | |||
27 |
|
28 | |||
28 | #----------------------------------------------------------------------------- |
|
29 | #----------------------------------------------------------------------------- | |
29 | # Main function |
|
30 | # Main function | |
@@ -53,7 +54,7 b' def split_user_input(line, pattern=None):' | |||||
53 | and the rest. |
|
54 | and the rest. | |
54 | """ |
|
55 | """ | |
55 | # We need to ensure that the rest of this routine deals only with unicode |
|
56 | # We need to ensure that the rest of this routine deals only with unicode | |
56 |
encoding = |
|
57 | encoding = get_stream_enc(sys.stdin, 'utf-8') | |
57 | line = py3compat.cast_unicode(line, encoding) |
|
58 | line = py3compat.cast_unicode(line, encoding) | |
58 |
|
59 | |||
59 | if pattern is None: |
|
60 | if pattern is None: |
@@ -52,7 +52,7 b' def test_cache():' | |||||
52 | def setUp(): |
|
52 | def setUp(): | |
53 | # Check we're in a proper Python 2 environment (some imports, such |
|
53 | # Check we're in a proper Python 2 environment (some imports, such | |
54 | # as GTK, can change the default encoding, which can hide bugs.) |
|
54 | # as GTK, can change the default encoding, which can hide bugs.) | |
55 |
nt.assert_equal( |
|
55 | nt.assert_equal(sys.getdefaultencoding(), "utf-8" if py3compat.PY3 else "ascii") | |
56 |
|
56 | |||
57 | def test_cache_unicode(): |
|
57 | def test_cache_unicode(): | |
58 | cp = compilerop.CachingCompiler() |
|
58 | cp = compilerop.CachingCompiler() |
@@ -23,7 +23,7 b' from IPython.core.history import HistoryManager, extract_hist_ranges' | |||||
23 | from IPython.utils import py3compat |
|
23 | from IPython.utils import py3compat | |
24 |
|
24 | |||
25 | def setUp(): |
|
25 | def setUp(): | |
26 |
nt.assert_equal( |
|
26 | nt.assert_equal(sys.getdefaultencoding(), "utf-8" if py3compat.PY3 else "ascii") | |
27 |
|
27 | |||
28 | def test_history(): |
|
28 | def test_history(): | |
29 | ip = get_ipython() |
|
29 | ip = get_ipython() |
@@ -128,9 +128,9 b' if not _PY3K:' | |||||
128 | return unicode(obj) |
|
128 | return unicode(obj) | |
129 | # Else encode it... but how? There are many choices... :) |
|
129 | # Else encode it... but how? There are many choices... :) | |
130 | # Replace unprintables with escape codes? |
|
130 | # Replace unprintables with escape codes? | |
131 |
#return unicode(obj).encode( |
|
131 | #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors') | |
132 | # Replace unprintables with question marks? |
|
132 | # Replace unprintables with question marks? | |
133 |
#return unicode(obj).encode( |
|
133 | #return unicode(obj).encode(sys.getdefaultencoding(), 'replace') | |
134 | # ... |
|
134 | # ... | |
135 | else: |
|
135 | else: | |
136 | _ustr = str |
|
136 | _ustr = str |
@@ -31,6 +31,7 b' from IPython.core.usage import interactive_usage, default_banner' | |||||
31 | from IPython.core.interactiveshell import InteractiveShell, InteractiveShellABC |
|
31 | from IPython.core.interactiveshell import InteractiveShell, InteractiveShellABC | |
32 | from IPython.core.pylabtools import pylab_activate |
|
32 | from IPython.core.pylabtools import pylab_activate | |
33 | from IPython.testing.skipdoctest import skip_doctest |
|
33 | from IPython.testing.skipdoctest import skip_doctest | |
|
34 | from IPython.utils.encoding import get_stream_enc | |||
34 | from IPython.utils import py3compat |
|
35 | from IPython.utils import py3compat | |
35 | from IPython.utils.terminal import toggle_set_term_title, set_term_title |
|
36 | from IPython.utils.terminal import toggle_set_term_title, set_term_title | |
36 | from IPython.utils.process import abbrev_cwd |
|
37 | from IPython.utils.process import abbrev_cwd | |
@@ -319,7 +320,7 b' class TerminalInteractiveShell(InteractiveShell):' | |||||
319 |
|
320 | |||
320 | for i in range(hlen - hlen_before_cell): |
|
321 | for i in range(hlen - hlen_before_cell): | |
321 | self.readline.remove_history_item(hlen - i - 1) |
|
322 | self.readline.remove_history_item(hlen - i - 1) | |
322 |
stdin_encoding = |
|
323 | stdin_encoding = get_stream_enc(sys.stdin, 'utf-8') | |
323 | self.readline.add_history(py3compat.unicode_to_str(source_raw.rstrip(), |
|
324 | self.readline.add_history(py3compat.unicode_to_str(source_raw.rstrip(), | |
324 | stdin_encoding)) |
|
325 | stdin_encoding)) | |
325 | return self.readline.get_current_history_length() |
|
326 | return self.readline.get_current_history_length() |
@@ -45,6 +45,7 b' from IPython.utils.process import find_cmd, getoutputerror' | |||||
45 | from IPython.utils.text import list_strings |
|
45 | from IPython.utils.text import list_strings | |
46 | from IPython.utils.io import temp_pyfile, Tee |
|
46 | from IPython.utils.io import temp_pyfile, Tee | |
47 | from IPython.utils import py3compat |
|
47 | from IPython.utils import py3compat | |
|
48 | from IPython.utils.encoding import getdefaultencoding | |||
48 |
|
49 | |||
49 | from . import decorators as dec |
|
50 | from . import decorators as dec | |
50 | from . import skipdoctest |
|
51 | from . import skipdoctest | |
@@ -322,7 +323,7 b' else:' | |||||
322 | # so we need a class that can handle both. |
|
323 | # so we need a class that can handle both. | |
323 | class MyStringIO(StringIO): |
|
324 | class MyStringIO(StringIO): | |
324 | def write(self, s): |
|
325 | def write(self, s): | |
325 |
s = py3compat.cast_unicode(s, encoding= |
|
326 | s = py3compat.cast_unicode(s, encoding=getdefaultencoding()) | |
326 | super(MyStringIO, self).write(s) |
|
327 | super(MyStringIO, self).write(s) | |
327 |
|
328 | |||
328 | notprinted_msg = """Did not find {0!r} in printed output (on {1}): |
|
329 | notprinted_msg = """Did not find {0!r} in printed output (on {1}): |
@@ -26,6 +26,7 b' from .autoattr import auto_attr' | |||||
26 | from ._process_common import getoutput, arg_split |
|
26 | from ._process_common import getoutput, arg_split | |
27 | from IPython.utils import text |
|
27 | from IPython.utils import text | |
28 | from IPython.utils import py3compat |
|
28 | from IPython.utils import py3compat | |
|
29 | from IPython.utils.encoding import getdefaultencoding | |||
29 |
|
30 | |||
30 | #----------------------------------------------------------------------------- |
|
31 | #----------------------------------------------------------------------------- | |
31 | # Function definitions |
|
32 | # Function definitions | |
@@ -128,7 +129,7 b' class ProcessHandler(object):' | |||||
128 | int : child's exitstatus |
|
129 | int : child's exitstatus | |
129 | """ |
|
130 | """ | |
130 | # Get likely encoding for the output. |
|
131 | # Get likely encoding for the output. | |
131 |
enc = |
|
132 | enc = getdefaultencoding() | |
132 |
|
133 | |||
133 | # Patterns to match on the output, for pexpect. We read input and |
|
134 | # Patterns to match on the output, for pexpect. We read input and | |
134 | # allow either a short timeout or EOF |
|
135 | # allow either a short timeout or EOF |
@@ -17,6 +17,7 b' import types' | |||||
17 | from datetime import datetime |
|
17 | from datetime import datetime | |
18 |
|
18 | |||
19 | from IPython.utils import py3compat |
|
19 | from IPython.utils import py3compat | |
|
20 | from IPython.utils.encoding import getdefaultencoding | |||
20 | from IPython.utils import text |
|
21 | from IPython.utils import text | |
21 | next_attr_name = '__next__' if py3compat.PY3 else 'next' |
|
22 | next_attr_name = '__next__' if py3compat.PY3 else 'next' | |
22 |
|
23 | |||
@@ -135,7 +136,7 b' def json_clean(obj):' | |||||
135 | return obj |
|
136 | return obj | |
136 |
|
137 | |||
137 | if isinstance(obj, bytes): |
|
138 | if isinstance(obj, bytes): | |
138 |
return obj.decode( |
|
139 | return obj.decode(getdefaultencoding(), 'replace') | |
139 |
|
140 | |||
140 | if isinstance(obj, container_to_list) or ( |
|
141 | if isinstance(obj, container_to_list) or ( | |
141 | hasattr(obj, '__iter__') and hasattr(obj, next_attr_name)): |
|
142 | hasattr(obj, '__iter__') and hasattr(obj, next_attr_name)): |
@@ -5,43 +5,14 b' import functools' | |||||
5 | import sys |
|
5 | import sys | |
6 | import re |
|
6 | import re | |
7 | import types |
|
7 | import types | |
8 | import locale |
|
8 | ||
|
9 | from IPython.utils.encoding import getdefaultencoding | |||
9 |
|
10 | |||
10 | orig_open = open |
|
11 | orig_open = open | |
11 |
|
12 | |||
12 | def no_code(x, encoding=None): |
|
13 | def no_code(x, encoding=None): | |
13 | return x |
|
14 | return x | |
14 |
|
15 | |||
15 | # to deal with the possibility of sys.std* not being a stream at all |
|
|||
16 | def get_stream_enc(stream, default=None): |
|
|||
17 | if not hasattr(stream, 'encoding') or not stream.encoding: |
|
|||
18 | return default |
|
|||
19 | else: |
|
|||
20 | return stream.encoding |
|
|||
21 |
|
||||
22 | # Less conservative replacement for sys.getdefaultencoding, that will try |
|
|||
23 | # to match the environment. |
|
|||
24 | # Defined here as central function, so if we find better choices, we |
|
|||
25 | # won't need to make changes all over IPython. |
|
|||
26 | def getdefaultencoding(): |
|
|||
27 | """Return IPython's guess for the default encoding for bytes as text. |
|
|||
28 |
|
||||
29 | Asks for stdin.encoding first, to match the calling Terminal, but that |
|
|||
30 | is often None for subprocesses. Fall back on locale.getpreferredencoding() |
|
|||
31 | which should be a sensible platform default (that respects LANG environment), |
|
|||
32 | and finally to sys.getdefaultencoding() which is the most conservative option, |
|
|||
33 | and usually ASCII. |
|
|||
34 | """ |
|
|||
35 | enc = get_stream_enc(sys.stdin) |
|
|||
36 | if not enc or enc=='ascii': |
|
|||
37 | try: |
|
|||
38 | # There are reports of getpreferredencoding raising errors |
|
|||
39 | # in some cases, which may well be fixed, but let's be conservative here. |
|
|||
40 | enc = locale.getpreferredencoding() |
|
|||
41 | except Exception: |
|
|||
42 | pass |
|
|||
43 | return enc or sys.getdefaultencoding() |
|
|||
44 |
|
||||
45 | def decode(s, encoding=None): |
|
16 | def decode(s, encoding=None): | |
46 | encoding = encoding or getdefaultencoding() |
|
17 | encoding = encoding or getdefaultencoding() | |
47 | return s.decode(encoding, "replace") |
|
18 | return s.decode(encoding, "replace") |
@@ -4,7 +4,7 b' from io import StringIO' | |||||
4 |
|
4 | |||
5 | from session import extract_header, Message |
|
5 | from session import extract_header, Message | |
6 |
|
6 | |||
7 |
from IPython.utils import io, text, |
|
7 | from IPython.utils import io, text, encoding | |
8 |
|
8 | |||
9 | #----------------------------------------------------------------------------- |
|
9 | #----------------------------------------------------------------------------- | |
10 | # Globals |
|
10 | # Globals | |
@@ -69,7 +69,7 b' class OutStream(object):' | |||||
69 | else: |
|
69 | else: | |
70 | # Make sure that we're handling unicode |
|
70 | # Make sure that we're handling unicode | |
71 | if not isinstance(string, unicode): |
|
71 | if not isinstance(string, unicode): | |
72 |
enc = |
|
72 | enc = encoding.getdefaultencoding() | |
73 | string = string.decode(enc, 'replace') |
|
73 | string = string.decode(enc, 'replace') | |
74 |
|
74 | |||
75 | self._buffer.write(string) |
|
75 | self._buffer.write(string) |
General Comments 0
You need to be logged in to leave comments.
Login now