##// END OF EJS Templates
add prefer_stream arg to utils.encoding.getdefaultencoding...
MinRK -
Show More
@@ -1,67 +1,71 b''
1 # coding: utf-8
1 # coding: utf-8
2 """
2 """
3 Utilities for dealing with text encodings
3 Utilities for dealing with text encodings
4 """
4 """
5
5
6 #-----------------------------------------------------------------------------
6 #-----------------------------------------------------------------------------
7 # Copyright (C) 2008-2012 The IPython Development Team
7 # Copyright (C) 2008-2012 The IPython Development Team
8 #
8 #
9 # Distributed under the terms of the BSD License. The full license is in
9 # Distributed under the terms of the BSD License. The full license is in
10 # the file COPYING, distributed as part of this software.
10 # the file COPYING, distributed as part of this software.
11 #-----------------------------------------------------------------------------
11 #-----------------------------------------------------------------------------
12
12
13 #-----------------------------------------------------------------------------
13 #-----------------------------------------------------------------------------
14 # Imports
14 # Imports
15 #-----------------------------------------------------------------------------
15 #-----------------------------------------------------------------------------
16 import sys
16 import sys
17 import locale
17 import locale
18 import warnings
18 import warnings
19
19
20 # to deal with the possibility of sys.std* not being a stream at all
20 # to deal with the possibility of sys.std* not being a stream at all
21 def get_stream_enc(stream, default=None):
21 def get_stream_enc(stream, default=None):
22 """Return the given stream's encoding or a default.
22 """Return the given stream's encoding or a default.
23
23
24 There are cases where ``sys.std*`` might not actually be a stream, so
24 There are cases where ``sys.std*`` might not actually be a stream, so
25 check for the encoding attribute prior to returning it, and return
25 check for the encoding attribute prior to returning it, and return
26 a default if it doesn't exist or evaluates as False. ``default``
26 a default if it doesn't exist or evaluates as False. ``default``
27 is None if not provided.
27 is None if not provided.
28 """
28 """
29 if not hasattr(stream, 'encoding') or not stream.encoding:
29 if not hasattr(stream, 'encoding') or not stream.encoding:
30 return default
30 return default
31 else:
31 else:
32 return stream.encoding
32 return stream.encoding
33
33
34 # Less conservative replacement for sys.getdefaultencoding, that will try
34 # Less conservative replacement for sys.getdefaultencoding, that will try
35 # to match the environment.
35 # to match the environment.
36 # Defined here as central function, so if we find better choices, we
36 # Defined here as central function, so if we find better choices, we
37 # won't need to make changes all over IPython.
37 # won't need to make changes all over IPython.
38 def getdefaultencoding():
38 def getdefaultencoding(prefer_stream=True):
39 """Return IPython's guess for the default encoding for bytes as text.
39 """Return IPython's guess for the default encoding for bytes as text.
40
40
41 Asks for stdin.encoding first, to match the calling Terminal, but that
41 If prefer_stream is True (default), asks for stdin.encoding first,
42 is often None for subprocesses. Fall back on locale.getpreferredencoding()
42 to match the calling Terminal, but that is often None for subprocesses.
43
44 Then fall back on locale.getpreferredencoding(),
43 which should be a sensible platform default (that respects LANG environment),
45 which should be a sensible platform default (that respects LANG environment),
44 and finally to sys.getdefaultencoding() which is the most conservative option,
46 and finally to sys.getdefaultencoding() which is the most conservative option,
45 and usually ASCII.
47 and usually ASCII on Python 2 or UTF8 on Python 3.
46 """
48 """
47 enc = get_stream_enc(sys.stdin)
49 enc = None
50 if prefer_stream:
51 enc = get_stream_enc(sys.stdin)
48 if not enc or enc=='ascii':
52 if not enc or enc=='ascii':
49 try:
53 try:
50 # There are reports of getpreferredencoding raising errors
54 # There are reports of getpreferredencoding raising errors
51 # in some cases, which may well be fixed, but let's be conservative here.
55 # in some cases, which may well be fixed, but let's be conservative here.
52 enc = locale.getpreferredencoding()
56 enc = locale.getpreferredencoding()
53 except Exception:
57 except Exception:
54 pass
58 pass
55 enc = enc or sys.getdefaultencoding()
59 enc = enc or sys.getdefaultencoding()
56 # On windows `cp0` can be returned to indicate that there is no code page.
60 # On windows `cp0` can be returned to indicate that there is no code page.
57 # Since cp0 is an invalid encoding return instead cp1252 which is the
61 # Since cp0 is an invalid encoding return instead cp1252 which is the
58 # Western European default.
62 # Western European default.
59 if enc == 'cp0':
63 if enc == 'cp0':
60 warnings.warn(
64 warnings.warn(
61 "Invalid code page cp0 detected - using cp1252 instead."
65 "Invalid code page cp0 detected - using cp1252 instead."
62 "If cp1252 is incorrect please ensure a valid code page "
66 "If cp1252 is incorrect please ensure a valid code page "
63 "is defined for the process.", RuntimeWarning)
67 "is defined for the process.", RuntimeWarning)
64 return 'cp1252'
68 return 'cp1252'
65 return enc
69 return enc
66
70
67 DEFAULT_ENCODING = getdefaultencoding()
71 DEFAULT_ENCODING = getdefaultencoding()
General Comments 0
You need to be logged in to leave comments. Login now