Show More
@@ -1,56 +1,67 | |||||
1 | # coding: utf-8 |
|
1 | # coding: utf-8 | |
2 | """ |
|
2 | """ | |
3 | Utilities for dealing with text encodings |
|
3 | Utilities for dealing with text encodings | |
4 | """ |
|
4 | """ | |
5 |
|
5 | |||
6 | #----------------------------------------------------------------------------- |
|
6 | #----------------------------------------------------------------------------- | |
7 | # Copyright (C) 2008-2012 The IPython Development Team |
|
7 | # Copyright (C) 2008-2012 The IPython Development Team | |
8 | # |
|
8 | # | |
9 | # Distributed under the terms of the BSD License. The full license is in |
|
9 | # Distributed under the terms of the BSD License. The full license is in | |
10 | # the file COPYING, distributed as part of this software. |
|
10 | # the file COPYING, distributed as part of this software. | |
11 | #----------------------------------------------------------------------------- |
|
11 | #----------------------------------------------------------------------------- | |
12 |
|
12 | |||
13 | #----------------------------------------------------------------------------- |
|
13 | #----------------------------------------------------------------------------- | |
14 | # Imports |
|
14 | # Imports | |
15 | #----------------------------------------------------------------------------- |
|
15 | #----------------------------------------------------------------------------- | |
16 | import sys |
|
16 | import sys | |
17 | import locale |
|
17 | import locale | |
|
18 | import warnings | |||
18 |
|
19 | |||
19 | # to deal with the possibility of sys.std* not being a stream at all |
|
20 | # to deal with the possibility of sys.std* not being a stream at all | |
20 | def get_stream_enc(stream, default=None): |
|
21 | def get_stream_enc(stream, default=None): | |
21 | """Return the given stream's encoding or a default. |
|
22 | """Return the given stream's encoding or a default. | |
22 |
|
23 | |||
23 | There are cases where ``sys.std*`` might not actually be a stream, so |
|
24 | There are cases where ``sys.std*`` might not actually be a stream, so | |
24 | check for the encoding attribute prior to returning it, and return |
|
25 | check for the encoding attribute prior to returning it, and return | |
25 | a default if it doesn't exist or evaluates as False. ``default`` |
|
26 | a default if it doesn't exist or evaluates as False. ``default`` | |
26 | is None if not provided. |
|
27 | is None if not provided. | |
27 | """ |
|
28 | """ | |
28 | if not hasattr(stream, 'encoding') or not stream.encoding: |
|
29 | if not hasattr(stream, 'encoding') or not stream.encoding: | |
29 | return default |
|
30 | return default | |
30 | else: |
|
31 | else: | |
31 | return stream.encoding |
|
32 | return stream.encoding | |
32 |
|
33 | |||
33 | # Less conservative replacement for sys.getdefaultencoding, that will try |
|
34 | # Less conservative replacement for sys.getdefaultencoding, that will try | |
34 | # to match the environment. |
|
35 | # to match the environment. | |
35 | # Defined here as central function, so if we find better choices, we |
|
36 | # Defined here as central function, so if we find better choices, we | |
36 | # won't need to make changes all over IPython. |
|
37 | # won't need to make changes all over IPython. | |
37 | def getdefaultencoding(): |
|
38 | def getdefaultencoding(): | |
38 | """Return IPython's guess for the default encoding for bytes as text. |
|
39 | """Return IPython's guess for the default encoding for bytes as text. | |
39 |
|
40 | |||
40 | Asks for stdin.encoding first, to match the calling Terminal, but that |
|
41 | Asks for stdin.encoding first, to match the calling Terminal, but that | |
41 | is often None for subprocesses. Fall back on locale.getpreferredencoding() |
|
42 | is often None for subprocesses. Fall back on locale.getpreferredencoding() | |
42 | which should be a sensible platform default (that respects LANG environment), |
|
43 | which should be a sensible platform default (that respects LANG environment), | |
43 | and finally to sys.getdefaultencoding() which is the most conservative option, |
|
44 | and finally to sys.getdefaultencoding() which is the most conservative option, | |
44 | and usually ASCII. |
|
45 | and usually ASCII. | |
45 | """ |
|
46 | """ | |
46 | enc = get_stream_enc(sys.stdin) |
|
47 | enc = get_stream_enc(sys.stdin) | |
47 | if not enc or enc=='ascii': |
|
48 | if not enc or enc=='ascii': | |
48 | try: |
|
49 | try: | |
49 | # There are reports of getpreferredencoding raising errors |
|
50 | # There are reports of getpreferredencoding raising errors | |
50 | # in some cases, which may well be fixed, but let's be conservative here. |
|
51 | # in some cases, which may well be fixed, but let's be conservative here. | |
51 | enc = locale.getpreferredencoding() |
|
52 | enc = locale.getpreferredencoding() | |
52 | except Exception: |
|
53 | except Exception: | |
53 | pass |
|
54 | pass | |
54 |
|
|
55 | enc = enc or sys.getdefaultencoding() | |
|
56 | # On windows `cp0` can be returned to indicate that there is no code page. | |||
|
57 | # Since cp0 is an invalid encoding return instead cp1252 which is the | |||
|
58 | # Western European default. | |||
|
59 | if enc == 'cp0': | |||
|
60 | warnings.warn( | |||
|
61 | "Invalid code page cp0 detected - using cp1252 instead." | |||
|
62 | "If cp1252 is incorrect please ensure a valid code page " | |||
|
63 | "is defined for the process.", RuntimeWarning) | |||
|
64 | return 'cp1252' | |||
|
65 | return enc | |||
55 |
|
66 | |||
56 | DEFAULT_ENCODING = getdefaultencoding() |
|
67 | DEFAULT_ENCODING = getdefaultencoding() |
General Comments 0
You need to be logged in to leave comments.
Login now