encoding.py
67 lines
| 2.7 KiB
| text/x-python
|
PythonLexer
Brandon Parsons
|
r6655 | # coding: utf-8 | ||
""" | ||||
Utilities for dealing with text encodings | ||||
""" | ||||
#----------------------------------------------------------------------------- | ||||
# Copyright (C) 2008-2012 The IPython Development Team | ||||
# | ||||
# Distributed under the terms of the BSD License. The full license is in | ||||
# the file COPYING, distributed as part of this software. | ||||
#----------------------------------------------------------------------------- | ||||
#----------------------------------------------------------------------------- | ||||
# Imports | ||||
#----------------------------------------------------------------------------- | ||||
import sys | ||||
import locale | ||||
dhirschfeld
|
r12444 | import warnings | ||
Brandon Parsons
|
r6655 | |||
# to deal with the possibility of sys.std* not being a stream at all | ||||
def get_stream_enc(stream, default=None): | ||||
"""Return the given stream's encoding or a default. | ||||
Thomas Kluyver
|
r9244 | There are cases where ``sys.std*`` might not actually be a stream, so | ||
Brandon Parsons
|
r6655 | check for the encoding attribute prior to returning it, and return | ||
Thomas Kluyver
|
r9244 | a default if it doesn't exist or evaluates as False. ``default`` | ||
Brandon Parsons
|
r6655 | is None if not provided. | ||
""" | ||||
if not hasattr(stream, 'encoding') or not stream.encoding: | ||||
return default | ||||
else: | ||||
return stream.encoding | ||||
# Less conservative replacement for sys.getdefaultencoding, that will try | ||||
# to match the environment. | ||||
# Defined here as central function, so if we find better choices, we | ||||
# won't need to make changes all over IPython. | ||||
def getdefaultencoding(): | ||||
"""Return IPython's guess for the default encoding for bytes as text. | ||||
Asks for stdin.encoding first, to match the calling Terminal, but that | ||||
is often None for subprocesses. Fall back on locale.getpreferredencoding() | ||||
which should be a sensible platform default (that respects LANG environment), | ||||
and finally to sys.getdefaultencoding() which is the most conservative option, | ||||
and usually ASCII. | ||||
""" | ||||
enc = get_stream_enc(sys.stdin) | ||||
if not enc or enc=='ascii': | ||||
try: | ||||
# There are reports of getpreferredencoding raising errors | ||||
# in some cases, which may well be fixed, but let's be conservative here. | ||||
enc = locale.getpreferredencoding() | ||||
except Exception: | ||||
pass | ||||
dhirschfeld
|
r12444 | enc = enc or sys.getdefaultencoding() | ||
# On windows `cp0` can be returned to indicate that there is no code page. | ||||
# Since cp0 is an invalid encoding return instead cp1252 which is the | ||||
# Western European default. | ||||
if enc == 'cp0': | ||||
warnings.warn( | ||||
"Invalid code page cp0 detected - using cp1252 instead." | ||||
"If cp1252 is incorrect please ensure a valid code page " | ||||
"is defined for the process.", RuntimeWarning) | ||||
return 'cp1252' | ||||
return enc | ||||
Brandon Parsons
|
r6716 | |||
DEFAULT_ENCODING = getdefaultencoding() | ||||