# HG changeset patch # User Dan Villiom Podlaski Christiansen # Date 2010-08-13 23:30:54 # Node ID 2be70ca1731165935aaf751c60f161655c87a7f5 # Parent 0bedf3a2062a468fccc7fb8359a17f5a036b001b encoding: improve handling of buggy getpreferredencoding() on Mac OS X Prior to version 2.7, calling locale.getpreferredencoding() would always return 'mac-roman' on Mac OS X. Previously, this was handled by a call to locale.setlocale(). Unfortunately, Python 2.6.5 and older have a bug where isspace() would incorrectly report True for 0x85 and 0xa0 after such a call. In order to fix this, we replace the previous _encodingfixup mapping to an _encodingfixers mapping. Rather than mapping encodings to their replacement, it maps them to a function returning the replacement. This allows us to provide an simplified implementation of getpreferredencoding() which extracts the expected encoding and restores the locale. This fix is based on a patch originally submitted by Martijn Pieters as well as feedback from Brodie Rao. diff --git a/mercurial/encoding.py b/mercurial/encoding.py --- a/mercurial/encoding.py +++ b/mercurial/encoding.py @@ -8,21 +8,41 @@ import error import sys, unicodedata, locale, os -_encodingfixup = {'646': 'ascii', 'ANSI_X3.4-1968': 'ascii'} +def _getpreferredencoding(): + ''' + On darwin, getpreferredencoding ignores the locale environment and + always returns mac-roman. http://bugs.python.org/issue6202 fixes this + for Python 2.7 and up. This is the same corrected code for earlier + Python versions. + + However, we can't use a version check for this method, as some distributions + patch Python to fix this. Instead, we use it as a 'fixer' for the mac-roman + encoding, as it is unlikely that this encoding is the actually expected. + ''' + try: + locale.CODESET + except AttributeError: + # Fall back to parsing environment variables :-( + return locale.getdefaultlocale()[1] + + oldloc = locale.setlocale(locale.LC_CTYPE) + locale.setlocale(locale.LC_CTYPE, "") + result = locale.nl_langinfo(locale.CODESET) + locale.setlocale(locale.LC_CTYPE, oldloc) + + return result + +_encodingfixers = { + '646': lambda: 'ascii', + 'ANSI_X3.4-1968': lambda: 'ascii', + 'mac-roman': _getpreferredencoding +} try: encoding = os.environ.get("HGENCODING") - if sys.platform == 'darwin' and not encoding: - # On darwin, getpreferredencoding ignores the locale environment and - # always returns mac-roman. We override this if the environment is - # not C (has been customized by the user). - lc = locale.setlocale(locale.LC_CTYPE, '') - if lc == 'UTF-8': - locale.setlocale(locale.LC_CTYPE, 'en_US.UTF-8') - encoding = locale.getlocale()[1] if not encoding: encoding = locale.getpreferredencoding() or 'ascii' - encoding = _encodingfixup.get(encoding, encoding) + encoding = _encodingfixers.get(encoding, lambda: encoding)() except locale.Error: encoding = 'ascii' encodingmode = os.environ.get("HGENCODINGMODE", "strict")