##// END OF EJS Templates
encoding: improve handling of buggy getpreferredencoding() on Mac OS X...
Dan Villiom Podlaski Christiansen -
r11892:2be70ca1 stable
parent child Browse files
Show More
@@ -1,77 +1,97 b''
1 1 # encoding.py - character transcoding support for Mercurial
2 2 #
3 3 # Copyright 2005-2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import error
9 9 import sys, unicodedata, locale, os
10 10
11 _encodingfixup = {'646': 'ascii', 'ANSI_X3.4-1968': 'ascii'}
11 def _getpreferredencoding():
12 '''
13 On darwin, getpreferredencoding ignores the locale environment and
14 always returns mac-roman. http://bugs.python.org/issue6202 fixes this
15 for Python 2.7 and up. This is the same corrected code for earlier
16 Python versions.
17
18 However, we can't use a version check for this method, as some distributions
19 patch Python to fix this. Instead, we use it as a 'fixer' for the mac-roman
20 encoding, as it is unlikely that this encoding is the actually expected.
21 '''
22 try:
23 locale.CODESET
24 except AttributeError:
25 # Fall back to parsing environment variables :-(
26 return locale.getdefaultlocale()[1]
27
28 oldloc = locale.setlocale(locale.LC_CTYPE)
29 locale.setlocale(locale.LC_CTYPE, "")
30 result = locale.nl_langinfo(locale.CODESET)
31 locale.setlocale(locale.LC_CTYPE, oldloc)
32
33 return result
34
35 _encodingfixers = {
36 '646': lambda: 'ascii',
37 'ANSI_X3.4-1968': lambda: 'ascii',
38 'mac-roman': _getpreferredencoding
39 }
12 40
13 41 try:
14 42 encoding = os.environ.get("HGENCODING")
15 if sys.platform == 'darwin' and not encoding:
16 # On darwin, getpreferredencoding ignores the locale environment and
17 # always returns mac-roman. We override this if the environment is
18 # not C (has been customized by the user).
19 lc = locale.setlocale(locale.LC_CTYPE, '')
20 if lc == 'UTF-8':
21 locale.setlocale(locale.LC_CTYPE, 'en_US.UTF-8')
22 encoding = locale.getlocale()[1]
23 43 if not encoding:
24 44 encoding = locale.getpreferredencoding() or 'ascii'
25 encoding = _encodingfixup.get(encoding, encoding)
45 encoding = _encodingfixers.get(encoding, lambda: encoding)()
26 46 except locale.Error:
27 47 encoding = 'ascii'
28 48 encodingmode = os.environ.get("HGENCODINGMODE", "strict")
29 49 fallbackencoding = 'ISO-8859-1'
30 50
31 51 def tolocal(s):
32 52 """
33 53 Convert a string from internal UTF-8 to local encoding
34 54
35 55 All internal strings should be UTF-8 but some repos before the
36 56 implementation of locale support may contain latin1 or possibly
37 57 other character sets. We attempt to decode everything strictly
38 58 using UTF-8, then Latin-1, and failing that, we use UTF-8 and
39 59 replace unknown characters.
40 60 """
41 61 for e in ('UTF-8', fallbackencoding):
42 62 try:
43 63 u = s.decode(e) # attempt strict decoding
44 64 return u.encode(encoding, "replace")
45 65 except LookupError, k:
46 66 raise error.Abort("%s, please check your locale settings" % k)
47 67 except UnicodeDecodeError:
48 68 pass
49 69 u = s.decode("utf-8", "replace") # last ditch
50 70 return u.encode(encoding, "replace")
51 71
52 72 def fromlocal(s):
53 73 """
54 74 Convert a string from the local character encoding to UTF-8
55 75
56 76 We attempt to decode strings using the encoding mode set by
57 77 HGENCODINGMODE, which defaults to 'strict'. In this mode, unknown
58 78 characters will cause an error message. Other modes include
59 79 'replace', which replaces unknown characters with a special
60 80 Unicode character, and 'ignore', which drops the character.
61 81 """
62 82 try:
63 83 return s.decode(encoding, encodingmode).encode("utf-8")
64 84 except UnicodeDecodeError, inst:
65 85 sub = s[max(0, inst.start - 10):inst.start + 10]
66 86 raise error.Abort("decoding near '%s': %s!" % (sub, inst))
67 87 except LookupError, k:
68 88 raise error.Abort("%s, please check your locale settings" % k)
69 89
70 90 def colwidth(s):
71 91 "Find the column width of a UTF-8 string for display"
72 92 d = s.decode(encoding, 'replace')
73 93 if hasattr(unicodedata, 'east_asian_width'):
74 94 w = unicodedata.east_asian_width
75 95 return sum([w(c) in 'WFA' and 2 or 1 for c in d])
76 96 return len(d)
77 97
General Comments 0
You need to be logged in to leave comments. Login now