##// END OF EJS Templates
encoding: avoid localstr when a string can be encoded losslessly (issue2763)...
Matt Mackall -
r13940:b7b26e54 stable
parent child Browse files
Show More
@@ -1,146 +1,150 b''
1 # encoding.py - character transcoding support for Mercurial
1 # encoding.py - character transcoding support for Mercurial
2 #
2 #
3 # Copyright 2005-2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2005-2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 import error
8 import error
9 import unicodedata, locale, os
9 import unicodedata, locale, os
10
10
11 def _getpreferredencoding():
11 def _getpreferredencoding():
12 '''
12 '''
13 On darwin, getpreferredencoding ignores the locale environment and
13 On darwin, getpreferredencoding ignores the locale environment and
14 always returns mac-roman. http://bugs.python.org/issue6202 fixes this
14 always returns mac-roman. http://bugs.python.org/issue6202 fixes this
15 for Python 2.7 and up. This is the same corrected code for earlier
15 for Python 2.7 and up. This is the same corrected code for earlier
16 Python versions.
16 Python versions.
17
17
18 However, we can't use a version check for this method, as some distributions
18 However, we can't use a version check for this method, as some distributions
19 patch Python to fix this. Instead, we use it as a 'fixer' for the mac-roman
19 patch Python to fix this. Instead, we use it as a 'fixer' for the mac-roman
20 encoding, as it is unlikely that this encoding is the actually expected.
20 encoding, as it is unlikely that this encoding is the actually expected.
21 '''
21 '''
22 try:
22 try:
23 locale.CODESET
23 locale.CODESET
24 except AttributeError:
24 except AttributeError:
25 # Fall back to parsing environment variables :-(
25 # Fall back to parsing environment variables :-(
26 return locale.getdefaultlocale()[1]
26 return locale.getdefaultlocale()[1]
27
27
28 oldloc = locale.setlocale(locale.LC_CTYPE)
28 oldloc = locale.setlocale(locale.LC_CTYPE)
29 locale.setlocale(locale.LC_CTYPE, "")
29 locale.setlocale(locale.LC_CTYPE, "")
30 result = locale.nl_langinfo(locale.CODESET)
30 result = locale.nl_langinfo(locale.CODESET)
31 locale.setlocale(locale.LC_CTYPE, oldloc)
31 locale.setlocale(locale.LC_CTYPE, oldloc)
32
32
33 return result
33 return result
34
34
35 _encodingfixers = {
35 _encodingfixers = {
36 '646': lambda: 'ascii',
36 '646': lambda: 'ascii',
37 'ANSI_X3.4-1968': lambda: 'ascii',
37 'ANSI_X3.4-1968': lambda: 'ascii',
38 'mac-roman': _getpreferredencoding
38 'mac-roman': _getpreferredencoding
39 }
39 }
40
40
41 try:
41 try:
42 encoding = os.environ.get("HGENCODING")
42 encoding = os.environ.get("HGENCODING")
43 if not encoding:
43 if not encoding:
44 encoding = locale.getpreferredencoding() or 'ascii'
44 encoding = locale.getpreferredencoding() or 'ascii'
45 encoding = _encodingfixers.get(encoding, lambda: encoding)()
45 encoding = _encodingfixers.get(encoding, lambda: encoding)()
46 except locale.Error:
46 except locale.Error:
47 encoding = 'ascii'
47 encoding = 'ascii'
48 encodingmode = os.environ.get("HGENCODINGMODE", "strict")
48 encodingmode = os.environ.get("HGENCODINGMODE", "strict")
49 fallbackencoding = 'ISO-8859-1'
49 fallbackencoding = 'ISO-8859-1'
50
50
51 class localstr(str):
51 class localstr(str):
52 '''This class allows strings that are unmodified to be
52 '''This class allows strings that are unmodified to be
53 round-tripped to the local encoding and back'''
53 round-tripped to the local encoding and back'''
54 def __new__(cls, u, l):
54 def __new__(cls, u, l):
55 s = str.__new__(cls, l)
55 s = str.__new__(cls, l)
56 s._utf8 = u
56 s._utf8 = u
57 return s
57 return s
58 def __hash__(self):
58 def __hash__(self):
59 return hash(self._utf8) # avoid collisions in local string space
59 return hash(self._utf8) # avoid collisions in local string space
60
60
61 def tolocal(s):
61 def tolocal(s):
62 """
62 """
63 Convert a string from internal UTF-8 to local encoding
63 Convert a string from internal UTF-8 to local encoding
64
64
65 All internal strings should be UTF-8 but some repos before the
65 All internal strings should be UTF-8 but some repos before the
66 implementation of locale support may contain latin1 or possibly
66 implementation of locale support may contain latin1 or possibly
67 other character sets. We attempt to decode everything strictly
67 other character sets. We attempt to decode everything strictly
68 using UTF-8, then Latin-1, and failing that, we use UTF-8 and
68 using UTF-8, then Latin-1, and failing that, we use UTF-8 and
69 replace unknown characters.
69 replace unknown characters.
70
70
71 The localstr class is used to cache the known UTF-8 encoding of
71 The localstr class is used to cache the known UTF-8 encoding of
72 strings next to their local representation to allow lossless
72 strings next to their local representation to allow lossless
73 round-trip conversion back to UTF-8.
73 round-trip conversion back to UTF-8.
74
74
75 >>> u = 'foo: \\xc3\\xa4' # utf-8
75 >>> u = 'foo: \\xc3\\xa4' # utf-8
76 >>> l = tolocal(u)
76 >>> l = tolocal(u)
77 >>> l
77 >>> l
78 'foo: ?'
78 'foo: ?'
79 >>> fromlocal(l)
79 >>> fromlocal(l)
80 'foo: \\xc3\\xa4'
80 'foo: \\xc3\\xa4'
81 >>> u2 = 'foo: \\xc3\\xa1'
81 >>> u2 = 'foo: \\xc3\\xa1'
82 >>> d = { l: 1, tolocal(u2): 2 }
82 >>> d = { l: 1, tolocal(u2): 2 }
83 >>> d # no collision
83 >>> d # no collision
84 {'foo: ?': 1, 'foo: ?': 2}
84 {'foo: ?': 1, 'foo: ?': 2}
85 >>> 'foo: ?' in d
85 >>> 'foo: ?' in d
86 False
86 False
87 >>> l1 = 'foo: \\xe4' # historical latin1 fallback
87 >>> l1 = 'foo: \\xe4' # historical latin1 fallback
88 >>> l = tolocal(l1)
88 >>> l = tolocal(l1)
89 >>> l
89 >>> l
90 'foo: ?'
90 'foo: ?'
91 >>> fromlocal(l) # magically in utf-8
91 >>> fromlocal(l) # magically in utf-8
92 'foo: \\xc3\\xa4'
92 'foo: \\xc3\\xa4'
93 """
93 """
94
94
95 for e in ('UTF-8', fallbackencoding):
95 for e in ('UTF-8', fallbackencoding):
96 try:
96 try:
97 u = s.decode(e) # attempt strict decoding
97 u = s.decode(e) # attempt strict decoding
98 if e == 'UTF-8':
98 r = u.encode(encoding, "replace")
99 return localstr(s, u.encode(encoding, "replace"))
99 if u == r.decode(encoding):
100 # r is a safe, non-lossy encoding of s
101 return r
102 elif e == 'UTF-8':
103 return localstr(s, r)
100 else:
104 else:
101 return localstr(u.encode('UTF-8'),
105 return localstr(u.encode('UTF-8'), r)
102 u.encode(encoding, "replace"))
106
103 except LookupError, k:
107 except LookupError, k:
104 raise error.Abort("%s, please check your locale settings" % k)
108 raise error.Abort("%s, please check your locale settings" % k)
105 except UnicodeDecodeError:
109 except UnicodeDecodeError:
106 pass
110 pass
107 u = s.decode("utf-8", "replace") # last ditch
111 u = s.decode("utf-8", "replace") # last ditch
108 return u.encode(encoding, "replace") # can't round-trip
112 return u.encode(encoding, "replace") # can't round-trip
109
113
110 def fromlocal(s):
114 def fromlocal(s):
111 """
115 """
112 Convert a string from the local character encoding to UTF-8
116 Convert a string from the local character encoding to UTF-8
113
117
114 We attempt to decode strings using the encoding mode set by
118 We attempt to decode strings using the encoding mode set by
115 HGENCODINGMODE, which defaults to 'strict'. In this mode, unknown
119 HGENCODINGMODE, which defaults to 'strict'. In this mode, unknown
116 characters will cause an error message. Other modes include
120 characters will cause an error message. Other modes include
117 'replace', which replaces unknown characters with a special
121 'replace', which replaces unknown characters with a special
118 Unicode character, and 'ignore', which drops the character.
122 Unicode character, and 'ignore', which drops the character.
119 """
123 """
120
124
121 # can we do a lossless round-trip?
125 # can we do a lossless round-trip?
122 if isinstance(s, localstr):
126 if isinstance(s, localstr):
123 return s._utf8
127 return s._utf8
124
128
125 try:
129 try:
126 return s.decode(encoding, encodingmode).encode("utf-8")
130 return s.decode(encoding, encodingmode).encode("utf-8")
127 except UnicodeDecodeError, inst:
131 except UnicodeDecodeError, inst:
128 sub = s[max(0, inst.start - 10):inst.start + 10]
132 sub = s[max(0, inst.start - 10):inst.start + 10]
129 raise error.Abort("decoding near '%s': %s!" % (sub, inst))
133 raise error.Abort("decoding near '%s': %s!" % (sub, inst))
130 except LookupError, k:
134 except LookupError, k:
131 raise error.Abort("%s, please check your locale settings" % k)
135 raise error.Abort("%s, please check your locale settings" % k)
132
136
133 # How to treat ambiguous-width characters. Set to 'wide' to treat as wide.
137 # How to treat ambiguous-width characters. Set to 'wide' to treat as wide.
134 ambiguous = os.environ.get("HGENCODINGAMBIGUOUS", "narrow")
138 ambiguous = os.environ.get("HGENCODINGAMBIGUOUS", "narrow")
135
139
136 def colwidth(s):
140 def colwidth(s):
137 "Find the column width of a UTF-8 string for display"
141 "Find the column width of a UTF-8 string for display"
138 d = s.decode(encoding, 'replace')
142 d = s.decode(encoding, 'replace')
139 if hasattr(unicodedata, 'east_asian_width'):
143 if hasattr(unicodedata, 'east_asian_width'):
140 wide = "WF"
144 wide = "WF"
141 if ambiguous == "wide":
145 if ambiguous == "wide":
142 wide = "WFA"
146 wide = "WFA"
143 w = unicodedata.east_asian_width
147 w = unicodedata.east_asian_width
144 return sum([w(c) in wide and 2 or 1 for c in d])
148 return sum([w(c) in wide and 2 or 1 for c in d])
145 return len(d)
149 return len(d)
146
150
@@ -1,243 +1,249 b''
1 Test character encoding
1 Test character encoding
2
2
3 $ hg init t
3 $ hg init t
4 $ cd t
4 $ cd t
5
5
6 we need a repo with some legacy latin-1 changesets
6 we need a repo with some legacy latin-1 changesets
7
7
8 $ hg unbundle $TESTDIR/legacy-encoding.hg
8 $ hg unbundle $TESTDIR/legacy-encoding.hg
9 adding changesets
9 adding changesets
10 adding manifests
10 adding manifests
11 adding file changes
11 adding file changes
12 added 2 changesets with 2 changes to 1 files
12 added 2 changesets with 2 changes to 1 files
13 (run 'hg update' to get a working copy)
13 (run 'hg update' to get a working copy)
14 $ hg co
14 $ hg co
15 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
15 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
16 $ python << EOF
16 $ python << EOF
17 > f = file('latin-1', 'w'); f.write("latin-1 e' encoded: \xe9"); f.close()
17 > f = file('latin-1', 'w'); f.write("latin-1 e' encoded: \xe9"); f.close()
18 > f = file('utf-8', 'w'); f.write("utf-8 e' encoded: \xc3\xa9"); f.close()
18 > f = file('utf-8', 'w'); f.write("utf-8 e' encoded: \xc3\xa9"); f.close()
19 > f = file('latin-1-tag', 'w'); f.write("\xe9"); f.close()
19 > f = file('latin-1-tag', 'w'); f.write("\xe9"); f.close()
20 > EOF
20 > EOF
21
21
22 should fail with encoding error
22 should fail with encoding error
23
23
24 $ echo "plain old ascii" > a
24 $ echo "plain old ascii" > a
25 $ hg st
25 $ hg st
26 M a
26 M a
27 ? latin-1
27 ? latin-1
28 ? latin-1-tag
28 ? latin-1-tag
29 ? utf-8
29 ? utf-8
30 $ HGENCODING=ascii hg ci -l latin-1
30 $ HGENCODING=ascii hg ci -l latin-1
31 transaction abort!
31 transaction abort!
32 rollback completed
32 rollback completed
33 abort: decoding near ' encoded: \xe9': 'ascii' codec can't decode byte 0xe9 in position 20: ordinal not in range(128)! (esc)
33 abort: decoding near ' encoded: \xe9': 'ascii' codec can't decode byte 0xe9 in position 20: ordinal not in range(128)! (esc)
34 [255]
34 [255]
35
35
36 these should work
36 these should work
37
37
38 $ echo "latin-1" > a
38 $ echo "latin-1" > a
39 $ HGENCODING=latin-1 hg ci -l latin-1
39 $ HGENCODING=latin-1 hg ci -l latin-1
40 $ echo "utf-8" > a
40 $ echo "utf-8" > a
41 $ HGENCODING=utf-8 hg ci -l utf-8
41 $ HGENCODING=utf-8 hg ci -l utf-8
42 $ HGENCODING=latin-1 hg tag `cat latin-1-tag`
42 $ HGENCODING=latin-1 hg tag `cat latin-1-tag`
43 $ HGENCODING=latin-1 hg branch `cat latin-1-tag`
43 $ HGENCODING=latin-1 hg branch `cat latin-1-tag`
44 marked working directory as branch \xe9 (esc)
44 marked working directory as branch \xe9 (esc)
45 $ HGENCODING=latin-1 hg ci -m 'latin1 branch'
45 $ HGENCODING=latin-1 hg ci -m 'latin1 branch'
46 $ rm .hg/branch
46 $ rm .hg/branch
47
47
48 hg log (ascii)
48 hg log (ascii)
49
49
50 $ hg --encoding ascii log
50 $ hg --encoding ascii log
51 changeset: 5:093c6077d1c8
51 changeset: 5:093c6077d1c8
52 branch: ?
52 branch: ?
53 tag: tip
53 tag: tip
54 user: test
54 user: test
55 date: Thu Jan 01 00:00:00 1970 +0000
55 date: Thu Jan 01 00:00:00 1970 +0000
56 summary: latin1 branch
56 summary: latin1 branch
57
57
58 changeset: 4:94db611b4196
58 changeset: 4:94db611b4196
59 user: test
59 user: test
60 date: Thu Jan 01 00:00:00 1970 +0000
60 date: Thu Jan 01 00:00:00 1970 +0000
61 summary: Added tag ? for changeset ca661e7520de
61 summary: Added tag ? for changeset ca661e7520de
62
62
63 changeset: 3:ca661e7520de
63 changeset: 3:ca661e7520de
64 tag: ?
64 tag: ?
65 user: test
65 user: test
66 date: Thu Jan 01 00:00:00 1970 +0000
66 date: Thu Jan 01 00:00:00 1970 +0000
67 summary: utf-8 e' encoded: ?
67 summary: utf-8 e' encoded: ?
68
68
69 changeset: 2:650c6f3d55dd
69 changeset: 2:650c6f3d55dd
70 user: test
70 user: test
71 date: Thu Jan 01 00:00:00 1970 +0000
71 date: Thu Jan 01 00:00:00 1970 +0000
72 summary: latin-1 e' encoded: ?
72 summary: latin-1 e' encoded: ?
73
73
74 changeset: 1:0e5b7e3f9c4a
74 changeset: 1:0e5b7e3f9c4a
75 user: test
75 user: test
76 date: Mon Jan 12 13:46:40 1970 +0000
76 date: Mon Jan 12 13:46:40 1970 +0000
77 summary: koi8-r: ????? = u'\u0440\u0442\u0443\u0442\u044c'
77 summary: koi8-r: ????? = u'\u0440\u0442\u0443\u0442\u044c'
78
78
79 changeset: 0:1e78a93102a3
79 changeset: 0:1e78a93102a3
80 user: test
80 user: test
81 date: Mon Jan 12 13:46:40 1970 +0000
81 date: Mon Jan 12 13:46:40 1970 +0000
82 summary: latin-1 e': ? = u'\xe9'
82 summary: latin-1 e': ? = u'\xe9'
83
83
84
84
85 hg log (latin-1)
85 hg log (latin-1)
86
86
87 $ hg --encoding latin-1 log
87 $ hg --encoding latin-1 log
88 changeset: 5:093c6077d1c8
88 changeset: 5:093c6077d1c8
89 branch: \xe9 (esc)
89 branch: \xe9 (esc)
90 tag: tip
90 tag: tip
91 user: test
91 user: test
92 date: Thu Jan 01 00:00:00 1970 +0000
92 date: Thu Jan 01 00:00:00 1970 +0000
93 summary: latin1 branch
93 summary: latin1 branch
94
94
95 changeset: 4:94db611b4196
95 changeset: 4:94db611b4196
96 user: test
96 user: test
97 date: Thu Jan 01 00:00:00 1970 +0000
97 date: Thu Jan 01 00:00:00 1970 +0000
98 summary: Added tag \xe9 for changeset ca661e7520de (esc)
98 summary: Added tag \xe9 for changeset ca661e7520de (esc)
99
99
100 changeset: 3:ca661e7520de
100 changeset: 3:ca661e7520de
101 tag: \xe9 (esc)
101 tag: \xe9 (esc)
102 user: test
102 user: test
103 date: Thu Jan 01 00:00:00 1970 +0000
103 date: Thu Jan 01 00:00:00 1970 +0000
104 summary: utf-8 e' encoded: \xe9 (esc)
104 summary: utf-8 e' encoded: \xe9 (esc)
105
105
106 changeset: 2:650c6f3d55dd
106 changeset: 2:650c6f3d55dd
107 user: test
107 user: test
108 date: Thu Jan 01 00:00:00 1970 +0000
108 date: Thu Jan 01 00:00:00 1970 +0000
109 summary: latin-1 e' encoded: \xe9 (esc)
109 summary: latin-1 e' encoded: \xe9 (esc)
110
110
111 changeset: 1:0e5b7e3f9c4a
111 changeset: 1:0e5b7e3f9c4a
112 user: test
112 user: test
113 date: Mon Jan 12 13:46:40 1970 +0000
113 date: Mon Jan 12 13:46:40 1970 +0000
114 summary: koi8-r: \xd2\xd4\xd5\xd4\xd8 = u'\\u0440\\u0442\\u0443\\u0442\\u044c' (esc)
114 summary: koi8-r: \xd2\xd4\xd5\xd4\xd8 = u'\\u0440\\u0442\\u0443\\u0442\\u044c' (esc)
115
115
116 changeset: 0:1e78a93102a3
116 changeset: 0:1e78a93102a3
117 user: test
117 user: test
118 date: Mon Jan 12 13:46:40 1970 +0000
118 date: Mon Jan 12 13:46:40 1970 +0000
119 summary: latin-1 e': \xe9 = u'\\xe9' (esc)
119 summary: latin-1 e': \xe9 = u'\\xe9' (esc)
120
120
121
121
122 hg log (utf-8)
122 hg log (utf-8)
123
123
124 $ hg --encoding utf-8 log
124 $ hg --encoding utf-8 log
125 changeset: 5:093c6077d1c8
125 changeset: 5:093c6077d1c8
126 branch: \xc3\xa9 (esc)
126 branch: \xc3\xa9 (esc)
127 tag: tip
127 tag: tip
128 user: test
128 user: test
129 date: Thu Jan 01 00:00:00 1970 +0000
129 date: Thu Jan 01 00:00:00 1970 +0000
130 summary: latin1 branch
130 summary: latin1 branch
131
131
132 changeset: 4:94db611b4196
132 changeset: 4:94db611b4196
133 user: test
133 user: test
134 date: Thu Jan 01 00:00:00 1970 +0000
134 date: Thu Jan 01 00:00:00 1970 +0000
135 summary: Added tag \xc3\xa9 for changeset ca661e7520de (esc)
135 summary: Added tag \xc3\xa9 for changeset ca661e7520de (esc)
136
136
137 changeset: 3:ca661e7520de
137 changeset: 3:ca661e7520de
138 tag: \xc3\xa9 (esc)
138 tag: \xc3\xa9 (esc)
139 user: test
139 user: test
140 date: Thu Jan 01 00:00:00 1970 +0000
140 date: Thu Jan 01 00:00:00 1970 +0000
141 summary: utf-8 e' encoded: \xc3\xa9 (esc)
141 summary: utf-8 e' encoded: \xc3\xa9 (esc)
142
142
143 changeset: 2:650c6f3d55dd
143 changeset: 2:650c6f3d55dd
144 user: test
144 user: test
145 date: Thu Jan 01 00:00:00 1970 +0000
145 date: Thu Jan 01 00:00:00 1970 +0000
146 summary: latin-1 e' encoded: \xc3\xa9 (esc)
146 summary: latin-1 e' encoded: \xc3\xa9 (esc)
147
147
148 changeset: 1:0e5b7e3f9c4a
148 changeset: 1:0e5b7e3f9c4a
149 user: test
149 user: test
150 date: Mon Jan 12 13:46:40 1970 +0000
150 date: Mon Jan 12 13:46:40 1970 +0000
151 summary: koi8-r: \xc3\x92\xc3\x94\xc3\x95\xc3\x94\xc3\x98 = u'\\u0440\\u0442\\u0443\\u0442\\u044c' (esc)
151 summary: koi8-r: \xc3\x92\xc3\x94\xc3\x95\xc3\x94\xc3\x98 = u'\\u0440\\u0442\\u0443\\u0442\\u044c' (esc)
152
152
153 changeset: 0:1e78a93102a3
153 changeset: 0:1e78a93102a3
154 user: test
154 user: test
155 date: Mon Jan 12 13:46:40 1970 +0000
155 date: Mon Jan 12 13:46:40 1970 +0000
156 summary: latin-1 e': \xc3\xa9 = u'\\xe9' (esc)
156 summary: latin-1 e': \xc3\xa9 = u'\\xe9' (esc)
157
157
158
158
159 hg tags (ascii)
159 hg tags (ascii)
160
160
161 $ HGENCODING=ascii hg tags
161 $ HGENCODING=ascii hg tags
162 tip 5:093c6077d1c8
162 tip 5:093c6077d1c8
163 ? 3:ca661e7520de
163 ? 3:ca661e7520de
164
164
165 hg tags (latin-1)
165 hg tags (latin-1)
166
166
167 $ HGENCODING=latin-1 hg tags
167 $ HGENCODING=latin-1 hg tags
168 tip 5:093c6077d1c8
168 tip 5:093c6077d1c8
169 \xe9 3:ca661e7520de (esc)
169 \xe9 3:ca661e7520de (esc)
170
170
171 hg tags (utf-8)
171 hg tags (utf-8)
172
172
173 $ HGENCODING=utf-8 hg tags
173 $ HGENCODING=utf-8 hg tags
174 tip 5:093c6077d1c8
174 tip 5:093c6077d1c8
175 \xc3\xa9 3:ca661e7520de (esc)
175 \xc3\xa9 3:ca661e7520de (esc)
176
176
177 hg branches (ascii)
177 hg branches (ascii)
178
178
179 $ HGENCODING=ascii hg branches
179 $ HGENCODING=ascii hg branches
180 ? 5:093c6077d1c8
180 ? 5:093c6077d1c8
181 default 4:94db611b4196 (inactive)
181 default 4:94db611b4196 (inactive)
182
182
183 hg branches (latin-1)
183 hg branches (latin-1)
184
184
185 $ HGENCODING=latin-1 hg branches
185 $ HGENCODING=latin-1 hg branches
186 \xe9 5:093c6077d1c8 (esc)
186 \xe9 5:093c6077d1c8 (esc)
187 default 4:94db611b4196 (inactive)
187 default 4:94db611b4196 (inactive)
188
188
189 hg branches (utf-8)
189 hg branches (utf-8)
190
190
191 $ HGENCODING=utf-8 hg branches
191 $ HGENCODING=utf-8 hg branches
192 \xc3\xa9 5:093c6077d1c8 (esc)
192 \xc3\xa9 5:093c6077d1c8 (esc)
193 default 4:94db611b4196 (inactive)
193 default 4:94db611b4196 (inactive)
194 $ echo '[ui]' >> .hg/hgrc
194 $ echo '[ui]' >> .hg/hgrc
195 $ echo 'fallbackencoding = koi8-r' >> .hg/hgrc
195 $ echo 'fallbackencoding = koi8-r' >> .hg/hgrc
196
196
197 hg log (utf-8)
197 hg log (utf-8)
198
198
199 $ HGENCODING=utf-8 hg log
199 $ HGENCODING=utf-8 hg log
200 changeset: 5:093c6077d1c8
200 changeset: 5:093c6077d1c8
201 branch: \xc3\xa9 (esc)
201 branch: \xc3\xa9 (esc)
202 tag: tip
202 tag: tip
203 user: test
203 user: test
204 date: Thu Jan 01 00:00:00 1970 +0000
204 date: Thu Jan 01 00:00:00 1970 +0000
205 summary: latin1 branch
205 summary: latin1 branch
206
206
207 changeset: 4:94db611b4196
207 changeset: 4:94db611b4196
208 user: test
208 user: test
209 date: Thu Jan 01 00:00:00 1970 +0000
209 date: Thu Jan 01 00:00:00 1970 +0000
210 summary: Added tag \xc3\xa9 for changeset ca661e7520de (esc)
210 summary: Added tag \xc3\xa9 for changeset ca661e7520de (esc)
211
211
212 changeset: 3:ca661e7520de
212 changeset: 3:ca661e7520de
213 tag: \xc3\xa9 (esc)
213 tag: \xc3\xa9 (esc)
214 user: test
214 user: test
215 date: Thu Jan 01 00:00:00 1970 +0000
215 date: Thu Jan 01 00:00:00 1970 +0000
216 summary: utf-8 e' encoded: \xc3\xa9 (esc)
216 summary: utf-8 e' encoded: \xc3\xa9 (esc)
217
217
218 changeset: 2:650c6f3d55dd
218 changeset: 2:650c6f3d55dd
219 user: test
219 user: test
220 date: Thu Jan 01 00:00:00 1970 +0000
220 date: Thu Jan 01 00:00:00 1970 +0000
221 summary: latin-1 e' encoded: \xc3\xa9 (esc)
221 summary: latin-1 e' encoded: \xc3\xa9 (esc)
222
222
223 changeset: 1:0e5b7e3f9c4a
223 changeset: 1:0e5b7e3f9c4a
224 user: test
224 user: test
225 date: Mon Jan 12 13:46:40 1970 +0000
225 date: Mon Jan 12 13:46:40 1970 +0000
226 summary: koi8-r: \xd1\x80\xd1\x82\xd1\x83\xd1\x82\xd1\x8c = u'\\u0440\\u0442\\u0443\\u0442\\u044c' (esc)
226 summary: koi8-r: \xd1\x80\xd1\x82\xd1\x83\xd1\x82\xd1\x8c = u'\\u0440\\u0442\\u0443\\u0442\\u044c' (esc)
227
227
228 changeset: 0:1e78a93102a3
228 changeset: 0:1e78a93102a3
229 user: test
229 user: test
230 date: Mon Jan 12 13:46:40 1970 +0000
230 date: Mon Jan 12 13:46:40 1970 +0000
231 summary: latin-1 e': \xd0\x98 = u'\\xe9' (esc)
231 summary: latin-1 e': \xd0\x98 = u'\\xe9' (esc)
232
232
233
233
234 hg log (dolphin)
234 hg log (dolphin)
235
235
236 $ HGENCODING=dolphin hg log
236 $ HGENCODING=dolphin hg log
237 abort: unknown encoding: dolphin, please check your locale settings
237 abort: unknown encoding: dolphin, please check your locale settings
238 [255]
238 [255]
239 $ HGENCODING=ascii hg branch `cat latin-1-tag`
239 $ HGENCODING=ascii hg branch `cat latin-1-tag`
240 abort: decoding near '\xe9': 'ascii' codec can't decode byte 0xe9 in position 0: ordinal not in range(128)! (esc)
240 abort: decoding near '\xe9': 'ascii' codec can't decode byte 0xe9 in position 0: ordinal not in range(128)! (esc)
241 [255]
241 [255]
242 $ cp latin-1-tag .hg/branch
242 $ cp latin-1-tag .hg/branch
243 $ HGENCODING=latin-1 hg ci -m 'auto-promote legacy name'
243 $ HGENCODING=latin-1 hg ci -m 'auto-promote legacy name'
244
245 Test roundtrip encoding of lookup tables when not using UTF-8 (issue2763)
246
247 $ HGENCODING=latin-1 hg up `cat latin-1-tag`
248 0 files updated, 0 files merged, 1 files removed, 0 files unresolved
249
General Comments 0
You need to be logged in to leave comments. Login now