##// END OF EJS Templates
test-encoding: enable fuzz testing of utf8b roundtrip...
Yuya Nishihara -
r26966:51fa43a3 default
parent child Browse files
Show More
@@ -1,274 +1,285
1 Test character encoding
1 Test character encoding
2
2
3 $ hg init t
3 $ hg init t
4 $ cd t
4 $ cd t
5
5
6 we need a repo with some legacy latin-1 changesets
6 we need a repo with some legacy latin-1 changesets
7
7
8 $ hg unbundle "$TESTDIR/bundles/legacy-encoding.hg"
8 $ hg unbundle "$TESTDIR/bundles/legacy-encoding.hg"
9 adding changesets
9 adding changesets
10 adding manifests
10 adding manifests
11 adding file changes
11 adding file changes
12 added 2 changesets with 2 changes to 1 files
12 added 2 changesets with 2 changes to 1 files
13 (run 'hg update' to get a working copy)
13 (run 'hg update' to get a working copy)
14 $ hg co
14 $ hg co
15 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
15 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
16 $ python << EOF
16 $ python << EOF
17 > f = file('latin-1', 'w'); f.write("latin-1 e' encoded: \xe9"); f.close()
17 > f = file('latin-1', 'w'); f.write("latin-1 e' encoded: \xe9"); f.close()
18 > f = file('utf-8', 'w'); f.write("utf-8 e' encoded: \xc3\xa9"); f.close()
18 > f = file('utf-8', 'w'); f.write("utf-8 e' encoded: \xc3\xa9"); f.close()
19 > f = file('latin-1-tag', 'w'); f.write("\xe9"); f.close()
19 > f = file('latin-1-tag', 'w'); f.write("\xe9"); f.close()
20 > EOF
20 > EOF
21
21
22 should fail with encoding error
22 should fail with encoding error
23
23
24 $ echo "plain old ascii" > a
24 $ echo "plain old ascii" > a
25 $ hg st
25 $ hg st
26 M a
26 M a
27 ? latin-1
27 ? latin-1
28 ? latin-1-tag
28 ? latin-1-tag
29 ? utf-8
29 ? utf-8
30 $ HGENCODING=ascii hg ci -l latin-1
30 $ HGENCODING=ascii hg ci -l latin-1
31 transaction abort!
31 transaction abort!
32 rollback completed
32 rollback completed
33 abort: decoding near ' encoded: \xe9': 'ascii' codec can't decode byte 0xe9 in position 20: ordinal not in range(128)! (esc)
33 abort: decoding near ' encoded: \xe9': 'ascii' codec can't decode byte 0xe9 in position 20: ordinal not in range(128)! (esc)
34 [255]
34 [255]
35
35
36 these should work
36 these should work
37
37
38 $ echo "latin-1" > a
38 $ echo "latin-1" > a
39 $ HGENCODING=latin-1 hg ci -l latin-1
39 $ HGENCODING=latin-1 hg ci -l latin-1
40 $ echo "utf-8" > a
40 $ echo "utf-8" > a
41 $ HGENCODING=utf-8 hg ci -l utf-8
41 $ HGENCODING=utf-8 hg ci -l utf-8
42 $ HGENCODING=latin-1 hg tag `cat latin-1-tag`
42 $ HGENCODING=latin-1 hg tag `cat latin-1-tag`
43 $ HGENCODING=latin-1 hg branch `cat latin-1-tag`
43 $ HGENCODING=latin-1 hg branch `cat latin-1-tag`
44 marked working directory as branch \xe9 (esc)
44 marked working directory as branch \xe9 (esc)
45 (branches are permanent and global, did you want a bookmark?)
45 (branches are permanent and global, did you want a bookmark?)
46 $ HGENCODING=latin-1 hg ci -m 'latin1 branch'
46 $ HGENCODING=latin-1 hg ci -m 'latin1 branch'
47 $ hg -q rollback
47 $ hg -q rollback
48 $ HGENCODING=latin-1 hg branch
48 $ HGENCODING=latin-1 hg branch
49 \xe9 (esc)
49 \xe9 (esc)
50 $ HGENCODING=latin-1 hg ci -m 'latin1 branch'
50 $ HGENCODING=latin-1 hg ci -m 'latin1 branch'
51 $ rm .hg/branch
51 $ rm .hg/branch
52
52
53 hg log (ascii)
53 hg log (ascii)
54
54
55 $ hg --encoding ascii log
55 $ hg --encoding ascii log
56 changeset: 5:a52c0692f24a
56 changeset: 5:a52c0692f24a
57 branch: ?
57 branch: ?
58 tag: tip
58 tag: tip
59 user: test
59 user: test
60 date: Thu Jan 01 00:00:00 1970 +0000
60 date: Thu Jan 01 00:00:00 1970 +0000
61 summary: latin1 branch
61 summary: latin1 branch
62
62
63 changeset: 4:94db611b4196
63 changeset: 4:94db611b4196
64 user: test
64 user: test
65 date: Thu Jan 01 00:00:00 1970 +0000
65 date: Thu Jan 01 00:00:00 1970 +0000
66 summary: Added tag ? for changeset ca661e7520de
66 summary: Added tag ? for changeset ca661e7520de
67
67
68 changeset: 3:ca661e7520de
68 changeset: 3:ca661e7520de
69 tag: ?
69 tag: ?
70 user: test
70 user: test
71 date: Thu Jan 01 00:00:00 1970 +0000
71 date: Thu Jan 01 00:00:00 1970 +0000
72 summary: utf-8 e' encoded: ?
72 summary: utf-8 e' encoded: ?
73
73
74 changeset: 2:650c6f3d55dd
74 changeset: 2:650c6f3d55dd
75 user: test
75 user: test
76 date: Thu Jan 01 00:00:00 1970 +0000
76 date: Thu Jan 01 00:00:00 1970 +0000
77 summary: latin-1 e' encoded: ?
77 summary: latin-1 e' encoded: ?
78
78
79 changeset: 1:0e5b7e3f9c4a
79 changeset: 1:0e5b7e3f9c4a
80 user: test
80 user: test
81 date: Mon Jan 12 13:46:40 1970 +0000
81 date: Mon Jan 12 13:46:40 1970 +0000
82 summary: koi8-r: ????? = u'\u0440\u0442\u0443\u0442\u044c'
82 summary: koi8-r: ????? = u'\u0440\u0442\u0443\u0442\u044c'
83
83
84 changeset: 0:1e78a93102a3
84 changeset: 0:1e78a93102a3
85 user: test
85 user: test
86 date: Mon Jan 12 13:46:40 1970 +0000
86 date: Mon Jan 12 13:46:40 1970 +0000
87 summary: latin-1 e': ? = u'\xe9'
87 summary: latin-1 e': ? = u'\xe9'
88
88
89
89
90 hg log (latin-1)
90 hg log (latin-1)
91
91
92 $ hg --encoding latin-1 log
92 $ hg --encoding latin-1 log
93 changeset: 5:a52c0692f24a
93 changeset: 5:a52c0692f24a
94 branch: \xe9 (esc)
94 branch: \xe9 (esc)
95 tag: tip
95 tag: tip
96 user: test
96 user: test
97 date: Thu Jan 01 00:00:00 1970 +0000
97 date: Thu Jan 01 00:00:00 1970 +0000
98 summary: latin1 branch
98 summary: latin1 branch
99
99
100 changeset: 4:94db611b4196
100 changeset: 4:94db611b4196
101 user: test
101 user: test
102 date: Thu Jan 01 00:00:00 1970 +0000
102 date: Thu Jan 01 00:00:00 1970 +0000
103 summary: Added tag \xe9 for changeset ca661e7520de (esc)
103 summary: Added tag \xe9 for changeset ca661e7520de (esc)
104
104
105 changeset: 3:ca661e7520de
105 changeset: 3:ca661e7520de
106 tag: \xe9 (esc)
106 tag: \xe9 (esc)
107 user: test
107 user: test
108 date: Thu Jan 01 00:00:00 1970 +0000
108 date: Thu Jan 01 00:00:00 1970 +0000
109 summary: utf-8 e' encoded: \xe9 (esc)
109 summary: utf-8 e' encoded: \xe9 (esc)
110
110
111 changeset: 2:650c6f3d55dd
111 changeset: 2:650c6f3d55dd
112 user: test
112 user: test
113 date: Thu Jan 01 00:00:00 1970 +0000
113 date: Thu Jan 01 00:00:00 1970 +0000
114 summary: latin-1 e' encoded: \xe9 (esc)
114 summary: latin-1 e' encoded: \xe9 (esc)
115
115
116 changeset: 1:0e5b7e3f9c4a
116 changeset: 1:0e5b7e3f9c4a
117 user: test
117 user: test
118 date: Mon Jan 12 13:46:40 1970 +0000
118 date: Mon Jan 12 13:46:40 1970 +0000
119 summary: koi8-r: \xd2\xd4\xd5\xd4\xd8 = u'\\u0440\\u0442\\u0443\\u0442\\u044c' (esc)
119 summary: koi8-r: \xd2\xd4\xd5\xd4\xd8 = u'\\u0440\\u0442\\u0443\\u0442\\u044c' (esc)
120
120
121 changeset: 0:1e78a93102a3
121 changeset: 0:1e78a93102a3
122 user: test
122 user: test
123 date: Mon Jan 12 13:46:40 1970 +0000
123 date: Mon Jan 12 13:46:40 1970 +0000
124 summary: latin-1 e': \xe9 = u'\\xe9' (esc)
124 summary: latin-1 e': \xe9 = u'\\xe9' (esc)
125
125
126
126
127 hg log (utf-8)
127 hg log (utf-8)
128
128
129 $ hg --encoding utf-8 log
129 $ hg --encoding utf-8 log
130 changeset: 5:a52c0692f24a
130 changeset: 5:a52c0692f24a
131 branch: \xc3\xa9 (esc)
131 branch: \xc3\xa9 (esc)
132 tag: tip
132 tag: tip
133 user: test
133 user: test
134 date: Thu Jan 01 00:00:00 1970 +0000
134 date: Thu Jan 01 00:00:00 1970 +0000
135 summary: latin1 branch
135 summary: latin1 branch
136
136
137 changeset: 4:94db611b4196
137 changeset: 4:94db611b4196
138 user: test
138 user: test
139 date: Thu Jan 01 00:00:00 1970 +0000
139 date: Thu Jan 01 00:00:00 1970 +0000
140 summary: Added tag \xc3\xa9 for changeset ca661e7520de (esc)
140 summary: Added tag \xc3\xa9 for changeset ca661e7520de (esc)
141
141
142 changeset: 3:ca661e7520de
142 changeset: 3:ca661e7520de
143 tag: \xc3\xa9 (esc)
143 tag: \xc3\xa9 (esc)
144 user: test
144 user: test
145 date: Thu Jan 01 00:00:00 1970 +0000
145 date: Thu Jan 01 00:00:00 1970 +0000
146 summary: utf-8 e' encoded: \xc3\xa9 (esc)
146 summary: utf-8 e' encoded: \xc3\xa9 (esc)
147
147
148 changeset: 2:650c6f3d55dd
148 changeset: 2:650c6f3d55dd
149 user: test
149 user: test
150 date: Thu Jan 01 00:00:00 1970 +0000
150 date: Thu Jan 01 00:00:00 1970 +0000
151 summary: latin-1 e' encoded: \xc3\xa9 (esc)
151 summary: latin-1 e' encoded: \xc3\xa9 (esc)
152
152
153 changeset: 1:0e5b7e3f9c4a
153 changeset: 1:0e5b7e3f9c4a
154 user: test
154 user: test
155 date: Mon Jan 12 13:46:40 1970 +0000
155 date: Mon Jan 12 13:46:40 1970 +0000
156 summary: koi8-r: \xc3\x92\xc3\x94\xc3\x95\xc3\x94\xc3\x98 = u'\\u0440\\u0442\\u0443\\u0442\\u044c' (esc)
156 summary: koi8-r: \xc3\x92\xc3\x94\xc3\x95\xc3\x94\xc3\x98 = u'\\u0440\\u0442\\u0443\\u0442\\u044c' (esc)
157
157
158 changeset: 0:1e78a93102a3
158 changeset: 0:1e78a93102a3
159 user: test
159 user: test
160 date: Mon Jan 12 13:46:40 1970 +0000
160 date: Mon Jan 12 13:46:40 1970 +0000
161 summary: latin-1 e': \xc3\xa9 = u'\\xe9' (esc)
161 summary: latin-1 e': \xc3\xa9 = u'\\xe9' (esc)
162
162
163
163
164 hg tags (ascii)
164 hg tags (ascii)
165
165
166 $ HGENCODING=ascii hg tags
166 $ HGENCODING=ascii hg tags
167 tip 5:a52c0692f24a
167 tip 5:a52c0692f24a
168 ? 3:ca661e7520de
168 ? 3:ca661e7520de
169
169
170 hg tags (latin-1)
170 hg tags (latin-1)
171
171
172 $ HGENCODING=latin-1 hg tags
172 $ HGENCODING=latin-1 hg tags
173 tip 5:a52c0692f24a
173 tip 5:a52c0692f24a
174 \xe9 3:ca661e7520de (esc)
174 \xe9 3:ca661e7520de (esc)
175
175
176 hg tags (utf-8)
176 hg tags (utf-8)
177
177
178 $ HGENCODING=utf-8 hg tags
178 $ HGENCODING=utf-8 hg tags
179 tip 5:a52c0692f24a
179 tip 5:a52c0692f24a
180 \xc3\xa9 3:ca661e7520de (esc)
180 \xc3\xa9 3:ca661e7520de (esc)
181
181
182 hg tags (JSON)
182 hg tags (JSON)
183
183
184 $ hg tags -Tjson
184 $ hg tags -Tjson
185 [
185 [
186 {
186 {
187 "node": "a52c0692f24ad921c0a31e1736e7635a8b23b670",
187 "node": "a52c0692f24ad921c0a31e1736e7635a8b23b670",
188 "rev": 5,
188 "rev": 5,
189 "tag": "tip",
189 "tag": "tip",
190 "type": ""
190 "type": ""
191 },
191 },
192 {
192 {
193 "node": "ca661e7520dec3f5438a63590c350bebadb04989",
193 "node": "ca661e7520dec3f5438a63590c350bebadb04989",
194 "rev": 3,
194 "rev": 3,
195 "tag": "\xc3\xa9", (esc)
195 "tag": "\xc3\xa9", (esc)
196 "type": ""
196 "type": ""
197 }
197 }
198 ]
198 ]
199
199
200 hg branches (ascii)
200 hg branches (ascii)
201
201
202 $ HGENCODING=ascii hg branches
202 $ HGENCODING=ascii hg branches
203 ? 5:a52c0692f24a
203 ? 5:a52c0692f24a
204 default 4:94db611b4196 (inactive)
204 default 4:94db611b4196 (inactive)
205
205
206 hg branches (latin-1)
206 hg branches (latin-1)
207
207
208 $ HGENCODING=latin-1 hg branches
208 $ HGENCODING=latin-1 hg branches
209 \xe9 5:a52c0692f24a (esc)
209 \xe9 5:a52c0692f24a (esc)
210 default 4:94db611b4196 (inactive)
210 default 4:94db611b4196 (inactive)
211
211
212 hg branches (utf-8)
212 hg branches (utf-8)
213
213
214 $ HGENCODING=utf-8 hg branches
214 $ HGENCODING=utf-8 hg branches
215 \xc3\xa9 5:a52c0692f24a (esc)
215 \xc3\xa9 5:a52c0692f24a (esc)
216 default 4:94db611b4196 (inactive)
216 default 4:94db611b4196 (inactive)
217 $ echo '[ui]' >> .hg/hgrc
217 $ echo '[ui]' >> .hg/hgrc
218 $ echo 'fallbackencoding = koi8-r' >> .hg/hgrc
218 $ echo 'fallbackencoding = koi8-r' >> .hg/hgrc
219
219
220 hg log (utf-8)
220 hg log (utf-8)
221
221
222 $ HGENCODING=utf-8 hg log
222 $ HGENCODING=utf-8 hg log
223 changeset: 5:a52c0692f24a
223 changeset: 5:a52c0692f24a
224 branch: \xc3\xa9 (esc)
224 branch: \xc3\xa9 (esc)
225 tag: tip
225 tag: tip
226 user: test
226 user: test
227 date: Thu Jan 01 00:00:00 1970 +0000
227 date: Thu Jan 01 00:00:00 1970 +0000
228 summary: latin1 branch
228 summary: latin1 branch
229
229
230 changeset: 4:94db611b4196
230 changeset: 4:94db611b4196
231 user: test
231 user: test
232 date: Thu Jan 01 00:00:00 1970 +0000
232 date: Thu Jan 01 00:00:00 1970 +0000
233 summary: Added tag \xc3\xa9 for changeset ca661e7520de (esc)
233 summary: Added tag \xc3\xa9 for changeset ca661e7520de (esc)
234
234
235 changeset: 3:ca661e7520de
235 changeset: 3:ca661e7520de
236 tag: \xc3\xa9 (esc)
236 tag: \xc3\xa9 (esc)
237 user: test
237 user: test
238 date: Thu Jan 01 00:00:00 1970 +0000
238 date: Thu Jan 01 00:00:00 1970 +0000
239 summary: utf-8 e' encoded: \xc3\xa9 (esc)
239 summary: utf-8 e' encoded: \xc3\xa9 (esc)
240
240
241 changeset: 2:650c6f3d55dd
241 changeset: 2:650c6f3d55dd
242 user: test
242 user: test
243 date: Thu Jan 01 00:00:00 1970 +0000
243 date: Thu Jan 01 00:00:00 1970 +0000
244 summary: latin-1 e' encoded: \xc3\xa9 (esc)
244 summary: latin-1 e' encoded: \xc3\xa9 (esc)
245
245
246 changeset: 1:0e5b7e3f9c4a
246 changeset: 1:0e5b7e3f9c4a
247 user: test
247 user: test
248 date: Mon Jan 12 13:46:40 1970 +0000
248 date: Mon Jan 12 13:46:40 1970 +0000
249 summary: koi8-r: \xd1\x80\xd1\x82\xd1\x83\xd1\x82\xd1\x8c = u'\\u0440\\u0442\\u0443\\u0442\\u044c' (esc)
249 summary: koi8-r: \xd1\x80\xd1\x82\xd1\x83\xd1\x82\xd1\x8c = u'\\u0440\\u0442\\u0443\\u0442\\u044c' (esc)
250
250
251 changeset: 0:1e78a93102a3
251 changeset: 0:1e78a93102a3
252 user: test
252 user: test
253 date: Mon Jan 12 13:46:40 1970 +0000
253 date: Mon Jan 12 13:46:40 1970 +0000
254 summary: latin-1 e': \xd0\x98 = u'\\xe9' (esc)
254 summary: latin-1 e': \xd0\x98 = u'\\xe9' (esc)
255
255
256
256
257 hg log (dolphin)
257 hg log (dolphin)
258
258
259 $ HGENCODING=dolphin hg log
259 $ HGENCODING=dolphin hg log
260 abort: unknown encoding: dolphin
260 abort: unknown encoding: dolphin
261 (please check your locale settings)
261 (please check your locale settings)
262 [255]
262 [255]
263 $ HGENCODING=ascii hg branch `cat latin-1-tag`
263 $ HGENCODING=ascii hg branch `cat latin-1-tag`
264 abort: decoding near '\xe9': 'ascii' codec can't decode byte 0xe9 in position 0: ordinal not in range(128)! (esc)
264 abort: decoding near '\xe9': 'ascii' codec can't decode byte 0xe9 in position 0: ordinal not in range(128)! (esc)
265 [255]
265 [255]
266 $ cp latin-1-tag .hg/branch
266 $ cp latin-1-tag .hg/branch
267 $ HGENCODING=latin-1 hg ci -m 'auto-promote legacy name'
267 $ HGENCODING=latin-1 hg ci -m 'auto-promote legacy name'
268
268
269 Test roundtrip encoding of lookup tables when not using UTF-8 (issue2763)
269 Test roundtrip encoding of lookup tables when not using UTF-8 (issue2763)
270
270
271 $ HGENCODING=latin-1 hg up `cat latin-1-tag`
271 $ HGENCODING=latin-1 hg up `cat latin-1-tag`
272 0 files updated, 0 files merged, 1 files removed, 0 files unresolved
272 0 files updated, 0 files merged, 1 files removed, 0 files unresolved
273
273
274 $ cd ..
274 $ cd ..
275
276 Test roundtrip encoding/decoding of utf8b for generated data
277
278 #if hypothesis
279
280 >>> from hypothesishelpers import *
281 >>> from mercurial import encoding
282 >>> roundtrips(st.binary(), encoding.fromutf8b, encoding.toutf8b)
283 Round trip OK
284
285 #endif
General Comments 0
You need to be logged in to leave comments. Login now