test-encoding.t
286 lines
| 7.7 KiB
| text/troff
|
Tads3Lexer
/ tests / test-encoding.t
Matt Mackall
|
r12417 | Test character encoding | ||
$ hg init t | ||||
$ cd t | ||||
we need a repo with some legacy latin-1 changesets | ||||
Thomas Arendsen Hein
|
r16350 | $ hg unbundle "$TESTDIR/bundles/legacy-encoding.hg" | ||
Matt Mackall
|
r12417 | adding changesets | ||
adding manifests | ||||
adding file changes | ||||
added 2 changesets with 2 changes to 1 files | ||||
Denis Laxalde
|
r34662 | new changesets 1e78a93102a3:0e5b7e3f9c4a | ||
Matt Mackall
|
r12417 | (run 'hg update' to get a working copy) | ||
$ hg co | ||||
1 files updated, 0 files merged, 0 files removed, 0 files unresolved | ||||
Augie Fackler
|
r33262 | $ $PYTHON << EOF | ||
Pulkit Goyal
|
r36068 | > f = open('latin-1', 'wb'); f.write(b"latin-1 e' encoded: \xe9"); f.close() | ||
> f = open('utf-8', 'wb'); f.write(b"utf-8 e' encoded: \xc3\xa9"); f.close() | ||||
> f = open('latin-1-tag', 'wb'); f.write(b"\xe9"); f.close() | ||||
Matt Mackall
|
r12417 | > EOF | ||
should fail with encoding error | ||||
$ echo "plain old ascii" > a | ||||
$ hg st | ||||
M a | ||||
? latin-1 | ||||
? latin-1-tag | ||||
? utf-8 | ||||
$ HGENCODING=ascii hg ci -l latin-1 | ||||
transaction abort! | ||||
rollback completed | ||||
Mads Kiilerich
|
r12942 | abort: decoding near ' encoded: \xe9': 'ascii' codec can't decode byte 0xe9 in position 20: ordinal not in range(128)! (esc) | ||
Matt Mackall
|
r12417 | [255] | ||
these should work | ||||
$ echo "latin-1" > a | ||||
$ HGENCODING=latin-1 hg ci -l latin-1 | ||||
$ echo "utf-8" > a | ||||
$ HGENCODING=utf-8 hg ci -l utf-8 | ||||
$ HGENCODING=latin-1 hg tag `cat latin-1-tag` | ||||
$ HGENCODING=latin-1 hg branch `cat latin-1-tag` | ||||
Mads Kiilerich
|
r12942 | marked working directory as branch \xe9 (esc) | ||
Matt Mackall
|
r15615 | (branches are permanent and global, did you want a bookmark?) | ||
Matt Mackall
|
r12417 | $ HGENCODING=latin-1 hg ci -m 'latin1 branch' | ||
Sune Foldager
|
r17360 | $ hg -q rollback | ||
$ HGENCODING=latin-1 hg branch | ||||
\xe9 (esc) | ||||
$ HGENCODING=latin-1 hg ci -m 'latin1 branch' | ||||
Matt Mackall
|
r12417 | $ rm .hg/branch | ||
hg log (ascii) | ||||
$ hg --encoding ascii log | ||||
Peter Arrenbrecht
|
r14162 | changeset: 5:a52c0692f24a | ||
Matt Mackall
|
r12417 | branch: ? | ||
tag: tip | ||||
user: test | ||||
date: Thu Jan 01 00:00:00 1970 +0000 | ||||
summary: latin1 branch | ||||
changeset: 4:94db611b4196 | ||||
user: test | ||||
date: Thu Jan 01 00:00:00 1970 +0000 | ||||
summary: Added tag ? for changeset ca661e7520de | ||||
changeset: 3:ca661e7520de | ||||
tag: ? | ||||
user: test | ||||
date: Thu Jan 01 00:00:00 1970 +0000 | ||||
summary: utf-8 e' encoded: ? | ||||
changeset: 2:650c6f3d55dd | ||||
user: test | ||||
date: Thu Jan 01 00:00:00 1970 +0000 | ||||
summary: latin-1 e' encoded: ? | ||||
changeset: 1:0e5b7e3f9c4a | ||||
user: test | ||||
date: Mon Jan 12 13:46:40 1970 +0000 | ||||
summary: koi8-r: ????? = u'\u0440\u0442\u0443\u0442\u044c' | ||||
changeset: 0:1e78a93102a3 | ||||
user: test | ||||
date: Mon Jan 12 13:46:40 1970 +0000 | ||||
summary: latin-1 e': ? = u'\xe9' | ||||
hg log (latin-1) | ||||
$ hg --encoding latin-1 log | ||||
Peter Arrenbrecht
|
r14162 | changeset: 5:a52c0692f24a | ||
Mads Kiilerich
|
r12942 | branch: \xe9 (esc) | ||
Matt Mackall
|
r12417 | tag: tip | ||
user: test | ||||
date: Thu Jan 01 00:00:00 1970 +0000 | ||||
summary: latin1 branch | ||||
changeset: 4:94db611b4196 | ||||
user: test | ||||
date: Thu Jan 01 00:00:00 1970 +0000 | ||||
Mads Kiilerich
|
r12942 | summary: Added tag \xe9 for changeset ca661e7520de (esc) | ||
Matt Mackall
|
r12417 | |||
changeset: 3:ca661e7520de | ||||
Mads Kiilerich
|
r12942 | tag: \xe9 (esc) | ||
Matt Mackall
|
r12417 | user: test | ||
date: Thu Jan 01 00:00:00 1970 +0000 | ||||
Mads Kiilerich
|
r12942 | summary: utf-8 e' encoded: \xe9 (esc) | ||
Matt Mackall
|
r12417 | |||
changeset: 2:650c6f3d55dd | ||||
user: test | ||||
date: Thu Jan 01 00:00:00 1970 +0000 | ||||
Mads Kiilerich
|
r12942 | summary: latin-1 e' encoded: \xe9 (esc) | ||
Matt Mackall
|
r12417 | |||
changeset: 1:0e5b7e3f9c4a | ||||
user: test | ||||
date: Mon Jan 12 13:46:40 1970 +0000 | ||||
Mads Kiilerich
|
r12942 | summary: koi8-r: \xd2\xd4\xd5\xd4\xd8 = u'\\u0440\\u0442\\u0443\\u0442\\u044c' (esc) | ||
Matt Mackall
|
r12417 | |||
changeset: 0:1e78a93102a3 | ||||
user: test | ||||
date: Mon Jan 12 13:46:40 1970 +0000 | ||||
Mads Kiilerich
|
r12942 | summary: latin-1 e': \xe9 = u'\\xe9' (esc) | ||
Matt Mackall
|
r12417 | |||
hg log (utf-8) | ||||
$ hg --encoding utf-8 log | ||||
Peter Arrenbrecht
|
r14162 | changeset: 5:a52c0692f24a | ||
Mads Kiilerich
|
r12942 | branch: \xc3\xa9 (esc) | ||
Matt Mackall
|
r12417 | tag: tip | ||
user: test | ||||
date: Thu Jan 01 00:00:00 1970 +0000 | ||||
summary: latin1 branch | ||||
changeset: 4:94db611b4196 | ||||
user: test | ||||
date: Thu Jan 01 00:00:00 1970 +0000 | ||||
Mads Kiilerich
|
r12942 | summary: Added tag \xc3\xa9 for changeset ca661e7520de (esc) | ||
Matt Mackall
|
r12417 | |||
changeset: 3:ca661e7520de | ||||
Mads Kiilerich
|
r12942 | tag: \xc3\xa9 (esc) | ||
Matt Mackall
|
r12417 | user: test | ||
date: Thu Jan 01 00:00:00 1970 +0000 | ||||
Mads Kiilerich
|
r12942 | summary: utf-8 e' encoded: \xc3\xa9 (esc) | ||
Matt Mackall
|
r12417 | |||
changeset: 2:650c6f3d55dd | ||||
user: test | ||||
date: Thu Jan 01 00:00:00 1970 +0000 | ||||
Mads Kiilerich
|
r12942 | summary: latin-1 e' encoded: \xc3\xa9 (esc) | ||
Matt Mackall
|
r12417 | |||
changeset: 1:0e5b7e3f9c4a | ||||
user: test | ||||
date: Mon Jan 12 13:46:40 1970 +0000 | ||||
Mads Kiilerich
|
r12942 | summary: koi8-r: \xc3\x92\xc3\x94\xc3\x95\xc3\x94\xc3\x98 = u'\\u0440\\u0442\\u0443\\u0442\\u044c' (esc) | ||
Matt Mackall
|
r12417 | |||
changeset: 0:1e78a93102a3 | ||||
user: test | ||||
date: Mon Jan 12 13:46:40 1970 +0000 | ||||
Mads Kiilerich
|
r12942 | summary: latin-1 e': \xc3\xa9 = u'\\xe9' (esc) | ||
Matt Mackall
|
r12417 | |||
hg tags (ascii) | ||||
$ HGENCODING=ascii hg tags | ||||
Peter Arrenbrecht
|
r14162 | tip 5:a52c0692f24a | ||
Matt Mackall
|
r12417 | ? 3:ca661e7520de | ||
hg tags (latin-1) | ||||
$ HGENCODING=latin-1 hg tags | ||||
Peter Arrenbrecht
|
r14162 | tip 5:a52c0692f24a | ||
Mads Kiilerich
|
r12942 | \xe9 3:ca661e7520de (esc) | ||
Matt Mackall
|
r12417 | |||
hg tags (utf-8) | ||||
$ HGENCODING=utf-8 hg tags | ||||
Peter Arrenbrecht
|
r14162 | tip 5:a52c0692f24a | ||
Mads Kiilerich
|
r12942 | \xc3\xa9 3:ca661e7520de (esc) | ||
Matt Mackall
|
r12417 | |||
Matt Mackall
|
r22429 | hg tags (JSON) | ||
$ hg tags -Tjson | ||||
[ | ||||
{ | ||||
Yuya Nishihara
|
r22554 | "node": "a52c0692f24ad921c0a31e1736e7635a8b23b670", | ||
Matt Mackall
|
r22429 | "rev": 5, | ||
"tag": "tip", | ||||
"type": "" | ||||
}, | ||||
{ | ||||
Yuya Nishihara
|
r22554 | "node": "ca661e7520dec3f5438a63590c350bebadb04989", | ||
Matt Mackall
|
r22429 | "rev": 3, | ||
"tag": "\xc3\xa9", (esc) | ||||
"type": "" | ||||
} | ||||
] | ||||
Matt Mackall
|
r12417 | hg branches (ascii) | ||
$ HGENCODING=ascii hg branches | ||||
Peter Arrenbrecht
|
r14162 | ? 5:a52c0692f24a | ||
Matt Mackall
|
r12417 | default 4:94db611b4196 (inactive) | ||
hg branches (latin-1) | ||||
$ HGENCODING=latin-1 hg branches | ||||
Peter Arrenbrecht
|
r14162 | \xe9 5:a52c0692f24a (esc) | ||
Matt Mackall
|
r12417 | default 4:94db611b4196 (inactive) | ||
hg branches (utf-8) | ||||
$ HGENCODING=utf-8 hg branches | ||||
Peter Arrenbrecht
|
r14162 | \xc3\xa9 5:a52c0692f24a (esc) | ||
Matt Mackall
|
r12417 | default 4:94db611b4196 (inactive) | ||
$ echo '[ui]' >> .hg/hgrc | ||||
$ echo 'fallbackencoding = koi8-r' >> .hg/hgrc | ||||
hg log (utf-8) | ||||
$ HGENCODING=utf-8 hg log | ||||
Peter Arrenbrecht
|
r14162 | changeset: 5:a52c0692f24a | ||
Mads Kiilerich
|
r12942 | branch: \xc3\xa9 (esc) | ||
Matt Mackall
|
r12417 | tag: tip | ||
user: test | ||||
date: Thu Jan 01 00:00:00 1970 +0000 | ||||
summary: latin1 branch | ||||
changeset: 4:94db611b4196 | ||||
user: test | ||||
date: Thu Jan 01 00:00:00 1970 +0000 | ||||
Mads Kiilerich
|
r12942 | summary: Added tag \xc3\xa9 for changeset ca661e7520de (esc) | ||
Matt Mackall
|
r12417 | |||
changeset: 3:ca661e7520de | ||||
Mads Kiilerich
|
r12942 | tag: \xc3\xa9 (esc) | ||
Matt Mackall
|
r12417 | user: test | ||
date: Thu Jan 01 00:00:00 1970 +0000 | ||||
Mads Kiilerich
|
r12942 | summary: utf-8 e' encoded: \xc3\xa9 (esc) | ||
Matt Mackall
|
r12417 | |||
changeset: 2:650c6f3d55dd | ||||
user: test | ||||
date: Thu Jan 01 00:00:00 1970 +0000 | ||||
Mads Kiilerich
|
r12942 | summary: latin-1 e' encoded: \xc3\xa9 (esc) | ||
Matt Mackall
|
r12417 | |||
changeset: 1:0e5b7e3f9c4a | ||||
user: test | ||||
date: Mon Jan 12 13:46:40 1970 +0000 | ||||
Mads Kiilerich
|
r12942 | summary: koi8-r: \xd1\x80\xd1\x82\xd1\x83\xd1\x82\xd1\x8c = u'\\u0440\\u0442\\u0443\\u0442\\u044c' (esc) | ||
Matt Mackall
|
r12417 | |||
changeset: 0:1e78a93102a3 | ||||
user: test | ||||
date: Mon Jan 12 13:46:40 1970 +0000 | ||||
Mads Kiilerich
|
r12942 | summary: latin-1 e': \xd0\x98 = u'\\xe9' (esc) | ||
Matt Mackall
|
r12417 | |||
hg log (dolphin) | ||||
$ HGENCODING=dolphin hg log | ||||
Mads Kiilerich
|
r15769 | abort: unknown encoding: dolphin | ||
(please check your locale settings) | ||||
Matt Mackall
|
r12417 | [255] | ||
$ HGENCODING=ascii hg branch `cat latin-1-tag` | ||||
Mads Kiilerich
|
r12942 | abort: decoding near '\xe9': 'ascii' codec can't decode byte 0xe9 in position 0: ordinal not in range(128)! (esc) | ||
Matt Mackall
|
r12417 | [255] | ||
$ cp latin-1-tag .hg/branch | ||||
Matt Mackall
|
r13047 | $ HGENCODING=latin-1 hg ci -m 'auto-promote legacy name' | ||
Matt Mackall
|
r13940 | |||
Test roundtrip encoding of lookup tables when not using UTF-8 (issue2763) | ||||
$ HGENCODING=latin-1 hg up `cat latin-1-tag` | ||||
0 files updated, 0 files merged, 1 files removed, 0 files unresolved | ||||
Mads Kiilerich
|
r17346 | |||
Mads Kiilerich
|
r16913 | $ cd .. | ||
Yuya Nishihara
|
r26966 | |||
Test roundtrip encoding/decoding of utf8b for generated data | ||||
#if hypothesis | ||||
>>> from hypothesishelpers import * | ||||
>>> from mercurial import encoding | ||||
>>> roundtrips(st.binary(), encoding.fromutf8b, encoding.toutf8b) | ||||
Round trip OK | ||||
#endif | ||||