# HG changeset patch # User FUJIWARA Katsunori # Date 2011-07-28 05:36:07 # Node ID 28e98a8b173d241e7bddc34df6d15d8c9a83fa14 # Parent 56848e2bb0c5a43b580dd2ca7ce1e781d4e75b2b i18n: use UTF-8 string to lower filename for case collision check Some character sets, cp932 (known as Shift-JIS for Japanese) for example, use 0x41('A') - 0x5A('Z') and 0x61('a') - 0x7A('z') as second or later character. In such character set, case collision checking recognizes different files as CASEFOLDED same file, if filenames are treated as byte sequence. win32mbcs extension is not appropriate to handle this problem, because this problem can occur on other than Windows platform only if problematic character set is used. Callers of util.checkcase() use known ASCII filenames as last component of path, and string.lower() is not applied to directory part of path. So, util.checkcase() is kept intact, even though it applies string.lower() to filenames. diff --git a/mercurial/merge.py b/mercurial/merge.py --- a/mercurial/merge.py +++ b/mercurial/merge.py @@ -7,7 +7,7 @@ from node import nullid, nullrev, hex, bin from i18n import _ -import scmutil, util, filemerge, copies, subrepo +import scmutil, util, filemerge, copies, subrepo, encoding import errno, os, shutil class mergestate(object): @@ -92,7 +92,7 @@ def _checkcollision(mctx): "check for case folding collisions in the destination context" folded = {} for fn in mctx: - fold = fn.lower() + fold = encoding.lower(fn) if fold in folded: raise util.Abort(_("case-folding collision between %s and %s") % (fn, folded[fold])) diff --git a/mercurial/scmutil.py b/mercurial/scmutil.py --- a/mercurial/scmutil.py +++ b/mercurial/scmutil.py @@ -6,7 +6,7 @@ # GNU General Public License version 2 or any later version. from i18n import _ -import util, error, osutil, revset, similar +import util, error, osutil, revset, similar, encoding import match as matchmod import os, errno, re, stat, sys, glob @@ -46,10 +46,10 @@ class casecollisionauditor(object): self._abort = abort self._map = {} for f in existingiter: - self._map[f.lower()] = f + self._map[encoding.lower(f)] = f def __call__(self, f): - fl = f.lower() + fl = encoding.lower(f) map = self._map if fl in map and map[fl] != f: msg = _('possible case-folding collision for %s') % f diff --git a/tests/test-casecollision-i18n.t b/tests/test-casecollision-i18n.t new file mode 100644 --- /dev/null +++ b/tests/test-casecollision-i18n.t @@ -0,0 +1,43 @@ +run only on case-insensitive filesystems, because collision check at +"hg update" is done only on case-insensitive filesystems + + $ "$TESTDIR/hghave" icasefs || exit 80 + +setup repository, and target files + + $ HGENCODING=cp932 + $ export HGENCODING + $ hg init t + $ cd t + $ python << EOF + > names = ["\x83\x41", # cp932(0x83, 0x41='A'), UNICODE(0x30a2) + > "\x83\x5A", # cp932(0x83, 0x5A='Z'), UNICODE(0x30bb) + > "\x83\x61", # cp932(0x83, 0x61='a'), UNICODE(0x30c2) + > "\x83\x7A", # cp932(0x83, 0x7A='z'), UNICODE(0x30db) + > ] + > for num, name in zip(range(len(names)), names): + > # file for getting target filename of "hg add" + > f = file(str(num), 'w'); f.write(name); f.close() + > # target file of "hg add" + > f = file(name, 'w'); f.write(name); f.close() + > EOF + +test filename collison check at "hg add" + + $ hg add --config ui.portablefilenames=abort `cat 0` + $ hg add --config ui.portablefilenames=abort `cat 1` + $ hg add --config ui.portablefilenames=abort `cat 2` + $ hg add --config ui.portablefilenames=abort `cat 3` + $ hg status -a + A \x83A (esc) + A \x83Z (esc) + A \x83a (esc) + A \x83z (esc) + +test filename collision check at "hg update" + + $ hg commit -m 'revision 0' + $ hg update null + 0 files updated, 0 files merged, 4 files removed, 0 files unresolved + $ hg update tip + 4 files updated, 0 files merged, 0 files removed, 0 files unresolved