# HG changeset patch
# User FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
# Date 2011-12-25 11:35:16
# Node ID 9b822edecb4cb4aa28d8fdf10ae8fdea01818e8b
# Parent  988409e44a7638de30b2c5e9fe76453d8ff07744

i18n: use "encoding.lower()" to normalize specified string for revset

some problematic encoding (e.g.: cp932) uses ASCII alphabet characters
in byte sequence of multi byte characters.

"str.lower()" on such byte sequence may treat distinct characters as
same one, and cause unexpected log matching.

this patch uses "encoding.lower()" instead of "str.lower()" to
normalize strings for compare.

diff --git a/mercurial/revset.py b/mercurial/revset.py
--- a/mercurial/revset.py
+++ b/mercurial/revset.py
@@ -11,6 +11,7 @@ import node as nodemod
 import bookmarks as bookmarksmod
 import match as matchmod
 from i18n import _
+import encoding
 
 elements = {
     "(": (20, ("group", 1, ")"), ("func", 1, ")")),
@@ -233,8 +234,8 @@ def author(repo, subset, x):
     Alias for ``user(string)``.
     """
     # i18n: "author" is a keyword
-    n = getstring(x, _("author requires a string")).lower()
-    return [r for r in subset if n in repo[r].user().lower()]
+    n = encoding.lower(getstring(x, _("author requires a string")))
+    return [r for r in subset if n in encoding.lower(repo[r].user())]
 
 def bisect(repo, subset, x):
     """``bisect(string)``
@@ -376,11 +377,11 @@ def desc(repo, subset, x):
     Search commit message for string. The match is case-insensitive.
     """
     # i18n: "desc" is a keyword
-    ds = getstring(x, _("desc requires a string")).lower()
+    ds = encoding.lower(getstring(x, _("desc requires a string")))
     l = []
     for r in subset:
         c = repo[r]
-        if ds in c.description().lower():
+        if ds in encoding.lower(c.description()):
             l.append(r)
     return l
 
@@ -522,12 +523,12 @@ def keyword(repo, subset, x):
     string. The match is case-insensitive.
     """
     # i18n: "keyword" is a keyword
-    kw = getstring(x, _("keyword requires a string")).lower()
+    kw = encoding.lower(getstring(x, _("keyword requires a string")))
     l = []
     for r in subset:
         c = repo[r]
         t = " ".join(c.files() + [c.user(), c.description()])
-        if kw in t.lower():
+        if kw in encoding.lower(t):
             l.append(r)
     return l
 
diff --git a/tests/test-revset.t b/tests/test-revset.t
--- a/tests/test-revset.t
+++ b/tests/test-revset.t
@@ -475,3 +475,61 @@ issue2549 - correct optimizations
   $ log 'max(1 or 2) and not 2'
   $ log 'min(1 or 2) and not 1'
   $ log 'last(1 or 2, 1) and not 2'
+
+  $ cd ..
+
+test author/desc/keyword in problematic encoding
+# unicode: cp932:
+# u30A2    0x83 0x41(= 'A')
+# u30C2    0x83 0x61(= 'a')
+
+  $ hg init problematicencoding
+  $ cd problematicencoding
+
+  $ python > setup.sh <<EOF
+  > print u'''
+  > echo a > text
+  > hg add text
+  > hg --encoding utf-8 commit -u '\u30A2' -m none
+  > echo b > text
+  > hg --encoding utf-8 commit -u '\u30C2' -m none
+  > echo c > text
+  > hg --encoding utf-8 commit -u none -m '\u30A2'
+  > echo d > text
+  > hg --encoding utf-8 commit -u none -m '\u30C2'
+  > '''.encode('utf-8')
+  > EOF
+  $ sh < setup.sh
+
+test in problematic encoding
+  $ python > test.sh <<EOF
+  > print u'''
+  > hg --encoding cp932 log --template '{rev}\\n' -r 'author(\u30A2)'
+  > echo ====
+  > hg --encoding cp932 log --template '{rev}\\n' -r 'author(\u30C2)'
+  > echo ====
+  > hg --encoding cp932 log --template '{rev}\\n' -r 'desc(\u30A2)'
+  > echo ====
+  > hg --encoding cp932 log --template '{rev}\\n' -r 'desc(\u30C2)'
+  > echo ====
+  > hg --encoding cp932 log --template '{rev}\\n' -r 'keyword(\u30A2)'
+  > echo ====
+  > hg --encoding cp932 log --template '{rev}\\n' -r 'keyword(\u30C2)'
+  > '''.encode('cp932')
+  > EOF
+  $ sh < test.sh
+  0
+  ====
+  1
+  ====
+  2
+  ====
+  3
+  ====
+  0
+  2
+  ====
+  1
+  3
+
+  $ cd ..