# HG changeset patch # User FUJIWARA Katsunori # Date 2011-10-31 12:06:18 # Node ID 87bb6b7644f6b9376d06d6c5b8a7d36ff24a537e # Parent d7bfbc92a1c0b5d878febf44e1a1f1dc83b1259a minirst: use unicode string as intermediate form for replacement # this change redones part of 521c8e0c93bf, backed out by 0ad0ebe67815 Some character encodings use ASCII characters other than control/alphabet/digit as a part of multi-bytes characters, so direct replacing with such characters on strings in local encoding causes invalid byte sequences. [mpm: test changed to simple doctest] diff --git a/mercurial/minirst.py b/mercurial/minirst.py --- a/mercurial/minirst.py +++ b/mercurial/minirst.py @@ -23,9 +23,27 @@ import util, encoding from i18n import _ def replace(text, substs): + ''' + Apply a list of (find, replace) pairs to a text. + + >>> replace("foo bar", [('f', 'F'), ('b', 'B')]) + 'Foo Bar' + >>> encoding.encoding = 'latin1' + >>> replace('\\x81\\\\', [('\\\\', '/')]) + '\\x81/' + >>> encoding.encoding = 'shiftjis' + >>> replace('\\x81\\\\', [('\\\\', '/')]) + '\\x81\\\\' + ''' + + # some character encodings (cp932 for Japanese, at least) use + # ASCII characters other than control/alphabet/digit as a part of + # multi-bytes characters, so direct replacing with such characters + # on strings in local encoding causes invalid byte sequences. + utext = text.decode(encoding.encoding) for f, t in substs: - text = text.replace(f, t) - return text + utext = utext.replace(f, t) + return utext.encode(encoding.encoding) _blockre = re.compile(r"\n(?:\s*\n)+") diff --git a/tests/test-doctest.py b/tests/test-doctest.py --- a/tests/test-doctest.py +++ b/tests/test-doctest.py @@ -36,3 +36,6 @@ doctest.testmod(hgext.convert.cvsps) import mercurial.revset doctest.testmod(mercurial.revset) + +import mercurial.minirst +doctest.testmod(mercurial.minirst)