diff --git a/mercurial/changelog.py b/mercurial/changelog.py --- a/mercurial/changelog.py +++ b/mercurial/changelog.py @@ -199,6 +199,11 @@ class changelog(revlog.revlog): def add(self, manifest, files, desc, transaction, p1, p2, user, date=None, extra=None): + # Convert to UTF-8 encoded bytestrings as the very first + # thing: calling any method on a localstr object will turn it + # into a str object and the cached UTF-8 string is thus lost. + user, desc = encoding.fromlocal(user), encoding.fromlocal(desc) + user = user.strip() # An empty username or a username with a "\n" will make the # revision text contain two "\n\n" sequences -> corrupt @@ -212,8 +217,6 @@ class changelog(revlog.revlog): # strip trailing whitespace and leading and trailing empty lines desc = '\n'.join([l.rstrip() for l in desc.splitlines()]).strip('\n') - user, desc = encoding.fromlocal(user), encoding.fromlocal(desc) - if date: parseddate = "%d %d" % util.parsedate(date) else: diff --git a/tests/test-context.py b/tests/test-context.py --- a/tests/test-context.py +++ b/tests/test-context.py @@ -1,5 +1,5 @@ import os -from mercurial import hg, ui +from mercurial import hg, ui, context, encoding u = ui.ui() @@ -17,3 +17,16 @@ repo[None].add(['foo']) repo.commit(text='commit1', date="0 0") print "workingfilectx.date =", repo[None]['foo'].date() + +# test memctx with non-ASCII commit message + +def filectxfn(repo, memctx, path): + return context.memfilectx("foo", "") + +ctx = context.memctx(repo, ['tip', None], + encoding.tolocal("Gr\xc3\xbcezi!"), + ["foo"], filectxfn) +ctx.commit() +for enc in "ASCII", "Latin-1", "UTF-8": + encoding.encoding = enc + print "%-8s: %s" % (enc, repo["tip"].description()) diff --git a/tests/test-context.py.out b/tests/test-context.py.out --- a/tests/test-context.py.out +++ b/tests/test-context.py.out @@ -1,1 +1,4 @@ workingfilectx.date = (1000, 0) +ASCII : Gr?ezi! +Latin-1 : Grüezi! +UTF-8 : Grüezi!