diff --git a/doc/gendoc.py b/doc/gendoc.py --- a/doc/gendoc.py +++ b/doc/gendoc.py @@ -40,6 +40,7 @@ from mercurial.i18n import ( gettext, _, ) +from mercurial.utils import stringutil table = commands.table globalopts = commands.globalopts @@ -85,7 +86,9 @@ def get_opts(opts): if b'\n' in desc: # only remove line breaks and indentation desc = b' '.join(l.lstrip() for l in desc.split(b'\n')) - desc += default and _(b" (default: %s)") % bytes(default) or b"" + if default: + default = stringutil.forcebytestr(default) + desc += _(b" (default: %s)") % default yield (b", ".join(allopts), desc) diff --git a/hgext/convert/common.py b/hgext/convert/common.py --- a/hgext/convert/common.py +++ b/hgext/convert/common.py @@ -84,10 +84,18 @@ def shlexer(data=None, filepath=None, wo return l +if pycompat.ispy3: + base64_encodebytes = base64.encodebytes + base64_decodebytes = base64.decodebytes +else: + base64_encodebytes = base64.encodestring + base64_decodebytes = base64.decodestring + + def encodeargs(args): def encodearg(s): - lines = base64.encodestring(s) - lines = [l.splitlines()[0] for l in lines] + lines = base64_encodebytes(s) + lines = [l.splitlines()[0] for l in pycompat.iterbytestr(lines)] return b''.join(lines) s = pickle.dumps(args) @@ -95,7 +103,7 @@ def encodeargs(args): def decodeargs(s): - s = base64.decodestring(s) + s = base64_decodebytes(s) return pickle.loads(s) diff --git a/hgext/convert/subversion.py b/hgext/convert/subversion.py --- a/hgext/convert/subversion.py +++ b/hgext/convert/subversion.py @@ -55,7 +55,7 @@ try: import warnings warnings.filterwarnings( - b'ignore', module=b'svn.core', category=DeprecationWarning + 'ignore', module='svn.core', category=DeprecationWarning ) svn.core.SubversionException # trigger import to catch error @@ -321,7 +321,26 @@ def issvnurl(ui, url): and path[2:6].lower() == b'%3a/' ): path = path[:2] + b':/' + path[6:] - path = urlreq.url2pathname(path) + # pycompat.fsdecode() / pycompat.fsencode() are used so that bytes + # in the URL roundtrip correctly on Unix. urlreq.url2pathname() on + # py3 will decode percent-encoded bytes using the utf-8 encoding + # and the "replace" error handler. This means that it will not + # preserve non-UTF-8 bytes (https://bugs.python.org/issue40983). + # url.open() uses the reverse function (urlreq.pathname2url()) and + # has a similar problem + # (https://bz.mercurial-scm.org/show_bug.cgi?id=6357). It makes + # sense to solve both problems together and handle all file URLs + # consistently. For now, we warn. + unicodepath = urlreq.url2pathname(pycompat.fsdecode(path)) + if pycompat.ispy3 and u'\N{REPLACEMENT CHARACTER}' in unicodepath: + ui.warn( + _( + b'on Python 3, we currently do not support non-UTF-8 ' + b'percent-encoded bytes in file URLs for Subversion ' + b'repositories\n' + ) + ) + path = pycompat.fsencode(unicodepath) except ValueError: proto = b'file' path = os.path.abspath(url) @@ -516,7 +535,9 @@ class svn_source(converter_source): % (name, path) ) return None - self.ui.note(_(b'found %s at %r\n') % (name, path)) + self.ui.note( + _(b'found %s at %r\n') % (name, pycompat.bytestr(path)) + ) return path rev = optrev(self.last_changed) @@ -597,7 +618,7 @@ class svn_source(converter_source): self.removed = set() files.sort() - files = zip(files, [rev] * len(files)) + files = pycompat.ziplist(files, [rev] * len(files)) return (files, copies) def getchanges(self, rev, full): @@ -641,9 +662,9 @@ class svn_source(converter_source): def checkrevformat(self, revstr, mapname=b'splicemap'): """ fails if revision format does not match the correct format""" if not re.match( - r'svn:[0-9a-f]{8,8}-[0-9a-f]{4,4}-' - r'[0-9a-f]{4,4}-[0-9a-f]{4,4}-[0-9a-f]' - r'{12,12}(.*)@[0-9]+$', + br'svn:[0-9a-f]{8,8}-[0-9a-f]{4,4}-' + br'[0-9a-f]{4,4}-[0-9a-f]{4,4}-[0-9a-f]' + br'{12,12}(.*)@[0-9]+$', revstr, ): raise error.Abort( @@ -773,7 +794,7 @@ class svn_source(converter_source): self.convertfp.flush() def revid(self, revnum, module=None): - return b'svn:%s%s@%s' % (self.uuid, module or self.module, revnum) + return b'svn:%s%s@%d' % (self.uuid, module or self.module, revnum) def revnum(self, rev): return int(rev.split(b'@')[-1]) @@ -796,7 +817,7 @@ class svn_source(converter_source): # We do not know the latest changed revision, # keep the first one with changed paths. break - if revnum <= stop: + if stop is not None and revnum <= stop: break for p in paths: @@ -898,12 +919,12 @@ class svn_source(converter_source): if not copyfrom_path: continue self.ui.debug( - b"copied to %s from %s@%s\n" + b"copied to %s from %s@%d\n" % (entrypath, copyfrom_path, ent.copyfrom_rev) ) copies[self.recode(entrypath)] = self.recode(copyfrom_path) elif kind == 0: # gone, but had better be a deleted *file* - self.ui.debug(b"gone from %s\n" % ent.copyfrom_rev) + self.ui.debug(b"gone from %d\n" % ent.copyfrom_rev) pmodule, prevnum = revsplit(parents[0])[1:] parentpath = pmodule + b"/" + entrypath fromkind = self._checkpath(entrypath, prevnum, pmodule) @@ -1189,7 +1210,10 @@ class svn_source(converter_source): return relative # The path is outside our tracked tree... - self.ui.debug(b'%r is not under %r, ignoring\n' % (path, module)) + self.ui.debug( + b'%r is not under %r, ignoring\n' + % (pycompat.bytestr(path), pycompat.bytestr(module)) + ) return None def _checkpath(self, path, revnum, module=None): diff --git a/mercurial/commands.py b/mercurial/commands.py --- a/mercurial/commands.py +++ b/mercurial/commands.py @@ -4613,7 +4613,8 @@ def log(ui, repo, *pats, **opts): With --graph the revisions are shown as an ASCII art DAG with the most recent changeset at the top. - 'o' is a changeset, '@' is a working directory parent, '_' closes a branch, + 'o' is a changeset, '@' is a working directory parent, '%' is a changeset + involved in an unresolved merge conflict, '_' closes a branch, 'x' is obsolete, '*' is unstable, and '+' represents a fork where the changeset from the lines below is a parent of the 'o' merge on the same line. diff --git a/mercurial/pycompat.py b/mercurial/pycompat.py --- a/mercurial/pycompat.py +++ b/mercurial/pycompat.py @@ -178,9 +178,16 @@ if ispy3: if os.name == r'nt': sysargv = [a.encode("mbcs", "ignore") for a in sys.argv] else: + + def getdefaultlocale_if_known(): + try: + return locale.getdefaultlocale() + except ValueError: + return None, None + encoding = ( locale.getlocale()[1] - or locale.getdefaultlocale()[1] + or getdefaultlocale_if_known()[1] or sys.getfilesystemencoding() ) sysargv = [a.encode(encoding, "surrogateescape") for a in sys.argv] diff --git a/tests/test-convert-svn-encoding.t b/tests/test-convert-svn-encoding.t --- a/tests/test-convert-svn-encoding.t +++ b/tests/test-convert-svn-encoding.t @@ -152,3 +152,23 @@ Check tags are in UTF-8 f7e66f98380ed1e53a797c5c7a7a2616a7ab377d branch\xc3\xa9 (esc) $ cd .. + +#if py3 +For now, on Python 3, we abort when encountering non-UTF-8 percent-encoded +bytes in a filename. + + $ hg convert file:///%ff test + initializing destination test repository + on Python 3, we currently do not support non-UTF-8 percent-encoded bytes in file URLs for Subversion repositories + file:///%ff does not look like a CVS checkout + $TESTTMP/file:/%ff does not look like a Git repository + file:///%ff does not look like a Subversion repository + file:///%ff is not a local Mercurial repository + file:///%ff does not look like a darcs repository + file:///%ff does not look like a monotone repository + file:///%ff does not look like a GNU Arch repository + file:///%ff does not look like a Bazaar repository + file:///%ff does not look like a P4 repository + abort: file:///%ff: missing or unsupported repository + [255] +#endif diff --git a/tests/test-locale.t b/tests/test-locale.t new file mode 100644 --- /dev/null +++ b/tests/test-locale.t @@ -0,0 +1,2 @@ + $ LANG=nonexistent LC_ALL=nonexistent LANGUAGE=nonexistent hg version -q + Mercurial Distributed SCM (version *) (glob)