diff --git a/.hgsigs b/.hgsigs --- a/.hgsigs +++ b/.hgsigs @@ -197,3 +197,4 @@ 8fca7e8449a847e3cf1054f2c07b51237699fad3 26ce8e7515036d3431a03aaeb7bc72dd96cb1112 0 iQJJBAABCgAzFiEE64UTlbQiPuL3ugso2lR0C/CHMroFAl6YlRUVHDc4OTVwdWxraXRAZ21haWwuY29tAAoJENpUdAvwhzK6Z3YP/iOqphn99v0z2OupCl0q8CepbcdZMJWW3j00OAHYSO43M0FULpMpzC2o+kZDeqeLyzN7DsjoGts2cUnAOe9WX73sPkX1n1dbiDcUSsRqNND+tCkEZMtTn4DaGNIq1zSkkm8Q7O/1uwZPnX6FaIRMBs9qGbdfmMPNEvzny2tgrKc3ra1+AA8RCdtsbpqhjy+xf+EKVB/SMsQVVSJEgPkUkW6PwpaspdrxQKgZrb7C7Jx/gRVzMTUmCQe1sVCSnZNO3I/woAqDY2UNg7/hBubeRh/EjoH1o4ONTXgBQdYCl7QdcwDHpDc2HstonrFq51qxBecHDVw+ZKQds63Ixtxuab3SK0o/SWabZ1v8bGaWnyWnRWXL/1qkyFWly+fjEGGlv1kHl3n0UmwlUY8FQJCYDZgR0FqQGXAF3vMJOEp82ysk6jWN/7NRzcnoUC7HpNo1jPMiPRjskgVf3bhErfUQnhlF1YsVu/jPTixyfftbiaZmwILMkaPF8Kg3Cyf63p2cdcnTHdbP1U6ncR+BucthlbFei4WL0J2iERb8TBeCxOyCHlEUq8kampjbmPXN7VxnK4oX3xeBTf8mMbvrD5Fv3svRD+SkCCKu/MwQvB1VT6q425TSKHbCWeNqGjVLvetpx+skVH7eaXLEQ3wlCfo/0OQTRimx2O73EnOF5r8Q2POm cf3e07d7648a4371ce584d15dd692e7a6845792f 0 iQJJBAABCgAzFiEE64UTlbQiPuL3ugso2lR0C/CHMroFAl6sS5sVHDc4OTVwdWxraXRAZ21haWwuY29tAAoJENpUdAvwhzK6FQcP/1usy9WxajBppBZ54ep+qesxufLoux5qkRU7j4XZ0Id4/IcKQZeik0C/0mFMjc+dYhQDGpDiuXCADKMv5h2DCIoaWUC0GueVtVkPhhMW3zMg/BmepV7dhUuipfQ4fck8gYuaBOclunLX1MFd+CS/6BQ6XIrsKasnx9WrbO2JpieBXv+8I5mslChaZf2AxeIvUVb2BkKqsCD0rqbIjTjtfHWJpaH6spFa7XX/BZWeEYz2Nc6LVJNZY0AmvJh8ebpoGOx85dokRIEAzTmBh04SbkChi+350ki6MvG3Ax+3yrUZVc1PJtBDreL7dMs7Y3ENafSMhKnBrRaPVMyUHEm2Ygn4cmJ1YiGw4OWha1n7dtRW/uI96lXKDt8iLAQ4WBRojPhYNl4L3b6/6voCgpZUOpd7PgTRc3/00siCmYIOQzAO0HkDsALoNpk8LcCxpPFYTr8dF3bSsAT9fuaLNV6tI2ofbRLXh0gFXYdaWu10eVRrSMUMiH7n3H6EpzLa4sNdyFrK0vU4aSTlBERcjj2rj86dY0XQQL181V7Yhg8m8nyj+BzraRh7et2UXNsVosOnbTa1XX0qFVu+qAVp2BeqC4k31jm0MJk+1pDzkuAPs07z3ITwkDmTHjzxm5qoZyZ1/n37BB6miD+8xJYNH7vBX/yrDW790HbloasQOcXcerNR 065704cbdbdbb05dcd6bb814eb9bbdd982211b28 0 iQJJBAABCgAzFiEE64UTlbQiPuL3ugso2lR0C/CHMroFAl7amzkVHDc4OTVwdWxraXRAZ21haWwuY29tAAoJENpUdAvwhzK6AKEP/26Hoe8VqkuGwU0ZDsK6YgErXEPs8xtgZ9A2iouDkIqw2dm1TDmWnB5X8XaWmhAWFMUdjcqd1ZZJrAyD0p13xUOm3D+hlDXYTd2INkLwS8cVu22czZ5eoxtPkjuGYlPvek9b3vrrejkZ4vpamdS3iSvIx+TzvEW+w5eZFh9s1a9gR77hcZZoir24vtM9MsNnnBuI/5/fdWkhBoe17HSU4II56ckNXDrGO0nuqrWDxPr64WAcz6EmlTGc+cUqOM45Uc0sCr3GNQGEm6VCAw5oXq2Vt9O6sjgExLxr8zdud6w5hl9b8h2MrxyisgcnVR7efbumaRuNb8QZZPzk5QqlRxbaEcStyIXzAdar4fArQUY2vrmv1WyLJR3S/G3p8QkyWYL3CZNKjCAVxSa5ytS5Dr/bM2sWaEnIHqq+W6DOagpWV4uRRnwaId9tB9b0KBoFElXZRlaq0FlNYG8RLg65ZlkF+lj6RACO23epxapadcJwibDQiNYX20mcSEFDkSEgECnLQBecA2WZvw134RRbL3vuvB49SKS0ZEJ95myXMZa9kyIJY/g+oAFBuyZeK9O8DwGii0zFDOi6VWDTZzc3/15RRS6ehqQyYrLQntYtVGwHpxnUrp2kBjk3hDIvaYOcFbTnhTGcQCzckFnIZN2oxr5YZOI+Fpfak6RQTVhnHh0/ +0ea9c86fac8974cd74dc12ea681c8986eb6da6c4 0 iQJJBAABCgAzFiEE64UTlbQiPuL3ugso2lR0C/CHMroFAl78z0gVHDc4OTVwdWxraXRAZ21haWwuY29tAAoJENpUdAvwhzK6IrkP/2m/DJ93BR/SljCFe7KnExrDTzDI/i69x+ljomRZJmMRa86zRkclgd5L49woExDd1ZGebUY650V16adKNmVpz2rS6bQOgEr2NBD5fL+GiTX6UJ1VMgmQ8x1m8DYuI8pfBWbqQuZIl1vCEc0RmT3tHLZ7T8XgG9RXa4XielI2uhyimJPyZsE1K7c8Fa6UakH++DhYFBj+3QYbwS2fFDdA29L/4N5JLUzHkIbF7tPg7P1RBk+vhopKz9MMIu4S95LU+Gk7eQ3FfE8Jnv959hX2o/B2sdT2tEPIuDRSxZhSKLdlGbMy5IZvc/bZ+a5jlb2w23tlpfgzQxNarFqpX/weiJCtsxzeMXQHEVFG/+VuIOIYbfILWzySFcnSvcAtmNXExxH2F9j+XmQkLysnsgIfplNVEEIgZDBPGAkAQ+lH7UrEdw31ciSrCDsjXDaPQWcmk4zkfrXlwN7R9zJguJ+OuZ/Ga7NXWdZAC+YkPSKAfCesdUefcesyiresO8GEk9DyRNQsX/gl5BjEeuqYyUsve5541IMqscvdosg6HrU/RrmeR7sM7tZrDwCWdOWu/GdFatQ+k6zArSrMTKUBztzV93MIwUHDrnd+7OOYDfAuqGy7oM2KoW0Jp8sS2hotIJZ9a+VGwQcxCJ93I5sVT6ePBdmBoIAFW+rbncnD+E/RvVpl diff --git a/.hgtags b/.hgtags --- a/.hgtags +++ b/.hgtags @@ -210,3 +210,4 @@ 8fca7e8449a847e3cf1054f2c07b51237699fad3 26ce8e7515036d3431a03aaeb7bc72dd96cb1112 5.4rc0 cf3e07d7648a4371ce584d15dd692e7a6845792f 5.4 065704cbdbdbb05dcd6bb814eb9bbdd982211b28 5.4.1 +0ea9c86fac8974cd74dc12ea681c8986eb6da6c4 5.4.2 diff --git a/hgext/convert/subversion.py b/hgext/convert/subversion.py --- a/hgext/convert/subversion.py +++ b/hgext/convert/subversion.py @@ -3,6 +3,8 @@ # Copyright(C) 2007 Daniel Holth et al from __future__ import absolute_import +import codecs +import locale import os import re import xml.dom.minidom @@ -63,6 +65,38 @@ except ImportError: svn = None +# In Subversion, paths and URLs are Unicode (encoded as UTF-8), which +# Subversion converts from / to native strings when interfacing with the OS. +# When passing paths and URLs to Subversion, we have to recode them such that +# it roundstrips with what Subversion is doing. + +fsencoding = None + + +def init_fsencoding(): + global fsencoding, fsencoding_is_utf8 + if fsencoding is not None: + return + if pycompat.iswindows: + # On Windows, filenames are Unicode, but we store them using the MBCS + # encoding. + fsencoding = 'mbcs' + else: + # This is the encoding used to convert UTF-8 back to natively-encoded + # strings in Subversion 1.14.0 or earlier with APR 1.7.0 or earlier. + with util.with_lc_ctype(): + fsencoding = locale.nl_langinfo(locale.CODESET) or 'ISO-8859-1' + fsencoding = codecs.lookup(fsencoding).name + fsencoding_is_utf8 = fsencoding == codecs.lookup('utf-8').name + + +def fs2svn(s): + if fsencoding_is_utf8: + return s + else: + return s.decode(fsencoding).encode('utf-8') + + class SvnPathNotFound(Exception): pass @@ -106,8 +140,15 @@ def quote(s): def geturl(path): + """Convert path or URL to a SVN URL, encoded in UTF-8. + + This can raise UnicodeDecodeError if the path or URL can't be converted to + unicode using `fsencoding`. + """ try: - return svn.client.url_from_path(svn.core.svn_path_canonicalize(path)) + return svn.client.url_from_path( + svn.core.svn_path_canonicalize(fs2svn(path)) + ) except svn.core.SubversionException: # svn.client.url_from_path() fails with local repositories pass @@ -117,7 +158,7 @@ def geturl(path): path = b'/' + util.normpath(path) # Module URL is later compared with the repository URL returned # by svn API, which is UTF-8. - path = encoding.tolocal(path) + path = fs2svn(path) path = b'file://%s' % quote(path) return svn.core.svn_path_canonicalize(path) @@ -284,7 +325,9 @@ def filecheck(ui, path, proto): def httpcheck(ui, path, proto): try: opener = urlreq.buildopener() - rsp = opener.open(b'%s://%s/!svn/ver/0/.svn' % (proto, path), b'rb') + rsp = opener.open( + pycompat.strurl(b'%s://%s/!svn/ver/0/.svn' % (proto, path)), b'rb' + ) data = rsp.read() except urlerr.httperror as inst: if inst.code != 404: @@ -311,6 +354,32 @@ protomap = { } +class NonUtf8PercentEncodedBytes(Exception): + pass + + +# Subversion paths are Unicode. Since the percent-decoding is done on +# UTF-8-encoded strings, percent-encoded bytes are interpreted as UTF-8. +def url2pathname_like_subversion(unicodepath): + if pycompat.ispy3: + # On Python 3, we have to pass unicode to urlreq.url2pathname(). + # Percent-decoded bytes get decoded using UTF-8 and the 'replace' error + # handler. + unicodepath = urlreq.url2pathname(unicodepath) + if u'\N{REPLACEMENT CHARACTER}' in unicodepath: + raise NonUtf8PercentEncodedBytes + else: + return unicodepath + else: + # If we passed unicode on Python 2, it would be converted using the + # latin-1 encoding. Therefore, we pass UTF-8-encoded bytes. + unicodepath = urlreq.url2pathname(unicodepath.encode('utf-8')) + try: + return unicodepath.decode('utf-8') + except UnicodeDecodeError: + raise NonUtf8PercentEncodedBytes + + def issvnurl(ui, url): try: proto, path = url.split(b'://', 1) @@ -322,31 +391,58 @@ def issvnurl(ui, url): and path[2:6].lower() == b'%3a/' ): path = path[:2] + b':/' + path[6:] - # pycompat.fsdecode() / pycompat.fsencode() are used so that bytes - # in the URL roundtrip correctly on Unix. urlreq.url2pathname() on - # py3 will decode percent-encoded bytes using the utf-8 encoding - # and the "replace" error handler. This means that it will not - # preserve non-UTF-8 bytes (https://bugs.python.org/issue40983). - # url.open() uses the reverse function (urlreq.pathname2url()) and - # has a similar problem - # (https://bz.mercurial-scm.org/show_bug.cgi?id=6357). It makes - # sense to solve both problems together and handle all file URLs - # consistently. For now, we warn. - unicodepath = urlreq.url2pathname(pycompat.fsdecode(path)) - if pycompat.ispy3 and u'\N{REPLACEMENT CHARACTER}' in unicodepath: + try: + unicodepath = path.decode(fsencoding) + except UnicodeDecodeError: ui.warn( _( - b'on Python 3, we currently do not support non-UTF-8 ' - b'percent-encoded bytes in file URLs for Subversion ' - b'repositories\n' + b'Subversion requires that file URLs can be converted ' + b'to Unicode using the current locale encoding (%s)\n' + ) + % pycompat.sysbytes(fsencoding) + ) + return False + try: + unicodepath = url2pathname_like_subversion(unicodepath) + except NonUtf8PercentEncodedBytes: + ui.warn( + _( + b'Subversion does not support non-UTF-8 ' + b'percent-encoded bytes in file URLs\n' ) ) - path = pycompat.fsencode(unicodepath) + return False + # Below, we approximate how Subversion checks the path. On Unix, we + # should therefore convert the path to bytes using `fsencoding` + # (like Subversion does). On Windows, the right thing would + # actually be to leave the path as unicode. For now, we restrict + # the path to MBCS. + path = unicodepath.encode(fsencoding) except ValueError: proto = b'file' path = os.path.abspath(url) + try: + path.decode(fsencoding) + except UnicodeDecodeError: + ui.warn( + _( + b'Subversion requires that paths can be converted to ' + b'Unicode using the current locale encoding (%s)\n' + ) + % pycompat.sysbytes(fsencoding) + ) + return False if proto == b'file': path = util.pconvert(path) + elif proto in (b'http', 'https'): + if not encoding.isasciistr(path): + ui.warn( + _( + b"Subversion sources don't support non-ASCII characters in " + b"HTTP(S) URLs. Please percent-encode them.\n" + ) + ) + return False check = protomap.get(proto, lambda *args: False) while b'/' in path: if check(ui, path, proto): @@ -373,6 +469,7 @@ class svn_source(converter_source): def __init__(self, ui, repotype, url, revs=None): super(svn_source, self).__init__(ui, repotype, url, revs=revs) + init_fsencoding() if not ( url.startswith(b'svn://') or url.startswith(b'svn+ssh://') diff --git a/tests/test-convert-svn-encoding.t b/tests/test-convert-svn-encoding.t --- a/tests/test-convert-svn-encoding.t +++ b/tests/test-convert-svn-encoding.t @@ -153,22 +153,65 @@ Check tags are in UTF-8 $ cd .. -#if py3 -For now, on Python 3, we abort when encountering non-UTF-8 percent-encoded -bytes in a filename. +Subversion sources don't support non-ASCII characters in HTTP(S) URLs. + + $ XFF=$($PYTHON -c 'from mercurial.utils.procutil import stdout; stdout.write(b"\xff")') + $ hg convert --source-type=svn http://localhost:$HGPORT/$XFF test + initializing destination test repository + Subversion sources don't support non-ASCII characters in HTTP(S) URLs. Please percent-encode them. + http://localhost:$HGPORT/\xff does not look like a Subversion repository (esc) + abort: http://localhost:$HGPORT/\xff: missing or unsupported repository (esc) + [255] + +In Subversion, paths are Unicode (encoded as UTF-8). Therefore paths that can't +be converted between UTF-8 and the locale encoding (which is always ASCII in +tests) don't work. - $ hg convert file:///%ff test + $ cp -R svn-repo $XFF + $ hg convert $XFF test + initializing destination test repository + Subversion requires that paths can be converted to Unicode using the current locale encoding (ascii) + \xff does not look like a CVS checkout (glob) (esc) + $TESTTMP/\xff does not look like a Git repository (esc) + \xff does not look like a Subversion repository (glob) (esc) + \xff is not a local Mercurial repository (glob) (esc) + \xff does not look like a darcs repository (glob) (esc) + \xff does not look like a monotone repository (glob) (esc) + \xff does not look like a GNU Arch repository (glob) (esc) + \xff does not look like a Bazaar repository (glob) (esc) + cannot find required "p4" tool + abort: \xff: missing or unsupported repository (glob) (esc) + [255] + $ hg convert file://$TESTTMP/$XFF test initializing destination test repository - on Python 3, we currently do not support non-UTF-8 percent-encoded bytes in file URLs for Subversion repositories - file:///%ff does not look like a CVS checkout - $TESTTMP/file:/%ff does not look like a Git repository - file:///%ff does not look like a Subversion repository - file:///%ff is not a local Mercurial repository - file:///%ff does not look like a darcs repository - file:///%ff does not look like a monotone repository - file:///%ff does not look like a GNU Arch repository - file:///%ff does not look like a Bazaar repository - file:///%ff does not look like a P4 repository - abort: file:///%ff: missing or unsupported repository + Subversion requires that file URLs can be converted to Unicode using the current locale encoding (ascii) + file:/*/$TESTTMP/\xff does not look like a CVS checkout (glob) (esc) + $TESTTMP/file:$TESTTMP/\xff does not look like a Git repository (esc) + file:/*/$TESTTMP/\xff does not look like a Subversion repository (glob) (esc) + file:/*/$TESTTMP/\xff is not a local Mercurial repository (glob) (esc) + file:/*/$TESTTMP/\xff does not look like a darcs repository (glob) (esc) + file:/*/$TESTTMP/\xff does not look like a monotone repository (glob) (esc) + file:/*/$TESTTMP/\xff does not look like a GNU Arch repository (glob) (esc) + file:/*/$TESTTMP/\xff does not look like a Bazaar repository (glob) (esc) + file:/*/$TESTTMP/\xff does not look like a P4 repository (glob) (esc) + abort: file:/*/$TESTTMP/\xff: missing or unsupported repository (glob) (esc) [255] -#endif + +Subversion decodes percent-encoded bytes on the converted, UTF-8-encoded +string. Therefore, if the percent-encoded bytes aren't valid UTF-8, Subversion +would choke on them when converting them to the locale encoding. + + $ hg convert file://$TESTTMP/%FF test + initializing destination test repository + Subversion does not support non-UTF-8 percent-encoded bytes in file URLs + file:/*/$TESTTMP/%FF does not look like a CVS checkout (glob) + $TESTTMP/file:$TESTTMP/%FF does not look like a Git repository + file:/*/$TESTTMP/%FF does not look like a Subversion repository (glob) + file:/*/$TESTTMP/%FF is not a local Mercurial repository (glob) + file:/*/$TESTTMP/%FF does not look like a darcs repository (glob) + file:/*/$TESTTMP/%FF does not look like a monotone repository (glob) + file:/*/$TESTTMP/%FF does not look like a GNU Arch repository (glob) + file:/*/$TESTTMP/%FF does not look like a Bazaar repository (glob) + file:/*/$TESTTMP/%FF does not look like a P4 repository (glob) + abort: file:/*/$TESTTMP/%FF: missing or unsupported repository (glob) + [255]