# HG changeset patch # User Manuel Jacob # Date 2020-06-30 02:55:52 # Node ID 697212a830fbac295d7270684bf232f87584cbdd # Parent bd0f122f3f51939b2adc7d6f9d8611692b1ebdce convert: bail out in Subversion source if encountering non-ASCII HTTP(S) URL Before this patch, in the tested case, urllib raised `httplib.InvalidURL: URL can't contain control characters. '/\xff/!svn/ver/0/.svn' (found at least '\xff')`, which resulted in that the URL was never recognized as a Subversion repository. This patch adds a check that bails out if the URL contains non-ASCII characters. The warning is not overly user-friendly, but giving the user something to type into a search engine is definitively better than not explaining why the repository was not recognized. We could support non-ASCII chracters by quoting them before passing them to urllib. However, we would want to be compatible with what the `svn` command does, which converts the URL from the locale encoding to UTF-8, percent-encodes it and sends it to the server. If the locale encoding is not UTF-8, the behavior is IMHO not very intuitive, as the `svn` command may send different (percent-encoded) octets than what was passed on the console. Instead of copying this behavior, we better leave it forbidden. diff --git a/hgext/convert/subversion.py b/hgext/convert/subversion.py --- a/hgext/convert/subversion.py +++ b/hgext/convert/subversion.py @@ -347,6 +347,15 @@ def issvnurl(ui, url): path = os.path.abspath(url) if proto == b'file': path = util.pconvert(path) + elif proto in (b'http', 'https'): + if not encoding.isasciistr(path): + ui.warn( + _( + b"Subversion sources don't support non-ASCII characters in " + b"HTTP(S) URLs. Please percent-encode them.\n" + ) + ) + return False check = protomap.get(proto, lambda *args: False) while b'/' in path: if check(ui, path, proto): diff --git a/tests/test-convert-svn-encoding.t b/tests/test-convert-svn-encoding.t --- a/tests/test-convert-svn-encoding.t +++ b/tests/test-convert-svn-encoding.t @@ -153,6 +153,16 @@ Check tags are in UTF-8 $ cd .. +Subversion sources don't support non-ASCII characters in HTTP(S) URLs. + + $ XFF=$($PYTHON -c 'from mercurial.utils.procutil import stdout; stdout.write(b"\xff")') + $ hg convert --source-type=svn http://localhost:$HGPORT/$XFF test + initializing destination test repository + Subversion sources don't support non-ASCII characters in HTTP(S) URLs. Please percent-encode them. + http://localhost:$HGPORT/\xff does not look like a Subversion repository (esc) + abort: http://localhost:$HGPORT/\xff: missing or unsupported repository (esc) + [255] + #if py3 For now, on Python 3, we abort when encountering non-UTF-8 percent-encoded bytes in a filename.