upstream/mercurial-mirror Commit - r45951:0ea9c86f

convert: handle percent-encoded bytes in file URLs like Subversion...

Manuel Jacob -

r45951:0ea9c86f 5.4.2 stable

parent child

hgext/convert/subversion.py

0 +39 -19

@@ -354,6 +354,32 b' protomap = {'
354	}	354	}
355		355
356		356
		357	class NonUtf8PercentEncodedBytes(Exception):
		358	pass
		359
		360
		361	# Subversion paths are Unicode. Since the percent-decoding is done on
		362	# UTF-8-encoded strings, percent-encoded bytes are interpreted as UTF-8.
		363	def url2pathname_like_subversion(unicodepath):
		364	if pycompat.ispy3:
		365	# On Python 3, we have to pass unicode to urlreq.url2pathname().
		366	# Percent-decoded bytes get decoded using UTF-8 and the 'replace' error
		367	# handler.
		368	unicodepath = urlreq.url2pathname(unicodepath)
		369	if u'\N{REPLACEMENT CHARACTER}' in unicodepath:
		370	raise NonUtf8PercentEncodedBytes
		371	else:
		372	return unicodepath
		373	else:
		374	# If we passed unicode on Python 2, it would be converted using the
		375	# latin-1 encoding. Therefore, we pass UTF-8-encoded bytes.
		376	unicodepath = urlreq.url2pathname(unicodepath.encode('utf-8'))
		377	try:
		378	return unicodepath.decode('utf-8')
		379	except UnicodeDecodeError:
		380	raise NonUtf8PercentEncodedBytes
		381
		382
357	def issvnurl(ui, url):	383	def issvnurl(ui, url):
358	try:	384	try:
359	proto, path = url.split(b'://', 1)	385	proto, path = url.split(b'://', 1)
@@ -366,7 +392,7 b' def issvnurl(ui, url):'
366	):	392	):
367	path = path[:2] + b':/' + path[6:]	393	path = path[:2] + b':/' + path[6:]
368	try:	394	try:
369	path.decode(fsencoding)	395	unicodepath = path.decode(fsencoding)
370	except UnicodeDecodeError:	396	except UnicodeDecodeError:
371	ui.warn(	397	ui.warn(
372	_(	398	_(
@@ -376,28 +402,22 b' def issvnurl(ui, url):'
376	% pycompat.sysbytes(fsencoding)	402	% pycompat.sysbytes(fsencoding)
377	)	403	)
378	return False	404	return False
379	# FIXME: The following reasoning and logic is wrong and will be	405	try:
380	# fixed in a following changeset.	406	unicodepath = url2pathname_like_subversion(unicodepath)
381	# pycompat.fsdecode() / pycompat.fsencode() are used so that bytes	407	except NonUtf8PercentEncodedBytes:
382	# in the URL roundtrip correctly on Unix. urlreq.url2pathname() on
383	# py3 will decode percent-encoded bytes using the utf-8 encoding
384	# and the "replace" error handler. This means that it will not
385	# preserve non-UTF-8 bytes (https://bugs.python.org/issue40983).
386	# url.open() uses the reverse function (urlreq.pathname2url()) and
387	# has a similar problem
388	# (https://bz.mercurial-scm.org/show_bug.cgi?id=6357). It makes
389	# sense to solve both problems together and handle all file URLs
390	# consistently. For now, we warn.
391	unicodepath = urlreq.url2pathname(pycompat.fsdecode(path))
392	if pycompat.ispy3 and u'\N{REPLACEMENT CHARACTER}' in unicodepath:
393	ui.warn(	408	ui.warn(
394	_(	409	_(
395	b'~~on Python 3, we currently do~~ not support non-UTF-8 '	410	b'Subversion does not support non-UTF-8 '
396	b'percent-encoded bytes in file URLs ~~for Subversion~~ '	411	b'percent-encoded bytes in file URLs\n'
397	b'repositories\n'
398	)	412	)
399	)	413	)
400	path = pycompat.fsencode(unicodepath)	414	return False
		415	# Below, we approximate how Subversion checks the path. On Unix, we
		416	# should therefore convert the path to bytes using `fsencoding`
		417	# (like Subversion does). On Windows, the right thing would
		418	# actually be to leave the path as unicode. For now, we restrict
		419	# the path to MBCS.
		420	path = unicodepath.encode(fsencoding)
401	except ValueError:	421	except ValueError:
402	proto = b'file'	422	proto = b'file'
403	path = os.path.abspath(url)	423	path = os.path.abspath(url)

tests/test-convert-svn-encoding.t

0 +4 -5

               abort: file:/*/$TESTTMP/\xff: missing or unsupported repository (glob) (esc)
               [255]
-            #if py3
+            Subversion decodes percent-encoded bytes on the converted, UTF-8-encoded
-            For now, on Python 3, we abort when encountering non-UTF-8 percent-encoded
+            string. Therefore, if the percent-encoded bytes aren't valid UTF-8, Subversion
-            bytes in a filename.
+            would choke on them when converting them to the locale encoding.
               $ hg convert file://$TESTTMP/%FF test
               initializing destination test repository
-              on Python 3, we currently do not support non-UTF-8 percent-encoded bytes in file URLs for Subversion repositories
+              Subversion does not support non-UTF-8 percent-encoded bytes in file URLs
               file:/*/$TESTTMP/%FF does not look like a CVS checkout (glob)
               $TESTTMP/file:$TESTTMP/%FF does not look like a Git repository
               file:/*/$TESTTMP/%FF does not look like a Subversion repository (glob)
               file:/*/$TESTTMP/%FF does not look like a P4 repository (glob)
               abort: file:/*/$TESTTMP/%FF: missing or unsupported repository (glob)
               [255]
-            #endif

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages