##// END OF EJS Templates
convert: handle percent-encoded bytes in file URLs like Subversion...
Manuel Jacob -
r45566:0ea9c86f 5.4.2 stable
parent child Browse files
Show More
@@ -354,6 +354,32 b' protomap = {'
354 }
354 }
355
355
356
356
357 class NonUtf8PercentEncodedBytes(Exception):
358 pass
359
360
361 # Subversion paths are Unicode. Since the percent-decoding is done on
362 # UTF-8-encoded strings, percent-encoded bytes are interpreted as UTF-8.
363 def url2pathname_like_subversion(unicodepath):
364 if pycompat.ispy3:
365 # On Python 3, we have to pass unicode to urlreq.url2pathname().
366 # Percent-decoded bytes get decoded using UTF-8 and the 'replace' error
367 # handler.
368 unicodepath = urlreq.url2pathname(unicodepath)
369 if u'\N{REPLACEMENT CHARACTER}' in unicodepath:
370 raise NonUtf8PercentEncodedBytes
371 else:
372 return unicodepath
373 else:
374 # If we passed unicode on Python 2, it would be converted using the
375 # latin-1 encoding. Therefore, we pass UTF-8-encoded bytes.
376 unicodepath = urlreq.url2pathname(unicodepath.encode('utf-8'))
377 try:
378 return unicodepath.decode('utf-8')
379 except UnicodeDecodeError:
380 raise NonUtf8PercentEncodedBytes
381
382
357 def issvnurl(ui, url):
383 def issvnurl(ui, url):
358 try:
384 try:
359 proto, path = url.split(b'://', 1)
385 proto, path = url.split(b'://', 1)
@@ -366,7 +392,7 b' def issvnurl(ui, url):'
366 ):
392 ):
367 path = path[:2] + b':/' + path[6:]
393 path = path[:2] + b':/' + path[6:]
368 try:
394 try:
369 path.decode(fsencoding)
395 unicodepath = path.decode(fsencoding)
370 except UnicodeDecodeError:
396 except UnicodeDecodeError:
371 ui.warn(
397 ui.warn(
372 _(
398 _(
@@ -376,28 +402,22 b' def issvnurl(ui, url):'
376 % pycompat.sysbytes(fsencoding)
402 % pycompat.sysbytes(fsencoding)
377 )
403 )
378 return False
404 return False
379 # FIXME: The following reasoning and logic is wrong and will be
405 try:
380 # fixed in a following changeset.
406 unicodepath = url2pathname_like_subversion(unicodepath)
381 # pycompat.fsdecode() / pycompat.fsencode() are used so that bytes
407 except NonUtf8PercentEncodedBytes:
382 # in the URL roundtrip correctly on Unix. urlreq.url2pathname() on
383 # py3 will decode percent-encoded bytes using the utf-8 encoding
384 # and the "replace" error handler. This means that it will not
385 # preserve non-UTF-8 bytes (https://bugs.python.org/issue40983).
386 # url.open() uses the reverse function (urlreq.pathname2url()) and
387 # has a similar problem
388 # (https://bz.mercurial-scm.org/show_bug.cgi?id=6357). It makes
389 # sense to solve both problems together and handle all file URLs
390 # consistently. For now, we warn.
391 unicodepath = urlreq.url2pathname(pycompat.fsdecode(path))
392 if pycompat.ispy3 and u'\N{REPLACEMENT CHARACTER}' in unicodepath:
393 ui.warn(
408 ui.warn(
394 _(
409 _(
395 b'on Python 3, we currently do not support non-UTF-8 '
410 b'Subversion does not support non-UTF-8 '
396 b'percent-encoded bytes in file URLs for Subversion '
411 b'percent-encoded bytes in file URLs\n'
397 b'repositories\n'
398 )
412 )
399 )
413 )
400 path = pycompat.fsencode(unicodepath)
414 return False
415 # Below, we approximate how Subversion checks the path. On Unix, we
416 # should therefore convert the path to bytes using `fsencoding`
417 # (like Subversion does). On Windows, the right thing would
418 # actually be to leave the path as unicode. For now, we restrict
419 # the path to MBCS.
420 path = unicodepath.encode(fsencoding)
401 except ValueError:
421 except ValueError:
402 proto = b'file'
422 proto = b'file'
403 path = os.path.abspath(url)
423 path = os.path.abspath(url)
@@ -197,13 +197,13 b" tests) don't work."
197 abort: file:/*/$TESTTMP/\xff: missing or unsupported repository (glob) (esc)
197 abort: file:/*/$TESTTMP/\xff: missing or unsupported repository (glob) (esc)
198 [255]
198 [255]
199
199
200 #if py3
200 Subversion decodes percent-encoded bytes on the converted, UTF-8-encoded
201 For now, on Python 3, we abort when encountering non-UTF-8 percent-encoded
201 string. Therefore, if the percent-encoded bytes aren't valid UTF-8, Subversion
202 bytes in a filename.
202 would choke on them when converting them to the locale encoding.
203
203
204 $ hg convert file://$TESTTMP/%FF test
204 $ hg convert file://$TESTTMP/%FF test
205 initializing destination test repository
205 initializing destination test repository
206 on Python 3, we currently do not support non-UTF-8 percent-encoded bytes in file URLs for Subversion repositories
206 Subversion does not support non-UTF-8 percent-encoded bytes in file URLs
207 file:/*/$TESTTMP/%FF does not look like a CVS checkout (glob)
207 file:/*/$TESTTMP/%FF does not look like a CVS checkout (glob)
208 $TESTTMP/file:$TESTTMP/%FF does not look like a Git repository
208 $TESTTMP/file:$TESTTMP/%FF does not look like a Git repository
209 file:/*/$TESTTMP/%FF does not look like a Subversion repository (glob)
209 file:/*/$TESTTMP/%FF does not look like a Subversion repository (glob)
@@ -215,4 +215,3 b' bytes in a filename.'
215 file:/*/$TESTTMP/%FF does not look like a P4 repository (glob)
215 file:/*/$TESTTMP/%FF does not look like a P4 repository (glob)
216 abort: file:/*/$TESTTMP/%FF: missing or unsupported repository (glob)
216 abort: file:/*/$TESTTMP/%FF: missing or unsupported repository (glob)
217 [255]
217 [255]
218 #endif
General Comments 0
You need to be logged in to leave comments. Login now