##// END OF EJS Templates
convert: handle percent-encoded bytes in file URLs like Subversion...
Manuel Jacob -
r45951:0ea9c86f 5.4.2 stable
parent child Browse files
Show More
@@ -354,6 +354,32 b' protomap = {'
354 354 }
355 355
356 356
357 class NonUtf8PercentEncodedBytes(Exception):
358 pass
359
360
361 # Subversion paths are Unicode. Since the percent-decoding is done on
362 # UTF-8-encoded strings, percent-encoded bytes are interpreted as UTF-8.
363 def url2pathname_like_subversion(unicodepath):
364 if pycompat.ispy3:
365 # On Python 3, we have to pass unicode to urlreq.url2pathname().
366 # Percent-decoded bytes get decoded using UTF-8 and the 'replace' error
367 # handler.
368 unicodepath = urlreq.url2pathname(unicodepath)
369 if u'\N{REPLACEMENT CHARACTER}' in unicodepath:
370 raise NonUtf8PercentEncodedBytes
371 else:
372 return unicodepath
373 else:
374 # If we passed unicode on Python 2, it would be converted using the
375 # latin-1 encoding. Therefore, we pass UTF-8-encoded bytes.
376 unicodepath = urlreq.url2pathname(unicodepath.encode('utf-8'))
377 try:
378 return unicodepath.decode('utf-8')
379 except UnicodeDecodeError:
380 raise NonUtf8PercentEncodedBytes
381
382
357 383 def issvnurl(ui, url):
358 384 try:
359 385 proto, path = url.split(b'://', 1)
@@ -366,7 +392,7 b' def issvnurl(ui, url):'
366 392 ):
367 393 path = path[:2] + b':/' + path[6:]
368 394 try:
369 path.decode(fsencoding)
395 unicodepath = path.decode(fsencoding)
370 396 except UnicodeDecodeError:
371 397 ui.warn(
372 398 _(
@@ -376,28 +402,22 b' def issvnurl(ui, url):'
376 402 % pycompat.sysbytes(fsencoding)
377 403 )
378 404 return False
379 # FIXME: The following reasoning and logic is wrong and will be
380 # fixed in a following changeset.
381 # pycompat.fsdecode() / pycompat.fsencode() are used so that bytes
382 # in the URL roundtrip correctly on Unix. urlreq.url2pathname() on
383 # py3 will decode percent-encoded bytes using the utf-8 encoding
384 # and the "replace" error handler. This means that it will not
385 # preserve non-UTF-8 bytes (https://bugs.python.org/issue40983).
386 # url.open() uses the reverse function (urlreq.pathname2url()) and
387 # has a similar problem
388 # (https://bz.mercurial-scm.org/show_bug.cgi?id=6357). It makes
389 # sense to solve both problems together and handle all file URLs
390 # consistently. For now, we warn.
391 unicodepath = urlreq.url2pathname(pycompat.fsdecode(path))
392 if pycompat.ispy3 and u'\N{REPLACEMENT CHARACTER}' in unicodepath:
405 try:
406 unicodepath = url2pathname_like_subversion(unicodepath)
407 except NonUtf8PercentEncodedBytes:
393 408 ui.warn(
394 409 _(
395 b'on Python 3, we currently do not support non-UTF-8 '
396 b'percent-encoded bytes in file URLs for Subversion '
397 b'repositories\n'
410 b'Subversion does not support non-UTF-8 '
411 b'percent-encoded bytes in file URLs\n'
398 412 )
399 413 )
400 path = pycompat.fsencode(unicodepath)
414 return False
415 # Below, we approximate how Subversion checks the path. On Unix, we
416 # should therefore convert the path to bytes using `fsencoding`
417 # (like Subversion does). On Windows, the right thing would
418 # actually be to leave the path as unicode. For now, we restrict
419 # the path to MBCS.
420 path = unicodepath.encode(fsencoding)
401 421 except ValueError:
402 422 proto = b'file'
403 423 path = os.path.abspath(url)
@@ -197,13 +197,13 b" tests) don't work."
197 197 abort: file:/*/$TESTTMP/\xff: missing or unsupported repository (glob) (esc)
198 198 [255]
199 199
200 #if py3
201 For now, on Python 3, we abort when encountering non-UTF-8 percent-encoded
202 bytes in a filename.
200 Subversion decodes percent-encoded bytes on the converted, UTF-8-encoded
201 string. Therefore, if the percent-encoded bytes aren't valid UTF-8, Subversion
202 would choke on them when converting them to the locale encoding.
203 203
204 204 $ hg convert file://$TESTTMP/%FF test
205 205 initializing destination test repository
206 on Python 3, we currently do not support non-UTF-8 percent-encoded bytes in file URLs for Subversion repositories
206 Subversion does not support non-UTF-8 percent-encoded bytes in file URLs
207 207 file:/*/$TESTTMP/%FF does not look like a CVS checkout (glob)
208 208 $TESTTMP/file:$TESTTMP/%FF does not look like a Git repository
209 209 file:/*/$TESTTMP/%FF does not look like a Subversion repository (glob)
@@ -215,4 +215,3 b' bytes in a filename.'
215 215 file:/*/$TESTTMP/%FF does not look like a P4 repository (glob)
216 216 abort: file:/*/$TESTTMP/%FF: missing or unsupported repository (glob)
217 217 [255]
218 #endif
General Comments 0
You need to be logged in to leave comments. Login now