Show More
@@ -354,6 +354,32 b' protomap = {' | |||
|
354 | 354 | } |
|
355 | 355 | |
|
356 | 356 | |
|
357 | class NonUtf8PercentEncodedBytes(Exception): | |
|
358 | pass | |
|
359 | ||
|
360 | ||
|
361 | # Subversion paths are Unicode. Since the percent-decoding is done on | |
|
362 | # UTF-8-encoded strings, percent-encoded bytes are interpreted as UTF-8. | |
|
363 | def url2pathname_like_subversion(unicodepath): | |
|
364 | if pycompat.ispy3: | |
|
365 | # On Python 3, we have to pass unicode to urlreq.url2pathname(). | |
|
366 | # Percent-decoded bytes get decoded using UTF-8 and the 'replace' error | |
|
367 | # handler. | |
|
368 | unicodepath = urlreq.url2pathname(unicodepath) | |
|
369 | if u'\N{REPLACEMENT CHARACTER}' in unicodepath: | |
|
370 | raise NonUtf8PercentEncodedBytes | |
|
371 | else: | |
|
372 | return unicodepath | |
|
373 | else: | |
|
374 | # If we passed unicode on Python 2, it would be converted using the | |
|
375 | # latin-1 encoding. Therefore, we pass UTF-8-encoded bytes. | |
|
376 | unicodepath = urlreq.url2pathname(unicodepath.encode('utf-8')) | |
|
377 | try: | |
|
378 | return unicodepath.decode('utf-8') | |
|
379 | except UnicodeDecodeError: | |
|
380 | raise NonUtf8PercentEncodedBytes | |
|
381 | ||
|
382 | ||
|
357 | 383 | def issvnurl(ui, url): |
|
358 | 384 | try: |
|
359 | 385 | proto, path = url.split(b'://', 1) |
@@ -366,7 +392,7 b' def issvnurl(ui, url):' | |||
|
366 | 392 | ): |
|
367 | 393 | path = path[:2] + b':/' + path[6:] |
|
368 | 394 | try: |
|
369 | path.decode(fsencoding) | |
|
395 | unicodepath = path.decode(fsencoding) | |
|
370 | 396 | except UnicodeDecodeError: |
|
371 | 397 | ui.warn( |
|
372 | 398 | _( |
@@ -376,28 +402,22 b' def issvnurl(ui, url):' | |||
|
376 | 402 | % pycompat.sysbytes(fsencoding) |
|
377 | 403 | ) |
|
378 | 404 | return False |
|
379 | # FIXME: The following reasoning and logic is wrong and will be | |
|
380 | # fixed in a following changeset. | |
|
381 | # pycompat.fsdecode() / pycompat.fsencode() are used so that bytes | |
|
382 | # in the URL roundtrip correctly on Unix. urlreq.url2pathname() on | |
|
383 | # py3 will decode percent-encoded bytes using the utf-8 encoding | |
|
384 | # and the "replace" error handler. This means that it will not | |
|
385 | # preserve non-UTF-8 bytes (https://bugs.python.org/issue40983). | |
|
386 | # url.open() uses the reverse function (urlreq.pathname2url()) and | |
|
387 | # has a similar problem | |
|
388 | # (https://bz.mercurial-scm.org/show_bug.cgi?id=6357). It makes | |
|
389 | # sense to solve both problems together and handle all file URLs | |
|
390 | # consistently. For now, we warn. | |
|
391 | unicodepath = urlreq.url2pathname(pycompat.fsdecode(path)) | |
|
392 | if pycompat.ispy3 and u'\N{REPLACEMENT CHARACTER}' in unicodepath: | |
|
405 | try: | |
|
406 | unicodepath = url2pathname_like_subversion(unicodepath) | |
|
407 | except NonUtf8PercentEncodedBytes: | |
|
393 | 408 | ui.warn( |
|
394 | 409 | _( |
|
395 |
b' |
|
|
396 |
b'percent-encoded bytes in file URLs |
|
|
397 | b'repositories\n' | |
|
410 | b'Subversion does not support non-UTF-8 ' | |
|
411 | b'percent-encoded bytes in file URLs\n' | |
|
398 | 412 | ) |
|
399 | 413 | ) |
|
400 | path = pycompat.fsencode(unicodepath) | |
|
414 | return False | |
|
415 | # Below, we approximate how Subversion checks the path. On Unix, we | |
|
416 | # should therefore convert the path to bytes using `fsencoding` | |
|
417 | # (like Subversion does). On Windows, the right thing would | |
|
418 | # actually be to leave the path as unicode. For now, we restrict | |
|
419 | # the path to MBCS. | |
|
420 | path = unicodepath.encode(fsencoding) | |
|
401 | 421 | except ValueError: |
|
402 | 422 | proto = b'file' |
|
403 | 423 | path = os.path.abspath(url) |
@@ -197,13 +197,13 b" tests) don't work." | |||
|
197 | 197 | abort: file:/*/$TESTTMP/\xff: missing or unsupported repository (glob) (esc) |
|
198 | 198 | [255] |
|
199 | 199 | |
|
200 | #if py3 | |
|
201 | For now, on Python 3, we abort when encountering non-UTF-8 percent-encoded | |
|
202 | bytes in a filename. | |
|
200 | Subversion decodes percent-encoded bytes on the converted, UTF-8-encoded | |
|
201 | string. Therefore, if the percent-encoded bytes aren't valid UTF-8, Subversion | |
|
202 | would choke on them when converting them to the locale encoding. | |
|
203 | 203 | |
|
204 | 204 | $ hg convert file://$TESTTMP/%FF test |
|
205 | 205 | initializing destination test repository |
|
206 |
|
|
|
206 | Subversion does not support non-UTF-8 percent-encoded bytes in file URLs | |
|
207 | 207 | file:/*/$TESTTMP/%FF does not look like a CVS checkout (glob) |
|
208 | 208 | $TESTTMP/file:$TESTTMP/%FF does not look like a Git repository |
|
209 | 209 | file:/*/$TESTTMP/%FF does not look like a Subversion repository (glob) |
@@ -215,4 +215,3 b' bytes in a filename.' | |||
|
215 | 215 | file:/*/$TESTTMP/%FF does not look like a P4 repository (glob) |
|
216 | 216 | abort: file:/*/$TESTTMP/%FF: missing or unsupported repository (glob) |
|
217 | 217 | [255] |
|
218 | #endif |
General Comments 0
You need to be logged in to leave comments.
Login now