Show More
@@ -448,6 +448,13 b' def jsonescape(s, paranoid=False):' | |||
|
448 | 448 | pass |
|
449 | 449 | return charencodepure.jsonescapeu8fallback(u8chars, paranoid) |
|
450 | 450 | |
|
451 | # We need to decode/encode U+DCxx codes transparently since invalid UTF-8 | |
|
452 | # bytes are mapped to that range. | |
|
453 | if pycompat.ispy3: | |
|
454 | _utf8strict = r'surrogatepass' | |
|
455 | else: | |
|
456 | _utf8strict = r'strict' | |
|
457 | ||
|
451 | 458 | _utf8len = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4] |
|
452 | 459 | |
|
453 | 460 | def getutf8char(s, pos): |
@@ -464,7 +471,7 b' def getutf8char(s, pos):' | |||
|
464 | 471 | |
|
465 | 472 | c = s[pos:pos + l] |
|
466 | 473 | # validate with attempted decode |
|
467 | c.decode("utf-8") | |
|
474 | c.decode("utf-8", _utf8strict) | |
|
468 | 475 | return c |
|
469 | 476 | |
|
470 | 477 | def toutf8b(s): |
@@ -503,7 +510,7 b' def toutf8b(s):' | |||
|
503 | 510 | if isinstance(s, localstr): |
|
504 | 511 | return s._utf8 |
|
505 | 512 | try: |
|
506 | s.decode('utf-8') | |
|
513 | s.decode('utf-8', _utf8strict) | |
|
507 | 514 | return s |
|
508 | 515 | except UnicodeDecodeError: |
|
509 | 516 | pass |
@@ -517,12 +524,12 b' def toutf8b(s):' | |||
|
517 | 524 | c = getutf8char(s, pos) |
|
518 | 525 | if "\xed\xb0\x80" <= c <= "\xed\xb3\xbf": |
|
519 | 526 | # have to re-escape existing U+DCxx characters |
|
520 | c = unichr(0xdc00 + ord(s[pos])).encode('utf-8') | |
|
527 | c = unichr(0xdc00 + ord(s[pos])).encode('utf-8', _utf8strict) | |
|
521 | 528 | pos += 1 |
|
522 | 529 | else: |
|
523 | 530 | pos += len(c) |
|
524 | 531 | except UnicodeDecodeError: |
|
525 | c = unichr(0xdc00 + ord(s[pos])).encode('utf-8') | |
|
532 | c = unichr(0xdc00 + ord(s[pos])).encode('utf-8', _utf8strict) | |
|
526 | 533 | pos += 1 |
|
527 | 534 | r += c |
|
528 | 535 | return r |
@@ -570,7 +577,7 b' def fromutf8b(s):' | |||
|
570 | 577 | pos += len(c) |
|
571 | 578 | # unescape U+DCxx characters |
|
572 | 579 | if "\xed\xb0\x80" <= c <= "\xed\xb3\xbf": |
|
573 | c = chr(ord(c.decode("utf-8")) & 0xff) | |
|
580 | c = pycompat.bytechr(ord(c.decode("utf-8", _utf8strict)) & 0xff) | |
|
574 | 581 | r += c |
|
575 | 582 | return r |
|
576 | 583 |
@@ -64,6 +64,11 b' def jsonescapeu8fast(u8chars, paranoid):' | |||
|
64 | 64 | except IndexError: |
|
65 | 65 | raise ValueError |
|
66 | 66 | |
|
67 | if pycompat.ispy3: | |
|
68 | _utf8strict = r'surrogatepass' | |
|
69 | else: | |
|
70 | _utf8strict = r'strict' | |
|
71 | ||
|
67 | 72 | def jsonescapeu8fallback(u8chars, paranoid): |
|
68 | 73 | """Convert a UTF-8 byte string to JSON-escaped form (slow path) |
|
69 | 74 | |
@@ -74,6 +79,7 b' def jsonescapeu8fallback(u8chars, parano' | |||
|
74 | 79 | else: |
|
75 | 80 | jm = _jsonmap |
|
76 | 81 | # non-BMP char is represented as UTF-16 surrogate pair |
|
77 |
u16 |
|
|
82 | u16b = u8chars.decode('utf-8', _utf8strict).encode('utf-16', _utf8strict) | |
|
83 | u16codes = array.array(r'H', u16b) | |
|
78 | 84 | u16codes.pop(0) # drop BOM |
|
79 | 85 | return ''.join(jm[x] if x < 128 else '\\u%04x' % x for x in u16codes) |
@@ -50,7 +50,7 b" testmod('mercurial.config')" | |||
|
50 | 50 | testmod('mercurial.context') |
|
51 | 51 | testmod('mercurial.dagparser', optionflags=doctest.NORMALIZE_WHITESPACE) |
|
52 | 52 | testmod('mercurial.dispatch') |
|
53 | testmod('mercurial.encoding', py3=False) # py3: multiple encoding issues | |
|
53 | testmod('mercurial.encoding') | |
|
54 | 54 | testmod('mercurial.formatter', py3=False) # py3: write bytes to stdout |
|
55 | 55 | testmod('mercurial.hg') |
|
56 | 56 | testmod('mercurial.hgweb.hgwebdir_mod', py3=False) # py3: repr(bytes) ? |
General Comments 0
You need to be logged in to leave comments.
Login now