Show More
@@ -463,14 +463,14 b' def toutf8b(s):' | |||||
463 | internal surrogate encoding as a UTF-8 string.) |
|
463 | internal surrogate encoding as a UTF-8 string.) | |
464 | ''' |
|
464 | ''' | |
465 |
|
465 | |||
466 | if isinstance(s, localstr): |
|
466 | if "\xed" not in s: | |
467 | return s._utf8 |
|
467 | if isinstance(s, localstr): | |
468 |
|
468 | return s._utf8 | ||
469 | try: |
|
469 | try: | |
470 | s.decode('utf-8') |
|
470 | s.decode('utf-8') | |
471 | return s |
|
471 | return s | |
472 | except UnicodeDecodeError: |
|
472 | except UnicodeDecodeError: | |
473 | pass |
|
473 | pass | |
474 |
|
474 | |||
475 | r = "" |
|
475 | r = "" | |
476 | pos = 0 |
|
476 | pos = 0 | |
@@ -478,7 +478,12 b' def toutf8b(s):' | |||||
478 | while pos < l: |
|
478 | while pos < l: | |
479 | try: |
|
479 | try: | |
480 | c = getutf8char(s, pos) |
|
480 | c = getutf8char(s, pos) | |
481 | pos += len(c) |
|
481 | if "\xed\xb0\x80" <= c <= "\xed\xb3\xbf": | |
|
482 | # have to re-escape existing U+DCxx characters | |||
|
483 | c = unichr(0xdc00 + ord(s[pos])).encode('utf-8') | |||
|
484 | pos += 1 | |||
|
485 | else: | |||
|
486 | pos += len(c) | |||
482 | except UnicodeDecodeError: |
|
487 | except UnicodeDecodeError: | |
483 | c = unichr(0xdc00 + ord(s[pos])).encode('utf-8') |
|
488 | c = unichr(0xdc00 + ord(s[pos])).encode('utf-8') | |
484 | pos += 1 |
|
489 | pos += 1 |
General Comments 0
You need to be logged in to leave comments.
Login now