Show More
@@ -470,17 +470,20 b' def toutf8b(s):' | |||
|
470 | 470 | s.decode('utf-8') |
|
471 | 471 | return s |
|
472 | 472 | except UnicodeDecodeError: |
|
473 | # surrogate-encode any characters that don't round-trip | |
|
474 | s2 = s.decode('utf-8', 'ignore').encode('utf-8') | |
|
475 |
|
|
|
476 |
|
|
|
477 | for c in s: | |
|
478 | if s2[pos:pos + 1] == c: | |
|
479 | r += c | |
|
480 |
|
|
|
481 |
|
|
|
482 | r += unichr(0xdc00 + ord(c)).encode('utf-8') | |
|
483 | return r | |
|
473 | pass | |
|
474 | ||
|
475 | r = "" | |
|
476 | pos = 0 | |
|
477 | l = len(s) | |
|
478 | while pos < l: | |
|
479 | try: | |
|
480 | c = getutf8char(s, pos) | |
|
481 | pos += len(c) | |
|
482 | except UnicodeDecodeError: | |
|
483 | c = unichr(0xdc00 + ord(s[pos])).encode('utf-8') | |
|
484 | pos += 1 | |
|
485 | r += c | |
|
486 | return r | |
|
484 | 487 | |
|
485 | 488 | def fromutf8b(s): |
|
486 | 489 | '''Given a UTF-8b string, return a local, possibly-binary string. |
General Comments 0
You need to be logged in to leave comments.
Login now