Show More
@@ -470,17 +470,20 b' def toutf8b(s):' | |||||
470 | s.decode('utf-8') |
|
470 | s.decode('utf-8') | |
471 | return s |
|
471 | return s | |
472 | except UnicodeDecodeError: |
|
472 | except UnicodeDecodeError: | |
473 | # surrogate-encode any characters that don't round-trip |
|
473 | pass | |
474 | s2 = s.decode('utf-8', 'ignore').encode('utf-8') |
|
474 | ||
475 |
|
|
475 | r = "" | |
476 |
|
|
476 | pos = 0 | |
477 | for c in s: |
|
477 | l = len(s) | |
478 | if s2[pos:pos + 1] == c: |
|
478 | while pos < l: | |
479 | r += c |
|
479 | try: | |
480 |
|
|
480 | c = getutf8char(s, pos) | |
481 |
|
|
481 | pos += len(c) | |
482 | r += unichr(0xdc00 + ord(c)).encode('utf-8') |
|
482 | except UnicodeDecodeError: | |
483 | return r |
|
483 | c = unichr(0xdc00 + ord(s[pos])).encode('utf-8') | |
|
484 | pos += 1 | |||
|
485 | r += c | |||
|
486 | return r | |||
484 |
|
487 | |||
485 | def fromutf8b(s): |
|
488 | def fromutf8b(s): | |
486 | '''Given a UTF-8b string, return a local, possibly-binary string. |
|
489 | '''Given a UTF-8b string, return a local, possibly-binary string. |
General Comments 0
You need to be logged in to leave comments.
Login now