Show More
@@ -470,16 +470,19 def toutf8b(s): | |||
|
470 | 470 | s.decode('utf-8') |
|
471 | 471 | return s |
|
472 | 472 | except UnicodeDecodeError: |
|
473 | # surrogate-encode any characters that don't round-trip | |
|
474 | s2 = s.decode('utf-8', 'ignore').encode('utf-8') | |
|
473 | pass | |
|
474 | ||
|
475 | 475 |
|
|
476 | 476 |
|
|
477 | for c in s: | |
|
478 | if s2[pos:pos + 1] == c: | |
|
477 | l = len(s) | |
|
478 | while pos < l: | |
|
479 | try: | |
|
480 | c = getutf8char(s, pos) | |
|
481 | pos += len(c) | |
|
482 | except UnicodeDecodeError: | |
|
483 | c = unichr(0xdc00 + ord(s[pos])).encode('utf-8') | |
|
484 | pos += 1 | |
|
479 | 485 |
|
|
480 | pos += 1 | |
|
481 | else: | |
|
482 | r += unichr(0xdc00 + ord(c)).encode('utf-8') | |
|
483 | 486 |
|
|
484 | 487 | |
|
485 | 488 | def fromutf8b(s): |
General Comments 0
You need to be logged in to leave comments.
Login now