Show More
@@ -470,16 +470,19 def toutf8b(s): | |||||
470 | s.decode('utf-8') |
|
470 | s.decode('utf-8') | |
471 | return s |
|
471 | return s | |
472 | except UnicodeDecodeError: |
|
472 | except UnicodeDecodeError: | |
473 | # surrogate-encode any characters that don't round-trip |
|
473 | pass | |
474 | s2 = s.decode('utf-8', 'ignore').encode('utf-8') |
|
474 | ||
475 |
|
|
475 | r = "" | |
476 |
|
|
476 | pos = 0 | |
477 | for c in s: |
|
477 | l = len(s) | |
478 | if s2[pos:pos + 1] == c: |
|
478 | while pos < l: | |
|
479 | try: | |||
|
480 | c = getutf8char(s, pos) | |||
|
481 | pos += len(c) | |||
|
482 | except UnicodeDecodeError: | |||
|
483 | c = unichr(0xdc00 + ord(s[pos])).encode('utf-8') | |||
|
484 | pos += 1 | |||
479 |
|
|
485 | r += c | |
480 | pos += 1 |
|
|||
481 | else: |
|
|||
482 | r += unichr(0xdc00 + ord(c)).encode('utf-8') |
|
|||
483 |
|
|
486 | return r | |
484 |
|
487 | |||
485 | def fromutf8b(s): |
|
488 | def fromutf8b(s): |
General Comments 0
You need to be logged in to leave comments.
Login now