##// END OF EJS Templates
encoding: use getutf8char in toutf8b...
Matt Mackall -
r26878:d7e83f10 default
parent child Browse files
Show More
@@ -470,17 +470,20 b' def toutf8b(s):'
470 s.decode('utf-8')
470 s.decode('utf-8')
471 return s
471 return s
472 except UnicodeDecodeError:
472 except UnicodeDecodeError:
473 # surrogate-encode any characters that don't round-trip
473 pass
474 s2 = s.decode('utf-8', 'ignore').encode('utf-8')
474
475 r = ""
475 r = ""
476 pos = 0
476 pos = 0
477 for c in s:
477 l = len(s)
478 if s2[pos:pos + 1] == c:
478 while pos < l:
479 r += c
479 try:
480 pos += 1
480 c = getutf8char(s, pos)
481 else:
481 pos += len(c)
482 r += unichr(0xdc00 + ord(c)).encode('utf-8')
482 except UnicodeDecodeError:
483 return r
483 c = unichr(0xdc00 + ord(s[pos])).encode('utf-8')
484 pos += 1
485 r += c
486 return r
484
487
485 def fromutf8b(s):
488 def fromutf8b(s):
486 '''Given a UTF-8b string, return a local, possibly-binary string.
489 '''Given a UTF-8b string, return a local, possibly-binary string.
General Comments 0
You need to be logged in to leave comments. Login now