##// END OF EJS Templates
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it...
Yuya Nishihara -
r37965:57b0c722 default
parent child Browse files
Show More
@@ -504,11 +504,13 b' def toutf8b(s):'
504 internal surrogate encoding as a UTF-8 string.)
504 internal surrogate encoding as a UTF-8 string.)
505 '''
505 '''
506
506
507 if not isinstance(s, localstr) and isasciistr(s):
507 if isinstance(s, localstr):
508 # assume that the original UTF-8 sequence would never contain
509 # invalid characters in U+DCxx range
510 return s._utf8
511 elif isasciistr(s):
508 return s
512 return s
509 if "\xed" not in s:
513 if "\xed" not in s:
510 if isinstance(s, localstr):
511 return s._utf8
512 try:
514 try:
513 s.decode('utf-8', _utf8strict)
515 s.decode('utf-8', _utf8strict)
514 return s
516 return s
@@ -35,11 +35,32 b' class LocalEncodingTest(unittest.TestCas'
35 self.assertTrue(s is encoding.fromlocal(s))
35 self.assertTrue(s is encoding.fromlocal(s))
36
36
37 class Utf8bEncodingTest(unittest.TestCase):
37 class Utf8bEncodingTest(unittest.TestCase):
38 def setUp(self):
39 self.origencoding = encoding.encoding
40
41 def tearDown(self):
42 encoding.encoding = self.origencoding
43
38 def testasciifastpath(self):
44 def testasciifastpath(self):
39 s = b'\0' * 100
45 s = b'\0' * 100
40 self.assertTrue(s is encoding.toutf8b(s))
46 self.assertTrue(s is encoding.toutf8b(s))
41 self.assertTrue(s is encoding.fromutf8b(s))
47 self.assertTrue(s is encoding.fromutf8b(s))
42
48
49 def testlossylatin(self):
50 encoding.encoding = b'ascii'
51 s = u'\xc0'.encode('utf-8')
52 l = encoding.tolocal(s)
53 self.assertEqual(l, b'?') # lossy
54 self.assertEqual(s, encoding.toutf8b(l)) # utf8 sequence preserved
55
56 def testlossy0xed(self):
57 encoding.encoding = b'euc-kr' # U+Dxxx Hangul
58 s = u'\ud1bc\xc0'.encode('utf-8')
59 l = encoding.tolocal(s)
60 self.assertIn(b'\xed', l)
61 self.assertTrue(l.endswith(b'?')) # lossy
62 self.assertEqual(s, encoding.toutf8b(l)) # utf8 sequence preserved
63
43 if __name__ == '__main__':
64 if __name__ == '__main__':
44 import silenttestrunner
65 import silenttestrunner
45 silenttestrunner.main(__name__)
66 silenttestrunner.main(__name__)
General Comments 0
You need to be logged in to leave comments. Login now