Show More
@@ -504,11 +504,13 b' def toutf8b(s):' | |||||
504 | internal surrogate encoding as a UTF-8 string.) |
|
504 | internal surrogate encoding as a UTF-8 string.) | |
505 | ''' |
|
505 | ''' | |
506 |
|
506 | |||
507 |
if |
|
507 | if isinstance(s, localstr): | |
|
508 | # assume that the original UTF-8 sequence would never contain | |||
|
509 | # invalid characters in U+DCxx range | |||
|
510 | return s._utf8 | |||
|
511 | elif isasciistr(s): | |||
508 | return s |
|
512 | return s | |
509 | if "\xed" not in s: |
|
513 | if "\xed" not in s: | |
510 | if isinstance(s, localstr): |
|
|||
511 | return s._utf8 |
|
|||
512 | try: |
|
514 | try: | |
513 | s.decode('utf-8', _utf8strict) |
|
515 | s.decode('utf-8', _utf8strict) | |
514 | return s |
|
516 | return s |
@@ -35,11 +35,32 b' class LocalEncodingTest(unittest.TestCas' | |||||
35 | self.assertTrue(s is encoding.fromlocal(s)) |
|
35 | self.assertTrue(s is encoding.fromlocal(s)) | |
36 |
|
36 | |||
37 | class Utf8bEncodingTest(unittest.TestCase): |
|
37 | class Utf8bEncodingTest(unittest.TestCase): | |
|
38 | def setUp(self): | |||
|
39 | self.origencoding = encoding.encoding | |||
|
40 | ||||
|
41 | def tearDown(self): | |||
|
42 | encoding.encoding = self.origencoding | |||
|
43 | ||||
38 | def testasciifastpath(self): |
|
44 | def testasciifastpath(self): | |
39 | s = b'\0' * 100 |
|
45 | s = b'\0' * 100 | |
40 | self.assertTrue(s is encoding.toutf8b(s)) |
|
46 | self.assertTrue(s is encoding.toutf8b(s)) | |
41 | self.assertTrue(s is encoding.fromutf8b(s)) |
|
47 | self.assertTrue(s is encoding.fromutf8b(s)) | |
42 |
|
48 | |||
|
49 | def testlossylatin(self): | |||
|
50 | encoding.encoding = b'ascii' | |||
|
51 | s = u'\xc0'.encode('utf-8') | |||
|
52 | l = encoding.tolocal(s) | |||
|
53 | self.assertEqual(l, b'?') # lossy | |||
|
54 | self.assertEqual(s, encoding.toutf8b(l)) # utf8 sequence preserved | |||
|
55 | ||||
|
56 | def testlossy0xed(self): | |||
|
57 | encoding.encoding = b'euc-kr' # U+Dxxx Hangul | |||
|
58 | s = u'\ud1bc\xc0'.encode('utf-8') | |||
|
59 | l = encoding.tolocal(s) | |||
|
60 | self.assertIn(b'\xed', l) | |||
|
61 | self.assertTrue(l.endswith(b'?')) # lossy | |||
|
62 | self.assertEqual(s, encoding.toutf8b(l)) # utf8 sequence preserved | |||
|
63 | ||||
43 | if __name__ == '__main__': |
|
64 | if __name__ == '__main__': | |
44 | import silenttestrunner |
|
65 | import silenttestrunner | |
45 | silenttestrunner.main(__name__) |
|
66 | silenttestrunner.main(__name__) |
General Comments 0
You need to be logged in to leave comments.
Login now