##// END OF EJS Templates
charencode: remove Python 2 support code...
Gregory Szorc -
r49762:b677bccf default
parent child Browse files
Show More
@@ -1,89 +1,86 b''
1 # charencode.py - miscellaneous character encoding
1 # charencode.py - miscellaneous character encoding
2 #
2 #
3 # Copyright 2005-2009 Olivia Mackall <olivia@selenic.com> and others
3 # Copyright 2005-2009 Olivia Mackall <olivia@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 import array
9 import array
10
10
11 from .. import pycompat
11 from .. import pycompat
12
12
13
13
14 def isasciistr(s):
14 def isasciistr(s):
15 try:
15 try:
16 s.decode('ascii')
16 s.decode('ascii')
17 return True
17 return True
18 except UnicodeDecodeError:
18 except UnicodeDecodeError:
19 return False
19 return False
20
20
21
21
22 def asciilower(s):
22 def asciilower(s):
23 """convert a string to lowercase if ASCII
23 """convert a string to lowercase if ASCII
24
24
25 Raises UnicodeDecodeError if non-ASCII characters are found."""
25 Raises UnicodeDecodeError if non-ASCII characters are found."""
26 s.decode('ascii')
26 s.decode('ascii')
27 return s.lower()
27 return s.lower()
28
28
29
29
30 def asciiupper(s):
30 def asciiupper(s):
31 """convert a string to uppercase if ASCII
31 """convert a string to uppercase if ASCII
32
32
33 Raises UnicodeDecodeError if non-ASCII characters are found."""
33 Raises UnicodeDecodeError if non-ASCII characters are found."""
34 s.decode('ascii')
34 s.decode('ascii')
35 return s.upper()
35 return s.upper()
36
36
37
37
38 _jsonmap = []
38 _jsonmap = []
39 _jsonmap.extend(b"\\u%04x" % x for x in range(32))
39 _jsonmap.extend(b"\\u%04x" % x for x in range(32))
40 _jsonmap.extend(pycompat.bytechr(x) for x in range(32, 127))
40 _jsonmap.extend(pycompat.bytechr(x) for x in range(32, 127))
41 _jsonmap.append(b'\\u007f')
41 _jsonmap.append(b'\\u007f')
42 _jsonmap[0x09] = b'\\t'
42 _jsonmap[0x09] = b'\\t'
43 _jsonmap[0x0A] = b'\\n'
43 _jsonmap[0x0A] = b'\\n'
44 _jsonmap[0x22] = b'\\"'
44 _jsonmap[0x22] = b'\\"'
45 _jsonmap[0x5C] = b'\\\\'
45 _jsonmap[0x5C] = b'\\\\'
46 _jsonmap[0x08] = b'\\b'
46 _jsonmap[0x08] = b'\\b'
47 _jsonmap[0x0C] = b'\\f'
47 _jsonmap[0x0C] = b'\\f'
48 _jsonmap[0x0D] = b'\\r'
48 _jsonmap[0x0D] = b'\\r'
49 _paranoidjsonmap = _jsonmap[:]
49 _paranoidjsonmap = _jsonmap[:]
50 _paranoidjsonmap[0x3C] = b'\\u003c' # '<' (e.g. escape "</script>")
50 _paranoidjsonmap[0x3C] = b'\\u003c' # '<' (e.g. escape "</script>")
51 _paranoidjsonmap[0x3E] = b'\\u003e' # '>'
51 _paranoidjsonmap[0x3E] = b'\\u003e' # '>'
52 _jsonmap.extend(pycompat.bytechr(x) for x in range(128, 256))
52 _jsonmap.extend(pycompat.bytechr(x) for x in range(128, 256))
53
53
54
54
55 def jsonescapeu8fast(u8chars, paranoid):
55 def jsonescapeu8fast(u8chars, paranoid):
56 """Convert a UTF-8 byte string to JSON-escaped form (fast path)
56 """Convert a UTF-8 byte string to JSON-escaped form (fast path)
57
57
58 Raises ValueError if non-ASCII characters have to be escaped.
58 Raises ValueError if non-ASCII characters have to be escaped.
59 """
59 """
60 if paranoid:
60 if paranoid:
61 jm = _paranoidjsonmap
61 jm = _paranoidjsonmap
62 else:
62 else:
63 jm = _jsonmap
63 jm = _jsonmap
64 try:
64 try:
65 return b''.join(jm[x] for x in bytearray(u8chars))
65 return b''.join(jm[x] for x in bytearray(u8chars))
66 except IndexError:
66 except IndexError:
67 raise ValueError
67 raise ValueError
68
68
69
69
70 if pycompat.ispy3:
71 _utf8strict = r'surrogatepass'
70 _utf8strict = r'surrogatepass'
72 else:
73 _utf8strict = r'strict'
74
71
75
72
76 def jsonescapeu8fallback(u8chars, paranoid):
73 def jsonescapeu8fallback(u8chars, paranoid):
77 """Convert a UTF-8 byte string to JSON-escaped form (slow path)
74 """Convert a UTF-8 byte string to JSON-escaped form (slow path)
78
75
79 Escapes all non-ASCII characters no matter if paranoid is False.
76 Escapes all non-ASCII characters no matter if paranoid is False.
80 """
77 """
81 if paranoid:
78 if paranoid:
82 jm = _paranoidjsonmap
79 jm = _paranoidjsonmap
83 else:
80 else:
84 jm = _jsonmap
81 jm = _jsonmap
85 # non-BMP char is represented as UTF-16 surrogate pair
82 # non-BMP char is represented as UTF-16 surrogate pair
86 u16b = u8chars.decode('utf-8', _utf8strict).encode('utf-16', _utf8strict)
83 u16b = u8chars.decode('utf-8', _utf8strict).encode('utf-16', _utf8strict)
87 u16codes = array.array('H', u16b)
84 u16codes = array.array('H', u16b)
88 u16codes.pop(0) # drop BOM
85 u16codes.pop(0) # drop BOM
89 return b''.join(jm[x] if x < 128 else b'\\u%04x' % x for x in u16codes)
86 return b''.join(jm[x] if x < 128 else b'\\u%04x' % x for x in u16codes)
General Comments 0
You need to be logged in to leave comments. Login now