Show More
@@ -1,382 +1,392 | |||||
1 | /* |
|
1 | /* | |
2 | charencode.c - miscellaneous character encoding |
|
2 | charencode.c - miscellaneous character encoding | |
3 |
|
3 | |||
4 | Copyright 2008 Matt Mackall <mpm@selenic.com> and others |
|
4 | Copyright 2008 Matt Mackall <mpm@selenic.com> and others | |
5 |
|
5 | |||
6 | This software may be used and distributed according to the terms of |
|
6 | This software may be used and distributed according to the terms of | |
7 | the GNU General Public License, incorporated herein by reference. |
|
7 | the GNU General Public License, incorporated herein by reference. | |
8 | */ |
|
8 | */ | |
9 |
|
9 | |||
10 | #define PY_SSIZE_T_CLEAN |
|
10 | #define PY_SSIZE_T_CLEAN | |
11 | #include <Python.h> |
|
11 | #include <Python.h> | |
12 | #include <assert.h> |
|
12 | #include <assert.h> | |
13 |
|
13 | |||
14 | #include "charencode.h" |
|
14 | #include "charencode.h" | |
15 | #include "compat.h" |
|
15 | #include "compat.h" | |
16 | #include "util.h" |
|
16 | #include "util.h" | |
17 |
|
17 | |||
18 | #ifdef IS_PY3K |
|
18 | #ifdef IS_PY3K | |
19 | /* The mapping of Python types is meant to be temporary to get Python |
|
19 | /* The mapping of Python types is meant to be temporary to get Python | |
20 | * 3 to compile. We should remove this once Python 3 support is fully |
|
20 | * 3 to compile. We should remove this once Python 3 support is fully | |
21 | * supported and proper types are used in the extensions themselves. */ |
|
21 | * supported and proper types are used in the extensions themselves. */ | |
22 | #define PyInt_Type PyLong_Type |
|
22 | #define PyInt_Type PyLong_Type | |
23 | #define PyInt_AS_LONG PyLong_AS_LONG |
|
23 | #define PyInt_AS_LONG PyLong_AS_LONG | |
24 | #endif |
|
24 | #endif | |
25 |
|
25 | |||
26 | static const char lowertable[128] = { |
|
26 | static const char lowertable[128] = { | |
27 | '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07', |
|
27 | '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07', | |
28 | '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f', |
|
28 | '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f', | |
29 | '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17', |
|
29 | '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17', | |
30 | '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f', |
|
30 | '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f', | |
31 | '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27', |
|
31 | '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27', | |
32 | '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f', |
|
32 | '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f', | |
33 | '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37', |
|
33 | '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37', | |
34 | '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f', |
|
34 | '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f', | |
35 | '\x40', |
|
35 | '\x40', | |
36 | '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', /* A-G */ |
|
36 | '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', /* A-G */ | |
37 | '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f', /* H-O */ |
|
37 | '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f', /* H-O */ | |
38 | '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77', /* P-W */ |
|
38 | '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77', /* P-W */ | |
39 | '\x78', '\x79', '\x7a', /* X-Z */ |
|
39 | '\x78', '\x79', '\x7a', /* X-Z */ | |
40 | '\x5b', '\x5c', '\x5d', '\x5e', '\x5f', |
|
40 | '\x5b', '\x5c', '\x5d', '\x5e', '\x5f', | |
41 | '\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', |
|
41 | '\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', | |
42 | '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f', |
|
42 | '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f', | |
43 | '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77', |
|
43 | '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77', | |
44 | '\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f' |
|
44 | '\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f' | |
45 | }; |
|
45 | }; | |
46 |
|
46 | |||
47 | static const char uppertable[128] = { |
|
47 | static const char uppertable[128] = { | |
48 | '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07', |
|
48 | '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07', | |
49 | '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f', |
|
49 | '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f', | |
50 | '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17', |
|
50 | '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17', | |
51 | '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f', |
|
51 | '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f', | |
52 | '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27', |
|
52 | '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27', | |
53 | '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f', |
|
53 | '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f', | |
54 | '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37', |
|
54 | '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37', | |
55 | '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f', |
|
55 | '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f', | |
56 | '\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47', |
|
56 | '\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47', | |
57 | '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f', |
|
57 | '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f', | |
58 | '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57', |
|
58 | '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57', | |
59 | '\x58', '\x59', '\x5a', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f', |
|
59 | '\x58', '\x59', '\x5a', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f', | |
60 | '\x60', |
|
60 | '\x60', | |
61 | '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47', /* a-g */ |
|
61 | '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47', /* a-g */ | |
62 | '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f', /* h-o */ |
|
62 | '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f', /* h-o */ | |
63 | '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57', /* p-w */ |
|
63 | '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57', /* p-w */ | |
64 | '\x58', '\x59', '\x5a', /* x-z */ |
|
64 | '\x58', '\x59', '\x5a', /* x-z */ | |
65 | '\x7b', '\x7c', '\x7d', '\x7e', '\x7f' |
|
65 | '\x7b', '\x7c', '\x7d', '\x7e', '\x7f' | |
66 | }; |
|
66 | }; | |
67 |
|
67 | |||
68 | /* 1: no escape, 2: \<c>, 6: \u<x> */ |
|
68 | /* 1: no escape, 2: \<c>, 6: \u<x> */ | |
69 | static const uint8_t jsonlentable[256] = { |
|
69 | static const uint8_t jsonlentable[256] = { | |
70 | 6, 6, 6, 6, 6, 6, 6, 6, 2, 2, 2, 6, 2, 2, 6, 6, /* b, t, n, f, r */ |
|
70 | 6, 6, 6, 6, 6, 6, 6, 6, 2, 2, 2, 6, 2, 2, 6, 6, /* b, t, n, f, r */ | |
71 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, |
|
71 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
72 | 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* " */ |
|
72 | 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* " */ | |
73 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
|
73 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
74 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
|
74 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
75 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, /* \\ */ |
|
75 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, /* \\ */ | |
76 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
|
76 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
77 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, /* DEL */ |
|
77 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, /* DEL */ | |
78 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
|
78 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
79 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
|
79 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
80 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
|
80 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
81 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
|
81 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
82 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
|
82 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
83 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
|
83 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
84 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
|
84 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
85 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
|
85 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
86 | }; |
|
86 | }; | |
87 |
|
87 | |||
88 | static const uint8_t jsonparanoidlentable[128] = { |
|
88 | static const uint8_t jsonparanoidlentable[128] = { | |
89 | 6, 6, 6, 6, 6, 6, 6, 6, 2, 2, 2, 6, 2, 2, 6, 6, /* b, t, n, f, r */ |
|
89 | 6, 6, 6, 6, 6, 6, 6, 6, 2, 2, 2, 6, 2, 2, 6, 6, /* b, t, n, f, r */ | |
90 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, |
|
90 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
91 | 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* " */ |
|
91 | 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* " */ | |
92 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 1, 6, 1, /* <, > */ |
|
92 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 1, 6, 1, /* <, > */ | |
93 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
|
93 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
94 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, /* \\ */ |
|
94 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, /* \\ */ | |
95 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
|
95 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
96 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, /* DEL */ |
|
96 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, /* DEL */ | |
97 | }; |
|
97 | }; | |
98 |
|
98 | |||
99 | static const char hexchartable[16] = { |
|
99 | static const char hexchartable[16] = { | |
100 | '0', '1', '2', '3', '4', '5', '6', '7', |
|
100 | '0', '1', '2', '3', '4', '5', '6', '7', | |
101 | '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', |
|
101 | '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', | |
102 | }; |
|
102 | }; | |
103 |
|
103 | |||
104 | /* |
|
104 | /* | |
105 | * Turn a hex-encoded string into binary. |
|
105 | * Turn a hex-encoded string into binary. | |
106 | */ |
|
106 | */ | |
107 | PyObject *unhexlify(const char *str, Py_ssize_t len) |
|
107 | PyObject *unhexlify(const char *str, Py_ssize_t len) | |
108 | { |
|
108 | { | |
109 | PyObject *ret; |
|
109 | PyObject *ret; | |
110 | char *d; |
|
110 | char *d; | |
111 | Py_ssize_t i; |
|
111 | Py_ssize_t i; | |
112 |
|
112 | |||
113 | ret = PyBytes_FromStringAndSize(NULL, len / 2); |
|
113 | ret = PyBytes_FromStringAndSize(NULL, len / 2); | |
114 |
|
114 | |||
115 | if (!ret) |
|
115 | if (!ret) | |
116 | return NULL; |
|
116 | return NULL; | |
117 |
|
117 | |||
118 | d = PyBytes_AsString(ret); |
|
118 | d = PyBytes_AsString(ret); | |
119 |
|
119 | |||
120 | for (i = 0; i < len;) { |
|
120 | for (i = 0; i < len;) { | |
121 | int hi = hexdigit(str, i++); |
|
121 | int hi = hexdigit(str, i++); | |
122 | int lo = hexdigit(str, i++); |
|
122 | int lo = hexdigit(str, i++); | |
123 | *d++ = (hi << 4) | lo; |
|
123 | *d++ = (hi << 4) | lo; | |
124 | } |
|
124 | } | |
125 |
|
125 | |||
126 | return ret; |
|
126 | return ret; | |
127 | } |
|
127 | } | |
128 |
|
128 | |||
129 | PyObject *isasciistr(PyObject *self, PyObject *args) |
|
129 | PyObject *isasciistr(PyObject *self, PyObject *args) | |
130 | { |
|
130 | { | |
131 | const char *buf; |
|
131 | const char *buf; | |
132 | Py_ssize_t i, len; |
|
132 | Py_ssize_t i, len; | |
133 | if (!PyArg_ParseTuple(args, "s#:isasciistr", &buf, &len)) |
|
133 | if (!PyArg_ParseTuple(args, "s#:isasciistr", &buf, &len)) | |
134 | return NULL; |
|
134 | return NULL; | |
135 | i = 0; |
|
135 | i = 0; | |
136 | /* char array in PyStringObject should be at least 4-byte aligned */ |
|
136 | /* char array in PyStringObject should be at least 4-byte aligned */ | |
137 | if (((uintptr_t)buf & 3) == 0) { |
|
137 | if (((uintptr_t)buf & 3) == 0) { | |
138 | const uint32_t *p = (const uint32_t *)buf; |
|
138 | const uint32_t *p = (const uint32_t *)buf; | |
139 | for (; i < len / 4; i++) { |
|
139 | for (; i < len / 4; i++) { | |
140 | if (p[i] & 0x80808080U) |
|
140 | if (p[i] & 0x80808080U) | |
141 | Py_RETURN_FALSE; |
|
141 | Py_RETURN_FALSE; | |
142 | } |
|
142 | } | |
143 | i *= 4; |
|
143 | i *= 4; | |
144 | } |
|
144 | } | |
145 | for (; i < len; i++) { |
|
145 | for (; i < len; i++) { | |
146 | if (buf[i] & 0x80) |
|
146 | if (buf[i] & 0x80) | |
147 | Py_RETURN_FALSE; |
|
147 | Py_RETURN_FALSE; | |
148 | } |
|
148 | } | |
149 | Py_RETURN_TRUE; |
|
149 | Py_RETURN_TRUE; | |
150 | } |
|
150 | } | |
151 |
|
151 | |||
152 | static inline PyObject *_asciitransform(PyObject *str_obj, |
|
152 | static inline PyObject *_asciitransform(PyObject *str_obj, | |
153 | const char table[128], |
|
153 | const char table[128], | |
154 | PyObject *fallback_fn) |
|
154 | PyObject *fallback_fn) | |
155 | { |
|
155 | { | |
156 | char *str, *newstr; |
|
156 | char *str, *newstr; | |
157 | Py_ssize_t i, len; |
|
157 | Py_ssize_t i, len; | |
158 | PyObject *newobj = NULL; |
|
158 | PyObject *newobj = NULL; | |
159 | PyObject *ret = NULL; |
|
159 | PyObject *ret = NULL; | |
160 |
|
160 | |||
161 | str = PyBytes_AS_STRING(str_obj); |
|
161 | str = PyBytes_AS_STRING(str_obj); | |
162 | len = PyBytes_GET_SIZE(str_obj); |
|
162 | len = PyBytes_GET_SIZE(str_obj); | |
163 |
|
163 | |||
164 | newobj = PyBytes_FromStringAndSize(NULL, len); |
|
164 | newobj = PyBytes_FromStringAndSize(NULL, len); | |
165 | if (!newobj) |
|
165 | if (!newobj) | |
166 | goto quit; |
|
166 | goto quit; | |
167 |
|
167 | |||
168 | newstr = PyBytes_AS_STRING(newobj); |
|
168 | newstr = PyBytes_AS_STRING(newobj); | |
169 |
|
169 | |||
170 | for (i = 0; i < len; i++) { |
|
170 | for (i = 0; i < len; i++) { | |
171 | char c = str[i]; |
|
171 | char c = str[i]; | |
172 | if (c & 0x80) { |
|
172 | if (c & 0x80) { | |
173 | if (fallback_fn != NULL) { |
|
173 | if (fallback_fn != NULL) { | |
174 | ret = PyObject_CallFunctionObjArgs(fallback_fn, |
|
174 | ret = PyObject_CallFunctionObjArgs(fallback_fn, | |
175 | str_obj, NULL); |
|
175 | str_obj, NULL); | |
176 | } else { |
|
176 | } else { | |
177 | PyObject *err = PyUnicodeDecodeError_Create( |
|
177 | PyObject *err = PyUnicodeDecodeError_Create( | |
178 | "ascii", str, len, i, (i + 1), |
|
178 | "ascii", str, len, i, (i + 1), | |
179 | "unexpected code byte"); |
|
179 | "unexpected code byte"); | |
180 | PyErr_SetObject(PyExc_UnicodeDecodeError, err); |
|
180 | PyErr_SetObject(PyExc_UnicodeDecodeError, err); | |
181 | Py_XDECREF(err); |
|
181 | Py_XDECREF(err); | |
182 | } |
|
182 | } | |
183 | goto quit; |
|
183 | goto quit; | |
184 | } |
|
184 | } | |
185 | newstr[i] = table[(unsigned char)c]; |
|
185 | newstr[i] = table[(unsigned char)c]; | |
186 | } |
|
186 | } | |
187 |
|
187 | |||
188 | ret = newobj; |
|
188 | ret = newobj; | |
189 | Py_INCREF(ret); |
|
189 | Py_INCREF(ret); | |
190 | quit: |
|
190 | quit: | |
191 | Py_XDECREF(newobj); |
|
191 | Py_XDECREF(newobj); | |
192 | return ret; |
|
192 | return ret; | |
193 | } |
|
193 | } | |
194 |
|
194 | |||
195 | PyObject *asciilower(PyObject *self, PyObject *args) |
|
195 | PyObject *asciilower(PyObject *self, PyObject *args) | |
196 | { |
|
196 | { | |
197 | PyObject *str_obj; |
|
197 | PyObject *str_obj; | |
198 | if (!PyArg_ParseTuple(args, "O!:asciilower", &PyBytes_Type, &str_obj)) |
|
198 | if (!PyArg_ParseTuple(args, "O!:asciilower", &PyBytes_Type, &str_obj)) | |
199 | return NULL; |
|
199 | return NULL; | |
200 | return _asciitransform(str_obj, lowertable, NULL); |
|
200 | return _asciitransform(str_obj, lowertable, NULL); | |
201 | } |
|
201 | } | |
202 |
|
202 | |||
203 | PyObject *asciiupper(PyObject *self, PyObject *args) |
|
203 | PyObject *asciiupper(PyObject *self, PyObject *args) | |
204 | { |
|
204 | { | |
205 | PyObject *str_obj; |
|
205 | PyObject *str_obj; | |
206 | if (!PyArg_ParseTuple(args, "O!:asciiupper", &PyBytes_Type, &str_obj)) |
|
206 | if (!PyArg_ParseTuple(args, "O!:asciiupper", &PyBytes_Type, &str_obj)) | |
207 | return NULL; |
|
207 | return NULL; | |
208 | return _asciitransform(str_obj, uppertable, NULL); |
|
208 | return _asciitransform(str_obj, uppertable, NULL); | |
209 | } |
|
209 | } | |
210 |
|
210 | |||
211 | PyObject *make_file_foldmap(PyObject *self, PyObject *args) |
|
211 | PyObject *make_file_foldmap(PyObject *self, PyObject *args) | |
212 | { |
|
212 | { | |
213 | PyObject *dmap, *spec_obj, *normcase_fallback; |
|
213 | PyObject *dmap, *spec_obj, *normcase_fallback; | |
214 | PyObject *file_foldmap = NULL; |
|
214 | PyObject *file_foldmap = NULL; | |
215 | enum normcase_spec spec; |
|
215 | enum normcase_spec spec; | |
216 | PyObject *k, *v; |
|
216 | PyObject *k, *v; | |
217 | dirstateTupleObject *tuple; |
|
217 | dirstateTupleObject *tuple; | |
218 | Py_ssize_t pos = 0; |
|
218 | Py_ssize_t pos = 0; | |
219 | const char *table; |
|
219 | const char *table; | |
220 |
|
220 | |||
221 | if (!PyArg_ParseTuple(args, "O!O!O!:make_file_foldmap", |
|
221 | if (!PyArg_ParseTuple(args, "O!O!O!:make_file_foldmap", | |
222 | &PyDict_Type, &dmap, |
|
222 | &PyDict_Type, &dmap, | |
223 | &PyInt_Type, &spec_obj, |
|
223 | &PyInt_Type, &spec_obj, | |
224 | &PyFunction_Type, &normcase_fallback)) |
|
224 | &PyFunction_Type, &normcase_fallback)) | |
225 | goto quit; |
|
225 | goto quit; | |
226 |
|
226 | |||
227 | spec = (int)PyInt_AS_LONG(spec_obj); |
|
227 | spec = (int)PyInt_AS_LONG(spec_obj); | |
228 | switch (spec) { |
|
228 | switch (spec) { | |
229 | case NORMCASE_LOWER: |
|
229 | case NORMCASE_LOWER: | |
230 | table = lowertable; |
|
230 | table = lowertable; | |
231 | break; |
|
231 | break; | |
232 | case NORMCASE_UPPER: |
|
232 | case NORMCASE_UPPER: | |
233 | table = uppertable; |
|
233 | table = uppertable; | |
234 | break; |
|
234 | break; | |
235 | case NORMCASE_OTHER: |
|
235 | case NORMCASE_OTHER: | |
236 | table = NULL; |
|
236 | table = NULL; | |
237 | break; |
|
237 | break; | |
238 | default: |
|
238 | default: | |
239 | PyErr_SetString(PyExc_TypeError, "invalid normcasespec"); |
|
239 | PyErr_SetString(PyExc_TypeError, "invalid normcasespec"); | |
240 | goto quit; |
|
240 | goto quit; | |
241 | } |
|
241 | } | |
242 |
|
242 | |||
243 | /* Add some more entries to deal with additions outside this |
|
243 | /* Add some more entries to deal with additions outside this | |
244 | function. */ |
|
244 | function. */ | |
245 | file_foldmap = _dict_new_presized((PyDict_Size(dmap) / 10) * 11); |
|
245 | file_foldmap = _dict_new_presized((PyDict_Size(dmap) / 10) * 11); | |
246 | if (file_foldmap == NULL) |
|
246 | if (file_foldmap == NULL) | |
247 | goto quit; |
|
247 | goto quit; | |
248 |
|
248 | |||
249 | while (PyDict_Next(dmap, &pos, &k, &v)) { |
|
249 | while (PyDict_Next(dmap, &pos, &k, &v)) { | |
250 | if (!dirstate_tuple_check(v)) { |
|
250 | if (!dirstate_tuple_check(v)) { | |
251 | PyErr_SetString(PyExc_TypeError, |
|
251 | PyErr_SetString(PyExc_TypeError, | |
252 | "expected a dirstate tuple"); |
|
252 | "expected a dirstate tuple"); | |
253 | goto quit; |
|
253 | goto quit; | |
254 | } |
|
254 | } | |
255 |
|
255 | |||
256 | tuple = (dirstateTupleObject *)v; |
|
256 | tuple = (dirstateTupleObject *)v; | |
257 | if (tuple->state != 'r') { |
|
257 | if (tuple->state != 'r') { | |
258 | PyObject *normed; |
|
258 | PyObject *normed; | |
259 | if (table != NULL) { |
|
259 | if (table != NULL) { | |
260 | normed = _asciitransform(k, table, |
|
260 | normed = _asciitransform(k, table, | |
261 | normcase_fallback); |
|
261 | normcase_fallback); | |
262 | } else { |
|
262 | } else { | |
263 | normed = PyObject_CallFunctionObjArgs( |
|
263 | normed = PyObject_CallFunctionObjArgs( | |
264 | normcase_fallback, k, NULL); |
|
264 | normcase_fallback, k, NULL); | |
265 | } |
|
265 | } | |
266 |
|
266 | |||
267 | if (normed == NULL) |
|
267 | if (normed == NULL) | |
268 | goto quit; |
|
268 | goto quit; | |
269 | if (PyDict_SetItem(file_foldmap, normed, k) == -1) { |
|
269 | if (PyDict_SetItem(file_foldmap, normed, k) == -1) { | |
270 | Py_DECREF(normed); |
|
270 | Py_DECREF(normed); | |
271 | goto quit; |
|
271 | goto quit; | |
272 | } |
|
272 | } | |
273 | Py_DECREF(normed); |
|
273 | Py_DECREF(normed); | |
274 | } |
|
274 | } | |
275 | } |
|
275 | } | |
276 | return file_foldmap; |
|
276 | return file_foldmap; | |
277 | quit: |
|
277 | quit: | |
278 | Py_XDECREF(file_foldmap); |
|
278 | Py_XDECREF(file_foldmap); | |
279 | return NULL; |
|
279 | return NULL; | |
280 | } |
|
280 | } | |
281 |
|
281 | |||
282 | /* calculate length of JSON-escaped string; returns -1 if unsupported */ |
|
282 | /* calculate length of JSON-escaped string; returns -1 if unsupported */ | |
283 | static Py_ssize_t jsonescapelen(const char *buf, Py_ssize_t len, bool paranoid) |
|
283 | static Py_ssize_t jsonescapelen(const char *buf, Py_ssize_t len, bool paranoid) | |
284 | { |
|
284 | { | |
285 | Py_ssize_t i, esclen = 0; |
|
285 | Py_ssize_t i, esclen = 0; | |
286 |
|
286 | |||
287 | if (paranoid) { |
|
287 | if (paranoid) { | |
288 | /* don't want to process multi-byte escapes in C */ |
|
288 | /* don't want to process multi-byte escapes in C */ | |
289 | for (i = 0; i < len; i++) { |
|
289 | for (i = 0; i < len; i++) { | |
290 | char c = buf[i]; |
|
290 | char c = buf[i]; | |
291 | if (c & 0x80) { |
|
291 | if (c & 0x80) { | |
292 | PyErr_SetString(PyExc_ValueError, |
|
292 | PyErr_SetString(PyExc_ValueError, | |
293 | "cannot process non-ascii str"); |
|
293 | "cannot process non-ascii str"); | |
294 | return -1; |
|
294 | return -1; | |
295 | } |
|
295 | } | |
296 | esclen += jsonparanoidlentable[(unsigned char)c]; |
|
296 | esclen += jsonparanoidlentable[(unsigned char)c]; | |
|
297 | if (esclen < 0) { | |||
|
298 | PyErr_SetString(PyExc_MemoryError, | |||
|
299 | "overflow in jsonescapelen"); | |||
|
300 | return -1; | |||
|
301 | } | |||
297 | } |
|
302 | } | |
298 | } else { |
|
303 | } else { | |
299 | for (i = 0; i < len; i++) { |
|
304 | for (i = 0; i < len; i++) { | |
300 | char c = buf[i]; |
|
305 | char c = buf[i]; | |
301 | esclen += jsonlentable[(unsigned char)c]; |
|
306 | esclen += jsonlentable[(unsigned char)c]; | |
|
307 | if (esclen < 0) { | |||
|
308 | PyErr_SetString(PyExc_MemoryError, | |||
|
309 | "overflow in jsonescapelen"); | |||
|
310 | return -1; | |||
|
311 | } | |||
302 | } |
|
312 | } | |
303 | } |
|
313 | } | |
304 |
|
314 | |||
305 | return esclen; |
|
315 | return esclen; | |
306 | } |
|
316 | } | |
307 |
|
317 | |||
308 | /* map '\<c>' escape character */ |
|
318 | /* map '\<c>' escape character */ | |
309 | static char jsonescapechar2(char c) |
|
319 | static char jsonescapechar2(char c) | |
310 | { |
|
320 | { | |
311 | switch (c) { |
|
321 | switch (c) { | |
312 | case '\b': return 'b'; |
|
322 | case '\b': return 'b'; | |
313 | case '\t': return 't'; |
|
323 | case '\t': return 't'; | |
314 | case '\n': return 'n'; |
|
324 | case '\n': return 'n'; | |
315 | case '\f': return 'f'; |
|
325 | case '\f': return 'f'; | |
316 | case '\r': return 'r'; |
|
326 | case '\r': return 'r'; | |
317 | case '"': return '"'; |
|
327 | case '"': return '"'; | |
318 | case '\\': return '\\'; |
|
328 | case '\\': return '\\'; | |
319 | } |
|
329 | } | |
320 | return '\0'; /* should not happen */ |
|
330 | return '\0'; /* should not happen */ | |
321 | } |
|
331 | } | |
322 |
|
332 | |||
323 | /* convert 'origbuf' to JSON-escaped form 'escbuf'; 'origbuf' should only |
|
333 | /* convert 'origbuf' to JSON-escaped form 'escbuf'; 'origbuf' should only | |
324 | include characters mappable by json(paranoid)lentable */ |
|
334 | include characters mappable by json(paranoid)lentable */ | |
325 | static void encodejsonescape(char *escbuf, Py_ssize_t esclen, |
|
335 | static void encodejsonescape(char *escbuf, Py_ssize_t esclen, | |
326 | const char *origbuf, Py_ssize_t origlen, |
|
336 | const char *origbuf, Py_ssize_t origlen, | |
327 | bool paranoid) |
|
337 | bool paranoid) | |
328 | { |
|
338 | { | |
329 | const uint8_t *lentable = |
|
339 | const uint8_t *lentable = | |
330 | (paranoid) ? jsonparanoidlentable : jsonlentable; |
|
340 | (paranoid) ? jsonparanoidlentable : jsonlentable; | |
331 | Py_ssize_t i, j; |
|
341 | Py_ssize_t i, j; | |
332 |
|
342 | |||
333 | for (i = 0, j = 0; i < origlen; i++) { |
|
343 | for (i = 0, j = 0; i < origlen; i++) { | |
334 | char c = origbuf[i]; |
|
344 | char c = origbuf[i]; | |
335 | uint8_t l = lentable[(unsigned char)c]; |
|
345 | uint8_t l = lentable[(unsigned char)c]; | |
336 | assert(j + l <= esclen); |
|
346 | assert(j + l <= esclen); | |
337 | switch (l) { |
|
347 | switch (l) { | |
338 | case 1: |
|
348 | case 1: | |
339 | escbuf[j] = c; |
|
349 | escbuf[j] = c; | |
340 | break; |
|
350 | break; | |
341 | case 2: |
|
351 | case 2: | |
342 | escbuf[j] = '\\'; |
|
352 | escbuf[j] = '\\'; | |
343 | escbuf[j + 1] = jsonescapechar2(c); |
|
353 | escbuf[j + 1] = jsonescapechar2(c); | |
344 | break; |
|
354 | break; | |
345 | case 6: |
|
355 | case 6: | |
346 | memcpy(escbuf + j, "\\u00", 4); |
|
356 | memcpy(escbuf + j, "\\u00", 4); | |
347 | escbuf[j + 4] = hexchartable[(unsigned char)c >> 4]; |
|
357 | escbuf[j + 4] = hexchartable[(unsigned char)c >> 4]; | |
348 | escbuf[j + 5] = hexchartable[(unsigned char)c & 0xf]; |
|
358 | escbuf[j + 5] = hexchartable[(unsigned char)c & 0xf]; | |
349 | break; |
|
359 | break; | |
350 | } |
|
360 | } | |
351 | j += l; |
|
361 | j += l; | |
352 | } |
|
362 | } | |
353 | } |
|
363 | } | |
354 |
|
364 | |||
355 | PyObject *jsonescapeu8fast(PyObject *self, PyObject *args) |
|
365 | PyObject *jsonescapeu8fast(PyObject *self, PyObject *args) | |
356 | { |
|
366 | { | |
357 | PyObject *origstr, *escstr; |
|
367 | PyObject *origstr, *escstr; | |
358 | const char *origbuf; |
|
368 | const char *origbuf; | |
359 | Py_ssize_t origlen, esclen; |
|
369 | Py_ssize_t origlen, esclen; | |
360 | int paranoid; |
|
370 | int paranoid; | |
361 | if (!PyArg_ParseTuple(args, "O!i:jsonescapeu8fast", |
|
371 | if (!PyArg_ParseTuple(args, "O!i:jsonescapeu8fast", | |
362 | &PyBytes_Type, &origstr, ¶noid)) |
|
372 | &PyBytes_Type, &origstr, ¶noid)) | |
363 | return NULL; |
|
373 | return NULL; | |
364 |
|
374 | |||
365 | origbuf = PyBytes_AS_STRING(origstr); |
|
375 | origbuf = PyBytes_AS_STRING(origstr); | |
366 | origlen = PyBytes_GET_SIZE(origstr); |
|
376 | origlen = PyBytes_GET_SIZE(origstr); | |
367 | esclen = jsonescapelen(origbuf, origlen, paranoid); |
|
377 | esclen = jsonescapelen(origbuf, origlen, paranoid); | |
368 | if (esclen < 0) |
|
378 | if (esclen < 0) | |
369 | return NULL; /* unsupported char found */ |
|
379 | return NULL; /* unsupported char found or overflow */ | |
370 | if (origlen == esclen) { |
|
380 | if (origlen == esclen) { | |
371 | Py_INCREF(origstr); |
|
381 | Py_INCREF(origstr); | |
372 | return origstr; |
|
382 | return origstr; | |
373 | } |
|
383 | } | |
374 |
|
384 | |||
375 | escstr = PyBytes_FromStringAndSize(NULL, esclen); |
|
385 | escstr = PyBytes_FromStringAndSize(NULL, esclen); | |
376 | if (!escstr) |
|
386 | if (!escstr) | |
377 | return NULL; |
|
387 | return NULL; | |
378 | encodejsonescape(PyBytes_AS_STRING(escstr), esclen, origbuf, origlen, |
|
388 | encodejsonescape(PyBytes_AS_STRING(escstr), esclen, origbuf, origlen, | |
379 | paranoid); |
|
389 | paranoid); | |
380 |
|
390 | |||
381 | return escstr; |
|
391 | return escstr; | |
382 | } |
|
392 | } |
General Comments 0
You need to be logged in to leave comments.
Login now