Show More
@@ -1,401 +1,401 | |||
|
1 | 1 | /* |
|
2 | 2 | charencode.c - miscellaneous character encoding |
|
3 | 3 | |
|
4 | 4 | Copyright 2008 Matt Mackall <mpm@selenic.com> and others |
|
5 | 5 | |
|
6 | 6 | This software may be used and distributed according to the terms of |
|
7 | 7 | the GNU General Public License, incorporated herein by reference. |
|
8 | 8 | */ |
|
9 | 9 | |
|
10 | 10 | #define PY_SSIZE_T_CLEAN |
|
11 | 11 | #include <Python.h> |
|
12 | 12 | #include <assert.h> |
|
13 | 13 | |
|
14 | 14 | #include "charencode.h" |
|
15 | 15 | #include "compat.h" |
|
16 | 16 | #include "util.h" |
|
17 | 17 | |
|
18 | 18 | #ifdef IS_PY3K |
|
19 | 19 | /* The mapping of Python types is meant to be temporary to get Python |
|
20 | 20 | * 3 to compile. We should remove this once Python 3 support is fully |
|
21 | 21 | * supported and proper types are used in the extensions themselves. */ |
|
22 | 22 | #define PyInt_Type PyLong_Type |
|
23 | 23 | #define PyInt_AS_LONG PyLong_AS_LONG |
|
24 | 24 | #endif |
|
25 | 25 | |
|
26 | 26 | /* clang-format off */ |
|
27 | 27 | static const char lowertable[128] = { |
|
28 | 28 | '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07', |
|
29 | 29 | '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f', |
|
30 | 30 | '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17', |
|
31 | 31 | '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f', |
|
32 | 32 | '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27', |
|
33 | 33 | '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f', |
|
34 | 34 | '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37', |
|
35 | 35 | '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f', |
|
36 | 36 | '\x40', |
|
37 | 37 | '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', /* A-G */ |
|
38 | 38 | '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f', /* H-O */ |
|
39 | 39 | '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77', /* P-W */ |
|
40 | 40 | '\x78', '\x79', '\x7a', /* X-Z */ |
|
41 | 41 | '\x5b', '\x5c', '\x5d', '\x5e', '\x5f', |
|
42 | 42 | '\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', |
|
43 | 43 | '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f', |
|
44 | 44 | '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77', |
|
45 | 45 | '\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f' |
|
46 | 46 | }; |
|
47 | 47 | |
|
48 | 48 | static const char uppertable[128] = { |
|
49 | 49 | '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07', |
|
50 | 50 | '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f', |
|
51 | 51 | '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17', |
|
52 | 52 | '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f', |
|
53 | 53 | '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27', |
|
54 | 54 | '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f', |
|
55 | 55 | '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37', |
|
56 | 56 | '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f', |
|
57 | 57 | '\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47', |
|
58 | 58 | '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f', |
|
59 | 59 | '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57', |
|
60 | 60 | '\x58', '\x59', '\x5a', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f', |
|
61 | 61 | '\x60', |
|
62 | 62 | '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47', /* a-g */ |
|
63 | 63 | '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f', /* h-o */ |
|
64 | 64 | '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57', /* p-w */ |
|
65 | 65 | '\x58', '\x59', '\x5a', /* x-z */ |
|
66 | 66 | '\x7b', '\x7c', '\x7d', '\x7e', '\x7f' |
|
67 | 67 | }; |
|
68 | /* clang-format on */ | |
|
69 | 68 | |
|
70 | 69 | /* 1: no escape, 2: \<c>, 6: \u<x> */ |
|
71 | 70 | static const uint8_t jsonlentable[256] = { |
|
72 | 71 | 6, 6, 6, 6, 6, 6, 6, 6, 2, 2, 2, 6, 2, 2, 6, 6, /* b, t, n, f, r */ |
|
73 | 72 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, |
|
74 | 73 | 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* " */ |
|
75 | 74 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
|
76 | 75 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
|
77 | 76 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, /* \\ */ |
|
78 | 77 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
|
79 | 78 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, /* DEL */ |
|
80 | 79 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
|
81 | 80 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
|
82 | 81 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
|
83 | 82 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
|
84 | 83 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
|
85 | 84 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
|
86 | 85 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
|
87 | 86 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
|
88 | 87 | }; |
|
89 | 88 | |
|
90 | 89 | static const uint8_t jsonparanoidlentable[128] = { |
|
91 | 90 | 6, 6, 6, 6, 6, 6, 6, 6, 2, 2, 2, 6, 2, 2, 6, 6, /* b, t, n, f, r */ |
|
92 | 91 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, |
|
93 | 92 | 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* " */ |
|
94 | 93 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 1, 6, 1, /* <, > */ |
|
95 | 94 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
|
96 | 95 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, /* \\ */ |
|
97 | 96 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
|
98 | 97 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, /* DEL */ |
|
99 | 98 | }; |
|
100 | 99 | |
|
101 | 100 | static const char hexchartable[16] = { |
|
102 | 101 | '0', '1', '2', '3', '4', '5', '6', '7', |
|
103 | 102 | '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', |
|
104 | 103 | }; |
|
104 | /* clang-format on */ | |
|
105 | 105 | |
|
106 | 106 | /* |
|
107 | 107 | * Turn a hex-encoded string into binary. |
|
108 | 108 | */ |
|
109 | 109 | PyObject *unhexlify(const char *str, Py_ssize_t len) |
|
110 | 110 | { |
|
111 | 111 | PyObject *ret; |
|
112 | 112 | char *d; |
|
113 | 113 | Py_ssize_t i; |
|
114 | 114 | |
|
115 | 115 | ret = PyBytes_FromStringAndSize(NULL, len / 2); |
|
116 | 116 | |
|
117 | 117 | if (!ret) |
|
118 | 118 | return NULL; |
|
119 | 119 | |
|
120 | 120 | d = PyBytes_AsString(ret); |
|
121 | 121 | |
|
122 | 122 | for (i = 0; i < len;) { |
|
123 | 123 | int hi = hexdigit(str, i++); |
|
124 | 124 | int lo = hexdigit(str, i++); |
|
125 | 125 | *d++ = (hi << 4) | lo; |
|
126 | 126 | } |
|
127 | 127 | |
|
128 | 128 | return ret; |
|
129 | 129 | } |
|
130 | 130 | |
|
131 | 131 | PyObject *isasciistr(PyObject *self, PyObject *args) |
|
132 | 132 | { |
|
133 | 133 | const char *buf; |
|
134 | 134 | Py_ssize_t i, len; |
|
135 | 135 | if (!PyArg_ParseTuple(args, "s#:isasciistr", &buf, &len)) |
|
136 | 136 | return NULL; |
|
137 | 137 | i = 0; |
|
138 | 138 | /* char array in PyStringObject should be at least 4-byte aligned */ |
|
139 | 139 | if (((uintptr_t)buf & 3) == 0) { |
|
140 | 140 | const uint32_t *p = (const uint32_t *)buf; |
|
141 | 141 | for (; i < len / 4; i++) { |
|
142 | 142 | if (p[i] & 0x80808080U) |
|
143 | 143 | Py_RETURN_FALSE; |
|
144 | 144 | } |
|
145 | 145 | i *= 4; |
|
146 | 146 | } |
|
147 | 147 | for (; i < len; i++) { |
|
148 | 148 | if (buf[i] & 0x80) |
|
149 | 149 | Py_RETURN_FALSE; |
|
150 | 150 | } |
|
151 | 151 | Py_RETURN_TRUE; |
|
152 | 152 | } |
|
153 | 153 | |
|
154 | 154 | static inline PyObject *_asciitransform(PyObject *str_obj, |
|
155 | 155 | const char table[128], |
|
156 | 156 | PyObject *fallback_fn) |
|
157 | 157 | { |
|
158 | 158 | char *str, *newstr; |
|
159 | 159 | Py_ssize_t i, len; |
|
160 | 160 | PyObject *newobj = NULL; |
|
161 | 161 | PyObject *ret = NULL; |
|
162 | 162 | |
|
163 | 163 | str = PyBytes_AS_STRING(str_obj); |
|
164 | 164 | len = PyBytes_GET_SIZE(str_obj); |
|
165 | 165 | |
|
166 | 166 | newobj = PyBytes_FromStringAndSize(NULL, len); |
|
167 | 167 | if (!newobj) |
|
168 | 168 | goto quit; |
|
169 | 169 | |
|
170 | 170 | newstr = PyBytes_AS_STRING(newobj); |
|
171 | 171 | |
|
172 | 172 | for (i = 0; i < len; i++) { |
|
173 | 173 | char c = str[i]; |
|
174 | 174 | if (c & 0x80) { |
|
175 | 175 | if (fallback_fn != NULL) { |
|
176 | 176 | ret = PyObject_CallFunctionObjArgs(fallback_fn, |
|
177 | 177 | str_obj, NULL); |
|
178 | 178 | } else { |
|
179 | 179 | PyObject *err = PyUnicodeDecodeError_Create( |
|
180 | 180 | "ascii", str, len, i, (i + 1), |
|
181 | 181 | "unexpected code byte"); |
|
182 | 182 | PyErr_SetObject(PyExc_UnicodeDecodeError, err); |
|
183 | 183 | Py_XDECREF(err); |
|
184 | 184 | } |
|
185 | 185 | goto quit; |
|
186 | 186 | } |
|
187 | 187 | newstr[i] = table[(unsigned char)c]; |
|
188 | 188 | } |
|
189 | 189 | |
|
190 | 190 | ret = newobj; |
|
191 | 191 | Py_INCREF(ret); |
|
192 | 192 | quit: |
|
193 | 193 | Py_XDECREF(newobj); |
|
194 | 194 | return ret; |
|
195 | 195 | } |
|
196 | 196 | |
|
197 | 197 | PyObject *asciilower(PyObject *self, PyObject *args) |
|
198 | 198 | { |
|
199 | 199 | PyObject *str_obj; |
|
200 | 200 | if (!PyArg_ParseTuple(args, "O!:asciilower", &PyBytes_Type, &str_obj)) |
|
201 | 201 | return NULL; |
|
202 | 202 | return _asciitransform(str_obj, lowertable, NULL); |
|
203 | 203 | } |
|
204 | 204 | |
|
205 | 205 | PyObject *asciiupper(PyObject *self, PyObject *args) |
|
206 | 206 | { |
|
207 | 207 | PyObject *str_obj; |
|
208 | 208 | if (!PyArg_ParseTuple(args, "O!:asciiupper", &PyBytes_Type, &str_obj)) |
|
209 | 209 | return NULL; |
|
210 | 210 | return _asciitransform(str_obj, uppertable, NULL); |
|
211 | 211 | } |
|
212 | 212 | |
|
213 | 213 | PyObject *make_file_foldmap(PyObject *self, PyObject *args) |
|
214 | 214 | { |
|
215 | 215 | PyObject *dmap, *spec_obj, *normcase_fallback; |
|
216 | 216 | PyObject *file_foldmap = NULL; |
|
217 | 217 | enum normcase_spec spec; |
|
218 | 218 | PyObject *k, *v; |
|
219 | 219 | dirstateTupleObject *tuple; |
|
220 | 220 | Py_ssize_t pos = 0; |
|
221 | 221 | const char *table; |
|
222 | 222 | |
|
223 | 223 | if (!PyArg_ParseTuple(args, "O!O!O!:make_file_foldmap", |
|
224 | 224 | &PyDict_Type, &dmap, |
|
225 | 225 | &PyInt_Type, &spec_obj, |
|
226 | 226 | &PyFunction_Type, &normcase_fallback)) |
|
227 | 227 | goto quit; |
|
228 | 228 | |
|
229 | 229 | spec = (int)PyInt_AS_LONG(spec_obj); |
|
230 | 230 | switch (spec) { |
|
231 | 231 | case NORMCASE_LOWER: |
|
232 | 232 | table = lowertable; |
|
233 | 233 | break; |
|
234 | 234 | case NORMCASE_UPPER: |
|
235 | 235 | table = uppertable; |
|
236 | 236 | break; |
|
237 | 237 | case NORMCASE_OTHER: |
|
238 | 238 | table = NULL; |
|
239 | 239 | break; |
|
240 | 240 | default: |
|
241 | 241 | PyErr_SetString(PyExc_TypeError, "invalid normcasespec"); |
|
242 | 242 | goto quit; |
|
243 | 243 | } |
|
244 | 244 | |
|
245 | 245 | /* Add some more entries to deal with additions outside this |
|
246 | 246 | function. */ |
|
247 | 247 | file_foldmap = _dict_new_presized((PyDict_Size(dmap) / 10) * 11); |
|
248 | 248 | if (file_foldmap == NULL) |
|
249 | 249 | goto quit; |
|
250 | 250 | |
|
251 | 251 | while (PyDict_Next(dmap, &pos, &k, &v)) { |
|
252 | 252 | if (!dirstate_tuple_check(v)) { |
|
253 | 253 | PyErr_SetString(PyExc_TypeError, |
|
254 | 254 | "expected a dirstate tuple"); |
|
255 | 255 | goto quit; |
|
256 | 256 | } |
|
257 | 257 | |
|
258 | 258 | tuple = (dirstateTupleObject *)v; |
|
259 | 259 | if (tuple->state != 'r') { |
|
260 | 260 | PyObject *normed; |
|
261 | 261 | if (table != NULL) { |
|
262 | 262 | normed = _asciitransform(k, table, |
|
263 | 263 | normcase_fallback); |
|
264 | 264 | } else { |
|
265 | 265 | normed = PyObject_CallFunctionObjArgs( |
|
266 | 266 | normcase_fallback, k, NULL); |
|
267 | 267 | } |
|
268 | 268 | |
|
269 | 269 | if (normed == NULL) |
|
270 | 270 | goto quit; |
|
271 | 271 | if (PyDict_SetItem(file_foldmap, normed, k) == -1) { |
|
272 | 272 | Py_DECREF(normed); |
|
273 | 273 | goto quit; |
|
274 | 274 | } |
|
275 | 275 | Py_DECREF(normed); |
|
276 | 276 | } |
|
277 | 277 | } |
|
278 | 278 | return file_foldmap; |
|
279 | 279 | quit: |
|
280 | 280 | Py_XDECREF(file_foldmap); |
|
281 | 281 | return NULL; |
|
282 | 282 | } |
|
283 | 283 | |
|
284 | 284 | /* calculate length of JSON-escaped string; returns -1 if unsupported */ |
|
285 | 285 | static Py_ssize_t jsonescapelen(const char *buf, Py_ssize_t len, bool paranoid) |
|
286 | 286 | { |
|
287 | 287 | Py_ssize_t i, esclen = 0; |
|
288 | 288 | |
|
289 | 289 | if (paranoid) { |
|
290 | 290 | /* don't want to process multi-byte escapes in C */ |
|
291 | 291 | for (i = 0; i < len; i++) { |
|
292 | 292 | char c = buf[i]; |
|
293 | 293 | if (c & 0x80) { |
|
294 | 294 | PyErr_SetString(PyExc_ValueError, |
|
295 | 295 | "cannot process non-ascii str"); |
|
296 | 296 | return -1; |
|
297 | 297 | } |
|
298 | 298 | esclen += jsonparanoidlentable[(unsigned char)c]; |
|
299 | 299 | if (esclen < 0) { |
|
300 | 300 | PyErr_SetString(PyExc_MemoryError, |
|
301 | 301 | "overflow in jsonescapelen"); |
|
302 | 302 | return -1; |
|
303 | 303 | } |
|
304 | 304 | } |
|
305 | 305 | } else { |
|
306 | 306 | for (i = 0; i < len; i++) { |
|
307 | 307 | char c = buf[i]; |
|
308 | 308 | esclen += jsonlentable[(unsigned char)c]; |
|
309 | 309 | if (esclen < 0) { |
|
310 | 310 | PyErr_SetString(PyExc_MemoryError, |
|
311 | 311 | "overflow in jsonescapelen"); |
|
312 | 312 | return -1; |
|
313 | 313 | } |
|
314 | 314 | } |
|
315 | 315 | } |
|
316 | 316 | |
|
317 | 317 | return esclen; |
|
318 | 318 | } |
|
319 | 319 | |
|
320 | 320 | /* map '\<c>' escape character */ |
|
321 | 321 | static char jsonescapechar2(char c) |
|
322 | 322 | { |
|
323 | 323 | switch (c) { |
|
324 | 324 | case '\b': |
|
325 | 325 | return 'b'; |
|
326 | 326 | case '\t': |
|
327 | 327 | return 't'; |
|
328 | 328 | case '\n': |
|
329 | 329 | return 'n'; |
|
330 | 330 | case '\f': |
|
331 | 331 | return 'f'; |
|
332 | 332 | case '\r': |
|
333 | 333 | return 'r'; |
|
334 | 334 | case '"': |
|
335 | 335 | return '"'; |
|
336 | 336 | case '\\': |
|
337 | 337 | return '\\'; |
|
338 | 338 | } |
|
339 | 339 | return '\0'; /* should not happen */ |
|
340 | 340 | } |
|
341 | 341 | |
|
342 | 342 | /* convert 'origbuf' to JSON-escaped form 'escbuf'; 'origbuf' should only |
|
343 | 343 | include characters mappable by json(paranoid)lentable */ |
|
344 | 344 | static void encodejsonescape(char *escbuf, Py_ssize_t esclen, |
|
345 | 345 | const char *origbuf, Py_ssize_t origlen, |
|
346 | 346 | bool paranoid) |
|
347 | 347 | { |
|
348 | 348 | const uint8_t *lentable = |
|
349 | 349 | (paranoid) ? jsonparanoidlentable : jsonlentable; |
|
350 | 350 | Py_ssize_t i, j; |
|
351 | 351 | |
|
352 | 352 | for (i = 0, j = 0; i < origlen; i++) { |
|
353 | 353 | char c = origbuf[i]; |
|
354 | 354 | uint8_t l = lentable[(unsigned char)c]; |
|
355 | 355 | assert(j + l <= esclen); |
|
356 | 356 | switch (l) { |
|
357 | 357 | case 1: |
|
358 | 358 | escbuf[j] = c; |
|
359 | 359 | break; |
|
360 | 360 | case 2: |
|
361 | 361 | escbuf[j] = '\\'; |
|
362 | 362 | escbuf[j + 1] = jsonescapechar2(c); |
|
363 | 363 | break; |
|
364 | 364 | case 6: |
|
365 | 365 | memcpy(escbuf + j, "\\u00", 4); |
|
366 | 366 | escbuf[j + 4] = hexchartable[(unsigned char)c >> 4]; |
|
367 | 367 | escbuf[j + 5] = hexchartable[(unsigned char)c & 0xf]; |
|
368 | 368 | break; |
|
369 | 369 | } |
|
370 | 370 | j += l; |
|
371 | 371 | } |
|
372 | 372 | } |
|
373 | 373 | |
|
374 | 374 | PyObject *jsonescapeu8fast(PyObject *self, PyObject *args) |
|
375 | 375 | { |
|
376 | 376 | PyObject *origstr, *escstr; |
|
377 | 377 | const char *origbuf; |
|
378 | 378 | Py_ssize_t origlen, esclen; |
|
379 | 379 | int paranoid; |
|
380 | 380 | if (!PyArg_ParseTuple(args, "O!i:jsonescapeu8fast", |
|
381 | 381 | &PyBytes_Type, &origstr, ¶noid)) |
|
382 | 382 | return NULL; |
|
383 | 383 | |
|
384 | 384 | origbuf = PyBytes_AS_STRING(origstr); |
|
385 | 385 | origlen = PyBytes_GET_SIZE(origstr); |
|
386 | 386 | esclen = jsonescapelen(origbuf, origlen, paranoid); |
|
387 | 387 | if (esclen < 0) |
|
388 | 388 | return NULL; /* unsupported char found or overflow */ |
|
389 | 389 | if (origlen == esclen) { |
|
390 | 390 | Py_INCREF(origstr); |
|
391 | 391 | return origstr; |
|
392 | 392 | } |
|
393 | 393 | |
|
394 | 394 | escstr = PyBytes_FromStringAndSize(NULL, esclen); |
|
395 | 395 | if (!escstr) |
|
396 | 396 | return NULL; |
|
397 | 397 | encodejsonescape(PyBytes_AS_STRING(escstr), esclen, origbuf, origlen, |
|
398 | 398 | paranoid); |
|
399 | 399 | |
|
400 | 400 | return escstr; |
|
401 | 401 | } |
@@ -1,59 +1,61 | |||
|
1 | 1 | /* |
|
2 | 2 | charencode.h - miscellaneous character encoding |
|
3 | 3 | |
|
4 | 4 | This software may be used and distributed according to the terms of |
|
5 | 5 | the GNU General Public License, incorporated herein by reference. |
|
6 | 6 | */ |
|
7 | 7 | |
|
8 | 8 | #ifndef _HG_CHARENCODE_H_ |
|
9 | 9 | #define _HG_CHARENCODE_H_ |
|
10 | 10 | |
|
11 | 11 | #include <Python.h> |
|
12 | 12 | #include "compat.h" |
|
13 | 13 | |
|
14 | 14 | /* This should be kept in sync with normcasespecs in encoding.py. */ |
|
15 | 15 | enum normcase_spec { |
|
16 | 16 | NORMCASE_LOWER = -1, |
|
17 | 17 | NORMCASE_UPPER = 1, |
|
18 | 18 | NORMCASE_OTHER = 0 |
|
19 | 19 | }; |
|
20 | 20 | |
|
21 | 21 | PyObject *unhexlify(const char *str, Py_ssize_t len); |
|
22 | 22 | PyObject *isasciistr(PyObject *self, PyObject *args); |
|
23 | 23 | PyObject *asciilower(PyObject *self, PyObject *args); |
|
24 | 24 | PyObject *asciiupper(PyObject *self, PyObject *args); |
|
25 | 25 | PyObject *make_file_foldmap(PyObject *self, PyObject *args); |
|
26 | 26 | PyObject *jsonescapeu8fast(PyObject *self, PyObject *args); |
|
27 | 27 | |
|
28 | /* clang-format off */ | |
|
28 | 29 | static const int8_t hextable[256] = { |
|
29 | 30 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
|
30 | 31 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
|
31 | 32 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
|
32 | 33 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, /* 0-9 */ |
|
33 | 34 | -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* A-F */ |
|
34 | 35 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
|
35 | 36 | -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* a-f */ |
|
36 | 37 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
|
37 | 38 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
|
38 | 39 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
|
39 | 40 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
|
40 | 41 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
|
41 | 42 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
|
42 | 43 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
|
43 | 44 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
|
44 | 45 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 |
|
45 | 46 | }; |
|
47 | /* clang-format on */ | |
|
46 | 48 | |
|
47 | 49 | static inline int hexdigit(const char *p, Py_ssize_t off) |
|
48 | 50 | { |
|
49 | 51 | int8_t val = hextable[(unsigned char)p[off]]; |
|
50 | 52 | |
|
51 | 53 | if (val >= 0) { |
|
52 | 54 | return val; |
|
53 | 55 | } |
|
54 | 56 | |
|
55 | 57 | PyErr_SetString(PyExc_ValueError, "input contains non-hex character"); |
|
56 | 58 | return 0; |
|
57 | 59 | } |
|
58 | 60 | |
|
59 | 61 | #endif /* _HG_CHARENCODE_H_ */ |
General Comments 0
You need to be logged in to leave comments.
Login now