##// END OF EJS Templates
encoding: check overflow while calculating size of JSON escape buffer...
Yuya Nishihara -
r34032:e97be042 default
parent child Browse files
Show More
@@ -1,382 +1,392
1 1 /*
2 2 charencode.c - miscellaneous character encoding
3 3
4 4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #define PY_SSIZE_T_CLEAN
11 11 #include <Python.h>
12 12 #include <assert.h>
13 13
14 14 #include "charencode.h"
15 15 #include "compat.h"
16 16 #include "util.h"
17 17
18 18 #ifdef IS_PY3K
19 19 /* The mapping of Python types is meant to be temporary to get Python
20 20 * 3 to compile. We should remove this once Python 3 support is fully
21 21 * supported and proper types are used in the extensions themselves. */
22 22 #define PyInt_Type PyLong_Type
23 23 #define PyInt_AS_LONG PyLong_AS_LONG
24 24 #endif
25 25
26 26 static const char lowertable[128] = {
27 27 '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
28 28 '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
29 29 '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
30 30 '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
31 31 '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
32 32 '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
33 33 '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
34 34 '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
35 35 '\x40',
36 36 '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', /* A-G */
37 37 '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f', /* H-O */
38 38 '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77', /* P-W */
39 39 '\x78', '\x79', '\x7a', /* X-Z */
40 40 '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
41 41 '\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67',
42 42 '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f',
43 43 '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77',
44 44 '\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f'
45 45 };
46 46
47 47 static const char uppertable[128] = {
48 48 '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
49 49 '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
50 50 '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
51 51 '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
52 52 '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
53 53 '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
54 54 '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
55 55 '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
56 56 '\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47',
57 57 '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f',
58 58 '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57',
59 59 '\x58', '\x59', '\x5a', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
60 60 '\x60',
61 61 '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47', /* a-g */
62 62 '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f', /* h-o */
63 63 '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57', /* p-w */
64 64 '\x58', '\x59', '\x5a', /* x-z */
65 65 '\x7b', '\x7c', '\x7d', '\x7e', '\x7f'
66 66 };
67 67
68 68 /* 1: no escape, 2: \<c>, 6: \u<x> */
69 69 static const uint8_t jsonlentable[256] = {
70 70 6, 6, 6, 6, 6, 6, 6, 6, 2, 2, 2, 6, 2, 2, 6, 6, /* b, t, n, f, r */
71 71 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
72 72 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* " */
73 73 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
74 74 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
75 75 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, /* \\ */
76 76 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
77 77 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, /* DEL */
78 78 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
79 79 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
80 80 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
81 81 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
82 82 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
83 83 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
84 84 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
85 85 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
86 86 };
87 87
88 88 static const uint8_t jsonparanoidlentable[128] = {
89 89 6, 6, 6, 6, 6, 6, 6, 6, 2, 2, 2, 6, 2, 2, 6, 6, /* b, t, n, f, r */
90 90 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
91 91 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* " */
92 92 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 1, 6, 1, /* <, > */
93 93 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
94 94 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, /* \\ */
95 95 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
96 96 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, /* DEL */
97 97 };
98 98
99 99 static const char hexchartable[16] = {
100 100 '0', '1', '2', '3', '4', '5', '6', '7',
101 101 '8', '9', 'a', 'b', 'c', 'd', 'e', 'f',
102 102 };
103 103
104 104 /*
105 105 * Turn a hex-encoded string into binary.
106 106 */
107 107 PyObject *unhexlify(const char *str, Py_ssize_t len)
108 108 {
109 109 PyObject *ret;
110 110 char *d;
111 111 Py_ssize_t i;
112 112
113 113 ret = PyBytes_FromStringAndSize(NULL, len / 2);
114 114
115 115 if (!ret)
116 116 return NULL;
117 117
118 118 d = PyBytes_AsString(ret);
119 119
120 120 for (i = 0; i < len;) {
121 121 int hi = hexdigit(str, i++);
122 122 int lo = hexdigit(str, i++);
123 123 *d++ = (hi << 4) | lo;
124 124 }
125 125
126 126 return ret;
127 127 }
128 128
129 129 PyObject *isasciistr(PyObject *self, PyObject *args)
130 130 {
131 131 const char *buf;
132 132 Py_ssize_t i, len;
133 133 if (!PyArg_ParseTuple(args, "s#:isasciistr", &buf, &len))
134 134 return NULL;
135 135 i = 0;
136 136 /* char array in PyStringObject should be at least 4-byte aligned */
137 137 if (((uintptr_t)buf & 3) == 0) {
138 138 const uint32_t *p = (const uint32_t *)buf;
139 139 for (; i < len / 4; i++) {
140 140 if (p[i] & 0x80808080U)
141 141 Py_RETURN_FALSE;
142 142 }
143 143 i *= 4;
144 144 }
145 145 for (; i < len; i++) {
146 146 if (buf[i] & 0x80)
147 147 Py_RETURN_FALSE;
148 148 }
149 149 Py_RETURN_TRUE;
150 150 }
151 151
152 152 static inline PyObject *_asciitransform(PyObject *str_obj,
153 153 const char table[128],
154 154 PyObject *fallback_fn)
155 155 {
156 156 char *str, *newstr;
157 157 Py_ssize_t i, len;
158 158 PyObject *newobj = NULL;
159 159 PyObject *ret = NULL;
160 160
161 161 str = PyBytes_AS_STRING(str_obj);
162 162 len = PyBytes_GET_SIZE(str_obj);
163 163
164 164 newobj = PyBytes_FromStringAndSize(NULL, len);
165 165 if (!newobj)
166 166 goto quit;
167 167
168 168 newstr = PyBytes_AS_STRING(newobj);
169 169
170 170 for (i = 0; i < len; i++) {
171 171 char c = str[i];
172 172 if (c & 0x80) {
173 173 if (fallback_fn != NULL) {
174 174 ret = PyObject_CallFunctionObjArgs(fallback_fn,
175 175 str_obj, NULL);
176 176 } else {
177 177 PyObject *err = PyUnicodeDecodeError_Create(
178 178 "ascii", str, len, i, (i + 1),
179 179 "unexpected code byte");
180 180 PyErr_SetObject(PyExc_UnicodeDecodeError, err);
181 181 Py_XDECREF(err);
182 182 }
183 183 goto quit;
184 184 }
185 185 newstr[i] = table[(unsigned char)c];
186 186 }
187 187
188 188 ret = newobj;
189 189 Py_INCREF(ret);
190 190 quit:
191 191 Py_XDECREF(newobj);
192 192 return ret;
193 193 }
194 194
195 195 PyObject *asciilower(PyObject *self, PyObject *args)
196 196 {
197 197 PyObject *str_obj;
198 198 if (!PyArg_ParseTuple(args, "O!:asciilower", &PyBytes_Type, &str_obj))
199 199 return NULL;
200 200 return _asciitransform(str_obj, lowertable, NULL);
201 201 }
202 202
203 203 PyObject *asciiupper(PyObject *self, PyObject *args)
204 204 {
205 205 PyObject *str_obj;
206 206 if (!PyArg_ParseTuple(args, "O!:asciiupper", &PyBytes_Type, &str_obj))
207 207 return NULL;
208 208 return _asciitransform(str_obj, uppertable, NULL);
209 209 }
210 210
211 211 PyObject *make_file_foldmap(PyObject *self, PyObject *args)
212 212 {
213 213 PyObject *dmap, *spec_obj, *normcase_fallback;
214 214 PyObject *file_foldmap = NULL;
215 215 enum normcase_spec spec;
216 216 PyObject *k, *v;
217 217 dirstateTupleObject *tuple;
218 218 Py_ssize_t pos = 0;
219 219 const char *table;
220 220
221 221 if (!PyArg_ParseTuple(args, "O!O!O!:make_file_foldmap",
222 222 &PyDict_Type, &dmap,
223 223 &PyInt_Type, &spec_obj,
224 224 &PyFunction_Type, &normcase_fallback))
225 225 goto quit;
226 226
227 227 spec = (int)PyInt_AS_LONG(spec_obj);
228 228 switch (spec) {
229 229 case NORMCASE_LOWER:
230 230 table = lowertable;
231 231 break;
232 232 case NORMCASE_UPPER:
233 233 table = uppertable;
234 234 break;
235 235 case NORMCASE_OTHER:
236 236 table = NULL;
237 237 break;
238 238 default:
239 239 PyErr_SetString(PyExc_TypeError, "invalid normcasespec");
240 240 goto quit;
241 241 }
242 242
243 243 /* Add some more entries to deal with additions outside this
244 244 function. */
245 245 file_foldmap = _dict_new_presized((PyDict_Size(dmap) / 10) * 11);
246 246 if (file_foldmap == NULL)
247 247 goto quit;
248 248
249 249 while (PyDict_Next(dmap, &pos, &k, &v)) {
250 250 if (!dirstate_tuple_check(v)) {
251 251 PyErr_SetString(PyExc_TypeError,
252 252 "expected a dirstate tuple");
253 253 goto quit;
254 254 }
255 255
256 256 tuple = (dirstateTupleObject *)v;
257 257 if (tuple->state != 'r') {
258 258 PyObject *normed;
259 259 if (table != NULL) {
260 260 normed = _asciitransform(k, table,
261 261 normcase_fallback);
262 262 } else {
263 263 normed = PyObject_CallFunctionObjArgs(
264 264 normcase_fallback, k, NULL);
265 265 }
266 266
267 267 if (normed == NULL)
268 268 goto quit;
269 269 if (PyDict_SetItem(file_foldmap, normed, k) == -1) {
270 270 Py_DECREF(normed);
271 271 goto quit;
272 272 }
273 273 Py_DECREF(normed);
274 274 }
275 275 }
276 276 return file_foldmap;
277 277 quit:
278 278 Py_XDECREF(file_foldmap);
279 279 return NULL;
280 280 }
281 281
282 282 /* calculate length of JSON-escaped string; returns -1 if unsupported */
283 283 static Py_ssize_t jsonescapelen(const char *buf, Py_ssize_t len, bool paranoid)
284 284 {
285 285 Py_ssize_t i, esclen = 0;
286 286
287 287 if (paranoid) {
288 288 /* don't want to process multi-byte escapes in C */
289 289 for (i = 0; i < len; i++) {
290 290 char c = buf[i];
291 291 if (c & 0x80) {
292 292 PyErr_SetString(PyExc_ValueError,
293 293 "cannot process non-ascii str");
294 294 return -1;
295 295 }
296 296 esclen += jsonparanoidlentable[(unsigned char)c];
297 if (esclen < 0) {
298 PyErr_SetString(PyExc_MemoryError,
299 "overflow in jsonescapelen");
300 return -1;
301 }
297 302 }
298 303 } else {
299 304 for (i = 0; i < len; i++) {
300 305 char c = buf[i];
301 306 esclen += jsonlentable[(unsigned char)c];
307 if (esclen < 0) {
308 PyErr_SetString(PyExc_MemoryError,
309 "overflow in jsonescapelen");
310 return -1;
311 }
302 312 }
303 313 }
304 314
305 315 return esclen;
306 316 }
307 317
308 318 /* map '\<c>' escape character */
309 319 static char jsonescapechar2(char c)
310 320 {
311 321 switch (c) {
312 322 case '\b': return 'b';
313 323 case '\t': return 't';
314 324 case '\n': return 'n';
315 325 case '\f': return 'f';
316 326 case '\r': return 'r';
317 327 case '"': return '"';
318 328 case '\\': return '\\';
319 329 }
320 330 return '\0'; /* should not happen */
321 331 }
322 332
323 333 /* convert 'origbuf' to JSON-escaped form 'escbuf'; 'origbuf' should only
324 334 include characters mappable by json(paranoid)lentable */
325 335 static void encodejsonescape(char *escbuf, Py_ssize_t esclen,
326 336 const char *origbuf, Py_ssize_t origlen,
327 337 bool paranoid)
328 338 {
329 339 const uint8_t *lentable =
330 340 (paranoid) ? jsonparanoidlentable : jsonlentable;
331 341 Py_ssize_t i, j;
332 342
333 343 for (i = 0, j = 0; i < origlen; i++) {
334 344 char c = origbuf[i];
335 345 uint8_t l = lentable[(unsigned char)c];
336 346 assert(j + l <= esclen);
337 347 switch (l) {
338 348 case 1:
339 349 escbuf[j] = c;
340 350 break;
341 351 case 2:
342 352 escbuf[j] = '\\';
343 353 escbuf[j + 1] = jsonescapechar2(c);
344 354 break;
345 355 case 6:
346 356 memcpy(escbuf + j, "\\u00", 4);
347 357 escbuf[j + 4] = hexchartable[(unsigned char)c >> 4];
348 358 escbuf[j + 5] = hexchartable[(unsigned char)c & 0xf];
349 359 break;
350 360 }
351 361 j += l;
352 362 }
353 363 }
354 364
355 365 PyObject *jsonescapeu8fast(PyObject *self, PyObject *args)
356 366 {
357 367 PyObject *origstr, *escstr;
358 368 const char *origbuf;
359 369 Py_ssize_t origlen, esclen;
360 370 int paranoid;
361 371 if (!PyArg_ParseTuple(args, "O!i:jsonescapeu8fast",
362 372 &PyBytes_Type, &origstr, &paranoid))
363 373 return NULL;
364 374
365 375 origbuf = PyBytes_AS_STRING(origstr);
366 376 origlen = PyBytes_GET_SIZE(origstr);
367 377 esclen = jsonescapelen(origbuf, origlen, paranoid);
368 378 if (esclen < 0)
369 return NULL; /* unsupported char found */
379 return NULL; /* unsupported char found or overflow */
370 380 if (origlen == esclen) {
371 381 Py_INCREF(origstr);
372 382 return origstr;
373 383 }
374 384
375 385 escstr = PyBytes_FromStringAndSize(NULL, esclen);
376 386 if (!escstr)
377 387 return NULL;
378 388 encodejsonescape(PyBytes_AS_STRING(escstr), esclen, origbuf, origlen,
379 389 paranoid);
380 390
381 391 return escstr;
382 392 }
General Comments 0
You need to be logged in to leave comments. Login now