##// END OF EJS Templates
cext: put case statements on separate line...
Gregory Szorc -
r34440:88e83a61 default
parent child Browse files
Show More
@@ -1,392 +1,399 b''
1 1 /*
2 2 charencode.c - miscellaneous character encoding
3 3
4 4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #define PY_SSIZE_T_CLEAN
11 11 #include <Python.h>
12 12 #include <assert.h>
13 13
14 14 #include "charencode.h"
15 15 #include "compat.h"
16 16 #include "util.h"
17 17
18 18 #ifdef IS_PY3K
19 19 /* The mapping of Python types is meant to be temporary to get Python
20 20 * 3 to compile. We should remove this once Python 3 support is fully
21 21 * supported and proper types are used in the extensions themselves. */
22 22 #define PyInt_Type PyLong_Type
23 23 #define PyInt_AS_LONG PyLong_AS_LONG
24 24 #endif
25 25
26 26 static const char lowertable[128] = {
27 27 '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
28 28 '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
29 29 '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
30 30 '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
31 31 '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
32 32 '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
33 33 '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
34 34 '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
35 35 '\x40',
36 36 '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', /* A-G */
37 37 '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f', /* H-O */
38 38 '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77', /* P-W */
39 39 '\x78', '\x79', '\x7a', /* X-Z */
40 40 '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
41 41 '\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67',
42 42 '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f',
43 43 '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77',
44 44 '\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f'
45 45 };
46 46
47 47 static const char uppertable[128] = {
48 48 '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
49 49 '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
50 50 '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
51 51 '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
52 52 '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
53 53 '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
54 54 '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
55 55 '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
56 56 '\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47',
57 57 '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f',
58 58 '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57',
59 59 '\x58', '\x59', '\x5a', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
60 60 '\x60',
61 61 '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47', /* a-g */
62 62 '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f', /* h-o */
63 63 '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57', /* p-w */
64 64 '\x58', '\x59', '\x5a', /* x-z */
65 65 '\x7b', '\x7c', '\x7d', '\x7e', '\x7f'
66 66 };
67 67
68 68 /* 1: no escape, 2: \<c>, 6: \u<x> */
69 69 static const uint8_t jsonlentable[256] = {
70 70 6, 6, 6, 6, 6, 6, 6, 6, 2, 2, 2, 6, 2, 2, 6, 6, /* b, t, n, f, r */
71 71 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
72 72 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* " */
73 73 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
74 74 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
75 75 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, /* \\ */
76 76 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
77 77 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, /* DEL */
78 78 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
79 79 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
80 80 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
81 81 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
82 82 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
83 83 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
84 84 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
85 85 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
86 86 };
87 87
88 88 static const uint8_t jsonparanoidlentable[128] = {
89 89 6, 6, 6, 6, 6, 6, 6, 6, 2, 2, 2, 6, 2, 2, 6, 6, /* b, t, n, f, r */
90 90 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
91 91 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* " */
92 92 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 1, 6, 1, /* <, > */
93 93 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
94 94 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, /* \\ */
95 95 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
96 96 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, /* DEL */
97 97 };
98 98
99 99 static const char hexchartable[16] = {
100 100 '0', '1', '2', '3', '4', '5', '6', '7',
101 101 '8', '9', 'a', 'b', 'c', 'd', 'e', 'f',
102 102 };
103 103
104 104 /*
105 105 * Turn a hex-encoded string into binary.
106 106 */
107 107 PyObject *unhexlify(const char *str, Py_ssize_t len)
108 108 {
109 109 PyObject *ret;
110 110 char *d;
111 111 Py_ssize_t i;
112 112
113 113 ret = PyBytes_FromStringAndSize(NULL, len / 2);
114 114
115 115 if (!ret)
116 116 return NULL;
117 117
118 118 d = PyBytes_AsString(ret);
119 119
120 120 for (i = 0; i < len;) {
121 121 int hi = hexdigit(str, i++);
122 122 int lo = hexdigit(str, i++);
123 123 *d++ = (hi << 4) | lo;
124 124 }
125 125
126 126 return ret;
127 127 }
128 128
129 129 PyObject *isasciistr(PyObject *self, PyObject *args)
130 130 {
131 131 const char *buf;
132 132 Py_ssize_t i, len;
133 133 if (!PyArg_ParseTuple(args, "s#:isasciistr", &buf, &len))
134 134 return NULL;
135 135 i = 0;
136 136 /* char array in PyStringObject should be at least 4-byte aligned */
137 137 if (((uintptr_t)buf & 3) == 0) {
138 138 const uint32_t *p = (const uint32_t *)buf;
139 139 for (; i < len / 4; i++) {
140 140 if (p[i] & 0x80808080U)
141 141 Py_RETURN_FALSE;
142 142 }
143 143 i *= 4;
144 144 }
145 145 for (; i < len; i++) {
146 146 if (buf[i] & 0x80)
147 147 Py_RETURN_FALSE;
148 148 }
149 149 Py_RETURN_TRUE;
150 150 }
151 151
152 152 static inline PyObject *_asciitransform(PyObject *str_obj,
153 153 const char table[128],
154 154 PyObject *fallback_fn)
155 155 {
156 156 char *str, *newstr;
157 157 Py_ssize_t i, len;
158 158 PyObject *newobj = NULL;
159 159 PyObject *ret = NULL;
160 160
161 161 str = PyBytes_AS_STRING(str_obj);
162 162 len = PyBytes_GET_SIZE(str_obj);
163 163
164 164 newobj = PyBytes_FromStringAndSize(NULL, len);
165 165 if (!newobj)
166 166 goto quit;
167 167
168 168 newstr = PyBytes_AS_STRING(newobj);
169 169
170 170 for (i = 0; i < len; i++) {
171 171 char c = str[i];
172 172 if (c & 0x80) {
173 173 if (fallback_fn != NULL) {
174 174 ret = PyObject_CallFunctionObjArgs(fallback_fn,
175 175 str_obj, NULL);
176 176 } else {
177 177 PyObject *err = PyUnicodeDecodeError_Create(
178 178 "ascii", str, len, i, (i + 1),
179 179 "unexpected code byte");
180 180 PyErr_SetObject(PyExc_UnicodeDecodeError, err);
181 181 Py_XDECREF(err);
182 182 }
183 183 goto quit;
184 184 }
185 185 newstr[i] = table[(unsigned char)c];
186 186 }
187 187
188 188 ret = newobj;
189 189 Py_INCREF(ret);
190 190 quit:
191 191 Py_XDECREF(newobj);
192 192 return ret;
193 193 }
194 194
195 195 PyObject *asciilower(PyObject *self, PyObject *args)
196 196 {
197 197 PyObject *str_obj;
198 198 if (!PyArg_ParseTuple(args, "O!:asciilower", &PyBytes_Type, &str_obj))
199 199 return NULL;
200 200 return _asciitransform(str_obj, lowertable, NULL);
201 201 }
202 202
203 203 PyObject *asciiupper(PyObject *self, PyObject *args)
204 204 {
205 205 PyObject *str_obj;
206 206 if (!PyArg_ParseTuple(args, "O!:asciiupper", &PyBytes_Type, &str_obj))
207 207 return NULL;
208 208 return _asciitransform(str_obj, uppertable, NULL);
209 209 }
210 210
211 211 PyObject *make_file_foldmap(PyObject *self, PyObject *args)
212 212 {
213 213 PyObject *dmap, *spec_obj, *normcase_fallback;
214 214 PyObject *file_foldmap = NULL;
215 215 enum normcase_spec spec;
216 216 PyObject *k, *v;
217 217 dirstateTupleObject *tuple;
218 218 Py_ssize_t pos = 0;
219 219 const char *table;
220 220
221 221 if (!PyArg_ParseTuple(args, "O!O!O!:make_file_foldmap",
222 222 &PyDict_Type, &dmap,
223 223 &PyInt_Type, &spec_obj,
224 224 &PyFunction_Type, &normcase_fallback))
225 225 goto quit;
226 226
227 227 spec = (int)PyInt_AS_LONG(spec_obj);
228 228 switch (spec) {
229 229 case NORMCASE_LOWER:
230 230 table = lowertable;
231 231 break;
232 232 case NORMCASE_UPPER:
233 233 table = uppertable;
234 234 break;
235 235 case NORMCASE_OTHER:
236 236 table = NULL;
237 237 break;
238 238 default:
239 239 PyErr_SetString(PyExc_TypeError, "invalid normcasespec");
240 240 goto quit;
241 241 }
242 242
243 243 /* Add some more entries to deal with additions outside this
244 244 function. */
245 245 file_foldmap = _dict_new_presized((PyDict_Size(dmap) / 10) * 11);
246 246 if (file_foldmap == NULL)
247 247 goto quit;
248 248
249 249 while (PyDict_Next(dmap, &pos, &k, &v)) {
250 250 if (!dirstate_tuple_check(v)) {
251 251 PyErr_SetString(PyExc_TypeError,
252 252 "expected a dirstate tuple");
253 253 goto quit;
254 254 }
255 255
256 256 tuple = (dirstateTupleObject *)v;
257 257 if (tuple->state != 'r') {
258 258 PyObject *normed;
259 259 if (table != NULL) {
260 260 normed = _asciitransform(k, table,
261 261 normcase_fallback);
262 262 } else {
263 263 normed = PyObject_CallFunctionObjArgs(
264 264 normcase_fallback, k, NULL);
265 265 }
266 266
267 267 if (normed == NULL)
268 268 goto quit;
269 269 if (PyDict_SetItem(file_foldmap, normed, k) == -1) {
270 270 Py_DECREF(normed);
271 271 goto quit;
272 272 }
273 273 Py_DECREF(normed);
274 274 }
275 275 }
276 276 return file_foldmap;
277 277 quit:
278 278 Py_XDECREF(file_foldmap);
279 279 return NULL;
280 280 }
281 281
282 282 /* calculate length of JSON-escaped string; returns -1 if unsupported */
283 283 static Py_ssize_t jsonescapelen(const char *buf, Py_ssize_t len, bool paranoid)
284 284 {
285 285 Py_ssize_t i, esclen = 0;
286 286
287 287 if (paranoid) {
288 288 /* don't want to process multi-byte escapes in C */
289 289 for (i = 0; i < len; i++) {
290 290 char c = buf[i];
291 291 if (c & 0x80) {
292 292 PyErr_SetString(PyExc_ValueError,
293 293 "cannot process non-ascii str");
294 294 return -1;
295 295 }
296 296 esclen += jsonparanoidlentable[(unsigned char)c];
297 297 if (esclen < 0) {
298 298 PyErr_SetString(PyExc_MemoryError,
299 299 "overflow in jsonescapelen");
300 300 return -1;
301 301 }
302 302 }
303 303 } else {
304 304 for (i = 0; i < len; i++) {
305 305 char c = buf[i];
306 306 esclen += jsonlentable[(unsigned char)c];
307 307 if (esclen < 0) {
308 308 PyErr_SetString(PyExc_MemoryError,
309 309 "overflow in jsonescapelen");
310 310 return -1;
311 311 }
312 312 }
313 313 }
314 314
315 315 return esclen;
316 316 }
317 317
318 318 /* map '\<c>' escape character */
319 319 static char jsonescapechar2(char c)
320 320 {
321 321 switch (c) {
322 case '\b': return 'b';
323 case '\t': return 't';
324 case '\n': return 'n';
325 case '\f': return 'f';
326 case '\r': return 'r';
327 case '"': return '"';
328 case '\\': return '\\';
322 case '\b':
323 return 'b';
324 case '\t':
325 return 't';
326 case '\n':
327 return 'n';
328 case '\f':
329 return 'f';
330 case '\r':
331 return 'r';
332 case '"':
333 return '"';
334 case '\\':
335 return '\\';
329 336 }
330 337 return '\0'; /* should not happen */
331 338 }
332 339
333 340 /* convert 'origbuf' to JSON-escaped form 'escbuf'; 'origbuf' should only
334 341 include characters mappable by json(paranoid)lentable */
335 342 static void encodejsonescape(char *escbuf, Py_ssize_t esclen,
336 343 const char *origbuf, Py_ssize_t origlen,
337 344 bool paranoid)
338 345 {
339 346 const uint8_t *lentable =
340 347 (paranoid) ? jsonparanoidlentable : jsonlentable;
341 348 Py_ssize_t i, j;
342 349
343 350 for (i = 0, j = 0; i < origlen; i++) {
344 351 char c = origbuf[i];
345 352 uint8_t l = lentable[(unsigned char)c];
346 353 assert(j + l <= esclen);
347 354 switch (l) {
348 355 case 1:
349 356 escbuf[j] = c;
350 357 break;
351 358 case 2:
352 359 escbuf[j] = '\\';
353 360 escbuf[j + 1] = jsonescapechar2(c);
354 361 break;
355 362 case 6:
356 363 memcpy(escbuf + j, "\\u00", 4);
357 364 escbuf[j + 4] = hexchartable[(unsigned char)c >> 4];
358 365 escbuf[j + 5] = hexchartable[(unsigned char)c & 0xf];
359 366 break;
360 367 }
361 368 j += l;
362 369 }
363 370 }
364 371
365 372 PyObject *jsonescapeu8fast(PyObject *self, PyObject *args)
366 373 {
367 374 PyObject *origstr, *escstr;
368 375 const char *origbuf;
369 376 Py_ssize_t origlen, esclen;
370 377 int paranoid;
371 378 if (!PyArg_ParseTuple(args, "O!i:jsonescapeu8fast",
372 379 &PyBytes_Type, &origstr, &paranoid))
373 380 return NULL;
374 381
375 382 origbuf = PyBytes_AS_STRING(origstr);
376 383 origlen = PyBytes_GET_SIZE(origstr);
377 384 esclen = jsonescapelen(origbuf, origlen, paranoid);
378 385 if (esclen < 0)
379 386 return NULL; /* unsupported char found or overflow */
380 387 if (origlen == esclen) {
381 388 Py_INCREF(origstr);
382 389 return origstr;
383 390 }
384 391
385 392 escstr = PyBytes_FromStringAndSize(NULL, esclen);
386 393 if (!escstr)
387 394 return NULL;
388 395 encodejsonescape(PyBytes_AS_STRING(escstr), esclen, origbuf, origlen,
389 396 paranoid);
390 397
391 398 return escstr;
392 399 }
General Comments 0
You need to be logged in to leave comments. Login now