##// END OF EJS Templates
charencode: adjust clang-format enable/disable comments...
Augie Fackler -
r36075:81199632 default
parent child Browse files
Show More
@@ -1,401 +1,401
1 1 /*
2 2 charencode.c - miscellaneous character encoding
3 3
4 4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #define PY_SSIZE_T_CLEAN
11 11 #include <Python.h>
12 12 #include <assert.h>
13 13
14 14 #include "charencode.h"
15 15 #include "compat.h"
16 16 #include "util.h"
17 17
18 18 #ifdef IS_PY3K
19 19 /* The mapping of Python types is meant to be temporary to get Python
20 20 * 3 to compile. We should remove this once Python 3 support is fully
21 21 * supported and proper types are used in the extensions themselves. */
22 22 #define PyInt_Type PyLong_Type
23 23 #define PyInt_AS_LONG PyLong_AS_LONG
24 24 #endif
25 25
26 26 /* clang-format off */
27 27 static const char lowertable[128] = {
28 28 '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
29 29 '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
30 30 '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
31 31 '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
32 32 '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
33 33 '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
34 34 '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
35 35 '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
36 36 '\x40',
37 37 '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', /* A-G */
38 38 '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f', /* H-O */
39 39 '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77', /* P-W */
40 40 '\x78', '\x79', '\x7a', /* X-Z */
41 41 '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
42 42 '\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67',
43 43 '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f',
44 44 '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77',
45 45 '\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f'
46 46 };
47 47
48 48 static const char uppertable[128] = {
49 49 '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
50 50 '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
51 51 '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
52 52 '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
53 53 '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
54 54 '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
55 55 '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
56 56 '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
57 57 '\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47',
58 58 '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f',
59 59 '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57',
60 60 '\x58', '\x59', '\x5a', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
61 61 '\x60',
62 62 '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47', /* a-g */
63 63 '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f', /* h-o */
64 64 '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57', /* p-w */
65 65 '\x58', '\x59', '\x5a', /* x-z */
66 66 '\x7b', '\x7c', '\x7d', '\x7e', '\x7f'
67 67 };
68 /* clang-format on */
69 68
70 69 /* 1: no escape, 2: \<c>, 6: \u<x> */
71 70 static const uint8_t jsonlentable[256] = {
72 71 6, 6, 6, 6, 6, 6, 6, 6, 2, 2, 2, 6, 2, 2, 6, 6, /* b, t, n, f, r */
73 72 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
74 73 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* " */
75 74 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
76 75 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
77 76 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, /* \\ */
78 77 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
79 78 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, /* DEL */
80 79 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
81 80 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
82 81 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
83 82 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
84 83 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
85 84 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
86 85 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
87 86 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
88 87 };
89 88
90 89 static const uint8_t jsonparanoidlentable[128] = {
91 90 6, 6, 6, 6, 6, 6, 6, 6, 2, 2, 2, 6, 2, 2, 6, 6, /* b, t, n, f, r */
92 91 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
93 92 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* " */
94 93 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 1, 6, 1, /* <, > */
95 94 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
96 95 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, /* \\ */
97 96 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
98 97 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, /* DEL */
99 98 };
100 99
101 100 static const char hexchartable[16] = {
102 101 '0', '1', '2', '3', '4', '5', '6', '7',
103 102 '8', '9', 'a', 'b', 'c', 'd', 'e', 'f',
104 103 };
104 /* clang-format on */
105 105
106 106 /*
107 107 * Turn a hex-encoded string into binary.
108 108 */
109 109 PyObject *unhexlify(const char *str, Py_ssize_t len)
110 110 {
111 111 PyObject *ret;
112 112 char *d;
113 113 Py_ssize_t i;
114 114
115 115 ret = PyBytes_FromStringAndSize(NULL, len / 2);
116 116
117 117 if (!ret)
118 118 return NULL;
119 119
120 120 d = PyBytes_AsString(ret);
121 121
122 122 for (i = 0; i < len;) {
123 123 int hi = hexdigit(str, i++);
124 124 int lo = hexdigit(str, i++);
125 125 *d++ = (hi << 4) | lo;
126 126 }
127 127
128 128 return ret;
129 129 }
130 130
131 131 PyObject *isasciistr(PyObject *self, PyObject *args)
132 132 {
133 133 const char *buf;
134 134 Py_ssize_t i, len;
135 135 if (!PyArg_ParseTuple(args, "s#:isasciistr", &buf, &len))
136 136 return NULL;
137 137 i = 0;
138 138 /* char array in PyStringObject should be at least 4-byte aligned */
139 139 if (((uintptr_t)buf & 3) == 0) {
140 140 const uint32_t *p = (const uint32_t *)buf;
141 141 for (; i < len / 4; i++) {
142 142 if (p[i] & 0x80808080U)
143 143 Py_RETURN_FALSE;
144 144 }
145 145 i *= 4;
146 146 }
147 147 for (; i < len; i++) {
148 148 if (buf[i] & 0x80)
149 149 Py_RETURN_FALSE;
150 150 }
151 151 Py_RETURN_TRUE;
152 152 }
153 153
154 154 static inline PyObject *_asciitransform(PyObject *str_obj,
155 155 const char table[128],
156 156 PyObject *fallback_fn)
157 157 {
158 158 char *str, *newstr;
159 159 Py_ssize_t i, len;
160 160 PyObject *newobj = NULL;
161 161 PyObject *ret = NULL;
162 162
163 163 str = PyBytes_AS_STRING(str_obj);
164 164 len = PyBytes_GET_SIZE(str_obj);
165 165
166 166 newobj = PyBytes_FromStringAndSize(NULL, len);
167 167 if (!newobj)
168 168 goto quit;
169 169
170 170 newstr = PyBytes_AS_STRING(newobj);
171 171
172 172 for (i = 0; i < len; i++) {
173 173 char c = str[i];
174 174 if (c & 0x80) {
175 175 if (fallback_fn != NULL) {
176 176 ret = PyObject_CallFunctionObjArgs(fallback_fn,
177 177 str_obj, NULL);
178 178 } else {
179 179 PyObject *err = PyUnicodeDecodeError_Create(
180 180 "ascii", str, len, i, (i + 1),
181 181 "unexpected code byte");
182 182 PyErr_SetObject(PyExc_UnicodeDecodeError, err);
183 183 Py_XDECREF(err);
184 184 }
185 185 goto quit;
186 186 }
187 187 newstr[i] = table[(unsigned char)c];
188 188 }
189 189
190 190 ret = newobj;
191 191 Py_INCREF(ret);
192 192 quit:
193 193 Py_XDECREF(newobj);
194 194 return ret;
195 195 }
196 196
197 197 PyObject *asciilower(PyObject *self, PyObject *args)
198 198 {
199 199 PyObject *str_obj;
200 200 if (!PyArg_ParseTuple(args, "O!:asciilower", &PyBytes_Type, &str_obj))
201 201 return NULL;
202 202 return _asciitransform(str_obj, lowertable, NULL);
203 203 }
204 204
205 205 PyObject *asciiupper(PyObject *self, PyObject *args)
206 206 {
207 207 PyObject *str_obj;
208 208 if (!PyArg_ParseTuple(args, "O!:asciiupper", &PyBytes_Type, &str_obj))
209 209 return NULL;
210 210 return _asciitransform(str_obj, uppertable, NULL);
211 211 }
212 212
213 213 PyObject *make_file_foldmap(PyObject *self, PyObject *args)
214 214 {
215 215 PyObject *dmap, *spec_obj, *normcase_fallback;
216 216 PyObject *file_foldmap = NULL;
217 217 enum normcase_spec spec;
218 218 PyObject *k, *v;
219 219 dirstateTupleObject *tuple;
220 220 Py_ssize_t pos = 0;
221 221 const char *table;
222 222
223 223 if (!PyArg_ParseTuple(args, "O!O!O!:make_file_foldmap",
224 224 &PyDict_Type, &dmap,
225 225 &PyInt_Type, &spec_obj,
226 226 &PyFunction_Type, &normcase_fallback))
227 227 goto quit;
228 228
229 229 spec = (int)PyInt_AS_LONG(spec_obj);
230 230 switch (spec) {
231 231 case NORMCASE_LOWER:
232 232 table = lowertable;
233 233 break;
234 234 case NORMCASE_UPPER:
235 235 table = uppertable;
236 236 break;
237 237 case NORMCASE_OTHER:
238 238 table = NULL;
239 239 break;
240 240 default:
241 241 PyErr_SetString(PyExc_TypeError, "invalid normcasespec");
242 242 goto quit;
243 243 }
244 244
245 245 /* Add some more entries to deal with additions outside this
246 246 function. */
247 247 file_foldmap = _dict_new_presized((PyDict_Size(dmap) / 10) * 11);
248 248 if (file_foldmap == NULL)
249 249 goto quit;
250 250
251 251 while (PyDict_Next(dmap, &pos, &k, &v)) {
252 252 if (!dirstate_tuple_check(v)) {
253 253 PyErr_SetString(PyExc_TypeError,
254 254 "expected a dirstate tuple");
255 255 goto quit;
256 256 }
257 257
258 258 tuple = (dirstateTupleObject *)v;
259 259 if (tuple->state != 'r') {
260 260 PyObject *normed;
261 261 if (table != NULL) {
262 262 normed = _asciitransform(k, table,
263 263 normcase_fallback);
264 264 } else {
265 265 normed = PyObject_CallFunctionObjArgs(
266 266 normcase_fallback, k, NULL);
267 267 }
268 268
269 269 if (normed == NULL)
270 270 goto quit;
271 271 if (PyDict_SetItem(file_foldmap, normed, k) == -1) {
272 272 Py_DECREF(normed);
273 273 goto quit;
274 274 }
275 275 Py_DECREF(normed);
276 276 }
277 277 }
278 278 return file_foldmap;
279 279 quit:
280 280 Py_XDECREF(file_foldmap);
281 281 return NULL;
282 282 }
283 283
284 284 /* calculate length of JSON-escaped string; returns -1 if unsupported */
285 285 static Py_ssize_t jsonescapelen(const char *buf, Py_ssize_t len, bool paranoid)
286 286 {
287 287 Py_ssize_t i, esclen = 0;
288 288
289 289 if (paranoid) {
290 290 /* don't want to process multi-byte escapes in C */
291 291 for (i = 0; i < len; i++) {
292 292 char c = buf[i];
293 293 if (c & 0x80) {
294 294 PyErr_SetString(PyExc_ValueError,
295 295 "cannot process non-ascii str");
296 296 return -1;
297 297 }
298 298 esclen += jsonparanoidlentable[(unsigned char)c];
299 299 if (esclen < 0) {
300 300 PyErr_SetString(PyExc_MemoryError,
301 301 "overflow in jsonescapelen");
302 302 return -1;
303 303 }
304 304 }
305 305 } else {
306 306 for (i = 0; i < len; i++) {
307 307 char c = buf[i];
308 308 esclen += jsonlentable[(unsigned char)c];
309 309 if (esclen < 0) {
310 310 PyErr_SetString(PyExc_MemoryError,
311 311 "overflow in jsonescapelen");
312 312 return -1;
313 313 }
314 314 }
315 315 }
316 316
317 317 return esclen;
318 318 }
319 319
320 320 /* map '\<c>' escape character */
321 321 static char jsonescapechar2(char c)
322 322 {
323 323 switch (c) {
324 324 case '\b':
325 325 return 'b';
326 326 case '\t':
327 327 return 't';
328 328 case '\n':
329 329 return 'n';
330 330 case '\f':
331 331 return 'f';
332 332 case '\r':
333 333 return 'r';
334 334 case '"':
335 335 return '"';
336 336 case '\\':
337 337 return '\\';
338 338 }
339 339 return '\0'; /* should not happen */
340 340 }
341 341
342 342 /* convert 'origbuf' to JSON-escaped form 'escbuf'; 'origbuf' should only
343 343 include characters mappable by json(paranoid)lentable */
344 344 static void encodejsonescape(char *escbuf, Py_ssize_t esclen,
345 345 const char *origbuf, Py_ssize_t origlen,
346 346 bool paranoid)
347 347 {
348 348 const uint8_t *lentable =
349 349 (paranoid) ? jsonparanoidlentable : jsonlentable;
350 350 Py_ssize_t i, j;
351 351
352 352 for (i = 0, j = 0; i < origlen; i++) {
353 353 char c = origbuf[i];
354 354 uint8_t l = lentable[(unsigned char)c];
355 355 assert(j + l <= esclen);
356 356 switch (l) {
357 357 case 1:
358 358 escbuf[j] = c;
359 359 break;
360 360 case 2:
361 361 escbuf[j] = '\\';
362 362 escbuf[j + 1] = jsonescapechar2(c);
363 363 break;
364 364 case 6:
365 365 memcpy(escbuf + j, "\\u00", 4);
366 366 escbuf[j + 4] = hexchartable[(unsigned char)c >> 4];
367 367 escbuf[j + 5] = hexchartable[(unsigned char)c & 0xf];
368 368 break;
369 369 }
370 370 j += l;
371 371 }
372 372 }
373 373
374 374 PyObject *jsonescapeu8fast(PyObject *self, PyObject *args)
375 375 {
376 376 PyObject *origstr, *escstr;
377 377 const char *origbuf;
378 378 Py_ssize_t origlen, esclen;
379 379 int paranoid;
380 380 if (!PyArg_ParseTuple(args, "O!i:jsonescapeu8fast",
381 381 &PyBytes_Type, &origstr, &paranoid))
382 382 return NULL;
383 383
384 384 origbuf = PyBytes_AS_STRING(origstr);
385 385 origlen = PyBytes_GET_SIZE(origstr);
386 386 esclen = jsonescapelen(origbuf, origlen, paranoid);
387 387 if (esclen < 0)
388 388 return NULL; /* unsupported char found or overflow */
389 389 if (origlen == esclen) {
390 390 Py_INCREF(origstr);
391 391 return origstr;
392 392 }
393 393
394 394 escstr = PyBytes_FromStringAndSize(NULL, esclen);
395 395 if (!escstr)
396 396 return NULL;
397 397 encodejsonescape(PyBytes_AS_STRING(escstr), esclen, origbuf, origlen,
398 398 paranoid);
399 399
400 400 return escstr;
401 401 }
@@ -1,59 +1,61
1 1 /*
2 2 charencode.h - miscellaneous character encoding
3 3
4 4 This software may be used and distributed according to the terms of
5 5 the GNU General Public License, incorporated herein by reference.
6 6 */
7 7
8 8 #ifndef _HG_CHARENCODE_H_
9 9 #define _HG_CHARENCODE_H_
10 10
11 11 #include <Python.h>
12 12 #include "compat.h"
13 13
14 14 /* This should be kept in sync with normcasespecs in encoding.py. */
15 15 enum normcase_spec {
16 16 NORMCASE_LOWER = -1,
17 17 NORMCASE_UPPER = 1,
18 18 NORMCASE_OTHER = 0
19 19 };
20 20
21 21 PyObject *unhexlify(const char *str, Py_ssize_t len);
22 22 PyObject *isasciistr(PyObject *self, PyObject *args);
23 23 PyObject *asciilower(PyObject *self, PyObject *args);
24 24 PyObject *asciiupper(PyObject *self, PyObject *args);
25 25 PyObject *make_file_foldmap(PyObject *self, PyObject *args);
26 26 PyObject *jsonescapeu8fast(PyObject *self, PyObject *args);
27 27
28 /* clang-format off */
28 29 static const int8_t hextable[256] = {
29 30 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
30 31 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
31 32 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
32 33 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, /* 0-9 */
33 34 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* A-F */
34 35 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
35 36 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* a-f */
36 37 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
37 38 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
38 39 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
39 40 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
40 41 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
41 42 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
42 43 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
43 44 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
44 45 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
45 46 };
47 /* clang-format on */
46 48
47 49 static inline int hexdigit(const char *p, Py_ssize_t off)
48 50 {
49 51 int8_t val = hextable[(unsigned char)p[off]];
50 52
51 53 if (val >= 0) {
52 54 return val;
53 55 }
54 56
55 57 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
56 58 return 0;
57 59 }
58 60
59 61 #endif /* _HG_CHARENCODE_H_ */
General Comments 0
You need to be logged in to leave comments. Login now