##// END OF EJS Templates
cext: move _dict_new_presized() to header...
Yuya Nishihara -
r33751:5866ba5e default
parent child Browse files
Show More
@@ -1,1009 +1,997
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #include <Python.h>
11 11 #include <ctype.h>
12 12 #include <stddef.h>
13 13 #include <string.h>
14 14
15 15 #include "util.h"
16 16 #include "bitmanipulation.h"
17 17
18 18 #ifdef IS_PY3K
19 19 /* The mapping of Python types is meant to be temporary to get Python
20 20 * 3 to compile. We should remove this once Python 3 support is fully
21 21 * supported and proper types are used in the extensions themselves. */
22 22 #define PyInt_Type PyLong_Type
23 23 #define PyInt_Check PyLong_Check
24 24 #define PyInt_FromLong PyLong_FromLong
25 25 #define PyInt_FromSsize_t PyLong_FromSsize_t
26 26 #define PyInt_AS_LONG PyLong_AS_LONG
27 27 #define PyInt_AsLong PyLong_AsLong
28 28 #endif
29 29
30 30 static const char *const versionerrortext = "Python minor version mismatch";
31 31
32 32 static const char lowertable[128] = {
33 33 '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
34 34 '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
35 35 '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
36 36 '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
37 37 '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
38 38 '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
39 39 '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
40 40 '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
41 41 '\x40',
42 42 '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', /* A-G */
43 43 '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f', /* H-O */
44 44 '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77', /* P-W */
45 45 '\x78', '\x79', '\x7a', /* X-Z */
46 46 '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
47 47 '\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67',
48 48 '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f',
49 49 '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77',
50 50 '\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f'
51 51 };
52 52
53 53 static const char uppertable[128] = {
54 54 '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
55 55 '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
56 56 '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
57 57 '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
58 58 '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
59 59 '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
60 60 '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
61 61 '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
62 62 '\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47',
63 63 '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f',
64 64 '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57',
65 65 '\x58', '\x59', '\x5a', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
66 66 '\x60',
67 67 '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47', /* a-g */
68 68 '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f', /* h-o */
69 69 '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57', /* p-w */
70 70 '\x58', '\x59', '\x5a', /* x-z */
71 71 '\x7b', '\x7c', '\x7d', '\x7e', '\x7f'
72 72 };
73 73
74 74 /*
75 75 * Turn a hex-encoded string into binary.
76 76 */
77 77 PyObject *unhexlify(const char *str, int len)
78 78 {
79 79 PyObject *ret;
80 80 char *d;
81 81 int i;
82 82
83 83 ret = PyBytes_FromStringAndSize(NULL, len / 2);
84 84
85 85 if (!ret)
86 86 return NULL;
87 87
88 88 d = PyBytes_AsString(ret);
89 89
90 90 for (i = 0; i < len;) {
91 91 int hi = hexdigit(str, i++);
92 92 int lo = hexdigit(str, i++);
93 93 *d++ = (hi << 4) | lo;
94 94 }
95 95
96 96 return ret;
97 97 }
98 98
99 99 static inline PyObject *_asciitransform(PyObject *str_obj,
100 100 const char table[128],
101 101 PyObject *fallback_fn)
102 102 {
103 103 char *str, *newstr;
104 104 Py_ssize_t i, len;
105 105 PyObject *newobj = NULL;
106 106 PyObject *ret = NULL;
107 107
108 108 str = PyBytes_AS_STRING(str_obj);
109 109 len = PyBytes_GET_SIZE(str_obj);
110 110
111 111 newobj = PyBytes_FromStringAndSize(NULL, len);
112 112 if (!newobj)
113 113 goto quit;
114 114
115 115 newstr = PyBytes_AS_STRING(newobj);
116 116
117 117 for (i = 0; i < len; i++) {
118 118 char c = str[i];
119 119 if (c & 0x80) {
120 120 if (fallback_fn != NULL) {
121 121 ret = PyObject_CallFunctionObjArgs(fallback_fn,
122 122 str_obj, NULL);
123 123 } else {
124 124 PyObject *err = PyUnicodeDecodeError_Create(
125 125 "ascii", str, len, i, (i + 1),
126 126 "unexpected code byte");
127 127 PyErr_SetObject(PyExc_UnicodeDecodeError, err);
128 128 Py_XDECREF(err);
129 129 }
130 130 goto quit;
131 131 }
132 132 newstr[i] = table[(unsigned char)c];
133 133 }
134 134
135 135 ret = newobj;
136 136 Py_INCREF(ret);
137 137 quit:
138 138 Py_XDECREF(newobj);
139 139 return ret;
140 140 }
141 141
142 142 static PyObject *asciilower(PyObject *self, PyObject *args)
143 143 {
144 144 PyObject *str_obj;
145 145 if (!PyArg_ParseTuple(args, "O!:asciilower", &PyBytes_Type, &str_obj))
146 146 return NULL;
147 147 return _asciitransform(str_obj, lowertable, NULL);
148 148 }
149 149
150 150 static PyObject *asciiupper(PyObject *self, PyObject *args)
151 151 {
152 152 PyObject *str_obj;
153 153 if (!PyArg_ParseTuple(args, "O!:asciiupper", &PyBytes_Type, &str_obj))
154 154 return NULL;
155 155 return _asciitransform(str_obj, uppertable, NULL);
156 156 }
157 157
158 static inline PyObject *_dict_new_presized(Py_ssize_t expected_size)
159 {
160 /* _PyDict_NewPresized expects a minused parameter, but it actually
161 creates a dictionary that's the nearest power of two bigger than the
162 parameter. For example, with the initial minused = 1000, the
163 dictionary created has size 1024. Of course in a lot of cases that
164 can be greater than the maximum load factor Python's dict object
165 expects (= 2/3), so as soon as we cross the threshold we'll resize
166 anyway. So create a dictionary that's at least 3/2 the size. */
167 return _PyDict_NewPresized(((1 + expected_size) / 2) * 3);
168 }
169
170 158 static PyObject *dict_new_presized(PyObject *self, PyObject *args)
171 159 {
172 160 Py_ssize_t expected_size;
173 161
174 162 if (!PyArg_ParseTuple(args, "n:make_presized_dict", &expected_size))
175 163 return NULL;
176 164
177 165 return _dict_new_presized(expected_size);
178 166 }
179 167
180 168 static PyObject *make_file_foldmap(PyObject *self, PyObject *args)
181 169 {
182 170 PyObject *dmap, *spec_obj, *normcase_fallback;
183 171 PyObject *file_foldmap = NULL;
184 172 enum normcase_spec spec;
185 173 PyObject *k, *v;
186 174 dirstateTupleObject *tuple;
187 175 Py_ssize_t pos = 0;
188 176 const char *table;
189 177
190 178 if (!PyArg_ParseTuple(args, "O!O!O!:make_file_foldmap",
191 179 &PyDict_Type, &dmap,
192 180 &PyInt_Type, &spec_obj,
193 181 &PyFunction_Type, &normcase_fallback))
194 182 goto quit;
195 183
196 184 spec = (int)PyInt_AS_LONG(spec_obj);
197 185 switch (spec) {
198 186 case NORMCASE_LOWER:
199 187 table = lowertable;
200 188 break;
201 189 case NORMCASE_UPPER:
202 190 table = uppertable;
203 191 break;
204 192 case NORMCASE_OTHER:
205 193 table = NULL;
206 194 break;
207 195 default:
208 196 PyErr_SetString(PyExc_TypeError, "invalid normcasespec");
209 197 goto quit;
210 198 }
211 199
212 200 /* Add some more entries to deal with additions outside this
213 201 function. */
214 202 file_foldmap = _dict_new_presized((PyDict_Size(dmap) / 10) * 11);
215 203 if (file_foldmap == NULL)
216 204 goto quit;
217 205
218 206 while (PyDict_Next(dmap, &pos, &k, &v)) {
219 207 if (!dirstate_tuple_check(v)) {
220 208 PyErr_SetString(PyExc_TypeError,
221 209 "expected a dirstate tuple");
222 210 goto quit;
223 211 }
224 212
225 213 tuple = (dirstateTupleObject *)v;
226 214 if (tuple->state != 'r') {
227 215 PyObject *normed;
228 216 if (table != NULL) {
229 217 normed = _asciitransform(k, table,
230 218 normcase_fallback);
231 219 } else {
232 220 normed = PyObject_CallFunctionObjArgs(
233 221 normcase_fallback, k, NULL);
234 222 }
235 223
236 224 if (normed == NULL)
237 225 goto quit;
238 226 if (PyDict_SetItem(file_foldmap, normed, k) == -1) {
239 227 Py_DECREF(normed);
240 228 goto quit;
241 229 }
242 230 Py_DECREF(normed);
243 231 }
244 232 }
245 233 return file_foldmap;
246 234 quit:
247 235 Py_XDECREF(file_foldmap);
248 236 return NULL;
249 237 }
250 238
251 239 /*
252 240 * This code assumes that a manifest is stitched together with newline
253 241 * ('\n') characters.
254 242 */
255 243 static PyObject *parse_manifest(PyObject *self, PyObject *args)
256 244 {
257 245 PyObject *mfdict, *fdict;
258 246 char *str, *start, *end;
259 247 int len;
260 248
261 249 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
262 250 &PyDict_Type, &mfdict,
263 251 &PyDict_Type, &fdict,
264 252 &str, &len))
265 253 goto quit;
266 254
267 255 start = str;
268 256 end = str + len;
269 257 while (start < end) {
270 258 PyObject *file = NULL, *node = NULL;
271 259 PyObject *flags = NULL;
272 260 char *zero = NULL, *newline = NULL;
273 261 ptrdiff_t nlen;
274 262
275 263 zero = memchr(start, '\0', end - start);
276 264 if (!zero) {
277 265 PyErr_SetString(PyExc_ValueError,
278 266 "manifest entry has no separator");
279 267 goto quit;
280 268 }
281 269
282 270 newline = memchr(zero + 1, '\n', end - (zero + 1));
283 271 if (!newline) {
284 272 PyErr_SetString(PyExc_ValueError,
285 273 "manifest contains trailing garbage");
286 274 goto quit;
287 275 }
288 276
289 277 file = PyBytes_FromStringAndSize(start, zero - start);
290 278
291 279 if (!file)
292 280 goto bail;
293 281
294 282 nlen = newline - zero - 1;
295 283
296 284 node = unhexlify(zero + 1, nlen > 40 ? 40 : (int)nlen);
297 285 if (!node)
298 286 goto bail;
299 287
300 288 if (nlen > 40) {
301 289 flags = PyBytes_FromStringAndSize(zero + 41,
302 290 nlen - 40);
303 291 if (!flags)
304 292 goto bail;
305 293
306 294 if (PyDict_SetItem(fdict, file, flags) == -1)
307 295 goto bail;
308 296 }
309 297
310 298 if (PyDict_SetItem(mfdict, file, node) == -1)
311 299 goto bail;
312 300
313 301 start = newline + 1;
314 302
315 303 Py_XDECREF(flags);
316 304 Py_XDECREF(node);
317 305 Py_XDECREF(file);
318 306 continue;
319 307 bail:
320 308 Py_XDECREF(flags);
321 309 Py_XDECREF(node);
322 310 Py_XDECREF(file);
323 311 goto quit;
324 312 }
325 313
326 314 Py_INCREF(Py_None);
327 315 return Py_None;
328 316 quit:
329 317 return NULL;
330 318 }
331 319
332 320 static inline dirstateTupleObject *make_dirstate_tuple(char state, int mode,
333 321 int size, int mtime)
334 322 {
335 323 dirstateTupleObject *t = PyObject_New(dirstateTupleObject,
336 324 &dirstateTupleType);
337 325 if (!t)
338 326 return NULL;
339 327 t->state = state;
340 328 t->mode = mode;
341 329 t->size = size;
342 330 t->mtime = mtime;
343 331 return t;
344 332 }
345 333
346 334 static PyObject *dirstate_tuple_new(PyTypeObject *subtype, PyObject *args,
347 335 PyObject *kwds)
348 336 {
349 337 /* We do all the initialization here and not a tp_init function because
350 338 * dirstate_tuple is immutable. */
351 339 dirstateTupleObject *t;
352 340 char state;
353 341 int size, mode, mtime;
354 342 if (!PyArg_ParseTuple(args, "ciii", &state, &mode, &size, &mtime))
355 343 return NULL;
356 344
357 345 t = (dirstateTupleObject *)subtype->tp_alloc(subtype, 1);
358 346 if (!t)
359 347 return NULL;
360 348 t->state = state;
361 349 t->mode = mode;
362 350 t->size = size;
363 351 t->mtime = mtime;
364 352
365 353 return (PyObject *)t;
366 354 }
367 355
368 356 static void dirstate_tuple_dealloc(PyObject *o)
369 357 {
370 358 PyObject_Del(o);
371 359 }
372 360
373 361 static Py_ssize_t dirstate_tuple_length(PyObject *o)
374 362 {
375 363 return 4;
376 364 }
377 365
378 366 static PyObject *dirstate_tuple_item(PyObject *o, Py_ssize_t i)
379 367 {
380 368 dirstateTupleObject *t = (dirstateTupleObject *)o;
381 369 switch (i) {
382 370 case 0:
383 371 return PyBytes_FromStringAndSize(&t->state, 1);
384 372 case 1:
385 373 return PyInt_FromLong(t->mode);
386 374 case 2:
387 375 return PyInt_FromLong(t->size);
388 376 case 3:
389 377 return PyInt_FromLong(t->mtime);
390 378 default:
391 379 PyErr_SetString(PyExc_IndexError, "index out of range");
392 380 return NULL;
393 381 }
394 382 }
395 383
396 384 static PySequenceMethods dirstate_tuple_sq = {
397 385 dirstate_tuple_length, /* sq_length */
398 386 0, /* sq_concat */
399 387 0, /* sq_repeat */
400 388 dirstate_tuple_item, /* sq_item */
401 389 0, /* sq_ass_item */
402 390 0, /* sq_contains */
403 391 0, /* sq_inplace_concat */
404 392 0 /* sq_inplace_repeat */
405 393 };
406 394
407 395 PyTypeObject dirstateTupleType = {
408 396 PyVarObject_HEAD_INIT(NULL, 0)
409 397 "dirstate_tuple", /* tp_name */
410 398 sizeof(dirstateTupleObject),/* tp_basicsize */
411 399 0, /* tp_itemsize */
412 400 (destructor)dirstate_tuple_dealloc, /* tp_dealloc */
413 401 0, /* tp_print */
414 402 0, /* tp_getattr */
415 403 0, /* tp_setattr */
416 404 0, /* tp_compare */
417 405 0, /* tp_repr */
418 406 0, /* tp_as_number */
419 407 &dirstate_tuple_sq, /* tp_as_sequence */
420 408 0, /* tp_as_mapping */
421 409 0, /* tp_hash */
422 410 0, /* tp_call */
423 411 0, /* tp_str */
424 412 0, /* tp_getattro */
425 413 0, /* tp_setattro */
426 414 0, /* tp_as_buffer */
427 415 Py_TPFLAGS_DEFAULT, /* tp_flags */
428 416 "dirstate tuple", /* tp_doc */
429 417 0, /* tp_traverse */
430 418 0, /* tp_clear */
431 419 0, /* tp_richcompare */
432 420 0, /* tp_weaklistoffset */
433 421 0, /* tp_iter */
434 422 0, /* tp_iternext */
435 423 0, /* tp_methods */
436 424 0, /* tp_members */
437 425 0, /* tp_getset */
438 426 0, /* tp_base */
439 427 0, /* tp_dict */
440 428 0, /* tp_descr_get */
441 429 0, /* tp_descr_set */
442 430 0, /* tp_dictoffset */
443 431 0, /* tp_init */
444 432 0, /* tp_alloc */
445 433 dirstate_tuple_new, /* tp_new */
446 434 };
447 435
448 436 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
449 437 {
450 438 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
451 439 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
452 440 char state, *cur, *str, *cpos;
453 441 int mode, size, mtime;
454 442 unsigned int flen, len, pos = 40;
455 443 int readlen;
456 444
457 445 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
458 446 &PyDict_Type, &dmap,
459 447 &PyDict_Type, &cmap,
460 448 &str, &readlen))
461 449 goto quit;
462 450
463 451 len = readlen;
464 452
465 453 /* read parents */
466 454 if (len < 40) {
467 455 PyErr_SetString(
468 456 PyExc_ValueError, "too little data for parents");
469 457 goto quit;
470 458 }
471 459
472 460 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
473 461 if (!parents)
474 462 goto quit;
475 463
476 464 /* read filenames */
477 465 while (pos >= 40 && pos < len) {
478 466 if (pos + 17 > len) {
479 467 PyErr_SetString(PyExc_ValueError,
480 468 "overflow in dirstate");
481 469 goto quit;
482 470 }
483 471 cur = str + pos;
484 472 /* unpack header */
485 473 state = *cur;
486 474 mode = getbe32(cur + 1);
487 475 size = getbe32(cur + 5);
488 476 mtime = getbe32(cur + 9);
489 477 flen = getbe32(cur + 13);
490 478 pos += 17;
491 479 cur += 17;
492 480 if (flen > len - pos) {
493 481 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
494 482 goto quit;
495 483 }
496 484
497 485 entry = (PyObject *)make_dirstate_tuple(state, mode, size,
498 486 mtime);
499 487 cpos = memchr(cur, 0, flen);
500 488 if (cpos) {
501 489 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
502 490 cname = PyBytes_FromStringAndSize(cpos + 1,
503 491 flen - (cpos - cur) - 1);
504 492 if (!fname || !cname ||
505 493 PyDict_SetItem(cmap, fname, cname) == -1 ||
506 494 PyDict_SetItem(dmap, fname, entry) == -1)
507 495 goto quit;
508 496 Py_DECREF(cname);
509 497 } else {
510 498 fname = PyBytes_FromStringAndSize(cur, flen);
511 499 if (!fname ||
512 500 PyDict_SetItem(dmap, fname, entry) == -1)
513 501 goto quit;
514 502 }
515 503 Py_DECREF(fname);
516 504 Py_DECREF(entry);
517 505 fname = cname = entry = NULL;
518 506 pos += flen;
519 507 }
520 508
521 509 ret = parents;
522 510 Py_INCREF(ret);
523 511 quit:
524 512 Py_XDECREF(fname);
525 513 Py_XDECREF(cname);
526 514 Py_XDECREF(entry);
527 515 Py_XDECREF(parents);
528 516 return ret;
529 517 }
530 518
531 519 /*
532 520 * Build a set of non-normal and other parent entries from the dirstate dmap
533 521 */
534 522 static PyObject *nonnormalotherparententries(PyObject *self, PyObject *args) {
535 523 PyObject *dmap, *fname, *v;
536 524 PyObject *nonnset = NULL, *otherpset = NULL, *result = NULL;
537 525 Py_ssize_t pos;
538 526
539 527 if (!PyArg_ParseTuple(args, "O!:nonnormalentries",
540 528 &PyDict_Type, &dmap))
541 529 goto bail;
542 530
543 531 nonnset = PySet_New(NULL);
544 532 if (nonnset == NULL)
545 533 goto bail;
546 534
547 535 otherpset = PySet_New(NULL);
548 536 if (otherpset == NULL)
549 537 goto bail;
550 538
551 539 pos = 0;
552 540 while (PyDict_Next(dmap, &pos, &fname, &v)) {
553 541 dirstateTupleObject *t;
554 542 if (!dirstate_tuple_check(v)) {
555 543 PyErr_SetString(PyExc_TypeError,
556 544 "expected a dirstate tuple");
557 545 goto bail;
558 546 }
559 547 t = (dirstateTupleObject *)v;
560 548
561 549 if (t->state == 'n' && t->size == -2) {
562 550 if (PySet_Add(otherpset, fname) == -1) {
563 551 goto bail;
564 552 }
565 553 }
566 554
567 555 if (t->state == 'n' && t->mtime != -1)
568 556 continue;
569 557 if (PySet_Add(nonnset, fname) == -1)
570 558 goto bail;
571 559 }
572 560
573 561 result = Py_BuildValue("(OO)", nonnset, otherpset);
574 562 if (result == NULL)
575 563 goto bail;
576 564 Py_DECREF(nonnset);
577 565 Py_DECREF(otherpset);
578 566 return result;
579 567 bail:
580 568 Py_XDECREF(nonnset);
581 569 Py_XDECREF(otherpset);
582 570 Py_XDECREF(result);
583 571 return NULL;
584 572 }
585 573
586 574 /*
587 575 * Efficiently pack a dirstate object into its on-disk format.
588 576 */
589 577 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
590 578 {
591 579 PyObject *packobj = NULL;
592 580 PyObject *map, *copymap, *pl, *mtime_unset = NULL;
593 581 Py_ssize_t nbytes, pos, l;
594 582 PyObject *k, *v = NULL, *pn;
595 583 char *p, *s;
596 584 int now;
597 585
598 586 if (!PyArg_ParseTuple(args, "O!O!Oi:pack_dirstate",
599 587 &PyDict_Type, &map, &PyDict_Type, &copymap,
600 588 &pl, &now))
601 589 return NULL;
602 590
603 591 if (!PySequence_Check(pl) || PySequence_Size(pl) != 2) {
604 592 PyErr_SetString(PyExc_TypeError, "expected 2-element sequence");
605 593 return NULL;
606 594 }
607 595
608 596 /* Figure out how much we need to allocate. */
609 597 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
610 598 PyObject *c;
611 599 if (!PyBytes_Check(k)) {
612 600 PyErr_SetString(PyExc_TypeError, "expected string key");
613 601 goto bail;
614 602 }
615 603 nbytes += PyBytes_GET_SIZE(k) + 17;
616 604 c = PyDict_GetItem(copymap, k);
617 605 if (c) {
618 606 if (!PyBytes_Check(c)) {
619 607 PyErr_SetString(PyExc_TypeError,
620 608 "expected string key");
621 609 goto bail;
622 610 }
623 611 nbytes += PyBytes_GET_SIZE(c) + 1;
624 612 }
625 613 }
626 614
627 615 packobj = PyBytes_FromStringAndSize(NULL, nbytes);
628 616 if (packobj == NULL)
629 617 goto bail;
630 618
631 619 p = PyBytes_AS_STRING(packobj);
632 620
633 621 pn = PySequence_ITEM(pl, 0);
634 622 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
635 623 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
636 624 goto bail;
637 625 }
638 626 memcpy(p, s, l);
639 627 p += 20;
640 628 pn = PySequence_ITEM(pl, 1);
641 629 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
642 630 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
643 631 goto bail;
644 632 }
645 633 memcpy(p, s, l);
646 634 p += 20;
647 635
648 636 for (pos = 0; PyDict_Next(map, &pos, &k, &v); ) {
649 637 dirstateTupleObject *tuple;
650 638 char state;
651 639 int mode, size, mtime;
652 640 Py_ssize_t len, l;
653 641 PyObject *o;
654 642 char *t;
655 643
656 644 if (!dirstate_tuple_check(v)) {
657 645 PyErr_SetString(PyExc_TypeError,
658 646 "expected a dirstate tuple");
659 647 goto bail;
660 648 }
661 649 tuple = (dirstateTupleObject *)v;
662 650
663 651 state = tuple->state;
664 652 mode = tuple->mode;
665 653 size = tuple->size;
666 654 mtime = tuple->mtime;
667 655 if (state == 'n' && mtime == now) {
668 656 /* See pure/parsers.py:pack_dirstate for why we do
669 657 * this. */
670 658 mtime = -1;
671 659 mtime_unset = (PyObject *)make_dirstate_tuple(
672 660 state, mode, size, mtime);
673 661 if (!mtime_unset)
674 662 goto bail;
675 663 if (PyDict_SetItem(map, k, mtime_unset) == -1)
676 664 goto bail;
677 665 Py_DECREF(mtime_unset);
678 666 mtime_unset = NULL;
679 667 }
680 668 *p++ = state;
681 669 putbe32((uint32_t)mode, p);
682 670 putbe32((uint32_t)size, p + 4);
683 671 putbe32((uint32_t)mtime, p + 8);
684 672 t = p + 12;
685 673 p += 16;
686 674 len = PyBytes_GET_SIZE(k);
687 675 memcpy(p, PyBytes_AS_STRING(k), len);
688 676 p += len;
689 677 o = PyDict_GetItem(copymap, k);
690 678 if (o) {
691 679 *p++ = '\0';
692 680 l = PyBytes_GET_SIZE(o);
693 681 memcpy(p, PyBytes_AS_STRING(o), l);
694 682 p += l;
695 683 len += l + 1;
696 684 }
697 685 putbe32((uint32_t)len, t);
698 686 }
699 687
700 688 pos = p - PyBytes_AS_STRING(packobj);
701 689 if (pos != nbytes) {
702 690 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
703 691 (long)pos, (long)nbytes);
704 692 goto bail;
705 693 }
706 694
707 695 return packobj;
708 696 bail:
709 697 Py_XDECREF(mtime_unset);
710 698 Py_XDECREF(packobj);
711 699 Py_XDECREF(v);
712 700 return NULL;
713 701 }
714 702
715 703 #define BUMPED_FIX 1
716 704 #define USING_SHA_256 2
717 705 #define FM1_HEADER_SIZE (4 + 8 + 2 + 2 + 1 + 1 + 1)
718 706
719 707 static PyObject *readshas(
720 708 const char *source, unsigned char num, Py_ssize_t hashwidth)
721 709 {
722 710 int i;
723 711 PyObject *list = PyTuple_New(num);
724 712 if (list == NULL) {
725 713 return NULL;
726 714 }
727 715 for (i = 0; i < num; i++) {
728 716 PyObject *hash = PyBytes_FromStringAndSize(source, hashwidth);
729 717 if (hash == NULL) {
730 718 Py_DECREF(list);
731 719 return NULL;
732 720 }
733 721 PyTuple_SET_ITEM(list, i, hash);
734 722 source += hashwidth;
735 723 }
736 724 return list;
737 725 }
738 726
739 727 static PyObject *fm1readmarker(const char *databegin, const char *dataend,
740 728 uint32_t *msize)
741 729 {
742 730 const char *data = databegin;
743 731 const char *meta;
744 732
745 733 double mtime;
746 734 int16_t tz;
747 735 uint16_t flags;
748 736 unsigned char nsuccs, nparents, nmetadata;
749 737 Py_ssize_t hashwidth = 20;
750 738
751 739 PyObject *prec = NULL, *parents = NULL, *succs = NULL;
752 740 PyObject *metadata = NULL, *ret = NULL;
753 741 int i;
754 742
755 743 if (data + FM1_HEADER_SIZE > dataend) {
756 744 goto overflow;
757 745 }
758 746
759 747 *msize = getbe32(data);
760 748 data += 4;
761 749 mtime = getbefloat64(data);
762 750 data += 8;
763 751 tz = getbeint16(data);
764 752 data += 2;
765 753 flags = getbeuint16(data);
766 754 data += 2;
767 755
768 756 if (flags & USING_SHA_256) {
769 757 hashwidth = 32;
770 758 }
771 759
772 760 nsuccs = (unsigned char)(*data++);
773 761 nparents = (unsigned char)(*data++);
774 762 nmetadata = (unsigned char)(*data++);
775 763
776 764 if (databegin + *msize > dataend) {
777 765 goto overflow;
778 766 }
779 767 dataend = databegin + *msize; /* narrow down to marker size */
780 768
781 769 if (data + hashwidth > dataend) {
782 770 goto overflow;
783 771 }
784 772 prec = PyBytes_FromStringAndSize(data, hashwidth);
785 773 data += hashwidth;
786 774 if (prec == NULL) {
787 775 goto bail;
788 776 }
789 777
790 778 if (data + nsuccs * hashwidth > dataend) {
791 779 goto overflow;
792 780 }
793 781 succs = readshas(data, nsuccs, hashwidth);
794 782 if (succs == NULL) {
795 783 goto bail;
796 784 }
797 785 data += nsuccs * hashwidth;
798 786
799 787 if (nparents == 1 || nparents == 2) {
800 788 if (data + nparents * hashwidth > dataend) {
801 789 goto overflow;
802 790 }
803 791 parents = readshas(data, nparents, hashwidth);
804 792 if (parents == NULL) {
805 793 goto bail;
806 794 }
807 795 data += nparents * hashwidth;
808 796 } else {
809 797 parents = Py_None;
810 798 Py_INCREF(parents);
811 799 }
812 800
813 801 if (data + 2 * nmetadata > dataend) {
814 802 goto overflow;
815 803 }
816 804 meta = data + (2 * nmetadata);
817 805 metadata = PyTuple_New(nmetadata);
818 806 if (metadata == NULL) {
819 807 goto bail;
820 808 }
821 809 for (i = 0; i < nmetadata; i++) {
822 810 PyObject *tmp, *left = NULL, *right = NULL;
823 811 Py_ssize_t leftsize = (unsigned char)(*data++);
824 812 Py_ssize_t rightsize = (unsigned char)(*data++);
825 813 if (meta + leftsize + rightsize > dataend) {
826 814 goto overflow;
827 815 }
828 816 left = PyBytes_FromStringAndSize(meta, leftsize);
829 817 meta += leftsize;
830 818 right = PyBytes_FromStringAndSize(meta, rightsize);
831 819 meta += rightsize;
832 820 tmp = PyTuple_New(2);
833 821 if (!left || !right || !tmp) {
834 822 Py_XDECREF(left);
835 823 Py_XDECREF(right);
836 824 Py_XDECREF(tmp);
837 825 goto bail;
838 826 }
839 827 PyTuple_SET_ITEM(tmp, 0, left);
840 828 PyTuple_SET_ITEM(tmp, 1, right);
841 829 PyTuple_SET_ITEM(metadata, i, tmp);
842 830 }
843 831 ret = Py_BuildValue("(OOHO(di)O)", prec, succs, flags,
844 832 metadata, mtime, (int)tz * 60, parents);
845 833 goto bail; /* return successfully */
846 834
847 835 overflow:
848 836 PyErr_SetString(PyExc_ValueError, "overflow in obsstore");
849 837 bail:
850 838 Py_XDECREF(prec);
851 839 Py_XDECREF(succs);
852 840 Py_XDECREF(metadata);
853 841 Py_XDECREF(parents);
854 842 return ret;
855 843 }
856 844
857 845
858 846 static PyObject *fm1readmarkers(PyObject *self, PyObject *args) {
859 847 const char *data, *dataend;
860 848 int datalen;
861 849 Py_ssize_t offset, stop;
862 850 PyObject *markers = NULL;
863 851
864 852 if (!PyArg_ParseTuple(args, "s#nn", &data, &datalen, &offset, &stop)) {
865 853 return NULL;
866 854 }
867 855 dataend = data + datalen;
868 856 data += offset;
869 857 markers = PyList_New(0);
870 858 if (!markers) {
871 859 return NULL;
872 860 }
873 861 while (offset < stop) {
874 862 uint32_t msize;
875 863 int error;
876 864 PyObject *record = fm1readmarker(data, dataend, &msize);
877 865 if (!record) {
878 866 goto bail;
879 867 }
880 868 error = PyList_Append(markers, record);
881 869 Py_DECREF(record);
882 870 if (error) {
883 871 goto bail;
884 872 }
885 873 data += msize;
886 874 offset += msize;
887 875 }
888 876 return markers;
889 877 bail:
890 878 Py_DECREF(markers);
891 879 return NULL;
892 880 }
893 881
894 882 static char parsers_doc[] = "Efficient content parsing.";
895 883
896 884 PyObject *encodedir(PyObject *self, PyObject *args);
897 885 PyObject *pathencode(PyObject *self, PyObject *args);
898 886 PyObject *lowerencode(PyObject *self, PyObject *args);
899 887 PyObject *parse_index2(PyObject *self, PyObject *args);
900 888
901 889 static PyMethodDef methods[] = {
902 890 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
903 891 {"nonnormalotherparententries", nonnormalotherparententries, METH_VARARGS,
904 892 "create a set containing non-normal and other parent entries of given "
905 893 "dirstate\n"},
906 894 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
907 895 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
908 896 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
909 897 {"asciilower", asciilower, METH_VARARGS, "lowercase an ASCII string\n"},
910 898 {"asciiupper", asciiupper, METH_VARARGS, "uppercase an ASCII string\n"},
911 899 {"dict_new_presized", dict_new_presized, METH_VARARGS,
912 900 "construct a dict with an expected size\n"},
913 901 {"make_file_foldmap", make_file_foldmap, METH_VARARGS,
914 902 "make file foldmap\n"},
915 903 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
916 904 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
917 905 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
918 906 {"fm1readmarkers", fm1readmarkers, METH_VARARGS,
919 907 "parse v1 obsolete markers\n"},
920 908 {NULL, NULL}
921 909 };
922 910
923 911 void dirs_module_init(PyObject *mod);
924 912 void manifest_module_init(PyObject *mod);
925 913 void revlog_module_init(PyObject *mod);
926 914
927 915 static const int version = 1;
928 916
929 917 static void module_init(PyObject *mod)
930 918 {
931 919 PyModule_AddIntConstant(mod, "version", version);
932 920
933 921 /* This module constant has two purposes. First, it lets us unit test
934 922 * the ImportError raised without hard-coding any error text. This
935 923 * means we can change the text in the future without breaking tests,
936 924 * even across changesets without a recompile. Second, its presence
937 925 * can be used to determine whether the version-checking logic is
938 926 * present, which also helps in testing across changesets without a
939 927 * recompile. Note that this means the pure-Python version of parsers
940 928 * should not have this module constant. */
941 929 PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext);
942 930
943 931 dirs_module_init(mod);
944 932 manifest_module_init(mod);
945 933 revlog_module_init(mod);
946 934
947 935 if (PyType_Ready(&dirstateTupleType) < 0)
948 936 return;
949 937 Py_INCREF(&dirstateTupleType);
950 938 PyModule_AddObject(mod, "dirstatetuple",
951 939 (PyObject *)&dirstateTupleType);
952 940 }
953 941
954 942 static int check_python_version(void)
955 943 {
956 944 PyObject *sys = PyImport_ImportModule("sys"), *ver;
957 945 long hexversion;
958 946 if (!sys)
959 947 return -1;
960 948 ver = PyObject_GetAttrString(sys, "hexversion");
961 949 Py_DECREF(sys);
962 950 if (!ver)
963 951 return -1;
964 952 hexversion = PyInt_AsLong(ver);
965 953 Py_DECREF(ver);
966 954 /* sys.hexversion is a 32-bit number by default, so the -1 case
967 955 * should only occur in unusual circumstances (e.g. if sys.hexversion
968 956 * is manually set to an invalid value). */
969 957 if ((hexversion == -1) || (hexversion >> 16 != PY_VERSION_HEX >> 16)) {
970 958 PyErr_Format(PyExc_ImportError, "%s: The Mercurial extension "
971 959 "modules were compiled with Python " PY_VERSION ", but "
972 960 "Mercurial is currently using Python with sys.hexversion=%ld: "
973 961 "Python %s\n at: %s", versionerrortext, hexversion,
974 962 Py_GetVersion(), Py_GetProgramFullPath());
975 963 return -1;
976 964 }
977 965 return 0;
978 966 }
979 967
980 968 #ifdef IS_PY3K
981 969 static struct PyModuleDef parsers_module = {
982 970 PyModuleDef_HEAD_INIT,
983 971 "parsers",
984 972 parsers_doc,
985 973 -1,
986 974 methods
987 975 };
988 976
989 977 PyMODINIT_FUNC PyInit_parsers(void)
990 978 {
991 979 PyObject *mod;
992 980
993 981 if (check_python_version() == -1)
994 982 return NULL;
995 983 mod = PyModule_Create(&parsers_module);
996 984 module_init(mod);
997 985 return mod;
998 986 }
999 987 #else
1000 988 PyMODINIT_FUNC initparsers(void)
1001 989 {
1002 990 PyObject *mod;
1003 991
1004 992 if (check_python_version() == -1)
1005 993 return;
1006 994 mod = Py_InitModule3("parsers", methods, parsers_doc);
1007 995 module_init(mod);
1008 996 }
1009 997 #endif
@@ -1,76 +1,88
1 1 /*
2 2 util.h - utility functions for interfacing with the various python APIs.
3 3
4 4 This software may be used and distributed according to the terms of
5 5 the GNU General Public License, incorporated herein by reference.
6 6 */
7 7
8 8 #ifndef _HG_UTIL_H_
9 9 #define _HG_UTIL_H_
10 10
11 11 #include "compat.h"
12 12
13 13 #if PY_MAJOR_VERSION >= 3
14 14 #define IS_PY3K
15 15 #endif
16 16
17 17 typedef struct {
18 18 PyObject_HEAD
19 19 char state;
20 20 int mode;
21 21 int size;
22 22 int mtime;
23 23 } dirstateTupleObject;
24 24
25 25 extern PyTypeObject dirstateTupleType;
26 26 #define dirstate_tuple_check(op) (Py_TYPE(op) == &dirstateTupleType)
27 27
28 28 /* This should be kept in sync with normcasespecs in encoding.py. */
29 29 enum normcase_spec {
30 30 NORMCASE_LOWER = -1,
31 31 NORMCASE_UPPER = 1,
32 32 NORMCASE_OTHER = 0
33 33 };
34 34
35 35 #define MIN(a, b) (((a)<(b))?(a):(b))
36 36 /* VC9 doesn't include bool and lacks stdbool.h based on my searching */
37 37 #if defined(_MSC_VER) || __STDC_VERSION__ < 199901L
38 38 #define true 1
39 39 #define false 0
40 40 typedef unsigned char bool;
41 41 #else
42 42 #include <stdbool.h>
43 43 #endif
44 44
45 static inline PyObject *_dict_new_presized(Py_ssize_t expected_size)
46 {
47 /* _PyDict_NewPresized expects a minused parameter, but it actually
48 creates a dictionary that's the nearest power of two bigger than the
49 parameter. For example, with the initial minused = 1000, the
50 dictionary created has size 1024. Of course in a lot of cases that
51 can be greater than the maximum load factor Python's dict object
52 expects (= 2/3), so as soon as we cross the threshold we'll resize
53 anyway. So create a dictionary that's at least 3/2 the size. */
54 return _PyDict_NewPresized(((1 + expected_size) / 2) * 3);
55 }
56
45 57 static const int8_t hextable[256] = {
46 58 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
47 59 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
48 60 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
49 61 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, /* 0-9 */
50 62 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* A-F */
51 63 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
52 64 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* a-f */
53 65 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
54 66 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
55 67 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
56 68 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
57 69 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
58 70 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
59 71 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
60 72 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
61 73 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
62 74 };
63 75
64 76 static inline int hexdigit(const char *p, Py_ssize_t off)
65 77 {
66 78 int8_t val = hextable[(unsigned char)p[off]];
67 79
68 80 if (val >= 0) {
69 81 return val;
70 82 }
71 83
72 84 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
73 85 return 0;
74 86 }
75 87
76 88 #endif /* _HG_UTIL_H_ */
General Comments 0
You need to be logged in to leave comments. Login now