##// END OF EJS Templates
parsers: use Py_CLEAR where appropriate
Bryan O'Sullivan -
r16732:277e2acb default
parent child Browse files
Show More
@@ -1,1293 +1,1287 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #include <Python.h>
11 11 #include <ctype.h>
12 12 #include <string.h>
13 13
14 14 #include "util.h"
15 15
16 16 static inline int hexdigit(const char *p, Py_ssize_t off)
17 17 {
18 18 char c = p[off];
19 19
20 20 if (c >= '0' && c <= '9')
21 21 return c - '0';
22 22 if (c >= 'a' && c <= 'f')
23 23 return c - 'a' + 10;
24 24 if (c >= 'A' && c <= 'F')
25 25 return c - 'A' + 10;
26 26
27 27 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
28 28 return 0;
29 29 }
30 30
31 31 /*
32 32 * Turn a hex-encoded string into binary.
33 33 */
34 34 static PyObject *unhexlify(const char *str, int len)
35 35 {
36 36 PyObject *ret;
37 37 char *d;
38 38 int i;
39 39
40 40 ret = PyBytes_FromStringAndSize(NULL, len / 2);
41 41
42 42 if (!ret)
43 43 return NULL;
44 44
45 45 d = PyBytes_AsString(ret);
46 46
47 47 for (i = 0; i < len;) {
48 48 int hi = hexdigit(str, i++);
49 49 int lo = hexdigit(str, i++);
50 50 *d++ = (hi << 4) | lo;
51 51 }
52 52
53 53 return ret;
54 54 }
55 55
56 56 /*
57 57 * This code assumes that a manifest is stitched together with newline
58 58 * ('\n') characters.
59 59 */
60 60 static PyObject *parse_manifest(PyObject *self, PyObject *args)
61 61 {
62 62 PyObject *mfdict, *fdict;
63 63 char *str, *cur, *start, *zero;
64 64 int len;
65 65
66 66 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
67 67 &PyDict_Type, &mfdict,
68 68 &PyDict_Type, &fdict,
69 69 &str, &len))
70 70 goto quit;
71 71
72 72 for (start = cur = str, zero = NULL; cur < str + len; cur++) {
73 73 PyObject *file = NULL, *node = NULL;
74 74 PyObject *flags = NULL;
75 75 int nlen;
76 76
77 77 if (!*cur) {
78 78 zero = cur;
79 79 continue;
80 80 }
81 81 else if (*cur != '\n')
82 82 continue;
83 83
84 84 if (!zero) {
85 85 PyErr_SetString(PyExc_ValueError,
86 86 "manifest entry has no separator");
87 87 goto quit;
88 88 }
89 89
90 90 file = PyBytes_FromStringAndSize(start, zero - start);
91 91
92 92 if (!file)
93 93 goto bail;
94 94
95 95 nlen = cur - zero - 1;
96 96
97 97 node = unhexlify(zero + 1, nlen > 40 ? 40 : nlen);
98 98 if (!node)
99 99 goto bail;
100 100
101 101 if (nlen > 40) {
102 102 flags = PyBytes_FromStringAndSize(zero + 41,
103 103 nlen - 40);
104 104 if (!flags)
105 105 goto bail;
106 106
107 107 if (PyDict_SetItem(fdict, file, flags) == -1)
108 108 goto bail;
109 109 }
110 110
111 111 if (PyDict_SetItem(mfdict, file, node) == -1)
112 112 goto bail;
113 113
114 114 start = cur + 1;
115 115 zero = NULL;
116 116
117 117 Py_XDECREF(flags);
118 118 Py_XDECREF(node);
119 119 Py_XDECREF(file);
120 120 continue;
121 121 bail:
122 122 Py_XDECREF(flags);
123 123 Py_XDECREF(node);
124 124 Py_XDECREF(file);
125 125 goto quit;
126 126 }
127 127
128 128 if (len > 0 && *(cur - 1) != '\n') {
129 129 PyErr_SetString(PyExc_ValueError,
130 130 "manifest contains trailing garbage");
131 131 goto quit;
132 132 }
133 133
134 134 Py_INCREF(Py_None);
135 135 return Py_None;
136 136 quit:
137 137 return NULL;
138 138 }
139 139
140 140 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
141 141 {
142 142 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
143 143 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
144 144 char *str, *cur, *end, *cpos;
145 145 int state, mode, size, mtime;
146 146 unsigned int flen;
147 147 int len;
148 148
149 149 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
150 150 &PyDict_Type, &dmap,
151 151 &PyDict_Type, &cmap,
152 152 &str, &len))
153 153 goto quit;
154 154
155 155 /* read parents */
156 156 if (len < 40)
157 157 goto quit;
158 158
159 159 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
160 160 if (!parents)
161 161 goto quit;
162 162
163 163 /* read filenames */
164 164 cur = str + 40;
165 165 end = str + len;
166 166
167 167 while (cur < end - 17) {
168 168 /* unpack header */
169 169 state = *cur;
170 170 mode = getbe32(cur + 1);
171 171 size = getbe32(cur + 5);
172 172 mtime = getbe32(cur + 9);
173 173 flen = getbe32(cur + 13);
174 174 cur += 17;
175 175 if (cur + flen > end || cur + flen < cur) {
176 176 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
177 177 goto quit;
178 178 }
179 179
180 180 entry = Py_BuildValue("ciii", state, mode, size, mtime);
181 181 if (!entry)
182 182 goto quit;
183 183 PyObject_GC_UnTrack(entry); /* don't waste time with this */
184 184
185 185 cpos = memchr(cur, 0, flen);
186 186 if (cpos) {
187 187 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
188 188 cname = PyBytes_FromStringAndSize(cpos + 1,
189 189 flen - (cpos - cur) - 1);
190 190 if (!fname || !cname ||
191 191 PyDict_SetItem(cmap, fname, cname) == -1 ||
192 192 PyDict_SetItem(dmap, fname, entry) == -1)
193 193 goto quit;
194 194 Py_DECREF(cname);
195 195 } else {
196 196 fname = PyBytes_FromStringAndSize(cur, flen);
197 197 if (!fname ||
198 198 PyDict_SetItem(dmap, fname, entry) == -1)
199 199 goto quit;
200 200 }
201 201 cur += flen;
202 202 Py_DECREF(fname);
203 203 Py_DECREF(entry);
204 204 fname = cname = entry = NULL;
205 205 }
206 206
207 207 ret = parents;
208 208 Py_INCREF(ret);
209 209 quit:
210 210 Py_XDECREF(fname);
211 211 Py_XDECREF(cname);
212 212 Py_XDECREF(entry);
213 213 Py_XDECREF(parents);
214 214 return ret;
215 215 }
216 216
217 217 /*
218 218 * A base-16 trie for fast node->rev mapping.
219 219 *
220 220 * Positive value is index of the next node in the trie
221 221 * Negative value is a leaf: -(rev + 1)
222 222 * Zero is empty
223 223 */
224 224 typedef struct {
225 225 int children[16];
226 226 } nodetree;
227 227
228 228 /*
229 229 * This class has two behaviours.
230 230 *
231 231 * When used in a list-like way (with integer keys), we decode an
232 232 * entry in a RevlogNG index file on demand. Our last entry is a
233 233 * sentinel, always a nullid. We have limited support for
234 234 * integer-keyed insert and delete, only at elements right before the
235 235 * sentinel.
236 236 *
237 237 * With string keys, we lazily perform a reverse mapping from node to
238 238 * rev, using a base-16 trie.
239 239 */
240 240 typedef struct {
241 241 PyObject_HEAD
242 242 /* Type-specific fields go here. */
243 243 PyObject *data; /* raw bytes of index */
244 244 PyObject **cache; /* cached tuples */
245 245 const char **offsets; /* populated on demand */
246 246 Py_ssize_t raw_length; /* original number of elements */
247 247 Py_ssize_t length; /* current number of elements */
248 248 PyObject *added; /* populated on demand */
249 249 nodetree *nt; /* base-16 trie */
250 250 int ntlength; /* # nodes in use */
251 251 int ntcapacity; /* # nodes allocated */
252 252 int ntdepth; /* maximum depth of tree */
253 253 int ntsplits; /* # splits performed */
254 254 int ntrev; /* last rev scanned */
255 255 int ntlookups; /* # lookups */
256 256 int ntmisses; /* # lookups that miss the cache */
257 257 int inlined;
258 258 } indexObject;
259 259
260 260 static Py_ssize_t index_length(const indexObject *self)
261 261 {
262 262 if (self->added == NULL)
263 263 return self->length;
264 264 return self->length + PyList_GET_SIZE(self->added);
265 265 }
266 266
267 267 static PyObject *nullentry;
268 268 static const char nullid[20];
269 269
270 270 static long inline_scan(indexObject *self, const char **offsets);
271 271
272 272 #if LONG_MAX == 0x7fffffffL
273 273 static char *tuple_format = "Kiiiiiis#";
274 274 #else
275 275 static char *tuple_format = "kiiiiiis#";
276 276 #endif
277 277
278 278 /*
279 279 * Return a pointer to the beginning of a RevlogNG record.
280 280 */
281 281 static const char *index_deref(indexObject *self, Py_ssize_t pos)
282 282 {
283 283 if (self->inlined && pos > 0) {
284 284 if (self->offsets == NULL) {
285 285 self->offsets = malloc(self->raw_length *
286 286 sizeof(*self->offsets));
287 287 if (self->offsets == NULL)
288 288 return (const char *)PyErr_NoMemory();
289 289 inline_scan(self, self->offsets);
290 290 }
291 291 return self->offsets[pos];
292 292 }
293 293
294 294 return PyString_AS_STRING(self->data) + pos * 64;
295 295 }
296 296
297 297 /*
298 298 * RevlogNG format (all in big endian, data may be inlined):
299 299 * 6 bytes: offset
300 300 * 2 bytes: flags
301 301 * 4 bytes: compressed length
302 302 * 4 bytes: uncompressed length
303 303 * 4 bytes: base revision
304 304 * 4 bytes: link revision
305 305 * 4 bytes: parent 1 revision
306 306 * 4 bytes: parent 2 revision
307 307 * 32 bytes: nodeid (only 20 bytes used)
308 308 */
309 309 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
310 310 {
311 311 uint64_t offset_flags;
312 312 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
313 313 const char *c_node_id;
314 314 const char *data;
315 315 Py_ssize_t length = index_length(self);
316 316 PyObject *entry;
317 317
318 318 if (pos < 0)
319 319 pos += length;
320 320
321 321 if (pos < 0 || pos >= length) {
322 322 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
323 323 return NULL;
324 324 }
325 325
326 326 if (pos == length - 1) {
327 327 Py_INCREF(nullentry);
328 328 return nullentry;
329 329 }
330 330
331 331 if (pos >= self->length - 1) {
332 332 PyObject *obj;
333 333 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
334 334 Py_INCREF(obj);
335 335 return obj;
336 336 }
337 337
338 338 if (self->cache) {
339 339 if (self->cache[pos]) {
340 340 Py_INCREF(self->cache[pos]);
341 341 return self->cache[pos];
342 342 }
343 343 } else {
344 344 self->cache = calloc(self->raw_length, sizeof(PyObject *));
345 345 if (self->cache == NULL)
346 346 return PyErr_NoMemory();
347 347 }
348 348
349 349 data = index_deref(self, pos);
350 350 if (data == NULL)
351 351 return NULL;
352 352
353 353 offset_flags = getbe32(data + 4);
354 354 if (pos == 0) /* mask out version number for the first entry */
355 355 offset_flags &= 0xFFFF;
356 356 else {
357 357 uint32_t offset_high = getbe32(data);
358 358 offset_flags |= ((uint64_t)offset_high) << 32;
359 359 }
360 360
361 361 comp_len = getbe32(data + 8);
362 362 uncomp_len = getbe32(data + 12);
363 363 base_rev = getbe32(data + 16);
364 364 link_rev = getbe32(data + 20);
365 365 parent_1 = getbe32(data + 24);
366 366 parent_2 = getbe32(data + 28);
367 367 c_node_id = data + 32;
368 368
369 369 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
370 370 uncomp_len, base_rev, link_rev,
371 371 parent_1, parent_2, c_node_id, 20);
372 372
373 373 if (entry)
374 374 PyObject_GC_UnTrack(entry);
375 375
376 376 self->cache[pos] = entry;
377 377 Py_INCREF(entry);
378 378
379 379 return entry;
380 380 }
381 381
382 382 /*
383 383 * Return the 20-byte SHA of the node corresponding to the given rev.
384 384 */
385 385 static const char *index_node(indexObject *self, Py_ssize_t pos)
386 386 {
387 387 Py_ssize_t length = index_length(self);
388 388 const char *data;
389 389
390 390 if (pos == length - 1 || pos == INT_MAX)
391 391 return nullid;
392 392
393 393 if (pos >= length)
394 394 return NULL;
395 395
396 396 if (pos >= self->length - 1) {
397 397 PyObject *tuple, *str;
398 398 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
399 399 str = PyTuple_GetItem(tuple, 7);
400 400 return str ? PyString_AS_STRING(str) : NULL;
401 401 }
402 402
403 403 data = index_deref(self, pos);
404 404 return data ? data + 32 : NULL;
405 405 }
406 406
407 407 static int nt_insert(indexObject *self, const char *node, int rev);
408 408
409 409 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
410 410 {
411 411 if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
412 412 return -1;
413 413 if (*nodelen == 20)
414 414 return 0;
415 415 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
416 416 return -1;
417 417 }
418 418
419 419 static PyObject *index_insert(indexObject *self, PyObject *args)
420 420 {
421 421 PyObject *obj;
422 422 char *node;
423 423 long offset;
424 424 Py_ssize_t len, nodelen;
425 425
426 426 if (!PyArg_ParseTuple(args, "lO", &offset, &obj))
427 427 return NULL;
428 428
429 429 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
430 430 PyErr_SetString(PyExc_TypeError, "8-tuple required");
431 431 return NULL;
432 432 }
433 433
434 434 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
435 435 return NULL;
436 436
437 437 len = index_length(self);
438 438
439 439 if (offset < 0)
440 440 offset += len;
441 441
442 442 if (offset != len - 1) {
443 443 PyErr_SetString(PyExc_IndexError,
444 444 "insert only supported at index -1");
445 445 return NULL;
446 446 }
447 447
448 448 if (offset > INT_MAX) {
449 449 PyErr_SetString(PyExc_ValueError,
450 450 "currently only 2**31 revs supported");
451 451 return NULL;
452 452 }
453 453
454 454 if (self->added == NULL) {
455 455 self->added = PyList_New(0);
456 456 if (self->added == NULL)
457 457 return NULL;
458 458 }
459 459
460 460 if (PyList_Append(self->added, obj) == -1)
461 461 return NULL;
462 462
463 463 if (self->nt)
464 464 nt_insert(self, node, (int)offset);
465 465
466 466 Py_RETURN_NONE;
467 467 }
468 468
469 469 static void _index_clearcaches(indexObject *self)
470 470 {
471 471 if (self->cache) {
472 472 Py_ssize_t i;
473 473
474 for (i = 0; i < self->raw_length; i++) {
475 if (self->cache[i]) {
476 Py_DECREF(self->cache[i]);
477 self->cache[i] = NULL;
478 }
479 }
474 for (i = 0; i < self->raw_length; i++)
475 Py_CLEAR(self->cache[i]);
480 476 free(self->cache);
481 477 self->cache = NULL;
482 478 }
483 479 if (self->offsets) {
484 480 free(self->offsets);
485 481 self->offsets = NULL;
486 482 }
487 483 if (self->nt) {
488 484 free(self->nt);
489 485 self->nt = NULL;
490 486 }
491 487 }
492 488
493 489 static PyObject *index_clearcaches(indexObject *self)
494 490 {
495 491 _index_clearcaches(self);
496 492 self->ntlength = self->ntcapacity = 0;
497 493 self->ntdepth = self->ntsplits = 0;
498 494 self->ntrev = -1;
499 495 self->ntlookups = self->ntmisses = 0;
500 496 Py_RETURN_NONE;
501 497 }
502 498
503 499 static PyObject *index_stats(indexObject *self)
504 500 {
505 501 PyObject *obj = PyDict_New();
506 502
507 503 if (obj == NULL)
508 504 return NULL;
509 505
510 506 #define istat(__n, __d) \
511 507 if (PyDict_SetItemString(obj, __d, PyInt_FromSsize_t(self->__n)) == -1) \
512 508 goto bail;
513 509
514 510 if (self->added) {
515 511 Py_ssize_t len = PyList_GET_SIZE(self->added);
516 512 if (PyDict_SetItemString(obj, "index entries added",
517 513 PyInt_FromSsize_t(len)) == -1)
518 514 goto bail;
519 515 }
520 516
521 517 if (self->raw_length != self->length - 1)
522 518 istat(raw_length, "revs on disk");
523 519 istat(length, "revs in memory");
524 520 istat(ntcapacity, "node trie capacity");
525 521 istat(ntdepth, "node trie depth");
526 522 istat(ntlength, "node trie count");
527 523 istat(ntlookups, "node trie lookups");
528 524 istat(ntmisses, "node trie misses");
529 525 istat(ntrev, "node trie last rev scanned");
530 526 istat(ntsplits, "node trie splits");
531 527
532 528 #undef istat
533 529
534 530 return obj;
535 531
536 532 bail:
537 533 Py_XDECREF(obj);
538 534 return NULL;
539 535 }
540 536
541 537 static inline int nt_level(const char *node, Py_ssize_t level)
542 538 {
543 539 int v = node[level>>1];
544 540 if (!(level & 1))
545 541 v >>= 4;
546 542 return v & 0xf;
547 543 }
548 544
549 545 /*
550 546 * Return values:
551 547 *
552 548 * -4: match is ambiguous (multiple candidates)
553 549 * -2: not found
554 550 * rest: valid rev
555 551 */
556 552 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
557 553 int hex)
558 554 {
559 555 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
560 556 int level, maxlevel, off;
561 557
562 558 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
563 559 return -1;
564 560
565 561 if (self->nt == NULL)
566 562 return -2;
567 563
568 564 if (hex)
569 565 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
570 566 else
571 567 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
572 568
573 569 for (level = off = 0; level < maxlevel; level++) {
574 570 int k = getnybble(node, level);
575 571 nodetree *n = &self->nt[off];
576 572 int v = n->children[k];
577 573
578 574 if (v < 0) {
579 575 const char *n;
580 576 Py_ssize_t i;
581 577
582 578 v = -v - 1;
583 579 n = index_node(self, v);
584 580 if (n == NULL)
585 581 return -2;
586 582 for (i = level; i < maxlevel; i++)
587 583 if (getnybble(node, i) != nt_level(n, i))
588 584 return -2;
589 585 return v;
590 586 }
591 587 if (v == 0)
592 588 return -2;
593 589 off = v;
594 590 }
595 591 /* multiple matches against an ambiguous prefix */
596 592 return -4;
597 593 }
598 594
599 595 static int nt_new(indexObject *self)
600 596 {
601 597 if (self->ntlength == self->ntcapacity) {
602 598 self->ntcapacity *= 2;
603 599 self->nt = realloc(self->nt,
604 600 self->ntcapacity * sizeof(nodetree));
605 601 if (self->nt == NULL) {
606 602 PyErr_SetString(PyExc_MemoryError, "out of memory");
607 603 return -1;
608 604 }
609 605 memset(&self->nt[self->ntlength], 0,
610 606 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
611 607 }
612 608 return self->ntlength++;
613 609 }
614 610
615 611 static int nt_insert(indexObject *self, const char *node, int rev)
616 612 {
617 613 int level = 0;
618 614 int off = 0;
619 615
620 616 while (level < 40) {
621 617 int k = nt_level(node, level);
622 618 nodetree *n;
623 619 int v;
624 620
625 621 n = &self->nt[off];
626 622 v = n->children[k];
627 623
628 624 if (v == 0) {
629 625 n->children[k] = -rev - 1;
630 626 return 0;
631 627 }
632 628 if (v < 0) {
633 629 const char *oldnode = index_node(self, -v - 1);
634 630 int noff;
635 631
636 632 if (!oldnode || !memcmp(oldnode, node, 20)) {
637 633 n->children[k] = -rev - 1;
638 634 return 0;
639 635 }
640 636 noff = nt_new(self);
641 637 if (noff == -1)
642 638 return -1;
643 639 /* self->nt may have been changed by realloc */
644 640 self->nt[off].children[k] = noff;
645 641 off = noff;
646 642 n = &self->nt[off];
647 643 n->children[nt_level(oldnode, ++level)] = v;
648 644 if (level > self->ntdepth)
649 645 self->ntdepth = level;
650 646 self->ntsplits += 1;
651 647 } else {
652 648 level += 1;
653 649 off = v;
654 650 }
655 651 }
656 652
657 653 return -1;
658 654 }
659 655
660 656 static int nt_init(indexObject *self)
661 657 {
662 658 if (self->nt == NULL) {
663 659 self->ntcapacity = self->raw_length < 4
664 660 ? 4 : self->raw_length / 2;
665 661 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
666 662 if (self->nt == NULL) {
667 663 PyErr_NoMemory();
668 664 return -1;
669 665 }
670 666 self->ntlength = 1;
671 667 self->ntrev = (int)index_length(self) - 1;
672 668 self->ntlookups = 1;
673 669 self->ntmisses = 0;
674 670 if (nt_insert(self, nullid, INT_MAX) == -1)
675 671 return -1;
676 672 }
677 673 return 0;
678 674 }
679 675
680 676 /*
681 677 * Return values:
682 678 *
683 679 * -3: error (exception set)
684 680 * -2: not found (no exception set)
685 681 * rest: valid rev
686 682 */
687 683 static int index_find_node(indexObject *self,
688 684 const char *node, Py_ssize_t nodelen)
689 685 {
690 686 int rev;
691 687
692 688 self->ntlookups++;
693 689 rev = nt_find(self, node, nodelen, 0);
694 690 if (rev >= -1)
695 691 return rev;
696 692
697 693 if (nt_init(self) == -1)
698 694 return -3;
699 695
700 696 /*
701 697 * For the first handful of lookups, we scan the entire index,
702 698 * and cache only the matching nodes. This optimizes for cases
703 699 * like "hg tip", where only a few nodes are accessed.
704 700 *
705 701 * After that, we cache every node we visit, using a single
706 702 * scan amortized over multiple lookups. This gives the best
707 703 * bulk performance, e.g. for "hg log".
708 704 */
709 705 if (self->ntmisses++ < 4) {
710 706 for (rev = self->ntrev - 1; rev >= 0; rev--) {
711 707 const char *n = index_node(self, rev);
712 708 if (n == NULL)
713 709 return -2;
714 710 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
715 711 if (nt_insert(self, n, rev) == -1)
716 712 return -3;
717 713 break;
718 714 }
719 715 }
720 716 } else {
721 717 for (rev = self->ntrev - 1; rev >= 0; rev--) {
722 718 const char *n = index_node(self, rev);
723 719 if (n == NULL) {
724 720 self->ntrev = rev + 1;
725 721 return -2;
726 722 }
727 723 if (nt_insert(self, n, rev) == -1) {
728 724 self->ntrev = rev + 1;
729 725 return -3;
730 726 }
731 727 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
732 728 break;
733 729 }
734 730 }
735 731 self->ntrev = rev;
736 732 }
737 733
738 734 if (rev >= 0)
739 735 return rev;
740 736 return -2;
741 737 }
742 738
743 739 static PyObject *raise_revlog_error(void)
744 740 {
745 741 static PyObject *errclass;
746 742 PyObject *mod = NULL, *errobj;
747 743
748 744 if (errclass == NULL) {
749 745 PyObject *dict;
750 746
751 747 mod = PyImport_ImportModule("mercurial.error");
752 748 if (mod == NULL)
753 749 goto classfail;
754 750
755 751 dict = PyModule_GetDict(mod);
756 752 if (dict == NULL)
757 753 goto classfail;
758 754
759 755 errclass = PyDict_GetItemString(dict, "RevlogError");
760 756 if (errclass == NULL) {
761 757 PyErr_SetString(PyExc_SystemError,
762 758 "could not find RevlogError");
763 759 goto classfail;
764 760 }
765 761 Py_INCREF(errclass);
766 762 }
767 763
768 764 errobj = PyObject_CallFunction(errclass, NULL);
769 765 if (errobj == NULL)
770 766 return NULL;
771 767 PyErr_SetObject(errclass, errobj);
772 768 return errobj;
773 769
774 770 classfail:
775 771 Py_XDECREF(mod);
776 772 return NULL;
777 773 }
778 774
779 775 static PyObject *index_getitem(indexObject *self, PyObject *value)
780 776 {
781 777 char *node;
782 778 Py_ssize_t nodelen;
783 779 int rev;
784 780
785 781 if (PyInt_Check(value))
786 782 return index_get(self, PyInt_AS_LONG(value));
787 783
788 784 if (node_check(value, &node, &nodelen) == -1)
789 785 return NULL;
790 786 rev = index_find_node(self, node, nodelen);
791 787 if (rev >= -1)
792 788 return PyInt_FromLong(rev);
793 789 if (rev == -2)
794 790 raise_revlog_error();
795 791 return NULL;
796 792 }
797 793
798 794 static int nt_partialmatch(indexObject *self, const char *node,
799 795 Py_ssize_t nodelen)
800 796 {
801 797 int rev;
802 798
803 799 if (nt_init(self) == -1)
804 800 return -3;
805 801
806 802 if (self->ntrev > 0) {
807 803 /* ensure that the radix tree is fully populated */
808 804 for (rev = self->ntrev - 1; rev >= 0; rev--) {
809 805 const char *n = index_node(self, rev);
810 806 if (n == NULL)
811 807 return -2;
812 808 if (nt_insert(self, n, rev) == -1)
813 809 return -3;
814 810 }
815 811 self->ntrev = rev;
816 812 }
817 813
818 814 return nt_find(self, node, nodelen, 1);
819 815 }
820 816
821 817 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
822 818 {
823 819 const char *fullnode;
824 820 int nodelen;
825 821 char *node;
826 822 int rev, i;
827 823
828 824 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
829 825 return NULL;
830 826
831 827 if (nodelen < 4) {
832 828 PyErr_SetString(PyExc_ValueError, "key too short");
833 829 return NULL;
834 830 }
835 831
836 832 if (nodelen > 40)
837 833 nodelen = 40;
838 834
839 835 for (i = 0; i < nodelen; i++)
840 836 hexdigit(node, i);
841 837 if (PyErr_Occurred()) {
842 838 /* input contains non-hex characters */
843 839 PyErr_Clear();
844 840 Py_RETURN_NONE;
845 841 }
846 842
847 843 rev = nt_partialmatch(self, node, nodelen);
848 844
849 845 switch (rev) {
850 846 case -4:
851 847 raise_revlog_error();
852 848 case -3:
853 849 return NULL;
854 850 case -2:
855 851 Py_RETURN_NONE;
856 852 case -1:
857 853 return PyString_FromStringAndSize(nullid, 20);
858 854 }
859 855
860 856 fullnode = index_node(self, rev);
861 857 if (fullnode == NULL) {
862 858 PyErr_Format(PyExc_IndexError,
863 859 "could not access rev %d", rev);
864 860 return NULL;
865 861 }
866 862 return PyString_FromStringAndSize(fullnode, 20);
867 863 }
868 864
869 865 static PyObject *index_m_get(indexObject *self, PyObject *args)
870 866 {
871 867 Py_ssize_t nodelen;
872 868 PyObject *val;
873 869 char *node;
874 870 int rev;
875 871
876 872 if (!PyArg_ParseTuple(args, "O", &val))
877 873 return NULL;
878 874 if (node_check(val, &node, &nodelen) == -1)
879 875 return NULL;
880 876 rev = index_find_node(self, node, nodelen);
881 877 if (rev == -3)
882 878 return NULL;
883 879 if (rev == -2)
884 880 Py_RETURN_NONE;
885 881 return PyInt_FromLong(rev);
886 882 }
887 883
888 884 static int index_contains(indexObject *self, PyObject *value)
889 885 {
890 886 char *node;
891 887 Py_ssize_t nodelen;
892 888
893 889 if (PyInt_Check(value)) {
894 890 long rev = PyInt_AS_LONG(value);
895 891 return rev >= -1 && rev < index_length(self);
896 892 }
897 893
898 894 if (node_check(value, &node, &nodelen) == -1)
899 895 return -1;
900 896
901 897 switch (index_find_node(self, node, nodelen)) {
902 898 case -3:
903 899 return -1;
904 900 case -2:
905 901 return 0;
906 902 default:
907 903 return 1;
908 904 }
909 905 }
910 906
911 907 /*
912 908 * Invalidate any trie entries introduced by added revs.
913 909 */
914 910 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
915 911 {
916 912 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
917 913
918 914 for (i = start; i < len; i++) {
919 915 PyObject *tuple = PyList_GET_ITEM(self->added, i);
920 916 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
921 917
922 918 nt_insert(self, PyString_AS_STRING(node), -1);
923 919 }
924 920
925 if (start == 0) {
926 Py_DECREF(self->added);
927 self->added = NULL;
928 }
921 if (start == 0)
922 Py_CLEAR(self->added);
929 923 }
930 924
931 925 /*
932 926 * Delete a numeric range of revs, which must be at the end of the
933 927 * range, but exclude the sentinel nullid entry.
934 928 */
935 929 static int index_slice_del(indexObject *self, PyObject *item)
936 930 {
937 931 Py_ssize_t start, stop, step, slicelength;
938 932 Py_ssize_t length = index_length(self);
939 933
940 934 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
941 935 &start, &stop, &step, &slicelength) < 0)
942 936 return -1;
943 937
944 938 if (slicelength <= 0)
945 939 return 0;
946 940
947 941 if ((step < 0 && start < stop) || (step > 0 && start > stop))
948 942 stop = start;
949 943
950 944 if (step < 0) {
951 945 stop = start + 1;
952 946 start = stop + step*(slicelength - 1) - 1;
953 947 step = -step;
954 948 }
955 949
956 950 if (step != 1) {
957 951 PyErr_SetString(PyExc_ValueError,
958 952 "revlog index delete requires step size of 1");
959 953 return -1;
960 954 }
961 955
962 956 if (stop != length - 1) {
963 957 PyErr_SetString(PyExc_IndexError,
964 958 "revlog index deletion indices are invalid");
965 959 return -1;
966 960 }
967 961
968 962 if (start < self->length - 1) {
969 963 if (self->nt) {
970 964 Py_ssize_t i;
971 965
972 966 for (i = start + 1; i < self->length - 1; i++) {
973 967 const char *node = index_node(self, i);
974 968
975 969 if (node)
976 970 nt_insert(self, node, -1);
977 971 }
978 972 if (self->added)
979 973 nt_invalidate_added(self, 0);
980 974 if (self->ntrev > start)
981 975 self->ntrev = (int)start;
982 976 }
983 977 self->length = start + 1;
984 978 return 0;
985 979 }
986 980
987 981 if (self->nt) {
988 982 nt_invalidate_added(self, start - self->length + 1);
989 983 if (self->ntrev > start)
990 984 self->ntrev = (int)start;
991 985 }
992 986 return self->added
993 987 ? PyList_SetSlice(self->added, start - self->length + 1,
994 988 PyList_GET_SIZE(self->added), NULL)
995 989 : 0;
996 990 }
997 991
998 992 /*
999 993 * Supported ops:
1000 994 *
1001 995 * slice deletion
1002 996 * string assignment (extend node->rev mapping)
1003 997 * string deletion (shrink node->rev mapping)
1004 998 */
1005 999 static int index_assign_subscript(indexObject *self, PyObject *item,
1006 1000 PyObject *value)
1007 1001 {
1008 1002 char *node;
1009 1003 Py_ssize_t nodelen;
1010 1004 long rev;
1011 1005
1012 1006 if (PySlice_Check(item) && value == NULL)
1013 1007 return index_slice_del(self, item);
1014 1008
1015 1009 if (node_check(item, &node, &nodelen) == -1)
1016 1010 return -1;
1017 1011
1018 1012 if (value == NULL)
1019 1013 return self->nt ? nt_insert(self, node, -1) : 0;
1020 1014 rev = PyInt_AsLong(value);
1021 1015 if (rev > INT_MAX || rev < 0) {
1022 1016 if (!PyErr_Occurred())
1023 1017 PyErr_SetString(PyExc_ValueError, "rev out of range");
1024 1018 return -1;
1025 1019 }
1026 1020 return nt_insert(self, node, (int)rev);
1027 1021 }
1028 1022
1029 1023 /*
1030 1024 * Find all RevlogNG entries in an index that has inline data. Update
1031 1025 * the optional "offsets" table with those entries.
1032 1026 */
1033 1027 static long inline_scan(indexObject *self, const char **offsets)
1034 1028 {
1035 1029 const char *data = PyString_AS_STRING(self->data);
1036 1030 const char *end = data + PyString_GET_SIZE(self->data);
1037 1031 const long hdrsize = 64;
1038 1032 long incr = hdrsize;
1039 1033 Py_ssize_t len = 0;
1040 1034
1041 1035 while (data + hdrsize <= end) {
1042 1036 uint32_t comp_len;
1043 1037 const char *old_data;
1044 1038 /* 3rd element of header is length of compressed inline data */
1045 1039 comp_len = getbe32(data + 8);
1046 1040 incr = hdrsize + comp_len;
1047 1041 if (incr < hdrsize)
1048 1042 break;
1049 1043 if (offsets)
1050 1044 offsets[len] = data;
1051 1045 len++;
1052 1046 old_data = data;
1053 1047 data += incr;
1054 1048 if (data <= old_data)
1055 1049 break;
1056 1050 }
1057 1051
1058 1052 if (data != end && data + hdrsize != end) {
1059 1053 if (!PyErr_Occurred())
1060 1054 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1061 1055 return -1;
1062 1056 }
1063 1057
1064 1058 return len;
1065 1059 }
1066 1060
1067 1061 static int index_init(indexObject *self, PyObject *args)
1068 1062 {
1069 1063 PyObject *data_obj, *inlined_obj;
1070 1064 Py_ssize_t size;
1071 1065
1072 1066 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
1073 1067 return -1;
1074 1068 if (!PyString_Check(data_obj)) {
1075 1069 PyErr_SetString(PyExc_TypeError, "data is not a string");
1076 1070 return -1;
1077 1071 }
1078 1072 size = PyString_GET_SIZE(data_obj);
1079 1073
1080 1074 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
1081 1075 self->data = data_obj;
1082 1076 self->cache = NULL;
1083 1077
1084 1078 self->added = NULL;
1085 1079 self->offsets = NULL;
1086 1080 self->nt = NULL;
1087 1081 self->ntlength = self->ntcapacity = 0;
1088 1082 self->ntdepth = self->ntsplits = 0;
1089 1083 self->ntlookups = self->ntmisses = 0;
1090 1084 self->ntrev = -1;
1091 1085 Py_INCREF(self->data);
1092 1086
1093 1087 if (self->inlined) {
1094 1088 long len = inline_scan(self, NULL);
1095 1089 if (len == -1)
1096 1090 goto bail;
1097 1091 self->raw_length = len;
1098 1092 self->length = len + 1;
1099 1093 } else {
1100 1094 if (size % 64) {
1101 1095 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1102 1096 goto bail;
1103 1097 }
1104 1098 self->raw_length = size / 64;
1105 1099 self->length = self->raw_length + 1;
1106 1100 }
1107 1101
1108 1102 return 0;
1109 1103 bail:
1110 1104 return -1;
1111 1105 }
1112 1106
1113 1107 static PyObject *index_nodemap(indexObject *self)
1114 1108 {
1115 1109 Py_INCREF(self);
1116 1110 return (PyObject *)self;
1117 1111 }
1118 1112
1119 1113 static void index_dealloc(indexObject *self)
1120 1114 {
1121 1115 _index_clearcaches(self);
1122 1116 Py_DECREF(self->data);
1123 1117 Py_XDECREF(self->added);
1124 1118 PyObject_Del(self);
1125 1119 }
1126 1120
1127 1121 static PySequenceMethods index_sequence_methods = {
1128 1122 (lenfunc)index_length, /* sq_length */
1129 1123 0, /* sq_concat */
1130 1124 0, /* sq_repeat */
1131 1125 (ssizeargfunc)index_get, /* sq_item */
1132 1126 0, /* sq_slice */
1133 1127 0, /* sq_ass_item */
1134 1128 0, /* sq_ass_slice */
1135 1129 (objobjproc)index_contains, /* sq_contains */
1136 1130 };
1137 1131
1138 1132 static PyMappingMethods index_mapping_methods = {
1139 1133 (lenfunc)index_length, /* mp_length */
1140 1134 (binaryfunc)index_getitem, /* mp_subscript */
1141 1135 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
1142 1136 };
1143 1137
1144 1138 static PyMethodDef index_methods[] = {
1145 1139 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
1146 1140 "clear the index caches"},
1147 1141 {"get", (PyCFunction)index_m_get, METH_VARARGS,
1148 1142 "get an index entry"},
1149 1143 {"insert", (PyCFunction)index_insert, METH_VARARGS,
1150 1144 "insert an index entry"},
1151 1145 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
1152 1146 "match a potentially ambiguous node ID"},
1153 1147 {"stats", (PyCFunction)index_stats, METH_NOARGS,
1154 1148 "stats for the index"},
1155 1149 {NULL} /* Sentinel */
1156 1150 };
1157 1151
1158 1152 static PyGetSetDef index_getset[] = {
1159 1153 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
1160 1154 {NULL} /* Sentinel */
1161 1155 };
1162 1156
1163 1157 static PyTypeObject indexType = {
1164 1158 PyObject_HEAD_INIT(NULL)
1165 1159 0, /* ob_size */
1166 1160 "parsers.index", /* tp_name */
1167 1161 sizeof(indexObject), /* tp_basicsize */
1168 1162 0, /* tp_itemsize */
1169 1163 (destructor)index_dealloc, /* tp_dealloc */
1170 1164 0, /* tp_print */
1171 1165 0, /* tp_getattr */
1172 1166 0, /* tp_setattr */
1173 1167 0, /* tp_compare */
1174 1168 0, /* tp_repr */
1175 1169 0, /* tp_as_number */
1176 1170 &index_sequence_methods, /* tp_as_sequence */
1177 1171 &index_mapping_methods, /* tp_as_mapping */
1178 1172 0, /* tp_hash */
1179 1173 0, /* tp_call */
1180 1174 0, /* tp_str */
1181 1175 0, /* tp_getattro */
1182 1176 0, /* tp_setattro */
1183 1177 0, /* tp_as_buffer */
1184 1178 Py_TPFLAGS_DEFAULT, /* tp_flags */
1185 1179 "revlog index", /* tp_doc */
1186 1180 0, /* tp_traverse */
1187 1181 0, /* tp_clear */
1188 1182 0, /* tp_richcompare */
1189 1183 0, /* tp_weaklistoffset */
1190 1184 0, /* tp_iter */
1191 1185 0, /* tp_iternext */
1192 1186 index_methods, /* tp_methods */
1193 1187 0, /* tp_members */
1194 1188 index_getset, /* tp_getset */
1195 1189 0, /* tp_base */
1196 1190 0, /* tp_dict */
1197 1191 0, /* tp_descr_get */
1198 1192 0, /* tp_descr_set */
1199 1193 0, /* tp_dictoffset */
1200 1194 (initproc)index_init, /* tp_init */
1201 1195 0, /* tp_alloc */
1202 1196 };
1203 1197
1204 1198 /*
1205 1199 * returns a tuple of the form (index, index, cache) with elements as
1206 1200 * follows:
1207 1201 *
1208 1202 * index: an index object that lazily parses RevlogNG records
1209 1203 * cache: if data is inlined, a tuple (index_file_content, 0), else None
1210 1204 *
1211 1205 * added complications are for backwards compatibility
1212 1206 */
1213 1207 static PyObject *parse_index2(PyObject *self, PyObject *args)
1214 1208 {
1215 1209 PyObject *tuple = NULL, *cache = NULL;
1216 1210 indexObject *idx;
1217 1211 int ret;
1218 1212
1219 1213 idx = PyObject_New(indexObject, &indexType);
1220 1214 if (idx == NULL)
1221 1215 goto bail;
1222 1216
1223 1217 ret = index_init(idx, args);
1224 1218 if (ret == -1)
1225 1219 goto bail;
1226 1220
1227 1221 if (idx->inlined) {
1228 1222 cache = Py_BuildValue("iO", 0, idx->data);
1229 1223 if (cache == NULL)
1230 1224 goto bail;
1231 1225 } else {
1232 1226 cache = Py_None;
1233 1227 Py_INCREF(cache);
1234 1228 }
1235 1229
1236 1230 tuple = Py_BuildValue("NN", idx, cache);
1237 1231 if (!tuple)
1238 1232 goto bail;
1239 1233 return tuple;
1240 1234
1241 1235 bail:
1242 1236 Py_XDECREF(idx);
1243 1237 Py_XDECREF(cache);
1244 1238 Py_XDECREF(tuple);
1245 1239 return NULL;
1246 1240 }
1247 1241
1248 1242 static char parsers_doc[] = "Efficient content parsing.";
1249 1243
1250 1244 static PyMethodDef methods[] = {
1251 1245 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
1252 1246 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
1253 1247 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
1254 1248 {NULL, NULL}
1255 1249 };
1256 1250
1257 1251 static void module_init(PyObject *mod)
1258 1252 {
1259 1253 indexType.tp_new = PyType_GenericNew;
1260 1254 if (PyType_Ready(&indexType) < 0)
1261 1255 return;
1262 1256 Py_INCREF(&indexType);
1263 1257
1264 1258 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
1265 1259
1266 1260 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
1267 1261 -1, -1, -1, -1, nullid, 20);
1268 1262 if (nullentry)
1269 1263 PyObject_GC_UnTrack(nullentry);
1270 1264 }
1271 1265
1272 1266 #ifdef IS_PY3K
1273 1267 static struct PyModuleDef parsers_module = {
1274 1268 PyModuleDef_HEAD_INIT,
1275 1269 "parsers",
1276 1270 parsers_doc,
1277 1271 -1,
1278 1272 methods
1279 1273 };
1280 1274
1281 1275 PyMODINIT_FUNC PyInit_parsers(void)
1282 1276 {
1283 1277 PyObject *mod = PyModule_Create(&parsers_module);
1284 1278 module_init(mod);
1285 1279 return mod;
1286 1280 }
1287 1281 #else
1288 1282 PyMODINIT_FUNC initparsers(void)
1289 1283 {
1290 1284 PyObject *mod = Py_InitModule3("parsers", methods, parsers_doc);
1291 1285 module_init(mod);
1292 1286 }
1293 1287 #endif
General Comments 0
You need to be logged in to leave comments. Login now