##// END OF EJS Templates
parsers: factor out radix tree initialization
Bryan O'Sullivan -
r16615:96fa9dd1 default
parent child Browse files
Show More
@@ -1,1190 +1,1197 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #include <Python.h>
11 11 #include <ctype.h>
12 12 #include <string.h>
13 13
14 14 #include "util.h"
15 15
16 16 static int hexdigit(char c)
17 17 {
18 18 if (c >= '0' && c <= '9')
19 19 return c - '0';
20 20 if (c >= 'a' && c <= 'f')
21 21 return c - 'a' + 10;
22 22 if (c >= 'A' && c <= 'F')
23 23 return c - 'A' + 10;
24 24
25 25 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
26 26 return 0;
27 27 }
28 28
29 29 /*
30 30 * Turn a hex-encoded string into binary.
31 31 */
32 32 static PyObject *unhexlify(const char *str, int len)
33 33 {
34 34 PyObject *ret;
35 35 const char *c;
36 36 char *d;
37 37
38 38 ret = PyBytes_FromStringAndSize(NULL, len / 2);
39 39
40 40 if (!ret)
41 41 return NULL;
42 42
43 43 d = PyBytes_AsString(ret);
44 44
45 45 for (c = str; c < str + len;) {
46 46 int hi = hexdigit(*c++);
47 47 int lo = hexdigit(*c++);
48 48 *d++ = (hi << 4) | lo;
49 49 }
50 50
51 51 return ret;
52 52 }
53 53
54 54 /*
55 55 * This code assumes that a manifest is stitched together with newline
56 56 * ('\n') characters.
57 57 */
58 58 static PyObject *parse_manifest(PyObject *self, PyObject *args)
59 59 {
60 60 PyObject *mfdict, *fdict;
61 61 char *str, *cur, *start, *zero;
62 62 int len;
63 63
64 64 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
65 65 &PyDict_Type, &mfdict,
66 66 &PyDict_Type, &fdict,
67 67 &str, &len))
68 68 goto quit;
69 69
70 70 for (start = cur = str, zero = NULL; cur < str + len; cur++) {
71 71 PyObject *file = NULL, *node = NULL;
72 72 PyObject *flags = NULL;
73 73 int nlen;
74 74
75 75 if (!*cur) {
76 76 zero = cur;
77 77 continue;
78 78 }
79 79 else if (*cur != '\n')
80 80 continue;
81 81
82 82 if (!zero) {
83 83 PyErr_SetString(PyExc_ValueError,
84 84 "manifest entry has no separator");
85 85 goto quit;
86 86 }
87 87
88 88 file = PyBytes_FromStringAndSize(start, zero - start);
89 89
90 90 if (!file)
91 91 goto bail;
92 92
93 93 nlen = cur - zero - 1;
94 94
95 95 node = unhexlify(zero + 1, nlen > 40 ? 40 : nlen);
96 96 if (!node)
97 97 goto bail;
98 98
99 99 if (nlen > 40) {
100 100 flags = PyBytes_FromStringAndSize(zero + 41,
101 101 nlen - 40);
102 102 if (!flags)
103 103 goto bail;
104 104
105 105 if (PyDict_SetItem(fdict, file, flags) == -1)
106 106 goto bail;
107 107 }
108 108
109 109 if (PyDict_SetItem(mfdict, file, node) == -1)
110 110 goto bail;
111 111
112 112 start = cur + 1;
113 113 zero = NULL;
114 114
115 115 Py_XDECREF(flags);
116 116 Py_XDECREF(node);
117 117 Py_XDECREF(file);
118 118 continue;
119 119 bail:
120 120 Py_XDECREF(flags);
121 121 Py_XDECREF(node);
122 122 Py_XDECREF(file);
123 123 goto quit;
124 124 }
125 125
126 126 if (len > 0 && *(cur - 1) != '\n') {
127 127 PyErr_SetString(PyExc_ValueError,
128 128 "manifest contains trailing garbage");
129 129 goto quit;
130 130 }
131 131
132 132 Py_INCREF(Py_None);
133 133 return Py_None;
134 134 quit:
135 135 return NULL;
136 136 }
137 137
138 138 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
139 139 {
140 140 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
141 141 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
142 142 char *str, *cur, *end, *cpos;
143 143 int state, mode, size, mtime;
144 144 unsigned int flen;
145 145 int len;
146 146
147 147 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
148 148 &PyDict_Type, &dmap,
149 149 &PyDict_Type, &cmap,
150 150 &str, &len))
151 151 goto quit;
152 152
153 153 /* read parents */
154 154 if (len < 40)
155 155 goto quit;
156 156
157 157 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
158 158 if (!parents)
159 159 goto quit;
160 160
161 161 /* read filenames */
162 162 cur = str + 40;
163 163 end = str + len;
164 164
165 165 while (cur < end - 17) {
166 166 /* unpack header */
167 167 state = *cur;
168 168 mode = getbe32(cur + 1);
169 169 size = getbe32(cur + 5);
170 170 mtime = getbe32(cur + 9);
171 171 flen = getbe32(cur + 13);
172 172 cur += 17;
173 173 if (cur + flen > end || cur + flen < cur) {
174 174 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
175 175 goto quit;
176 176 }
177 177
178 178 entry = Py_BuildValue("ciii", state, mode, size, mtime);
179 179 if (!entry)
180 180 goto quit;
181 181 PyObject_GC_UnTrack(entry); /* don't waste time with this */
182 182
183 183 cpos = memchr(cur, 0, flen);
184 184 if (cpos) {
185 185 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
186 186 cname = PyBytes_FromStringAndSize(cpos + 1,
187 187 flen - (cpos - cur) - 1);
188 188 if (!fname || !cname ||
189 189 PyDict_SetItem(cmap, fname, cname) == -1 ||
190 190 PyDict_SetItem(dmap, fname, entry) == -1)
191 191 goto quit;
192 192 Py_DECREF(cname);
193 193 } else {
194 194 fname = PyBytes_FromStringAndSize(cur, flen);
195 195 if (!fname ||
196 196 PyDict_SetItem(dmap, fname, entry) == -1)
197 197 goto quit;
198 198 }
199 199 cur += flen;
200 200 Py_DECREF(fname);
201 201 Py_DECREF(entry);
202 202 fname = cname = entry = NULL;
203 203 }
204 204
205 205 ret = parents;
206 206 Py_INCREF(ret);
207 207 quit:
208 208 Py_XDECREF(fname);
209 209 Py_XDECREF(cname);
210 210 Py_XDECREF(entry);
211 211 Py_XDECREF(parents);
212 212 return ret;
213 213 }
214 214
215 215 /*
216 216 * A base-16 trie for fast node->rev mapping.
217 217 *
218 218 * Positive value is index of the next node in the trie
219 219 * Negative value is a leaf: -(rev + 1)
220 220 * Zero is empty
221 221 */
222 222 typedef struct {
223 223 int children[16];
224 224 } nodetree;
225 225
226 226 /*
227 227 * This class has two behaviours.
228 228 *
229 229 * When used in a list-like way (with integer keys), we decode an
230 230 * entry in a RevlogNG index file on demand. Our last entry is a
231 231 * sentinel, always a nullid. We have limited support for
232 232 * integer-keyed insert and delete, only at elements right before the
233 233 * sentinel.
234 234 *
235 235 * With string keys, we lazily perform a reverse mapping from node to
236 236 * rev, using a base-16 trie.
237 237 */
238 238 typedef struct {
239 239 PyObject_HEAD
240 240 /* Type-specific fields go here. */
241 241 PyObject *data; /* raw bytes of index */
242 242 PyObject **cache; /* cached tuples */
243 243 const char **offsets; /* populated on demand */
244 244 Py_ssize_t raw_length; /* original number of elements */
245 245 Py_ssize_t length; /* current number of elements */
246 246 PyObject *added; /* populated on demand */
247 247 nodetree *nt; /* base-16 trie */
248 248 int ntlength; /* # nodes in use */
249 249 int ntcapacity; /* # nodes allocated */
250 250 int ntdepth; /* maximum depth of tree */
251 251 int ntsplits; /* # splits performed */
252 252 int ntrev; /* last rev scanned */
253 253 int ntlookups; /* # lookups */
254 254 int ntmisses; /* # lookups that miss the cache */
255 255 int inlined;
256 256 } indexObject;
257 257
258 258 static Py_ssize_t index_length(const indexObject *self)
259 259 {
260 260 if (self->added == NULL)
261 261 return self->length;
262 262 return self->length + PyList_GET_SIZE(self->added);
263 263 }
264 264
265 265 static PyObject *nullentry;
266 266 static const char nullid[20];
267 267
268 268 static long inline_scan(indexObject *self, const char **offsets);
269 269
270 270 #if LONG_MAX == 0x7fffffffL
271 271 static char *tuple_format = "Kiiiiiis#";
272 272 #else
273 273 static char *tuple_format = "kiiiiiis#";
274 274 #endif
275 275
276 276 /*
277 277 * Return a pointer to the beginning of a RevlogNG record.
278 278 */
279 279 static const char *index_deref(indexObject *self, Py_ssize_t pos)
280 280 {
281 281 if (self->inlined && pos > 0) {
282 282 if (self->offsets == NULL) {
283 283 self->offsets = malloc(self->raw_length *
284 284 sizeof(*self->offsets));
285 285 if (self->offsets == NULL)
286 286 return (const char *)PyErr_NoMemory();
287 287 inline_scan(self, self->offsets);
288 288 }
289 289 return self->offsets[pos];
290 290 }
291 291
292 292 return PyString_AS_STRING(self->data) + pos * 64;
293 293 }
294 294
295 295 /*
296 296 * RevlogNG format (all in big endian, data may be inlined):
297 297 * 6 bytes: offset
298 298 * 2 bytes: flags
299 299 * 4 bytes: compressed length
300 300 * 4 bytes: uncompressed length
301 301 * 4 bytes: base revision
302 302 * 4 bytes: link revision
303 303 * 4 bytes: parent 1 revision
304 304 * 4 bytes: parent 2 revision
305 305 * 32 bytes: nodeid (only 20 bytes used)
306 306 */
307 307 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
308 308 {
309 309 uint64_t offset_flags;
310 310 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
311 311 const char *c_node_id;
312 312 const char *data;
313 313 Py_ssize_t length = index_length(self);
314 314 PyObject *entry;
315 315
316 316 if (pos < 0)
317 317 pos += length;
318 318
319 319 if (pos < 0 || pos >= length) {
320 320 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
321 321 return NULL;
322 322 }
323 323
324 324 if (pos == length - 1) {
325 325 Py_INCREF(nullentry);
326 326 return nullentry;
327 327 }
328 328
329 329 if (pos >= self->length - 1) {
330 330 PyObject *obj;
331 331 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
332 332 Py_INCREF(obj);
333 333 return obj;
334 334 }
335 335
336 336 if (self->cache) {
337 337 if (self->cache[pos]) {
338 338 Py_INCREF(self->cache[pos]);
339 339 return self->cache[pos];
340 340 }
341 341 } else {
342 342 self->cache = calloc(self->raw_length, sizeof(PyObject *));
343 343 if (self->cache == NULL)
344 344 return PyErr_NoMemory();
345 345 }
346 346
347 347 data = index_deref(self, pos);
348 348 if (data == NULL)
349 349 return NULL;
350 350
351 351 offset_flags = getbe32(data + 4);
352 352 if (pos == 0) /* mask out version number for the first entry */
353 353 offset_flags &= 0xFFFF;
354 354 else {
355 355 uint32_t offset_high = getbe32(data);
356 356 offset_flags |= ((uint64_t)offset_high) << 32;
357 357 }
358 358
359 359 comp_len = getbe32(data + 8);
360 360 uncomp_len = getbe32(data + 12);
361 361 base_rev = getbe32(data + 16);
362 362 link_rev = getbe32(data + 20);
363 363 parent_1 = getbe32(data + 24);
364 364 parent_2 = getbe32(data + 28);
365 365 c_node_id = data + 32;
366 366
367 367 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
368 368 uncomp_len, base_rev, link_rev,
369 369 parent_1, parent_2, c_node_id, 20);
370 370
371 371 if (entry)
372 372 PyObject_GC_UnTrack(entry);
373 373
374 374 self->cache[pos] = entry;
375 375 Py_INCREF(entry);
376 376
377 377 return entry;
378 378 }
379 379
380 380 /*
381 381 * Return the 20-byte SHA of the node corresponding to the given rev.
382 382 */
383 383 static const char *index_node(indexObject *self, Py_ssize_t pos)
384 384 {
385 385 Py_ssize_t length = index_length(self);
386 386 const char *data;
387 387
388 388 if (pos == length - 1)
389 389 return nullid;
390 390
391 391 if (pos >= length)
392 392 return NULL;
393 393
394 394 if (pos >= self->length - 1) {
395 395 PyObject *tuple, *str;
396 396 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
397 397 str = PyTuple_GetItem(tuple, 7);
398 398 return str ? PyString_AS_STRING(str) : NULL;
399 399 }
400 400
401 401 data = index_deref(self, pos);
402 402 return data ? data + 32 : NULL;
403 403 }
404 404
405 405 static int nt_insert(indexObject *self, const char *node, int rev);
406 406
407 407 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
408 408 {
409 409 if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
410 410 return -1;
411 411 if (*nodelen == 20)
412 412 return 0;
413 413 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
414 414 return -1;
415 415 }
416 416
417 417 static PyObject *index_insert(indexObject *self, PyObject *args)
418 418 {
419 419 PyObject *obj;
420 420 char *node;
421 421 long offset;
422 422 Py_ssize_t len, nodelen;
423 423
424 424 if (!PyArg_ParseTuple(args, "lO", &offset, &obj))
425 425 return NULL;
426 426
427 427 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
428 428 PyErr_SetString(PyExc_TypeError, "8-tuple required");
429 429 return NULL;
430 430 }
431 431
432 432 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
433 433 return NULL;
434 434
435 435 len = index_length(self);
436 436
437 437 if (offset < 0)
438 438 offset += len;
439 439
440 440 if (offset != len - 1) {
441 441 PyErr_SetString(PyExc_IndexError,
442 442 "insert only supported at index -1");
443 443 return NULL;
444 444 }
445 445
446 446 if (offset > INT_MAX) {
447 447 PyErr_SetString(PyExc_ValueError,
448 448 "currently only 2**31 revs supported");
449 449 return NULL;
450 450 }
451 451
452 452 if (self->added == NULL) {
453 453 self->added = PyList_New(0);
454 454 if (self->added == NULL)
455 455 return NULL;
456 456 }
457 457
458 458 if (PyList_Append(self->added, obj) == -1)
459 459 return NULL;
460 460
461 461 if (self->nt)
462 462 nt_insert(self, node, (int)offset);
463 463
464 464 Py_RETURN_NONE;
465 465 }
466 466
467 467 static void _index_clearcaches(indexObject *self)
468 468 {
469 469 if (self->cache) {
470 470 Py_ssize_t i;
471 471
472 472 for (i = 0; i < self->raw_length; i++) {
473 473 if (self->cache[i]) {
474 474 Py_DECREF(self->cache[i]);
475 475 self->cache[i] = NULL;
476 476 }
477 477 }
478 478 free(self->cache);
479 479 self->cache = NULL;
480 480 }
481 481 if (self->offsets) {
482 482 free(self->offsets);
483 483 self->offsets = NULL;
484 484 }
485 485 if (self->nt) {
486 486 free(self->nt);
487 487 self->nt = NULL;
488 488 }
489 489 }
490 490
491 491 static PyObject *index_clearcaches(indexObject *self)
492 492 {
493 493 _index_clearcaches(self);
494 494 self->ntlength = self->ntcapacity = 0;
495 495 self->ntdepth = self->ntsplits = 0;
496 496 self->ntrev = -1;
497 497 self->ntlookups = self->ntmisses = 0;
498 498 Py_RETURN_NONE;
499 499 }
500 500
501 501 static PyObject *index_stats(indexObject *self)
502 502 {
503 503 PyObject *obj = PyDict_New();
504 504
505 505 if (obj == NULL)
506 506 return NULL;
507 507
508 508 #define istat(__n, __d) \
509 509 if (PyDict_SetItemString(obj, __d, PyInt_FromLong(self->__n)) == -1) \
510 510 goto bail;
511 511
512 512 if (self->added) {
513 513 Py_ssize_t len = PyList_GET_SIZE(self->added);
514 514 if (PyDict_SetItemString(obj, "index entries added",
515 515 PyInt_FromLong(len)) == -1)
516 516 goto bail;
517 517 }
518 518
519 519 if (self->raw_length != self->length - 1)
520 520 istat(raw_length, "revs on disk");
521 521 istat(length, "revs in memory");
522 522 istat(ntcapacity, "node trie capacity");
523 523 istat(ntdepth, "node trie depth");
524 524 istat(ntlength, "node trie count");
525 525 istat(ntlookups, "node trie lookups");
526 526 istat(ntmisses, "node trie misses");
527 527 istat(ntrev, "node trie last rev scanned");
528 528 istat(ntsplits, "node trie splits");
529 529
530 530 #undef istat
531 531
532 532 return obj;
533 533
534 534 bail:
535 535 Py_XDECREF(obj);
536 536 return NULL;
537 537 }
538 538
539 539 static inline int nt_level(const char *node, int level)
540 540 {
541 541 int v = node[level>>1];
542 542 if (!(level & 1))
543 543 v >>= 4;
544 544 return v & 0xf;
545 545 }
546 546
547 547 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen)
548 548 {
549 549 int level, off;
550 550
551 551 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
552 552 return -1;
553 553
554 554 if (self->nt == NULL)
555 555 return -2;
556 556
557 557 for (level = off = 0; level < nodelen; level++) {
558 558 int k = nt_level(node, level);
559 559 nodetree *n = &self->nt[off];
560 560 int v = n->children[k];
561 561
562 562 if (v < 0) {
563 563 const char *n;
564 564 v = -v - 1;
565 565 n = index_node(self, v);
566 566 if (n == NULL)
567 567 return -2;
568 568 return memcmp(node, n, nodelen > 20 ? 20 : nodelen)
569 569 ? -2 : v;
570 570 }
571 571 if (v == 0)
572 572 return -2;
573 573 off = v;
574 574 }
575 575 return -2;
576 576 }
577 577
578 578 static int nt_new(indexObject *self)
579 579 {
580 580 if (self->ntlength == self->ntcapacity) {
581 581 self->ntcapacity *= 2;
582 582 self->nt = realloc(self->nt,
583 583 self->ntcapacity * sizeof(nodetree));
584 584 if (self->nt == NULL) {
585 585 PyErr_SetString(PyExc_MemoryError, "out of memory");
586 586 return -1;
587 587 }
588 588 memset(&self->nt[self->ntlength], 0,
589 589 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
590 590 }
591 591 return self->ntlength++;
592 592 }
593 593
594 594 static int nt_insert(indexObject *self, const char *node, int rev)
595 595 {
596 596 int level = 0;
597 597 int off = 0;
598 598
599 599 while (level < 20) {
600 600 int k = nt_level(node, level);
601 601 nodetree *n;
602 602 int v;
603 603
604 604 n = &self->nt[off];
605 605 v = n->children[k];
606 606
607 607 if (v == 0) {
608 608 n->children[k] = -rev - 1;
609 609 return 0;
610 610 }
611 611 if (v < 0) {
612 612 const char *oldnode = index_node(self, -v - 1);
613 613 int noff;
614 614
615 615 if (!oldnode || !memcmp(oldnode, node, 20)) {
616 616 n->children[k] = -rev - 1;
617 617 return 0;
618 618 }
619 619 noff = nt_new(self);
620 620 if (noff == -1)
621 621 return -1;
622 622 /* self->nt may have been changed by realloc */
623 623 self->nt[off].children[k] = noff;
624 624 off = noff;
625 625 n = &self->nt[off];
626 626 n->children[nt_level(oldnode, ++level)] = v;
627 627 if (level > self->ntdepth)
628 628 self->ntdepth = level;
629 629 self->ntsplits += 1;
630 630 } else {
631 631 level += 1;
632 632 off = v;
633 633 }
634 634 }
635 635
636 636 return -1;
637 637 }
638 638
639 static int nt_init(indexObject *self)
640 {
641 if (self->nt == NULL) {
642 self->ntcapacity = self->raw_length < 4
643 ? 4 : self->raw_length / 2;
644 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
645 if (self->nt == NULL) {
646 PyErr_NoMemory();
647 return -1;
648 }
649 self->ntlength = 1;
650 self->ntrev = (int)index_length(self) - 1;
651 self->ntlookups = 1;
652 self->ntmisses = 0;
653 }
654 return 0;
655 }
656
639 657 /*
640 658 * Return values:
641 659 *
642 660 * -3: error (exception set)
643 661 * -2: not found (no exception set)
644 662 * rest: valid rev
645 663 */
646 664 static int index_find_node(indexObject *self,
647 665 const char *node, Py_ssize_t nodelen)
648 666 {
649 667 int rev;
650 668
651 669 self->ntlookups++;
652 670 rev = nt_find(self, node, nodelen);
653 671 if (rev >= -1)
654 672 return rev;
655 673
656 if (self->nt == NULL) {
657 self->ntcapacity = self->raw_length < 4
658 ? 4 : self->raw_length / 2;
659 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
660 if (self->nt == NULL) {
661 PyErr_SetString(PyExc_MemoryError, "out of memory");
662 return -3;
663 }
664 self->ntlength = 1;
665 self->ntrev = (int)index_length(self) - 1;
666 self->ntlookups = 1;
667 self->ntmisses = 0;
668 }
674 if (nt_init(self) == -1)
675 return -3;
669 676
670 677 /*
671 678 * For the first handful of lookups, we scan the entire index,
672 679 * and cache only the matching nodes. This optimizes for cases
673 680 * like "hg tip", where only a few nodes are accessed.
674 681 *
675 682 * After that, we cache every node we visit, using a single
676 683 * scan amortized over multiple lookups. This gives the best
677 684 * bulk performance, e.g. for "hg log".
678 685 */
679 686 if (self->ntmisses++ < 4) {
680 687 for (rev = self->ntrev - 1; rev >= 0; rev--) {
681 688 const char *n = index_node(self, rev);
682 689 if (n == NULL)
683 690 return -2;
684 691 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
685 692 if (nt_insert(self, n, rev) == -1)
686 693 return -3;
687 694 break;
688 695 }
689 696 }
690 697 } else {
691 698 for (rev = self->ntrev - 1; rev >= 0; rev--) {
692 699 const char *n = index_node(self, rev);
693 700 if (n == NULL) {
694 701 self->ntrev = rev + 1;
695 702 return -2;
696 703 }
697 704 if (nt_insert(self, n, rev) == -1) {
698 705 self->ntrev = rev + 1;
699 706 return -3;
700 707 }
701 708 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
702 709 break;
703 710 }
704 711 }
705 712 self->ntrev = rev;
706 713 }
707 714
708 715 if (rev >= 0)
709 716 return rev;
710 717 return -2;
711 718 }
712 719
713 720 static PyObject *raise_revlog_error(void)
714 721 {
715 722 static PyObject *errclass;
716 723 PyObject *mod = NULL, *errobj;
717 724
718 725 if (errclass == NULL) {
719 726 PyObject *dict;
720 727
721 728 mod = PyImport_ImportModule("mercurial.error");
722 729 if (mod == NULL)
723 730 goto classfail;
724 731
725 732 dict = PyModule_GetDict(mod);
726 733 if (dict == NULL)
727 734 goto classfail;
728 735
729 736 errclass = PyDict_GetItemString(dict, "RevlogError");
730 737 if (errclass == NULL) {
731 738 PyErr_SetString(PyExc_SystemError,
732 739 "could not find RevlogError");
733 740 goto classfail;
734 741 }
735 742 Py_INCREF(errclass);
736 743 }
737 744
738 745 errobj = PyObject_CallFunction(errclass, NULL);
739 746 if (errobj == NULL)
740 747 return NULL;
741 748 PyErr_SetObject(errclass, errobj);
742 749 return errobj;
743 750
744 751 classfail:
745 752 Py_XDECREF(mod);
746 753 return NULL;
747 754 }
748 755
749 756 static PyObject *index_getitem(indexObject *self, PyObject *value)
750 757 {
751 758 char *node;
752 759 Py_ssize_t nodelen;
753 760 int rev;
754 761
755 762 if (PyInt_Check(value))
756 763 return index_get(self, PyInt_AS_LONG(value));
757 764
758 765 if (PyString_AsStringAndSize(value, &node, &nodelen) == -1)
759 766 return NULL;
760 767 rev = index_find_node(self, node, nodelen);
761 768 if (rev >= -1)
762 769 return PyInt_FromLong(rev);
763 770 if (rev == -2)
764 771 raise_revlog_error();
765 772 return NULL;
766 773 }
767 774
768 775 static PyObject *index_m_get(indexObject *self, PyObject *args)
769 776 {
770 777 char *node;
771 778 int nodelen, rev;
772 779
773 780 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
774 781 return NULL;
775 782
776 783 rev = index_find_node(self, node, nodelen);
777 784 if (rev == -3)
778 785 return NULL;
779 786 if (rev == -2)
780 787 Py_RETURN_NONE;
781 788 return PyInt_FromLong(rev);
782 789 }
783 790
784 791 static int index_contains(indexObject *self, PyObject *value)
785 792 {
786 793 char *node;
787 794 Py_ssize_t nodelen;
788 795
789 796 if (PyInt_Check(value)) {
790 797 long rev = PyInt_AS_LONG(value);
791 798 return rev >= -1 && rev < index_length(self);
792 799 }
793 800
794 801 if (!PyString_Check(value))
795 802 return 0;
796 803
797 804 node = PyString_AS_STRING(value);
798 805 nodelen = PyString_GET_SIZE(value);
799 806
800 807 switch (index_find_node(self, node, nodelen)) {
801 808 case -3:
802 809 return -1;
803 810 case -2:
804 811 return 0;
805 812 default:
806 813 return 1;
807 814 }
808 815 }
809 816
810 817 /*
811 818 * Invalidate any trie entries introduced by added revs.
812 819 */
813 820 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
814 821 {
815 822 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
816 823
817 824 for (i = start; i < len; i++) {
818 825 PyObject *tuple = PyList_GET_ITEM(self->added, i);
819 826 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
820 827
821 828 nt_insert(self, PyString_AS_STRING(node), -1);
822 829 }
823 830
824 831 if (start == 0) {
825 832 Py_DECREF(self->added);
826 833 self->added = NULL;
827 834 }
828 835 }
829 836
830 837 /*
831 838 * Delete a numeric range of revs, which must be at the end of the
832 839 * range, but exclude the sentinel nullid entry.
833 840 */
834 841 static int index_slice_del(indexObject *self, PyObject *item)
835 842 {
836 843 Py_ssize_t start, stop, step, slicelength;
837 844 Py_ssize_t length = index_length(self);
838 845
839 846 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
840 847 &start, &stop, &step, &slicelength) < 0)
841 848 return -1;
842 849
843 850 if (slicelength <= 0)
844 851 return 0;
845 852
846 853 if ((step < 0 && start < stop) || (step > 0 && start > stop))
847 854 stop = start;
848 855
849 856 if (step < 0) {
850 857 stop = start + 1;
851 858 start = stop + step*(slicelength - 1) - 1;
852 859 step = -step;
853 860 }
854 861
855 862 if (step != 1) {
856 863 PyErr_SetString(PyExc_ValueError,
857 864 "revlog index delete requires step size of 1");
858 865 return -1;
859 866 }
860 867
861 868 if (stop != length - 1) {
862 869 PyErr_SetString(PyExc_IndexError,
863 870 "revlog index deletion indices are invalid");
864 871 return -1;
865 872 }
866 873
867 874 if (start < self->length - 1) {
868 875 if (self->nt) {
869 876 Py_ssize_t i;
870 877
871 878 for (i = start + 1; i < self->length - 1; i++) {
872 879 const char *node = index_node(self, i);
873 880
874 881 if (node)
875 882 nt_insert(self, node, -1);
876 883 }
877 884 if (self->added)
878 885 nt_invalidate_added(self, 0);
879 886 if (self->ntrev > start)
880 887 self->ntrev = (int)start;
881 888 }
882 889 self->length = start + 1;
883 890 return 0;
884 891 }
885 892
886 893 if (self->nt) {
887 894 nt_invalidate_added(self, start - self->length + 1);
888 895 if (self->ntrev > start)
889 896 self->ntrev = (int)start;
890 897 }
891 898 return self->added
892 899 ? PyList_SetSlice(self->added, start - self->length + 1,
893 900 PyList_GET_SIZE(self->added), NULL)
894 901 : 0;
895 902 }
896 903
897 904 /*
898 905 * Supported ops:
899 906 *
900 907 * slice deletion
901 908 * string assignment (extend node->rev mapping)
902 909 * string deletion (shrink node->rev mapping)
903 910 */
904 911 static int index_assign_subscript(indexObject *self, PyObject *item,
905 912 PyObject *value)
906 913 {
907 914 char *node;
908 915 Py_ssize_t nodelen;
909 916 long rev;
910 917
911 918 if (PySlice_Check(item) && value == NULL)
912 919 return index_slice_del(self, item);
913 920
914 921 if (node_check(item, &node, &nodelen) == -1)
915 922 return -1;
916 923
917 924 if (value == NULL)
918 925 return self->nt ? nt_insert(self, node, -1) : 0;
919 926 rev = PyInt_AsLong(value);
920 927 if (rev > INT_MAX || rev < 0) {
921 928 if (!PyErr_Occurred())
922 929 PyErr_SetString(PyExc_ValueError, "rev out of range");
923 930 return -1;
924 931 }
925 932 return nt_insert(self, node, (int)rev);
926 933 }
927 934
928 935 /*
929 936 * Find all RevlogNG entries in an index that has inline data. Update
930 937 * the optional "offsets" table with those entries.
931 938 */
932 939 static long inline_scan(indexObject *self, const char **offsets)
933 940 {
934 941 const char *data = PyString_AS_STRING(self->data);
935 942 const char *end = data + PyString_GET_SIZE(self->data);
936 943 const long hdrsize = 64;
937 944 long incr = hdrsize;
938 945 Py_ssize_t len = 0;
939 946
940 947 while (data + hdrsize <= end) {
941 948 uint32_t comp_len;
942 949 const char *old_data;
943 950 /* 3rd element of header is length of compressed inline data */
944 951 comp_len = getbe32(data + 8);
945 952 incr = hdrsize + comp_len;
946 953 if (incr < hdrsize)
947 954 break;
948 955 if (offsets)
949 956 offsets[len] = data;
950 957 len++;
951 958 old_data = data;
952 959 data += incr;
953 960 if (data <= old_data)
954 961 break;
955 962 }
956 963
957 964 if (data != end && data + hdrsize != end) {
958 965 if (!PyErr_Occurred())
959 966 PyErr_SetString(PyExc_ValueError, "corrupt index file");
960 967 return -1;
961 968 }
962 969
963 970 return len;
964 971 }
965 972
966 973 static int index_init(indexObject *self, PyObject *args)
967 974 {
968 975 PyObject *data_obj, *inlined_obj;
969 976 Py_ssize_t size;
970 977
971 978 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
972 979 return -1;
973 980 if (!PyString_Check(data_obj)) {
974 981 PyErr_SetString(PyExc_TypeError, "data is not a string");
975 982 return -1;
976 983 }
977 984 size = PyString_GET_SIZE(data_obj);
978 985
979 986 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
980 987 self->data = data_obj;
981 988 self->cache = NULL;
982 989
983 990 self->added = NULL;
984 991 self->offsets = NULL;
985 992 self->nt = NULL;
986 993 self->ntlength = self->ntcapacity = 0;
987 994 self->ntdepth = self->ntsplits = 0;
988 995 self->ntlookups = self->ntmisses = 0;
989 996 self->ntrev = -1;
990 997 Py_INCREF(self->data);
991 998
992 999 if (self->inlined) {
993 1000 long len = inline_scan(self, NULL);
994 1001 if (len == -1)
995 1002 goto bail;
996 1003 self->raw_length = len;
997 1004 self->length = len + 1;
998 1005 } else {
999 1006 if (size % 64) {
1000 1007 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1001 1008 goto bail;
1002 1009 }
1003 1010 self->raw_length = size / 64;
1004 1011 self->length = self->raw_length + 1;
1005 1012 }
1006 1013
1007 1014 return 0;
1008 1015 bail:
1009 1016 return -1;
1010 1017 }
1011 1018
1012 1019 static PyObject *index_nodemap(indexObject *self)
1013 1020 {
1014 1021 Py_INCREF(self);
1015 1022 return (PyObject *)self;
1016 1023 }
1017 1024
1018 1025 static void index_dealloc(indexObject *self)
1019 1026 {
1020 1027 _index_clearcaches(self);
1021 1028 Py_DECREF(self->data);
1022 1029 Py_XDECREF(self->added);
1023 1030 PyObject_Del(self);
1024 1031 }
1025 1032
1026 1033 static PySequenceMethods index_sequence_methods = {
1027 1034 (lenfunc)index_length, /* sq_length */
1028 1035 0, /* sq_concat */
1029 1036 0, /* sq_repeat */
1030 1037 (ssizeargfunc)index_get, /* sq_item */
1031 1038 0, /* sq_slice */
1032 1039 0, /* sq_ass_item */
1033 1040 0, /* sq_ass_slice */
1034 1041 (objobjproc)index_contains, /* sq_contains */
1035 1042 };
1036 1043
1037 1044 static PyMappingMethods index_mapping_methods = {
1038 1045 (lenfunc)index_length, /* mp_length */
1039 1046 (binaryfunc)index_getitem, /* mp_subscript */
1040 1047 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
1041 1048 };
1042 1049
1043 1050 static PyMethodDef index_methods[] = {
1044 1051 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
1045 1052 "clear the index caches"},
1046 1053 {"get", (PyCFunction)index_m_get, METH_VARARGS,
1047 1054 "get an index entry"},
1048 1055 {"insert", (PyCFunction)index_insert, METH_VARARGS,
1049 1056 "insert an index entry"},
1050 1057 {"stats", (PyCFunction)index_stats, METH_NOARGS,
1051 1058 "stats for the index"},
1052 1059 {NULL} /* Sentinel */
1053 1060 };
1054 1061
1055 1062 static PyGetSetDef index_getset[] = {
1056 1063 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
1057 1064 {NULL} /* Sentinel */
1058 1065 };
1059 1066
1060 1067 static PyTypeObject indexType = {
1061 1068 PyObject_HEAD_INIT(NULL)
1062 1069 0, /* ob_size */
1063 1070 "parsers.index", /* tp_name */
1064 1071 sizeof(indexObject), /* tp_basicsize */
1065 1072 0, /* tp_itemsize */
1066 1073 (destructor)index_dealloc, /* tp_dealloc */
1067 1074 0, /* tp_print */
1068 1075 0, /* tp_getattr */
1069 1076 0, /* tp_setattr */
1070 1077 0, /* tp_compare */
1071 1078 0, /* tp_repr */
1072 1079 0, /* tp_as_number */
1073 1080 &index_sequence_methods, /* tp_as_sequence */
1074 1081 &index_mapping_methods, /* tp_as_mapping */
1075 1082 0, /* tp_hash */
1076 1083 0, /* tp_call */
1077 1084 0, /* tp_str */
1078 1085 0, /* tp_getattro */
1079 1086 0, /* tp_setattro */
1080 1087 0, /* tp_as_buffer */
1081 1088 Py_TPFLAGS_DEFAULT, /* tp_flags */
1082 1089 "revlog index", /* tp_doc */
1083 1090 0, /* tp_traverse */
1084 1091 0, /* tp_clear */
1085 1092 0, /* tp_richcompare */
1086 1093 0, /* tp_weaklistoffset */
1087 1094 0, /* tp_iter */
1088 1095 0, /* tp_iternext */
1089 1096 index_methods, /* tp_methods */
1090 1097 0, /* tp_members */
1091 1098 index_getset, /* tp_getset */
1092 1099 0, /* tp_base */
1093 1100 0, /* tp_dict */
1094 1101 0, /* tp_descr_get */
1095 1102 0, /* tp_descr_set */
1096 1103 0, /* tp_dictoffset */
1097 1104 (initproc)index_init, /* tp_init */
1098 1105 0, /* tp_alloc */
1099 1106 PyType_GenericNew, /* tp_new */
1100 1107 };
1101 1108
1102 1109 /*
1103 1110 * returns a tuple of the form (index, index, cache) with elements as
1104 1111 * follows:
1105 1112 *
1106 1113 * index: an index object that lazily parses RevlogNG records
1107 1114 * cache: if data is inlined, a tuple (index_file_content, 0), else None
1108 1115 *
1109 1116 * added complications are for backwards compatibility
1110 1117 */
1111 1118 static PyObject *parse_index2(PyObject *self, PyObject *args)
1112 1119 {
1113 1120 PyObject *tuple = NULL, *cache = NULL;
1114 1121 indexObject *idx;
1115 1122 int ret;
1116 1123
1117 1124 idx = PyObject_New(indexObject, &indexType);
1118 1125 if (idx == NULL)
1119 1126 goto bail;
1120 1127
1121 1128 ret = index_init(idx, args);
1122 1129 if (ret == -1)
1123 1130 goto bail;
1124 1131
1125 1132 if (idx->inlined) {
1126 1133 cache = Py_BuildValue("iO", 0, idx->data);
1127 1134 if (cache == NULL)
1128 1135 goto bail;
1129 1136 } else {
1130 1137 cache = Py_None;
1131 1138 Py_INCREF(cache);
1132 1139 }
1133 1140
1134 1141 tuple = Py_BuildValue("NN", idx, cache);
1135 1142 if (!tuple)
1136 1143 goto bail;
1137 1144 return tuple;
1138 1145
1139 1146 bail:
1140 1147 Py_XDECREF(idx);
1141 1148 Py_XDECREF(cache);
1142 1149 Py_XDECREF(tuple);
1143 1150 return NULL;
1144 1151 }
1145 1152
1146 1153 static char parsers_doc[] = "Efficient content parsing.";
1147 1154
1148 1155 static PyMethodDef methods[] = {
1149 1156 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
1150 1157 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
1151 1158 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
1152 1159 {NULL, NULL}
1153 1160 };
1154 1161
1155 1162 static void module_init(PyObject *mod)
1156 1163 {
1157 1164 if (PyType_Ready(&indexType) < 0)
1158 1165 return;
1159 1166 Py_INCREF(&indexType);
1160 1167
1161 1168 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
1162 1169
1163 1170 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
1164 1171 -1, -1, -1, -1, nullid, 20);
1165 1172 if (nullentry)
1166 1173 PyObject_GC_UnTrack(nullentry);
1167 1174 }
1168 1175
1169 1176 #ifdef IS_PY3K
1170 1177 static struct PyModuleDef parsers_module = {
1171 1178 PyModuleDef_HEAD_INIT,
1172 1179 "parsers",
1173 1180 parsers_doc,
1174 1181 -1,
1175 1182 methods
1176 1183 };
1177 1184
1178 1185 PyMODINIT_FUNC PyInit_parsers(void)
1179 1186 {
1180 1187 PyObject *mod = PyModule_Create(&parsers_module);
1181 1188 module_init(mod);
1182 1189 return mod;
1183 1190 }
1184 1191 #else
1185 1192 PyMODINIT_FUNC initparsers(void)
1186 1193 {
1187 1194 PyObject *mod = Py_InitModule3("parsers", methods, parsers_doc);
1188 1195 module_init(mod);
1189 1196 }
1190 1197 #endif
General Comments 0
You need to be logged in to leave comments. Login now