##// END OF EJS Templates
parsers: fix memleak of revlog cache entries on strip...
Yuya Nishihara -
r18504:d1d5fdcc stable
parent child Browse files
Show More
@@ -1,1562 +1,1568 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #include <Python.h>
11 11 #include <ctype.h>
12 12 #include <stddef.h>
13 13 #include <string.h>
14 14
15 15 #include "util.h"
16 16
17 17 static inline int hexdigit(const char *p, Py_ssize_t off)
18 18 {
19 19 char c = p[off];
20 20
21 21 if (c >= '0' && c <= '9')
22 22 return c - '0';
23 23 if (c >= 'a' && c <= 'f')
24 24 return c - 'a' + 10;
25 25 if (c >= 'A' && c <= 'F')
26 26 return c - 'A' + 10;
27 27
28 28 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
29 29 return 0;
30 30 }
31 31
32 32 /*
33 33 * Turn a hex-encoded string into binary.
34 34 */
35 35 static PyObject *unhexlify(const char *str, int len)
36 36 {
37 37 PyObject *ret;
38 38 char *d;
39 39 int i;
40 40
41 41 ret = PyBytes_FromStringAndSize(NULL, len / 2);
42 42
43 43 if (!ret)
44 44 return NULL;
45 45
46 46 d = PyBytes_AsString(ret);
47 47
48 48 for (i = 0; i < len;) {
49 49 int hi = hexdigit(str, i++);
50 50 int lo = hexdigit(str, i++);
51 51 *d++ = (hi << 4) | lo;
52 52 }
53 53
54 54 return ret;
55 55 }
56 56
57 57 /*
58 58 * This code assumes that a manifest is stitched together with newline
59 59 * ('\n') characters.
60 60 */
61 61 static PyObject *parse_manifest(PyObject *self, PyObject *args)
62 62 {
63 63 PyObject *mfdict, *fdict;
64 64 char *str, *cur, *start, *zero;
65 65 int len;
66 66
67 67 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
68 68 &PyDict_Type, &mfdict,
69 69 &PyDict_Type, &fdict,
70 70 &str, &len))
71 71 goto quit;
72 72
73 73 for (start = cur = str, zero = NULL; cur < str + len; cur++) {
74 74 PyObject *file = NULL, *node = NULL;
75 75 PyObject *flags = NULL;
76 76 ptrdiff_t nlen;
77 77
78 78 if (!*cur) {
79 79 zero = cur;
80 80 continue;
81 81 }
82 82 else if (*cur != '\n')
83 83 continue;
84 84
85 85 if (!zero) {
86 86 PyErr_SetString(PyExc_ValueError,
87 87 "manifest entry has no separator");
88 88 goto quit;
89 89 }
90 90
91 91 file = PyBytes_FromStringAndSize(start, zero - start);
92 92
93 93 if (!file)
94 94 goto bail;
95 95
96 96 nlen = cur - zero - 1;
97 97
98 98 node = unhexlify(zero + 1, nlen > 40 ? 40 : (int)nlen);
99 99 if (!node)
100 100 goto bail;
101 101
102 102 if (nlen > 40) {
103 103 flags = PyBytes_FromStringAndSize(zero + 41,
104 104 nlen - 40);
105 105 if (!flags)
106 106 goto bail;
107 107
108 108 if (PyDict_SetItem(fdict, file, flags) == -1)
109 109 goto bail;
110 110 }
111 111
112 112 if (PyDict_SetItem(mfdict, file, node) == -1)
113 113 goto bail;
114 114
115 115 start = cur + 1;
116 116 zero = NULL;
117 117
118 118 Py_XDECREF(flags);
119 119 Py_XDECREF(node);
120 120 Py_XDECREF(file);
121 121 continue;
122 122 bail:
123 123 Py_XDECREF(flags);
124 124 Py_XDECREF(node);
125 125 Py_XDECREF(file);
126 126 goto quit;
127 127 }
128 128
129 129 if (len > 0 && *(cur - 1) != '\n') {
130 130 PyErr_SetString(PyExc_ValueError,
131 131 "manifest contains trailing garbage");
132 132 goto quit;
133 133 }
134 134
135 135 Py_INCREF(Py_None);
136 136 return Py_None;
137 137 quit:
138 138 return NULL;
139 139 }
140 140
141 141 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
142 142 {
143 143 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
144 144 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
145 145 char *str, *cur, *end, *cpos;
146 146 int state, mode, size, mtime;
147 147 unsigned int flen;
148 148 int len;
149 149
150 150 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
151 151 &PyDict_Type, &dmap,
152 152 &PyDict_Type, &cmap,
153 153 &str, &len))
154 154 goto quit;
155 155
156 156 /* read parents */
157 157 if (len < 40)
158 158 goto quit;
159 159
160 160 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
161 161 if (!parents)
162 162 goto quit;
163 163
164 164 /* read filenames */
165 165 cur = str + 40;
166 166 end = str + len;
167 167
168 168 while (cur < end - 17) {
169 169 /* unpack header */
170 170 state = *cur;
171 171 mode = getbe32(cur + 1);
172 172 size = getbe32(cur + 5);
173 173 mtime = getbe32(cur + 9);
174 174 flen = getbe32(cur + 13);
175 175 cur += 17;
176 176 if (cur + flen > end || cur + flen < cur) {
177 177 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
178 178 goto quit;
179 179 }
180 180
181 181 entry = Py_BuildValue("ciii", state, mode, size, mtime);
182 182 if (!entry)
183 183 goto quit;
184 184 PyObject_GC_UnTrack(entry); /* don't waste time with this */
185 185
186 186 cpos = memchr(cur, 0, flen);
187 187 if (cpos) {
188 188 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
189 189 cname = PyBytes_FromStringAndSize(cpos + 1,
190 190 flen - (cpos - cur) - 1);
191 191 if (!fname || !cname ||
192 192 PyDict_SetItem(cmap, fname, cname) == -1 ||
193 193 PyDict_SetItem(dmap, fname, entry) == -1)
194 194 goto quit;
195 195 Py_DECREF(cname);
196 196 } else {
197 197 fname = PyBytes_FromStringAndSize(cur, flen);
198 198 if (!fname ||
199 199 PyDict_SetItem(dmap, fname, entry) == -1)
200 200 goto quit;
201 201 }
202 202 cur += flen;
203 203 Py_DECREF(fname);
204 204 Py_DECREF(entry);
205 205 fname = cname = entry = NULL;
206 206 }
207 207
208 208 ret = parents;
209 209 Py_INCREF(ret);
210 210 quit:
211 211 Py_XDECREF(fname);
212 212 Py_XDECREF(cname);
213 213 Py_XDECREF(entry);
214 214 Py_XDECREF(parents);
215 215 return ret;
216 216 }
217 217
218 218 static inline int getintat(PyObject *tuple, int off, uint32_t *v)
219 219 {
220 220 PyObject *o = PyTuple_GET_ITEM(tuple, off);
221 221 long val;
222 222
223 223 if (PyInt_Check(o))
224 224 val = PyInt_AS_LONG(o);
225 225 else if (PyLong_Check(o)) {
226 226 val = PyLong_AsLong(o);
227 227 if (val == -1 && PyErr_Occurred())
228 228 return -1;
229 229 } else {
230 230 PyErr_SetString(PyExc_TypeError, "expected an int or long");
231 231 return -1;
232 232 }
233 233 if (LONG_MAX > INT_MAX && (val > INT_MAX || val < INT_MIN)) {
234 234 PyErr_SetString(PyExc_OverflowError,
235 235 "Python value to large to convert to uint32_t");
236 236 return -1;
237 237 }
238 238 *v = (uint32_t)val;
239 239 return 0;
240 240 }
241 241
242 242 static PyObject *dirstate_unset;
243 243
244 244 /*
245 245 * Efficiently pack a dirstate object into its on-disk format.
246 246 */
247 247 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
248 248 {
249 249 PyObject *packobj = NULL;
250 250 PyObject *map, *copymap, *pl;
251 251 Py_ssize_t nbytes, pos, l;
252 252 PyObject *k, *v, *pn;
253 253 char *p, *s;
254 254 double now;
255 255
256 256 if (!PyArg_ParseTuple(args, "O!O!Od:pack_dirstate",
257 257 &PyDict_Type, &map, &PyDict_Type, &copymap,
258 258 &pl, &now))
259 259 return NULL;
260 260
261 261 if (!PySequence_Check(pl) || PySequence_Size(pl) != 2) {
262 262 PyErr_SetString(PyExc_TypeError, "expected 2-element sequence");
263 263 return NULL;
264 264 }
265 265
266 266 /* Figure out how much we need to allocate. */
267 267 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
268 268 PyObject *c;
269 269 if (!PyString_Check(k)) {
270 270 PyErr_SetString(PyExc_TypeError, "expected string key");
271 271 goto bail;
272 272 }
273 273 nbytes += PyString_GET_SIZE(k) + 17;
274 274 c = PyDict_GetItem(copymap, k);
275 275 if (c) {
276 276 if (!PyString_Check(c)) {
277 277 PyErr_SetString(PyExc_TypeError,
278 278 "expected string key");
279 279 goto bail;
280 280 }
281 281 nbytes += PyString_GET_SIZE(c) + 1;
282 282 }
283 283 }
284 284
285 285 packobj = PyString_FromStringAndSize(NULL, nbytes);
286 286 if (packobj == NULL)
287 287 goto bail;
288 288
289 289 p = PyString_AS_STRING(packobj);
290 290
291 291 pn = PySequence_ITEM(pl, 0);
292 292 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
293 293 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
294 294 goto bail;
295 295 }
296 296 memcpy(p, s, l);
297 297 p += 20;
298 298 pn = PySequence_ITEM(pl, 1);
299 299 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
300 300 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
301 301 goto bail;
302 302 }
303 303 memcpy(p, s, l);
304 304 p += 20;
305 305
306 306 for (pos = 0; PyDict_Next(map, &pos, &k, &v); ) {
307 307 uint32_t mode, size, mtime;
308 308 Py_ssize_t len, l;
309 309 PyObject *o;
310 310 char *s, *t;
311 311
312 312 if (!PyTuple_Check(v) || PyTuple_GET_SIZE(v) != 4) {
313 313 PyErr_SetString(PyExc_TypeError, "expected a 4-tuple");
314 314 goto bail;
315 315 }
316 316 o = PyTuple_GET_ITEM(v, 0);
317 317 if (PyString_AsStringAndSize(o, &s, &l) == -1 || l != 1) {
318 318 PyErr_SetString(PyExc_TypeError, "expected one byte");
319 319 goto bail;
320 320 }
321 321 *p++ = *s;
322 322 if (getintat(v, 1, &mode) == -1)
323 323 goto bail;
324 324 if (getintat(v, 2, &size) == -1)
325 325 goto bail;
326 326 if (getintat(v, 3, &mtime) == -1)
327 327 goto bail;
328 328 if (*s == 'n' && mtime == (uint32_t)now) {
329 329 /* See dirstate.py:write for why we do this. */
330 330 if (PyDict_SetItem(map, k, dirstate_unset) == -1)
331 331 goto bail;
332 332 mode = 0, size = -1, mtime = -1;
333 333 }
334 334 putbe32(mode, p);
335 335 putbe32(size, p + 4);
336 336 putbe32(mtime, p + 8);
337 337 t = p + 12;
338 338 p += 16;
339 339 len = PyString_GET_SIZE(k);
340 340 memcpy(p, PyString_AS_STRING(k), len);
341 341 p += len;
342 342 o = PyDict_GetItem(copymap, k);
343 343 if (o) {
344 344 *p++ = '\0';
345 345 l = PyString_GET_SIZE(o);
346 346 memcpy(p, PyString_AS_STRING(o), l);
347 347 p += l;
348 348 len += l + 1;
349 349 }
350 350 putbe32((uint32_t)len, t);
351 351 }
352 352
353 353 pos = p - PyString_AS_STRING(packobj);
354 354 if (pos != nbytes) {
355 355 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
356 356 (long)pos, (long)nbytes);
357 357 goto bail;
358 358 }
359 359
360 360 return packobj;
361 361 bail:
362 362 Py_XDECREF(packobj);
363 363 return NULL;
364 364 }
365 365
366 366 /*
367 367 * A base-16 trie for fast node->rev mapping.
368 368 *
369 369 * Positive value is index of the next node in the trie
370 370 * Negative value is a leaf: -(rev + 1)
371 371 * Zero is empty
372 372 */
373 373 typedef struct {
374 374 int children[16];
375 375 } nodetree;
376 376
377 377 /*
378 378 * This class has two behaviours.
379 379 *
380 380 * When used in a list-like way (with integer keys), we decode an
381 381 * entry in a RevlogNG index file on demand. Our last entry is a
382 382 * sentinel, always a nullid. We have limited support for
383 383 * integer-keyed insert and delete, only at elements right before the
384 384 * sentinel.
385 385 *
386 386 * With string keys, we lazily perform a reverse mapping from node to
387 387 * rev, using a base-16 trie.
388 388 */
389 389 typedef struct {
390 390 PyObject_HEAD
391 391 /* Type-specific fields go here. */
392 392 PyObject *data; /* raw bytes of index */
393 393 PyObject **cache; /* cached tuples */
394 394 const char **offsets; /* populated on demand */
395 395 Py_ssize_t raw_length; /* original number of elements */
396 396 Py_ssize_t length; /* current number of elements */
397 397 PyObject *added; /* populated on demand */
398 398 PyObject *headrevs; /* cache, invalidated on changes */
399 399 nodetree *nt; /* base-16 trie */
400 400 int ntlength; /* # nodes in use */
401 401 int ntcapacity; /* # nodes allocated */
402 402 int ntdepth; /* maximum depth of tree */
403 403 int ntsplits; /* # splits performed */
404 404 int ntrev; /* last rev scanned */
405 405 int ntlookups; /* # lookups */
406 406 int ntmisses; /* # lookups that miss the cache */
407 407 int inlined;
408 408 } indexObject;
409 409
410 410 static Py_ssize_t index_length(const indexObject *self)
411 411 {
412 412 if (self->added == NULL)
413 413 return self->length;
414 414 return self->length + PyList_GET_SIZE(self->added);
415 415 }
416 416
417 417 static PyObject *nullentry;
418 418 static const char nullid[20];
419 419
420 420 static long inline_scan(indexObject *self, const char **offsets);
421 421
422 422 #if LONG_MAX == 0x7fffffffL
423 423 static char *tuple_format = "Kiiiiiis#";
424 424 #else
425 425 static char *tuple_format = "kiiiiiis#";
426 426 #endif
427 427
428 428 /* A RevlogNG v1 index entry is 64 bytes long. */
429 429 static const long v1_hdrsize = 64;
430 430
431 431 /*
432 432 * Return a pointer to the beginning of a RevlogNG record.
433 433 */
434 434 static const char *index_deref(indexObject *self, Py_ssize_t pos)
435 435 {
436 436 if (self->inlined && pos > 0) {
437 437 if (self->offsets == NULL) {
438 438 self->offsets = malloc(self->raw_length *
439 439 sizeof(*self->offsets));
440 440 if (self->offsets == NULL)
441 441 return (const char *)PyErr_NoMemory();
442 442 inline_scan(self, self->offsets);
443 443 }
444 444 return self->offsets[pos];
445 445 }
446 446
447 447 return PyString_AS_STRING(self->data) + pos * v1_hdrsize;
448 448 }
449 449
450 450 /*
451 451 * RevlogNG format (all in big endian, data may be inlined):
452 452 * 6 bytes: offset
453 453 * 2 bytes: flags
454 454 * 4 bytes: compressed length
455 455 * 4 bytes: uncompressed length
456 456 * 4 bytes: base revision
457 457 * 4 bytes: link revision
458 458 * 4 bytes: parent 1 revision
459 459 * 4 bytes: parent 2 revision
460 460 * 32 bytes: nodeid (only 20 bytes used)
461 461 */
462 462 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
463 463 {
464 464 uint64_t offset_flags;
465 465 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
466 466 const char *c_node_id;
467 467 const char *data;
468 468 Py_ssize_t length = index_length(self);
469 469 PyObject *entry;
470 470
471 471 if (pos < 0)
472 472 pos += length;
473 473
474 474 if (pos < 0 || pos >= length) {
475 475 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
476 476 return NULL;
477 477 }
478 478
479 479 if (pos == length - 1) {
480 480 Py_INCREF(nullentry);
481 481 return nullentry;
482 482 }
483 483
484 484 if (pos >= self->length - 1) {
485 485 PyObject *obj;
486 486 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
487 487 Py_INCREF(obj);
488 488 return obj;
489 489 }
490 490
491 491 if (self->cache) {
492 492 if (self->cache[pos]) {
493 493 Py_INCREF(self->cache[pos]);
494 494 return self->cache[pos];
495 495 }
496 496 } else {
497 497 self->cache = calloc(self->raw_length, sizeof(PyObject *));
498 498 if (self->cache == NULL)
499 499 return PyErr_NoMemory();
500 500 }
501 501
502 502 data = index_deref(self, pos);
503 503 if (data == NULL)
504 504 return NULL;
505 505
506 506 offset_flags = getbe32(data + 4);
507 507 if (pos == 0) /* mask out version number for the first entry */
508 508 offset_flags &= 0xFFFF;
509 509 else {
510 510 uint32_t offset_high = getbe32(data);
511 511 offset_flags |= ((uint64_t)offset_high) << 32;
512 512 }
513 513
514 514 comp_len = getbe32(data + 8);
515 515 uncomp_len = getbe32(data + 12);
516 516 base_rev = getbe32(data + 16);
517 517 link_rev = getbe32(data + 20);
518 518 parent_1 = getbe32(data + 24);
519 519 parent_2 = getbe32(data + 28);
520 520 c_node_id = data + 32;
521 521
522 522 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
523 523 uncomp_len, base_rev, link_rev,
524 524 parent_1, parent_2, c_node_id, 20);
525 525
526 526 if (entry)
527 527 PyObject_GC_UnTrack(entry);
528 528
529 529 self->cache[pos] = entry;
530 530 Py_INCREF(entry);
531 531
532 532 return entry;
533 533 }
534 534
535 535 /*
536 536 * Return the 20-byte SHA of the node corresponding to the given rev.
537 537 */
538 538 static const char *index_node(indexObject *self, Py_ssize_t pos)
539 539 {
540 540 Py_ssize_t length = index_length(self);
541 541 const char *data;
542 542
543 543 if (pos == length - 1 || pos == INT_MAX)
544 544 return nullid;
545 545
546 546 if (pos >= length)
547 547 return NULL;
548 548
549 549 if (pos >= self->length - 1) {
550 550 PyObject *tuple, *str;
551 551 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
552 552 str = PyTuple_GetItem(tuple, 7);
553 553 return str ? PyString_AS_STRING(str) : NULL;
554 554 }
555 555
556 556 data = index_deref(self, pos);
557 557 return data ? data + 32 : NULL;
558 558 }
559 559
560 560 static int nt_insert(indexObject *self, const char *node, int rev);
561 561
562 562 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
563 563 {
564 564 if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
565 565 return -1;
566 566 if (*nodelen == 20)
567 567 return 0;
568 568 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
569 569 return -1;
570 570 }
571 571
572 572 static PyObject *index_insert(indexObject *self, PyObject *args)
573 573 {
574 574 PyObject *obj;
575 575 char *node;
576 576 long offset;
577 577 Py_ssize_t len, nodelen;
578 578
579 579 if (!PyArg_ParseTuple(args, "lO", &offset, &obj))
580 580 return NULL;
581 581
582 582 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
583 583 PyErr_SetString(PyExc_TypeError, "8-tuple required");
584 584 return NULL;
585 585 }
586 586
587 587 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
588 588 return NULL;
589 589
590 590 len = index_length(self);
591 591
592 592 if (offset < 0)
593 593 offset += len;
594 594
595 595 if (offset != len - 1) {
596 596 PyErr_SetString(PyExc_IndexError,
597 597 "insert only supported at index -1");
598 598 return NULL;
599 599 }
600 600
601 601 if (offset > INT_MAX) {
602 602 PyErr_SetString(PyExc_ValueError,
603 603 "currently only 2**31 revs supported");
604 604 return NULL;
605 605 }
606 606
607 607 if (self->added == NULL) {
608 608 self->added = PyList_New(0);
609 609 if (self->added == NULL)
610 610 return NULL;
611 611 }
612 612
613 613 if (PyList_Append(self->added, obj) == -1)
614 614 return NULL;
615 615
616 616 if (self->nt)
617 617 nt_insert(self, node, (int)offset);
618 618
619 619 Py_CLEAR(self->headrevs);
620 620 Py_RETURN_NONE;
621 621 }
622 622
623 623 static void _index_clearcaches(indexObject *self)
624 624 {
625 625 if (self->cache) {
626 626 Py_ssize_t i;
627 627
628 628 for (i = 0; i < self->raw_length; i++)
629 629 Py_CLEAR(self->cache[i]);
630 630 free(self->cache);
631 631 self->cache = NULL;
632 632 }
633 633 if (self->offsets) {
634 634 free(self->offsets);
635 635 self->offsets = NULL;
636 636 }
637 637 if (self->nt) {
638 638 free(self->nt);
639 639 self->nt = NULL;
640 640 }
641 641 Py_CLEAR(self->headrevs);
642 642 }
643 643
644 644 static PyObject *index_clearcaches(indexObject *self)
645 645 {
646 646 _index_clearcaches(self);
647 647 self->ntlength = self->ntcapacity = 0;
648 648 self->ntdepth = self->ntsplits = 0;
649 649 self->ntrev = -1;
650 650 self->ntlookups = self->ntmisses = 0;
651 651 Py_RETURN_NONE;
652 652 }
653 653
654 654 static PyObject *index_stats(indexObject *self)
655 655 {
656 656 PyObject *obj = PyDict_New();
657 657
658 658 if (obj == NULL)
659 659 return NULL;
660 660
661 661 #define istat(__n, __d) \
662 662 if (PyDict_SetItemString(obj, __d, PyInt_FromSsize_t(self->__n)) == -1) \
663 663 goto bail;
664 664
665 665 if (self->added) {
666 666 Py_ssize_t len = PyList_GET_SIZE(self->added);
667 667 if (PyDict_SetItemString(obj, "index entries added",
668 668 PyInt_FromSsize_t(len)) == -1)
669 669 goto bail;
670 670 }
671 671
672 672 if (self->raw_length != self->length - 1)
673 673 istat(raw_length, "revs on disk");
674 674 istat(length, "revs in memory");
675 675 istat(ntcapacity, "node trie capacity");
676 676 istat(ntdepth, "node trie depth");
677 677 istat(ntlength, "node trie count");
678 678 istat(ntlookups, "node trie lookups");
679 679 istat(ntmisses, "node trie misses");
680 680 istat(ntrev, "node trie last rev scanned");
681 681 istat(ntsplits, "node trie splits");
682 682
683 683 #undef istat
684 684
685 685 return obj;
686 686
687 687 bail:
688 688 Py_XDECREF(obj);
689 689 return NULL;
690 690 }
691 691
692 692 /*
693 693 * When we cache a list, we want to be sure the caller can't mutate
694 694 * the cached copy.
695 695 */
696 696 static PyObject *list_copy(PyObject *list)
697 697 {
698 698 Py_ssize_t len = PyList_GET_SIZE(list);
699 699 PyObject *newlist = PyList_New(len);
700 700 Py_ssize_t i;
701 701
702 702 if (newlist == NULL)
703 703 return NULL;
704 704
705 705 for (i = 0; i < len; i++) {
706 706 PyObject *obj = PyList_GET_ITEM(list, i);
707 707 Py_INCREF(obj);
708 708 PyList_SET_ITEM(newlist, i, obj);
709 709 }
710 710
711 711 return newlist;
712 712 }
713 713
714 714 static PyObject *index_headrevs(indexObject *self)
715 715 {
716 716 Py_ssize_t i, len, addlen;
717 717 char *nothead = NULL;
718 718 PyObject *heads;
719 719
720 720 if (self->headrevs)
721 721 return list_copy(self->headrevs);
722 722
723 723 len = index_length(self) - 1;
724 724 heads = PyList_New(0);
725 725 if (heads == NULL)
726 726 goto bail;
727 727 if (len == 0) {
728 728 PyObject *nullid = PyInt_FromLong(-1);
729 729 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
730 730 Py_XDECREF(nullid);
731 731 goto bail;
732 732 }
733 733 goto done;
734 734 }
735 735
736 736 nothead = calloc(len, 1);
737 737 if (nothead == NULL)
738 738 goto bail;
739 739
740 740 for (i = 0; i < self->raw_length; i++) {
741 741 const char *data = index_deref(self, i);
742 742 int parent_1 = getbe32(data + 24);
743 743 int parent_2 = getbe32(data + 28);
744 744 if (parent_1 >= 0)
745 745 nothead[parent_1] = 1;
746 746 if (parent_2 >= 0)
747 747 nothead[parent_2] = 1;
748 748 }
749 749
750 750 addlen = self->added ? PyList_GET_SIZE(self->added) : 0;
751 751
752 752 for (i = 0; i < addlen; i++) {
753 753 PyObject *rev = PyList_GET_ITEM(self->added, i);
754 754 PyObject *p1 = PyTuple_GET_ITEM(rev, 5);
755 755 PyObject *p2 = PyTuple_GET_ITEM(rev, 6);
756 756 long parent_1, parent_2;
757 757
758 758 if (!PyInt_Check(p1) || !PyInt_Check(p2)) {
759 759 PyErr_SetString(PyExc_TypeError,
760 760 "revlog parents are invalid");
761 761 goto bail;
762 762 }
763 763 parent_1 = PyInt_AS_LONG(p1);
764 764 parent_2 = PyInt_AS_LONG(p2);
765 765 if (parent_1 >= 0)
766 766 nothead[parent_1] = 1;
767 767 if (parent_2 >= 0)
768 768 nothead[parent_2] = 1;
769 769 }
770 770
771 771 for (i = 0; i < len; i++) {
772 772 PyObject *head;
773 773
774 774 if (nothead[i])
775 775 continue;
776 776 head = PyInt_FromLong(i);
777 777 if (head == NULL || PyList_Append(heads, head) == -1) {
778 778 Py_XDECREF(head);
779 779 goto bail;
780 780 }
781 781 }
782 782
783 783 done:
784 784 self->headrevs = heads;
785 785 free(nothead);
786 786 return list_copy(self->headrevs);
787 787 bail:
788 788 Py_XDECREF(heads);
789 789 free(nothead);
790 790 return NULL;
791 791 }
792 792
793 793 static inline int nt_level(const char *node, Py_ssize_t level)
794 794 {
795 795 int v = node[level>>1];
796 796 if (!(level & 1))
797 797 v >>= 4;
798 798 return v & 0xf;
799 799 }
800 800
801 801 /*
802 802 * Return values:
803 803 *
804 804 * -4: match is ambiguous (multiple candidates)
805 805 * -2: not found
806 806 * rest: valid rev
807 807 */
808 808 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
809 809 int hex)
810 810 {
811 811 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
812 812 int level, maxlevel, off;
813 813
814 814 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
815 815 return -1;
816 816
817 817 if (self->nt == NULL)
818 818 return -2;
819 819
820 820 if (hex)
821 821 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
822 822 else
823 823 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
824 824
825 825 for (level = off = 0; level < maxlevel; level++) {
826 826 int k = getnybble(node, level);
827 827 nodetree *n = &self->nt[off];
828 828 int v = n->children[k];
829 829
830 830 if (v < 0) {
831 831 const char *n;
832 832 Py_ssize_t i;
833 833
834 834 v = -v - 1;
835 835 n = index_node(self, v);
836 836 if (n == NULL)
837 837 return -2;
838 838 for (i = level; i < maxlevel; i++)
839 839 if (getnybble(node, i) != nt_level(n, i))
840 840 return -2;
841 841 return v;
842 842 }
843 843 if (v == 0)
844 844 return -2;
845 845 off = v;
846 846 }
847 847 /* multiple matches against an ambiguous prefix */
848 848 return -4;
849 849 }
850 850
851 851 static int nt_new(indexObject *self)
852 852 {
853 853 if (self->ntlength == self->ntcapacity) {
854 854 self->ntcapacity *= 2;
855 855 self->nt = realloc(self->nt,
856 856 self->ntcapacity * sizeof(nodetree));
857 857 if (self->nt == NULL) {
858 858 PyErr_SetString(PyExc_MemoryError, "out of memory");
859 859 return -1;
860 860 }
861 861 memset(&self->nt[self->ntlength], 0,
862 862 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
863 863 }
864 864 return self->ntlength++;
865 865 }
866 866
867 867 static int nt_insert(indexObject *self, const char *node, int rev)
868 868 {
869 869 int level = 0;
870 870 int off = 0;
871 871
872 872 while (level < 40) {
873 873 int k = nt_level(node, level);
874 874 nodetree *n;
875 875 int v;
876 876
877 877 n = &self->nt[off];
878 878 v = n->children[k];
879 879
880 880 if (v == 0) {
881 881 n->children[k] = -rev - 1;
882 882 return 0;
883 883 }
884 884 if (v < 0) {
885 885 const char *oldnode = index_node(self, -v - 1);
886 886 int noff;
887 887
888 888 if (!oldnode || !memcmp(oldnode, node, 20)) {
889 889 n->children[k] = -rev - 1;
890 890 return 0;
891 891 }
892 892 noff = nt_new(self);
893 893 if (noff == -1)
894 894 return -1;
895 895 /* self->nt may have been changed by realloc */
896 896 self->nt[off].children[k] = noff;
897 897 off = noff;
898 898 n = &self->nt[off];
899 899 n->children[nt_level(oldnode, ++level)] = v;
900 900 if (level > self->ntdepth)
901 901 self->ntdepth = level;
902 902 self->ntsplits += 1;
903 903 } else {
904 904 level += 1;
905 905 off = v;
906 906 }
907 907 }
908 908
909 909 return -1;
910 910 }
911 911
912 912 static int nt_init(indexObject *self)
913 913 {
914 914 if (self->nt == NULL) {
915 915 self->ntcapacity = self->raw_length < 4
916 916 ? 4 : self->raw_length / 2;
917 917 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
918 918 if (self->nt == NULL) {
919 919 PyErr_NoMemory();
920 920 return -1;
921 921 }
922 922 self->ntlength = 1;
923 923 self->ntrev = (int)index_length(self) - 1;
924 924 self->ntlookups = 1;
925 925 self->ntmisses = 0;
926 926 if (nt_insert(self, nullid, INT_MAX) == -1)
927 927 return -1;
928 928 }
929 929 return 0;
930 930 }
931 931
932 932 /*
933 933 * Return values:
934 934 *
935 935 * -3: error (exception set)
936 936 * -2: not found (no exception set)
937 937 * rest: valid rev
938 938 */
939 939 static int index_find_node(indexObject *self,
940 940 const char *node, Py_ssize_t nodelen)
941 941 {
942 942 int rev;
943 943
944 944 self->ntlookups++;
945 945 rev = nt_find(self, node, nodelen, 0);
946 946 if (rev >= -1)
947 947 return rev;
948 948
949 949 if (nt_init(self) == -1)
950 950 return -3;
951 951
952 952 /*
953 953 * For the first handful of lookups, we scan the entire index,
954 954 * and cache only the matching nodes. This optimizes for cases
955 955 * like "hg tip", where only a few nodes are accessed.
956 956 *
957 957 * After that, we cache every node we visit, using a single
958 958 * scan amortized over multiple lookups. This gives the best
959 959 * bulk performance, e.g. for "hg log".
960 960 */
961 961 if (self->ntmisses++ < 4) {
962 962 for (rev = self->ntrev - 1; rev >= 0; rev--) {
963 963 const char *n = index_node(self, rev);
964 964 if (n == NULL)
965 965 return -2;
966 966 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
967 967 if (nt_insert(self, n, rev) == -1)
968 968 return -3;
969 969 break;
970 970 }
971 971 }
972 972 } else {
973 973 for (rev = self->ntrev - 1; rev >= 0; rev--) {
974 974 const char *n = index_node(self, rev);
975 975 if (n == NULL) {
976 976 self->ntrev = rev + 1;
977 977 return -2;
978 978 }
979 979 if (nt_insert(self, n, rev) == -1) {
980 980 self->ntrev = rev + 1;
981 981 return -3;
982 982 }
983 983 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
984 984 break;
985 985 }
986 986 }
987 987 self->ntrev = rev;
988 988 }
989 989
990 990 if (rev >= 0)
991 991 return rev;
992 992 return -2;
993 993 }
994 994
995 995 static PyObject *raise_revlog_error(void)
996 996 {
997 997 static PyObject *errclass;
998 998 PyObject *mod = NULL, *errobj;
999 999
1000 1000 if (errclass == NULL) {
1001 1001 PyObject *dict;
1002 1002
1003 1003 mod = PyImport_ImportModule("mercurial.error");
1004 1004 if (mod == NULL)
1005 1005 goto classfail;
1006 1006
1007 1007 dict = PyModule_GetDict(mod);
1008 1008 if (dict == NULL)
1009 1009 goto classfail;
1010 1010
1011 1011 errclass = PyDict_GetItemString(dict, "RevlogError");
1012 1012 if (errclass == NULL) {
1013 1013 PyErr_SetString(PyExc_SystemError,
1014 1014 "could not find RevlogError");
1015 1015 goto classfail;
1016 1016 }
1017 1017 Py_INCREF(errclass);
1018 1018 }
1019 1019
1020 1020 errobj = PyObject_CallFunction(errclass, NULL);
1021 1021 if (errobj == NULL)
1022 1022 return NULL;
1023 1023 PyErr_SetObject(errclass, errobj);
1024 1024 return errobj;
1025 1025
1026 1026 classfail:
1027 1027 Py_XDECREF(mod);
1028 1028 return NULL;
1029 1029 }
1030 1030
1031 1031 static PyObject *index_getitem(indexObject *self, PyObject *value)
1032 1032 {
1033 1033 char *node;
1034 1034 Py_ssize_t nodelen;
1035 1035 int rev;
1036 1036
1037 1037 if (PyInt_Check(value))
1038 1038 return index_get(self, PyInt_AS_LONG(value));
1039 1039
1040 1040 if (node_check(value, &node, &nodelen) == -1)
1041 1041 return NULL;
1042 1042 rev = index_find_node(self, node, nodelen);
1043 1043 if (rev >= -1)
1044 1044 return PyInt_FromLong(rev);
1045 1045 if (rev == -2)
1046 1046 raise_revlog_error();
1047 1047 return NULL;
1048 1048 }
1049 1049
1050 1050 static int nt_partialmatch(indexObject *self, const char *node,
1051 1051 Py_ssize_t nodelen)
1052 1052 {
1053 1053 int rev;
1054 1054
1055 1055 if (nt_init(self) == -1)
1056 1056 return -3;
1057 1057
1058 1058 if (self->ntrev > 0) {
1059 1059 /* ensure that the radix tree is fully populated */
1060 1060 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1061 1061 const char *n = index_node(self, rev);
1062 1062 if (n == NULL)
1063 1063 return -2;
1064 1064 if (nt_insert(self, n, rev) == -1)
1065 1065 return -3;
1066 1066 }
1067 1067 self->ntrev = rev;
1068 1068 }
1069 1069
1070 1070 return nt_find(self, node, nodelen, 1);
1071 1071 }
1072 1072
1073 1073 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1074 1074 {
1075 1075 const char *fullnode;
1076 1076 int nodelen;
1077 1077 char *node;
1078 1078 int rev, i;
1079 1079
1080 1080 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
1081 1081 return NULL;
1082 1082
1083 1083 if (nodelen < 4) {
1084 1084 PyErr_SetString(PyExc_ValueError, "key too short");
1085 1085 return NULL;
1086 1086 }
1087 1087
1088 1088 if (nodelen > 40) {
1089 1089 PyErr_SetString(PyExc_ValueError, "key too long");
1090 1090 return NULL;
1091 1091 }
1092 1092
1093 1093 for (i = 0; i < nodelen; i++)
1094 1094 hexdigit(node, i);
1095 1095 if (PyErr_Occurred()) {
1096 1096 /* input contains non-hex characters */
1097 1097 PyErr_Clear();
1098 1098 Py_RETURN_NONE;
1099 1099 }
1100 1100
1101 1101 rev = nt_partialmatch(self, node, nodelen);
1102 1102
1103 1103 switch (rev) {
1104 1104 case -4:
1105 1105 raise_revlog_error();
1106 1106 case -3:
1107 1107 return NULL;
1108 1108 case -2:
1109 1109 Py_RETURN_NONE;
1110 1110 case -1:
1111 1111 return PyString_FromStringAndSize(nullid, 20);
1112 1112 }
1113 1113
1114 1114 fullnode = index_node(self, rev);
1115 1115 if (fullnode == NULL) {
1116 1116 PyErr_Format(PyExc_IndexError,
1117 1117 "could not access rev %d", rev);
1118 1118 return NULL;
1119 1119 }
1120 1120 return PyString_FromStringAndSize(fullnode, 20);
1121 1121 }
1122 1122
1123 1123 static PyObject *index_m_get(indexObject *self, PyObject *args)
1124 1124 {
1125 1125 Py_ssize_t nodelen;
1126 1126 PyObject *val;
1127 1127 char *node;
1128 1128 int rev;
1129 1129
1130 1130 if (!PyArg_ParseTuple(args, "O", &val))
1131 1131 return NULL;
1132 1132 if (node_check(val, &node, &nodelen) == -1)
1133 1133 return NULL;
1134 1134 rev = index_find_node(self, node, nodelen);
1135 1135 if (rev == -3)
1136 1136 return NULL;
1137 1137 if (rev == -2)
1138 1138 Py_RETURN_NONE;
1139 1139 return PyInt_FromLong(rev);
1140 1140 }
1141 1141
1142 1142 static int index_contains(indexObject *self, PyObject *value)
1143 1143 {
1144 1144 char *node;
1145 1145 Py_ssize_t nodelen;
1146 1146
1147 1147 if (PyInt_Check(value)) {
1148 1148 long rev = PyInt_AS_LONG(value);
1149 1149 return rev >= -1 && rev < index_length(self);
1150 1150 }
1151 1151
1152 1152 if (node_check(value, &node, &nodelen) == -1)
1153 1153 return -1;
1154 1154
1155 1155 switch (index_find_node(self, node, nodelen)) {
1156 1156 case -3:
1157 1157 return -1;
1158 1158 case -2:
1159 1159 return 0;
1160 1160 default:
1161 1161 return 1;
1162 1162 }
1163 1163 }
1164 1164
1165 1165 /*
1166 1166 * Invalidate any trie entries introduced by added revs.
1167 1167 */
1168 1168 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
1169 1169 {
1170 1170 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
1171 1171
1172 1172 for (i = start; i < len; i++) {
1173 1173 PyObject *tuple = PyList_GET_ITEM(self->added, i);
1174 1174 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
1175 1175
1176 1176 nt_insert(self, PyString_AS_STRING(node), -1);
1177 1177 }
1178 1178
1179 1179 if (start == 0)
1180 1180 Py_CLEAR(self->added);
1181 1181 }
1182 1182
1183 1183 /*
1184 1184 * Delete a numeric range of revs, which must be at the end of the
1185 1185 * range, but exclude the sentinel nullid entry.
1186 1186 */
1187 1187 static int index_slice_del(indexObject *self, PyObject *item)
1188 1188 {
1189 1189 Py_ssize_t start, stop, step, slicelength;
1190 1190 Py_ssize_t length = index_length(self);
1191 1191 int ret = 0;
1192 1192
1193 1193 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
1194 1194 &start, &stop, &step, &slicelength) < 0)
1195 1195 return -1;
1196 1196
1197 1197 if (slicelength <= 0)
1198 1198 return 0;
1199 1199
1200 1200 if ((step < 0 && start < stop) || (step > 0 && start > stop))
1201 1201 stop = start;
1202 1202
1203 1203 if (step < 0) {
1204 1204 stop = start + 1;
1205 1205 start = stop + step*(slicelength - 1) - 1;
1206 1206 step = -step;
1207 1207 }
1208 1208
1209 1209 if (step != 1) {
1210 1210 PyErr_SetString(PyExc_ValueError,
1211 1211 "revlog index delete requires step size of 1");
1212 1212 return -1;
1213 1213 }
1214 1214
1215 1215 if (stop != length - 1) {
1216 1216 PyErr_SetString(PyExc_IndexError,
1217 1217 "revlog index deletion indices are invalid");
1218 1218 return -1;
1219 1219 }
1220 1220
1221 1221 if (start < self->length - 1) {
1222 1222 if (self->nt) {
1223 1223 Py_ssize_t i;
1224 1224
1225 1225 for (i = start + 1; i < self->length - 1; i++) {
1226 1226 const char *node = index_node(self, i);
1227 1227
1228 1228 if (node)
1229 1229 nt_insert(self, node, -1);
1230 1230 }
1231 1231 if (self->added)
1232 1232 nt_invalidate_added(self, 0);
1233 1233 if (self->ntrev > start)
1234 1234 self->ntrev = (int)start;
1235 1235 }
1236 1236 self->length = start + 1;
1237 if (start < self->raw_length)
1237 if (start < self->raw_length) {
1238 if (self->cache) {
1239 Py_ssize_t i;
1240 for (i = start; i < self->raw_length; i++)
1241 Py_CLEAR(self->cache[i]);
1242 }
1238 1243 self->raw_length = start;
1244 }
1239 1245 goto done;
1240 1246 }
1241 1247
1242 1248 if (self->nt) {
1243 1249 nt_invalidate_added(self, start - self->length + 1);
1244 1250 if (self->ntrev > start)
1245 1251 self->ntrev = (int)start;
1246 1252 }
1247 1253 if (self->added)
1248 1254 ret = PyList_SetSlice(self->added, start - self->length + 1,
1249 1255 PyList_GET_SIZE(self->added), NULL);
1250 1256 done:
1251 1257 Py_CLEAR(self->headrevs);
1252 1258 return ret;
1253 1259 }
1254 1260
1255 1261 /*
1256 1262 * Supported ops:
1257 1263 *
1258 1264 * slice deletion
1259 1265 * string assignment (extend node->rev mapping)
1260 1266 * string deletion (shrink node->rev mapping)
1261 1267 */
1262 1268 static int index_assign_subscript(indexObject *self, PyObject *item,
1263 1269 PyObject *value)
1264 1270 {
1265 1271 char *node;
1266 1272 Py_ssize_t nodelen;
1267 1273 long rev;
1268 1274
1269 1275 if (PySlice_Check(item) && value == NULL)
1270 1276 return index_slice_del(self, item);
1271 1277
1272 1278 if (node_check(item, &node, &nodelen) == -1)
1273 1279 return -1;
1274 1280
1275 1281 if (value == NULL)
1276 1282 return self->nt ? nt_insert(self, node, -1) : 0;
1277 1283 rev = PyInt_AsLong(value);
1278 1284 if (rev > INT_MAX || rev < 0) {
1279 1285 if (!PyErr_Occurred())
1280 1286 PyErr_SetString(PyExc_ValueError, "rev out of range");
1281 1287 return -1;
1282 1288 }
1283 1289 return nt_insert(self, node, (int)rev);
1284 1290 }
1285 1291
1286 1292 /*
1287 1293 * Find all RevlogNG entries in an index that has inline data. Update
1288 1294 * the optional "offsets" table with those entries.
1289 1295 */
1290 1296 static long inline_scan(indexObject *self, const char **offsets)
1291 1297 {
1292 1298 const char *data = PyString_AS_STRING(self->data);
1293 1299 const char *end = data + PyString_GET_SIZE(self->data);
1294 1300 long incr = v1_hdrsize;
1295 1301 Py_ssize_t len = 0;
1296 1302
1297 1303 while (data + v1_hdrsize <= end) {
1298 1304 uint32_t comp_len;
1299 1305 const char *old_data;
1300 1306 /* 3rd element of header is length of compressed inline data */
1301 1307 comp_len = getbe32(data + 8);
1302 1308 incr = v1_hdrsize + comp_len;
1303 1309 if (incr < v1_hdrsize)
1304 1310 break;
1305 1311 if (offsets)
1306 1312 offsets[len] = data;
1307 1313 len++;
1308 1314 old_data = data;
1309 1315 data += incr;
1310 1316 if (data <= old_data)
1311 1317 break;
1312 1318 }
1313 1319
1314 1320 if (data != end && data + v1_hdrsize != end) {
1315 1321 if (!PyErr_Occurred())
1316 1322 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1317 1323 return -1;
1318 1324 }
1319 1325
1320 1326 return len;
1321 1327 }
1322 1328
1323 1329 static int index_init(indexObject *self, PyObject *args)
1324 1330 {
1325 1331 PyObject *data_obj, *inlined_obj;
1326 1332 Py_ssize_t size;
1327 1333
1328 1334 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
1329 1335 return -1;
1330 1336 if (!PyString_Check(data_obj)) {
1331 1337 PyErr_SetString(PyExc_TypeError, "data is not a string");
1332 1338 return -1;
1333 1339 }
1334 1340 size = PyString_GET_SIZE(data_obj);
1335 1341
1336 1342 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
1337 1343 self->data = data_obj;
1338 1344 self->cache = NULL;
1339 1345
1340 1346 self->added = NULL;
1341 1347 self->headrevs = NULL;
1342 1348 self->offsets = NULL;
1343 1349 self->nt = NULL;
1344 1350 self->ntlength = self->ntcapacity = 0;
1345 1351 self->ntdepth = self->ntsplits = 0;
1346 1352 self->ntlookups = self->ntmisses = 0;
1347 1353 self->ntrev = -1;
1348 1354 Py_INCREF(self->data);
1349 1355
1350 1356 if (self->inlined) {
1351 1357 long len = inline_scan(self, NULL);
1352 1358 if (len == -1)
1353 1359 goto bail;
1354 1360 self->raw_length = len;
1355 1361 self->length = len + 1;
1356 1362 } else {
1357 1363 if (size % v1_hdrsize) {
1358 1364 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1359 1365 goto bail;
1360 1366 }
1361 1367 self->raw_length = size / v1_hdrsize;
1362 1368 self->length = self->raw_length + 1;
1363 1369 }
1364 1370
1365 1371 return 0;
1366 1372 bail:
1367 1373 return -1;
1368 1374 }
1369 1375
1370 1376 static PyObject *index_nodemap(indexObject *self)
1371 1377 {
1372 1378 Py_INCREF(self);
1373 1379 return (PyObject *)self;
1374 1380 }
1375 1381
1376 1382 static void index_dealloc(indexObject *self)
1377 1383 {
1378 1384 _index_clearcaches(self);
1379 1385 Py_DECREF(self->data);
1380 1386 Py_XDECREF(self->added);
1381 1387 PyObject_Del(self);
1382 1388 }
1383 1389
1384 1390 static PySequenceMethods index_sequence_methods = {
1385 1391 (lenfunc)index_length, /* sq_length */
1386 1392 0, /* sq_concat */
1387 1393 0, /* sq_repeat */
1388 1394 (ssizeargfunc)index_get, /* sq_item */
1389 1395 0, /* sq_slice */
1390 1396 0, /* sq_ass_item */
1391 1397 0, /* sq_ass_slice */
1392 1398 (objobjproc)index_contains, /* sq_contains */
1393 1399 };
1394 1400
1395 1401 static PyMappingMethods index_mapping_methods = {
1396 1402 (lenfunc)index_length, /* mp_length */
1397 1403 (binaryfunc)index_getitem, /* mp_subscript */
1398 1404 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
1399 1405 };
1400 1406
1401 1407 static PyMethodDef index_methods[] = {
1402 1408 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
1403 1409 "clear the index caches"},
1404 1410 {"get", (PyCFunction)index_m_get, METH_VARARGS,
1405 1411 "get an index entry"},
1406 1412 {"headrevs", (PyCFunction)index_headrevs, METH_NOARGS,
1407 1413 "get head revisions"},
1408 1414 {"insert", (PyCFunction)index_insert, METH_VARARGS,
1409 1415 "insert an index entry"},
1410 1416 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
1411 1417 "match a potentially ambiguous node ID"},
1412 1418 {"stats", (PyCFunction)index_stats, METH_NOARGS,
1413 1419 "stats for the index"},
1414 1420 {NULL} /* Sentinel */
1415 1421 };
1416 1422
1417 1423 static PyGetSetDef index_getset[] = {
1418 1424 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
1419 1425 {NULL} /* Sentinel */
1420 1426 };
1421 1427
1422 1428 static PyTypeObject indexType = {
1423 1429 PyObject_HEAD_INIT(NULL)
1424 1430 0, /* ob_size */
1425 1431 "parsers.index", /* tp_name */
1426 1432 sizeof(indexObject), /* tp_basicsize */
1427 1433 0, /* tp_itemsize */
1428 1434 (destructor)index_dealloc, /* tp_dealloc */
1429 1435 0, /* tp_print */
1430 1436 0, /* tp_getattr */
1431 1437 0, /* tp_setattr */
1432 1438 0, /* tp_compare */
1433 1439 0, /* tp_repr */
1434 1440 0, /* tp_as_number */
1435 1441 &index_sequence_methods, /* tp_as_sequence */
1436 1442 &index_mapping_methods, /* tp_as_mapping */
1437 1443 0, /* tp_hash */
1438 1444 0, /* tp_call */
1439 1445 0, /* tp_str */
1440 1446 0, /* tp_getattro */
1441 1447 0, /* tp_setattro */
1442 1448 0, /* tp_as_buffer */
1443 1449 Py_TPFLAGS_DEFAULT, /* tp_flags */
1444 1450 "revlog index", /* tp_doc */
1445 1451 0, /* tp_traverse */
1446 1452 0, /* tp_clear */
1447 1453 0, /* tp_richcompare */
1448 1454 0, /* tp_weaklistoffset */
1449 1455 0, /* tp_iter */
1450 1456 0, /* tp_iternext */
1451 1457 index_methods, /* tp_methods */
1452 1458 0, /* tp_members */
1453 1459 index_getset, /* tp_getset */
1454 1460 0, /* tp_base */
1455 1461 0, /* tp_dict */
1456 1462 0, /* tp_descr_get */
1457 1463 0, /* tp_descr_set */
1458 1464 0, /* tp_dictoffset */
1459 1465 (initproc)index_init, /* tp_init */
1460 1466 0, /* tp_alloc */
1461 1467 };
1462 1468
1463 1469 /*
1464 1470 * returns a tuple of the form (index, index, cache) with elements as
1465 1471 * follows:
1466 1472 *
1467 1473 * index: an index object that lazily parses RevlogNG records
1468 1474 * cache: if data is inlined, a tuple (index_file_content, 0), else None
1469 1475 *
1470 1476 * added complications are for backwards compatibility
1471 1477 */
1472 1478 static PyObject *parse_index2(PyObject *self, PyObject *args)
1473 1479 {
1474 1480 PyObject *tuple = NULL, *cache = NULL;
1475 1481 indexObject *idx;
1476 1482 int ret;
1477 1483
1478 1484 idx = PyObject_New(indexObject, &indexType);
1479 1485 if (idx == NULL)
1480 1486 goto bail;
1481 1487
1482 1488 ret = index_init(idx, args);
1483 1489 if (ret == -1)
1484 1490 goto bail;
1485 1491
1486 1492 if (idx->inlined) {
1487 1493 cache = Py_BuildValue("iO", 0, idx->data);
1488 1494 if (cache == NULL)
1489 1495 goto bail;
1490 1496 } else {
1491 1497 cache = Py_None;
1492 1498 Py_INCREF(cache);
1493 1499 }
1494 1500
1495 1501 tuple = Py_BuildValue("NN", idx, cache);
1496 1502 if (!tuple)
1497 1503 goto bail;
1498 1504 return tuple;
1499 1505
1500 1506 bail:
1501 1507 Py_XDECREF(idx);
1502 1508 Py_XDECREF(cache);
1503 1509 Py_XDECREF(tuple);
1504 1510 return NULL;
1505 1511 }
1506 1512
1507 1513 static char parsers_doc[] = "Efficient content parsing.";
1508 1514
1509 1515 PyObject *encodedir(PyObject *self, PyObject *args);
1510 1516 PyObject *pathencode(PyObject *self, PyObject *args);
1511 1517 PyObject *lowerencode(PyObject *self, PyObject *args);
1512 1518
1513 1519 static PyMethodDef methods[] = {
1514 1520 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
1515 1521 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
1516 1522 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
1517 1523 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
1518 1524 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
1519 1525 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
1520 1526 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
1521 1527 {NULL, NULL}
1522 1528 };
1523 1529
1524 1530 static void module_init(PyObject *mod)
1525 1531 {
1526 1532 indexType.tp_new = PyType_GenericNew;
1527 1533 if (PyType_Ready(&indexType) < 0)
1528 1534 return;
1529 1535 Py_INCREF(&indexType);
1530 1536
1531 1537 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
1532 1538
1533 1539 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
1534 1540 -1, -1, -1, -1, nullid, 20);
1535 1541 if (nullentry)
1536 1542 PyObject_GC_UnTrack(nullentry);
1537 1543
1538 1544 dirstate_unset = Py_BuildValue("ciii", 'n', 0, -1, -1);
1539 1545 }
1540 1546
1541 1547 #ifdef IS_PY3K
1542 1548 static struct PyModuleDef parsers_module = {
1543 1549 PyModuleDef_HEAD_INIT,
1544 1550 "parsers",
1545 1551 parsers_doc,
1546 1552 -1,
1547 1553 methods
1548 1554 };
1549 1555
1550 1556 PyMODINIT_FUNC PyInit_parsers(void)
1551 1557 {
1552 1558 PyObject *mod = PyModule_Create(&parsers_module);
1553 1559 module_init(mod);
1554 1560 return mod;
1555 1561 }
1556 1562 #else
1557 1563 PyMODINIT_FUNC initparsers(void)
1558 1564 {
1559 1565 PyObject *mod = Py_InitModule3("parsers", methods, parsers_doc);
1560 1566 module_init(mod);
1561 1567 }
1562 1568 #endif
General Comments 0
You need to be logged in to leave comments. Login now