##// END OF EJS Templates
revlog: properly detect corrupted revlog in `index_get_length`...
Boris Feld -
r40772:95913063 default
parent child Browse files
Show More
@@ -1,2849 +1,2856 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #include <Python.h>
11 11 #include <assert.h>
12 12 #include <ctype.h>
13 13 #include <limits.h>
14 14 #include <stddef.h>
15 15 #include <stdlib.h>
16 16 #include <string.h>
17 17
18 18 #include "bitmanipulation.h"
19 19 #include "charencode.h"
20 20 #include "util.h"
21 21
22 22 #ifdef IS_PY3K
23 23 /* The mapping of Python types is meant to be temporary to get Python
24 24 * 3 to compile. We should remove this once Python 3 support is fully
25 25 * supported and proper types are used in the extensions themselves. */
26 26 #define PyInt_Check PyLong_Check
27 27 #define PyInt_FromLong PyLong_FromLong
28 28 #define PyInt_FromSsize_t PyLong_FromSsize_t
29 29 #define PyInt_AsLong PyLong_AsLong
30 30 #endif
31 31
32 32 typedef struct indexObjectStruct indexObject;
33 33
34 34 typedef struct {
35 35 int children[16];
36 36 } nodetreenode;
37 37
38 38 /*
39 39 * A base-16 trie for fast node->rev mapping.
40 40 *
41 41 * Positive value is index of the next node in the trie
42 42 * Negative value is a leaf: -(rev + 2)
43 43 * Zero is empty
44 44 */
45 45 typedef struct {
46 46 indexObject *index;
47 47 nodetreenode *nodes;
48 48 unsigned length; /* # nodes in use */
49 49 unsigned capacity; /* # nodes allocated */
50 50 int depth; /* maximum depth of tree */
51 51 int splits; /* # splits performed */
52 52 } nodetree;
53 53
54 54 typedef struct {
55 55 PyObject_HEAD /* ; */
56 56 nodetree nt;
57 57 } nodetreeObject;
58 58
59 59 /*
60 60 * This class has two behaviors.
61 61 *
62 62 * When used in a list-like way (with integer keys), we decode an
63 63 * entry in a RevlogNG index file on demand. Our last entry is a
64 64 * sentinel, always a nullid. We have limited support for
65 65 * integer-keyed insert and delete, only at elements right before the
66 66 * sentinel.
67 67 *
68 68 * With string keys, we lazily perform a reverse mapping from node to
69 69 * rev, using a base-16 trie.
70 70 */
71 71 struct indexObjectStruct {
72 72 PyObject_HEAD
73 73 /* Type-specific fields go here. */
74 74 PyObject *data; /* raw bytes of index */
75 75 Py_buffer buf; /* buffer of data */
76 76 PyObject **cache; /* cached tuples */
77 77 const char **offsets; /* populated on demand */
78 78 Py_ssize_t raw_length; /* original number of elements */
79 79 Py_ssize_t length; /* current number of elements */
80 80 PyObject *added; /* populated on demand */
81 81 PyObject *headrevs; /* cache, invalidated on changes */
82 82 PyObject *filteredrevs; /* filtered revs set */
83 83 nodetree nt; /* base-16 trie */
84 84 int ntinitialized; /* 0 or 1 */
85 85 int ntrev; /* last rev scanned */
86 86 int ntlookups; /* # lookups */
87 87 int ntmisses; /* # lookups that miss the cache */
88 88 int inlined;
89 89 };
90 90
91 91 static Py_ssize_t index_length(const indexObject *self)
92 92 {
93 93 if (self->added == NULL)
94 94 return self->length;
95 95 return self->length + PyList_GET_SIZE(self->added);
96 96 }
97 97
98 98 static PyObject *nullentry = NULL;
99 99 static const char nullid[20] = {0};
100 100
101 101 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
102 102
103 103 #if LONG_MAX == 0x7fffffffL
104 104 static const char *const tuple_format = PY23("Kiiiiiis#", "Kiiiiiiy#");
105 105 #else
106 106 static const char *const tuple_format = PY23("kiiiiiis#", "kiiiiiiy#");
107 107 #endif
108 108
109 109 /* A RevlogNG v1 index entry is 64 bytes long. */
110 110 static const long v1_hdrsize = 64;
111 111
112 112 static void raise_revlog_error(void)
113 113 {
114 114 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
115 115
116 116 mod = PyImport_ImportModule("mercurial.error");
117 117 if (mod == NULL) {
118 118 goto cleanup;
119 119 }
120 120
121 121 dict = PyModule_GetDict(mod);
122 122 if (dict == NULL) {
123 123 goto cleanup;
124 124 }
125 125 Py_INCREF(dict);
126 126
127 127 errclass = PyDict_GetItemString(dict, "RevlogError");
128 128 if (errclass == NULL) {
129 129 PyErr_SetString(PyExc_SystemError,
130 130 "could not find RevlogError");
131 131 goto cleanup;
132 132 }
133 133
134 134 /* value of exception is ignored by callers */
135 135 PyErr_SetString(errclass, "RevlogError");
136 136
137 137 cleanup:
138 138 Py_XDECREF(dict);
139 139 Py_XDECREF(mod);
140 140 }
141 141
142 142 /*
143 143 * Return a pointer to the beginning of a RevlogNG record.
144 144 */
145 145 static const char *index_deref(indexObject *self, Py_ssize_t pos)
146 146 {
147 147 if (self->inlined && pos > 0) {
148 148 if (self->offsets == NULL) {
149 149 self->offsets = PyMem_Malloc(self->raw_length *
150 150 sizeof(*self->offsets));
151 151 if (self->offsets == NULL)
152 152 return (const char *)PyErr_NoMemory();
153 153 inline_scan(self, self->offsets);
154 154 }
155 155 return self->offsets[pos];
156 156 }
157 157
158 158 return (const char *)(self->buf.buf) + pos * v1_hdrsize;
159 159 }
160 160
161 161 static inline int index_get_parents(indexObject *self, Py_ssize_t rev, int *ps,
162 162 int maxrev)
163 163 {
164 164 if (rev >= self->length) {
165 165 long tmp;
166 166 PyObject *tuple =
167 167 PyList_GET_ITEM(self->added, rev - self->length);
168 168 if (!pylong_to_long(PyTuple_GET_ITEM(tuple, 5), &tmp)) {
169 169 return -1;
170 170 }
171 171 ps[0] = (int)tmp;
172 172 if (!pylong_to_long(PyTuple_GET_ITEM(tuple, 6), &tmp)) {
173 173 return -1;
174 174 }
175 175 ps[1] = (int)tmp;
176 176 } else {
177 177 const char *data = index_deref(self, rev);
178 178 ps[0] = getbe32(data + 24);
179 179 ps[1] = getbe32(data + 28);
180 180 }
181 181 /* If index file is corrupted, ps[] may point to invalid revisions. So
182 182 * there is a risk of buffer overflow to trust them unconditionally. */
183 183 if (ps[0] > maxrev || ps[1] > maxrev) {
184 184 PyErr_SetString(PyExc_ValueError, "parent out of range");
185 185 return -1;
186 186 }
187 187 return 0;
188 188 }
189 189
190 190 static inline int64_t index_get_start(indexObject *self, Py_ssize_t rev)
191 191 {
192 192 uint64_t offset;
193 193 if (rev >= self->length) {
194 194 PyObject *tuple;
195 195 PyObject *pylong;
196 196 PY_LONG_LONG tmp;
197 197 tuple = PyList_GET_ITEM(self->added, rev - self->length);
198 198 pylong = PyTuple_GET_ITEM(tuple, 0);
199 199 tmp = PyLong_AsLongLong(pylong);
200 200 if (tmp == -1 && PyErr_Occurred()) {
201 201 return -1;
202 202 }
203 203 if (tmp < 0) {
204 204 PyErr_Format(PyExc_OverflowError,
205 205 "revlog entry size out of bound (%lld)",
206 206 (long long)tmp);
207 207 return -1;
208 208 }
209 209 offset = (uint64_t)tmp;
210 210 } else {
211 211 const char *data = index_deref(self, rev);
212 212 offset = getbe32(data + 4);
213 213 if (rev == 0) {
214 214 /* mask out version number for the first entry */
215 215 offset &= 0xFFFF;
216 216 } else {
217 217 uint32_t offset_high = getbe32(data);
218 218 offset |= ((uint64_t)offset_high) << 32;
219 219 }
220 220 }
221 221 return (int64_t)(offset >> 16);
222 222 }
223 223
224 224 static inline int index_get_length(indexObject *self, Py_ssize_t rev)
225 225 {
226 226 if (rev >= self->length) {
227 227 PyObject *tuple;
228 228 PyObject *pylong;
229 229 long ret;
230 230 tuple = PyList_GET_ITEM(self->added, rev - self->length);
231 231 pylong = PyTuple_GET_ITEM(tuple, 1);
232 232 ret = PyInt_AsLong(pylong);
233 233 if (ret == -1 && PyErr_Occurred()) {
234 234 return -1;
235 235 }
236 236 if (ret < 0 || ret > (long)INT_MAX) {
237 237 PyErr_Format(PyExc_OverflowError,
238 238 "revlog entry size out of bound (%ld)",
239 239 ret);
240 240 return -1;
241 241 }
242 242 return (int)ret;
243 243 } else {
244 244 const char *data = index_deref(self, rev);
245 return (int)getbe32(data + 8);
245 int tmp = (int)getbe32(data + 8);
246 if (tmp < 0) {
247 PyErr_Format(PyExc_OverflowError,
248 "revlog entry size out of bound (%d)",
249 tmp);
250 return -1;
251 }
252 return tmp;
246 253 }
247 254 }
248 255
249 256 /*
250 257 * RevlogNG format (all in big endian, data may be inlined):
251 258 * 6 bytes: offset
252 259 * 2 bytes: flags
253 260 * 4 bytes: compressed length
254 261 * 4 bytes: uncompressed length
255 262 * 4 bytes: base revision
256 263 * 4 bytes: link revision
257 264 * 4 bytes: parent 1 revision
258 265 * 4 bytes: parent 2 revision
259 266 * 32 bytes: nodeid (only 20 bytes used)
260 267 */
261 268 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
262 269 {
263 270 uint64_t offset_flags;
264 271 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
265 272 const char *c_node_id;
266 273 const char *data;
267 274 Py_ssize_t length = index_length(self);
268 275 PyObject *entry;
269 276
270 277 if (pos == -1) {
271 278 Py_INCREF(nullentry);
272 279 return nullentry;
273 280 }
274 281
275 282 if (pos < 0 || pos >= length) {
276 283 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
277 284 return NULL;
278 285 }
279 286
280 287 if (pos >= self->length) {
281 288 PyObject *obj;
282 289 obj = PyList_GET_ITEM(self->added, pos - self->length);
283 290 Py_INCREF(obj);
284 291 return obj;
285 292 }
286 293
287 294 if (self->cache) {
288 295 if (self->cache[pos]) {
289 296 Py_INCREF(self->cache[pos]);
290 297 return self->cache[pos];
291 298 }
292 299 } else {
293 300 self->cache = calloc(self->raw_length, sizeof(PyObject *));
294 301 if (self->cache == NULL)
295 302 return PyErr_NoMemory();
296 303 }
297 304
298 305 data = index_deref(self, pos);
299 306 if (data == NULL)
300 307 return NULL;
301 308
302 309 offset_flags = getbe32(data + 4);
303 310 if (pos == 0) /* mask out version number for the first entry */
304 311 offset_flags &= 0xFFFF;
305 312 else {
306 313 uint32_t offset_high = getbe32(data);
307 314 offset_flags |= ((uint64_t)offset_high) << 32;
308 315 }
309 316
310 317 comp_len = getbe32(data + 8);
311 318 uncomp_len = getbe32(data + 12);
312 319 base_rev = getbe32(data + 16);
313 320 link_rev = getbe32(data + 20);
314 321 parent_1 = getbe32(data + 24);
315 322 parent_2 = getbe32(data + 28);
316 323 c_node_id = data + 32;
317 324
318 325 entry = Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len,
319 326 base_rev, link_rev, parent_1, parent_2, c_node_id,
320 327 20);
321 328
322 329 if (entry) {
323 330 PyObject_GC_UnTrack(entry);
324 331 Py_INCREF(entry);
325 332 }
326 333
327 334 self->cache[pos] = entry;
328 335
329 336 return entry;
330 337 }
331 338
332 339 /*
333 340 * Return the 20-byte SHA of the node corresponding to the given rev.
334 341 */
335 342 static const char *index_node(indexObject *self, Py_ssize_t pos)
336 343 {
337 344 Py_ssize_t length = index_length(self);
338 345 const char *data;
339 346
340 347 if (pos == -1)
341 348 return nullid;
342 349
343 350 if (pos >= length)
344 351 return NULL;
345 352
346 353 if (pos >= self->length) {
347 354 PyObject *tuple, *str;
348 355 tuple = PyList_GET_ITEM(self->added, pos - self->length);
349 356 str = PyTuple_GetItem(tuple, 7);
350 357 return str ? PyBytes_AS_STRING(str) : NULL;
351 358 }
352 359
353 360 data = index_deref(self, pos);
354 361 return data ? data + 32 : NULL;
355 362 }
356 363
357 364 /*
358 365 * Return the 20-byte SHA of the node corresponding to the given rev. The
359 366 * rev is assumed to be existing. If not, an exception is set.
360 367 */
361 368 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
362 369 {
363 370 const char *node = index_node(self, pos);
364 371 if (node == NULL) {
365 372 PyErr_Format(PyExc_IndexError, "could not access rev %d",
366 373 (int)pos);
367 374 }
368 375 return node;
369 376 }
370 377
371 378 static int nt_insert(nodetree *self, const char *node, int rev);
372 379
373 380 static int node_check(PyObject *obj, char **node)
374 381 {
375 382 Py_ssize_t nodelen;
376 383 if (PyBytes_AsStringAndSize(obj, node, &nodelen) == -1)
377 384 return -1;
378 385 if (nodelen == 20)
379 386 return 0;
380 387 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
381 388 return -1;
382 389 }
383 390
384 391 static PyObject *index_append(indexObject *self, PyObject *obj)
385 392 {
386 393 char *node;
387 394 Py_ssize_t len;
388 395
389 396 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
390 397 PyErr_SetString(PyExc_TypeError, "8-tuple required");
391 398 return NULL;
392 399 }
393 400
394 401 if (node_check(PyTuple_GET_ITEM(obj, 7), &node) == -1)
395 402 return NULL;
396 403
397 404 len = index_length(self);
398 405
399 406 if (self->added == NULL) {
400 407 self->added = PyList_New(0);
401 408 if (self->added == NULL)
402 409 return NULL;
403 410 }
404 411
405 412 if (PyList_Append(self->added, obj) == -1)
406 413 return NULL;
407 414
408 415 if (self->ntinitialized)
409 416 nt_insert(&self->nt, node, (int)len);
410 417
411 418 Py_CLEAR(self->headrevs);
412 419 Py_RETURN_NONE;
413 420 }
414 421
415 422 static PyObject *index_stats(indexObject *self)
416 423 {
417 424 PyObject *obj = PyDict_New();
418 425 PyObject *s = NULL;
419 426 PyObject *t = NULL;
420 427
421 428 if (obj == NULL)
422 429 return NULL;
423 430
424 431 #define istat(__n, __d) \
425 432 do { \
426 433 s = PyBytes_FromString(__d); \
427 434 t = PyInt_FromSsize_t(self->__n); \
428 435 if (!s || !t) \
429 436 goto bail; \
430 437 if (PyDict_SetItem(obj, s, t) == -1) \
431 438 goto bail; \
432 439 Py_CLEAR(s); \
433 440 Py_CLEAR(t); \
434 441 } while (0)
435 442
436 443 if (self->added) {
437 444 Py_ssize_t len = PyList_GET_SIZE(self->added);
438 445 s = PyBytes_FromString("index entries added");
439 446 t = PyInt_FromSsize_t(len);
440 447 if (!s || !t)
441 448 goto bail;
442 449 if (PyDict_SetItem(obj, s, t) == -1)
443 450 goto bail;
444 451 Py_CLEAR(s);
445 452 Py_CLEAR(t);
446 453 }
447 454
448 455 if (self->raw_length != self->length)
449 456 istat(raw_length, "revs on disk");
450 457 istat(length, "revs in memory");
451 458 istat(ntlookups, "node trie lookups");
452 459 istat(ntmisses, "node trie misses");
453 460 istat(ntrev, "node trie last rev scanned");
454 461 if (self->ntinitialized) {
455 462 istat(nt.capacity, "node trie capacity");
456 463 istat(nt.depth, "node trie depth");
457 464 istat(nt.length, "node trie count");
458 465 istat(nt.splits, "node trie splits");
459 466 }
460 467
461 468 #undef istat
462 469
463 470 return obj;
464 471
465 472 bail:
466 473 Py_XDECREF(obj);
467 474 Py_XDECREF(s);
468 475 Py_XDECREF(t);
469 476 return NULL;
470 477 }
471 478
472 479 /*
473 480 * When we cache a list, we want to be sure the caller can't mutate
474 481 * the cached copy.
475 482 */
476 483 static PyObject *list_copy(PyObject *list)
477 484 {
478 485 Py_ssize_t len = PyList_GET_SIZE(list);
479 486 PyObject *newlist = PyList_New(len);
480 487 Py_ssize_t i;
481 488
482 489 if (newlist == NULL)
483 490 return NULL;
484 491
485 492 for (i = 0; i < len; i++) {
486 493 PyObject *obj = PyList_GET_ITEM(list, i);
487 494 Py_INCREF(obj);
488 495 PyList_SET_ITEM(newlist, i, obj);
489 496 }
490 497
491 498 return newlist;
492 499 }
493 500
494 501 static int check_filter(PyObject *filter, Py_ssize_t arg)
495 502 {
496 503 if (filter) {
497 504 PyObject *arglist, *result;
498 505 int isfiltered;
499 506
500 507 arglist = Py_BuildValue("(n)", arg);
501 508 if (!arglist) {
502 509 return -1;
503 510 }
504 511
505 512 result = PyEval_CallObject(filter, arglist);
506 513 Py_DECREF(arglist);
507 514 if (!result) {
508 515 return -1;
509 516 }
510 517
511 518 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
512 519 * same as this function, so we can just return it directly.*/
513 520 isfiltered = PyObject_IsTrue(result);
514 521 Py_DECREF(result);
515 522 return isfiltered;
516 523 } else {
517 524 return 0;
518 525 }
519 526 }
520 527
521 528 static Py_ssize_t add_roots_get_min(indexObject *self, PyObject *list,
522 529 Py_ssize_t marker, char *phases)
523 530 {
524 531 PyObject *iter = NULL;
525 532 PyObject *iter_item = NULL;
526 533 Py_ssize_t min_idx = index_length(self) + 2;
527 534 long iter_item_long;
528 535
529 536 if (PyList_GET_SIZE(list) != 0) {
530 537 iter = PyObject_GetIter(list);
531 538 if (iter == NULL)
532 539 return -2;
533 540 while ((iter_item = PyIter_Next(iter))) {
534 541 if (!pylong_to_long(iter_item, &iter_item_long)) {
535 542 Py_DECREF(iter_item);
536 543 return -2;
537 544 }
538 545 Py_DECREF(iter_item);
539 546 if (iter_item_long < min_idx)
540 547 min_idx = iter_item_long;
541 548 phases[iter_item_long] = (char)marker;
542 549 }
543 550 Py_DECREF(iter);
544 551 }
545 552
546 553 return min_idx;
547 554 }
548 555
549 556 static inline void set_phase_from_parents(char *phases, int parent_1,
550 557 int parent_2, Py_ssize_t i)
551 558 {
552 559 if (parent_1 >= 0 && phases[parent_1] > phases[i])
553 560 phases[i] = phases[parent_1];
554 561 if (parent_2 >= 0 && phases[parent_2] > phases[i])
555 562 phases[i] = phases[parent_2];
556 563 }
557 564
558 565 static PyObject *reachableroots2(indexObject *self, PyObject *args)
559 566 {
560 567
561 568 /* Input */
562 569 long minroot;
563 570 PyObject *includepatharg = NULL;
564 571 int includepath = 0;
565 572 /* heads and roots are lists */
566 573 PyObject *heads = NULL;
567 574 PyObject *roots = NULL;
568 575 PyObject *reachable = NULL;
569 576
570 577 PyObject *val;
571 578 Py_ssize_t len = index_length(self);
572 579 long revnum;
573 580 Py_ssize_t k;
574 581 Py_ssize_t i;
575 582 Py_ssize_t l;
576 583 int r;
577 584 int parents[2];
578 585
579 586 /* Internal data structure:
580 587 * tovisit: array of length len+1 (all revs + nullrev), filled upto
581 588 * lentovisit
582 589 *
583 590 * revstates: array of length len+1 (all revs + nullrev) */
584 591 int *tovisit = NULL;
585 592 long lentovisit = 0;
586 593 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
587 594 char *revstates = NULL;
588 595
589 596 /* Get arguments */
590 597 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
591 598 &PyList_Type, &roots, &PyBool_Type,
592 599 &includepatharg))
593 600 goto bail;
594 601
595 602 if (includepatharg == Py_True)
596 603 includepath = 1;
597 604
598 605 /* Initialize return set */
599 606 reachable = PyList_New(0);
600 607 if (reachable == NULL)
601 608 goto bail;
602 609
603 610 /* Initialize internal datastructures */
604 611 tovisit = (int *)malloc((len + 1) * sizeof(int));
605 612 if (tovisit == NULL) {
606 613 PyErr_NoMemory();
607 614 goto bail;
608 615 }
609 616
610 617 revstates = (char *)calloc(len + 1, 1);
611 618 if (revstates == NULL) {
612 619 PyErr_NoMemory();
613 620 goto bail;
614 621 }
615 622
616 623 l = PyList_GET_SIZE(roots);
617 624 for (i = 0; i < l; i++) {
618 625 revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
619 626 if (revnum == -1 && PyErr_Occurred())
620 627 goto bail;
621 628 /* If root is out of range, e.g. wdir(), it must be unreachable
622 629 * from heads. So we can just ignore it. */
623 630 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
624 631 continue;
625 632 revstates[revnum + 1] |= RS_ROOT;
626 633 }
627 634
628 635 /* Populate tovisit with all the heads */
629 636 l = PyList_GET_SIZE(heads);
630 637 for (i = 0; i < l; i++) {
631 638 revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i));
632 639 if (revnum == -1 && PyErr_Occurred())
633 640 goto bail;
634 641 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
635 642 PyErr_SetString(PyExc_IndexError, "head out of range");
636 643 goto bail;
637 644 }
638 645 if (!(revstates[revnum + 1] & RS_SEEN)) {
639 646 tovisit[lentovisit++] = (int)revnum;
640 647 revstates[revnum + 1] |= RS_SEEN;
641 648 }
642 649 }
643 650
644 651 /* Visit the tovisit list and find the reachable roots */
645 652 k = 0;
646 653 while (k < lentovisit) {
647 654 /* Add the node to reachable if it is a root*/
648 655 revnum = tovisit[k++];
649 656 if (revstates[revnum + 1] & RS_ROOT) {
650 657 revstates[revnum + 1] |= RS_REACHABLE;
651 658 val = PyInt_FromLong(revnum);
652 659 if (val == NULL)
653 660 goto bail;
654 661 r = PyList_Append(reachable, val);
655 662 Py_DECREF(val);
656 663 if (r < 0)
657 664 goto bail;
658 665 if (includepath == 0)
659 666 continue;
660 667 }
661 668
662 669 /* Add its parents to the list of nodes to visit */
663 670 if (revnum == -1)
664 671 continue;
665 672 r = index_get_parents(self, revnum, parents, (int)len - 1);
666 673 if (r < 0)
667 674 goto bail;
668 675 for (i = 0; i < 2; i++) {
669 676 if (!(revstates[parents[i] + 1] & RS_SEEN) &&
670 677 parents[i] >= minroot) {
671 678 tovisit[lentovisit++] = parents[i];
672 679 revstates[parents[i] + 1] |= RS_SEEN;
673 680 }
674 681 }
675 682 }
676 683
677 684 /* Find all the nodes in between the roots we found and the heads
678 685 * and add them to the reachable set */
679 686 if (includepath == 1) {
680 687 long minidx = minroot;
681 688 if (minidx < 0)
682 689 minidx = 0;
683 690 for (i = minidx; i < len; i++) {
684 691 if (!(revstates[i + 1] & RS_SEEN))
685 692 continue;
686 693 r = index_get_parents(self, i, parents, (int)len - 1);
687 694 /* Corrupted index file, error is set from
688 695 * index_get_parents */
689 696 if (r < 0)
690 697 goto bail;
691 698 if (((revstates[parents[0] + 1] |
692 699 revstates[parents[1] + 1]) &
693 700 RS_REACHABLE) &&
694 701 !(revstates[i + 1] & RS_REACHABLE)) {
695 702 revstates[i + 1] |= RS_REACHABLE;
696 703 val = PyInt_FromSsize_t(i);
697 704 if (val == NULL)
698 705 goto bail;
699 706 r = PyList_Append(reachable, val);
700 707 Py_DECREF(val);
701 708 if (r < 0)
702 709 goto bail;
703 710 }
704 711 }
705 712 }
706 713
707 714 free(revstates);
708 715 free(tovisit);
709 716 return reachable;
710 717 bail:
711 718 Py_XDECREF(reachable);
712 719 free(revstates);
713 720 free(tovisit);
714 721 return NULL;
715 722 }
716 723
717 724 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
718 725 {
719 726 PyObject *roots = Py_None;
720 727 PyObject *ret = NULL;
721 728 PyObject *phasessize = NULL;
722 729 PyObject *phaseroots = NULL;
723 730 PyObject *phaseset = NULL;
724 731 PyObject *phasessetlist = NULL;
725 732 PyObject *rev = NULL;
726 733 Py_ssize_t len = index_length(self);
727 734 Py_ssize_t numphase = 0;
728 735 Py_ssize_t minrevallphases = 0;
729 736 Py_ssize_t minrevphase = 0;
730 737 Py_ssize_t i = 0;
731 738 char *phases = NULL;
732 739 long phase;
733 740
734 741 if (!PyArg_ParseTuple(args, "O", &roots))
735 742 goto done;
736 743 if (roots == NULL || !PyList_Check(roots)) {
737 744 PyErr_SetString(PyExc_TypeError, "roots must be a list");
738 745 goto done;
739 746 }
740 747
741 748 phases = calloc(
742 749 len, 1); /* phase per rev: {0: public, 1: draft, 2: secret} */
743 750 if (phases == NULL) {
744 751 PyErr_NoMemory();
745 752 goto done;
746 753 }
747 754 /* Put the phase information of all the roots in phases */
748 755 numphase = PyList_GET_SIZE(roots) + 1;
749 756 minrevallphases = len + 1;
750 757 phasessetlist = PyList_New(numphase);
751 758 if (phasessetlist == NULL)
752 759 goto done;
753 760
754 761 PyList_SET_ITEM(phasessetlist, 0, Py_None);
755 762 Py_INCREF(Py_None);
756 763
757 764 for (i = 0; i < numphase - 1; i++) {
758 765 phaseroots = PyList_GET_ITEM(roots, i);
759 766 phaseset = PySet_New(NULL);
760 767 if (phaseset == NULL)
761 768 goto release;
762 769 PyList_SET_ITEM(phasessetlist, i + 1, phaseset);
763 770 if (!PyList_Check(phaseroots)) {
764 771 PyErr_SetString(PyExc_TypeError,
765 772 "roots item must be a list");
766 773 goto release;
767 774 }
768 775 minrevphase =
769 776 add_roots_get_min(self, phaseroots, i + 1, phases);
770 777 if (minrevphase == -2) /* Error from add_roots_get_min */
771 778 goto release;
772 779 minrevallphases = MIN(minrevallphases, minrevphase);
773 780 }
774 781 /* Propagate the phase information from the roots to the revs */
775 782 if (minrevallphases != -1) {
776 783 int parents[2];
777 784 for (i = minrevallphases; i < len; i++) {
778 785 if (index_get_parents(self, i, parents, (int)len - 1) <
779 786 0)
780 787 goto release;
781 788 set_phase_from_parents(phases, parents[0], parents[1],
782 789 i);
783 790 }
784 791 }
785 792 /* Transform phase list to a python list */
786 793 phasessize = PyInt_FromSsize_t(len);
787 794 if (phasessize == NULL)
788 795 goto release;
789 796 for (i = 0; i < len; i++) {
790 797 phase = phases[i];
791 798 /* We only store the sets of phase for non public phase, the
792 799 * public phase is computed as a difference */
793 800 if (phase != 0) {
794 801 phaseset = PyList_GET_ITEM(phasessetlist, phase);
795 802 rev = PyInt_FromSsize_t(i);
796 803 if (rev == NULL)
797 804 goto release;
798 805 PySet_Add(phaseset, rev);
799 806 Py_XDECREF(rev);
800 807 }
801 808 }
802 809 ret = PyTuple_Pack(2, phasessize, phasessetlist);
803 810
804 811 release:
805 812 Py_XDECREF(phasessize);
806 813 Py_XDECREF(phasessetlist);
807 814 done:
808 815 free(phases);
809 816 return ret;
810 817 }
811 818
812 819 static PyObject *index_headrevs(indexObject *self, PyObject *args)
813 820 {
814 821 Py_ssize_t i, j, len;
815 822 char *nothead = NULL;
816 823 PyObject *heads = NULL;
817 824 PyObject *filter = NULL;
818 825 PyObject *filteredrevs = Py_None;
819 826
820 827 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
821 828 return NULL;
822 829 }
823 830
824 831 if (self->headrevs && filteredrevs == self->filteredrevs)
825 832 return list_copy(self->headrevs);
826 833
827 834 Py_DECREF(self->filteredrevs);
828 835 self->filteredrevs = filteredrevs;
829 836 Py_INCREF(filteredrevs);
830 837
831 838 if (filteredrevs != Py_None) {
832 839 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
833 840 if (!filter) {
834 841 PyErr_SetString(
835 842 PyExc_TypeError,
836 843 "filteredrevs has no attribute __contains__");
837 844 goto bail;
838 845 }
839 846 }
840 847
841 848 len = index_length(self);
842 849 heads = PyList_New(0);
843 850 if (heads == NULL)
844 851 goto bail;
845 852 if (len == 0) {
846 853 PyObject *nullid = PyInt_FromLong(-1);
847 854 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
848 855 Py_XDECREF(nullid);
849 856 goto bail;
850 857 }
851 858 goto done;
852 859 }
853 860
854 861 nothead = calloc(len, 1);
855 862 if (nothead == NULL) {
856 863 PyErr_NoMemory();
857 864 goto bail;
858 865 }
859 866
860 867 for (i = len - 1; i >= 0; i--) {
861 868 int isfiltered;
862 869 int parents[2];
863 870
864 871 /* If nothead[i] == 1, it means we've seen an unfiltered child
865 872 * of this node already, and therefore this node is not
866 873 * filtered. So we can skip the expensive check_filter step.
867 874 */
868 875 if (nothead[i] != 1) {
869 876 isfiltered = check_filter(filter, i);
870 877 if (isfiltered == -1) {
871 878 PyErr_SetString(PyExc_TypeError,
872 879 "unable to check filter");
873 880 goto bail;
874 881 }
875 882
876 883 if (isfiltered) {
877 884 nothead[i] = 1;
878 885 continue;
879 886 }
880 887 }
881 888
882 889 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
883 890 goto bail;
884 891 for (j = 0; j < 2; j++) {
885 892 if (parents[j] >= 0)
886 893 nothead[parents[j]] = 1;
887 894 }
888 895 }
889 896
890 897 for (i = 0; i < len; i++) {
891 898 PyObject *head;
892 899
893 900 if (nothead[i])
894 901 continue;
895 902 head = PyInt_FromSsize_t(i);
896 903 if (head == NULL || PyList_Append(heads, head) == -1) {
897 904 Py_XDECREF(head);
898 905 goto bail;
899 906 }
900 907 }
901 908
902 909 done:
903 910 self->headrevs = heads;
904 911 Py_XDECREF(filter);
905 912 free(nothead);
906 913 return list_copy(self->headrevs);
907 914 bail:
908 915 Py_XDECREF(filter);
909 916 Py_XDECREF(heads);
910 917 free(nothead);
911 918 return NULL;
912 919 }
913 920
914 921 /**
915 922 * Obtain the base revision index entry.
916 923 *
917 924 * Callers must ensure that rev >= 0 or illegal memory access may occur.
918 925 */
919 926 static inline int index_baserev(indexObject *self, int rev)
920 927 {
921 928 const char *data;
922 929
923 930 if (rev >= self->length) {
924 931 PyObject *tuple =
925 932 PyList_GET_ITEM(self->added, rev - self->length);
926 933 long ret;
927 934 if (!pylong_to_long(PyTuple_GET_ITEM(tuple, 3), &ret)) {
928 935 return -2;
929 936 }
930 937 return (int)ret;
931 938 } else {
932 939 data = index_deref(self, rev);
933 940 if (data == NULL) {
934 941 return -2;
935 942 }
936 943
937 944 return getbe32(data + 16);
938 945 }
939 946 }
940 947
941 948 static PyObject *index_deltachain(indexObject *self, PyObject *args)
942 949 {
943 950 int rev, generaldelta;
944 951 PyObject *stoparg;
945 952 int stoprev, iterrev, baserev = -1;
946 953 int stopped;
947 954 PyObject *chain = NULL, *result = NULL;
948 955 const Py_ssize_t length = index_length(self);
949 956
950 957 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
951 958 return NULL;
952 959 }
953 960
954 961 if (PyInt_Check(stoparg)) {
955 962 stoprev = (int)PyInt_AsLong(stoparg);
956 963 if (stoprev == -1 && PyErr_Occurred()) {
957 964 return NULL;
958 965 }
959 966 } else if (stoparg == Py_None) {
960 967 stoprev = -2;
961 968 } else {
962 969 PyErr_SetString(PyExc_ValueError,
963 970 "stoprev must be integer or None");
964 971 return NULL;
965 972 }
966 973
967 974 if (rev < 0 || rev >= length) {
968 975 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
969 976 return NULL;
970 977 }
971 978
972 979 chain = PyList_New(0);
973 980 if (chain == NULL) {
974 981 return NULL;
975 982 }
976 983
977 984 baserev = index_baserev(self, rev);
978 985
979 986 /* This should never happen. */
980 987 if (baserev <= -2) {
981 988 /* Error should be set by index_deref() */
982 989 assert(PyErr_Occurred());
983 990 goto bail;
984 991 }
985 992
986 993 iterrev = rev;
987 994
988 995 while (iterrev != baserev && iterrev != stoprev) {
989 996 PyObject *value = PyInt_FromLong(iterrev);
990 997 if (value == NULL) {
991 998 goto bail;
992 999 }
993 1000 if (PyList_Append(chain, value)) {
994 1001 Py_DECREF(value);
995 1002 goto bail;
996 1003 }
997 1004 Py_DECREF(value);
998 1005
999 1006 if (generaldelta) {
1000 1007 iterrev = baserev;
1001 1008 } else {
1002 1009 iterrev--;
1003 1010 }
1004 1011
1005 1012 if (iterrev < 0) {
1006 1013 break;
1007 1014 }
1008 1015
1009 1016 if (iterrev >= length) {
1010 1017 PyErr_SetString(PyExc_IndexError,
1011 1018 "revision outside index");
1012 1019 return NULL;
1013 1020 }
1014 1021
1015 1022 baserev = index_baserev(self, iterrev);
1016 1023
1017 1024 /* This should never happen. */
1018 1025 if (baserev <= -2) {
1019 1026 /* Error should be set by index_deref() */
1020 1027 assert(PyErr_Occurred());
1021 1028 goto bail;
1022 1029 }
1023 1030 }
1024 1031
1025 1032 if (iterrev == stoprev) {
1026 1033 stopped = 1;
1027 1034 } else {
1028 1035 PyObject *value = PyInt_FromLong(iterrev);
1029 1036 if (value == NULL) {
1030 1037 goto bail;
1031 1038 }
1032 1039 if (PyList_Append(chain, value)) {
1033 1040 Py_DECREF(value);
1034 1041 goto bail;
1035 1042 }
1036 1043 Py_DECREF(value);
1037 1044
1038 1045 stopped = 0;
1039 1046 }
1040 1047
1041 1048 if (PyList_Reverse(chain)) {
1042 1049 goto bail;
1043 1050 }
1044 1051
1045 1052 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
1046 1053 Py_DECREF(chain);
1047 1054 return result;
1048 1055
1049 1056 bail:
1050 1057 Py_DECREF(chain);
1051 1058 return NULL;
1052 1059 }
1053 1060
1054 1061 static inline int64_t
1055 1062 index_segment_span(indexObject *self, Py_ssize_t start_rev, Py_ssize_t end_rev)
1056 1063 {
1057 1064 int64_t start_offset;
1058 1065 int64_t end_offset;
1059 1066 int end_size;
1060 1067 start_offset = index_get_start(self, start_rev);
1061 1068 if (start_offset < 0) {
1062 1069 return -1;
1063 1070 }
1064 1071 end_offset = index_get_start(self, end_rev);
1065 1072 if (end_offset < 0) {
1066 1073 return -1;
1067 1074 }
1068 1075 end_size = index_get_length(self, end_rev);
1069 1076 if (end_size < 0) {
1070 1077 return -1;
1071 1078 }
1072 1079 if (end_offset < start_offset) {
1073 1080 PyErr_Format(PyExc_ValueError,
1074 1081 "corrupted revlog index: inconsistent offset "
1075 1082 "between revisions (%zd) and (%zd)",
1076 1083 start_rev, end_rev);
1077 1084 return -1;
1078 1085 }
1079 1086 return (end_offset - start_offset) + (int64_t)end_size;
1080 1087 }
1081 1088
1082 1089 /* returns revs[startidx:endidx] without empty trailing revs */
1083 1090 static Py_ssize_t trim_endidx(indexObject *self, const Py_ssize_t *revs,
1084 1091 Py_ssize_t startidx, Py_ssize_t endidx)
1085 1092 {
1086 1093 int length;
1087 1094 while (endidx > 1 && endidx > startidx) {
1088 1095 length = index_get_length(self, revs[endidx - 1]);
1089 1096 if (length < 0) {
1090 1097 return -1;
1091 1098 }
1092 1099 if (length != 0) {
1093 1100 break;
1094 1101 }
1095 1102 endidx -= 1;
1096 1103 }
1097 1104 return endidx;
1098 1105 }
1099 1106
1100 1107 struct Gap {
1101 1108 int64_t size;
1102 1109 Py_ssize_t idx;
1103 1110 };
1104 1111
1105 1112 static int gap_compare(const void *left, const void *right)
1106 1113 {
1107 1114 const struct Gap *l_left = ((const struct Gap *)left);
1108 1115 const struct Gap *l_right = ((const struct Gap *)right);
1109 1116 if (l_left->size < l_right->size) {
1110 1117 return -1;
1111 1118 } else if (l_left->size > l_right->size) {
1112 1119 return 1;
1113 1120 }
1114 1121 return 0;
1115 1122 }
1116 1123 static int Py_ssize_t_compare(const void *left, const void *right)
1117 1124 {
1118 1125 const Py_ssize_t l_left = *(const Py_ssize_t *)left;
1119 1126 const Py_ssize_t l_right = *(const Py_ssize_t *)right;
1120 1127 if (l_left < l_right) {
1121 1128 return -1;
1122 1129 } else if (l_left > l_right) {
1123 1130 return 1;
1124 1131 }
1125 1132 return 0;
1126 1133 }
1127 1134
1128 1135 static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
1129 1136 {
1130 1137 /* method arguments */
1131 1138 PyObject *list_revs = NULL; /* revisions in the chain */
1132 1139 double targetdensity = 0; /* min density to achieve */
1133 1140 Py_ssize_t mingapsize = 0; /* threshold to ignore gaps */
1134 1141
1135 1142 /* other core variables */
1136 1143 Py_ssize_t idxlen = index_length(self);
1137 1144 Py_ssize_t i; /* used for various iteration */
1138 1145 PyObject *result = NULL; /* the final return of the function */
1139 1146
1140 1147 /* generic information about the delta chain being slice */
1141 1148 Py_ssize_t num_revs = 0; /* size of the full delta chain */
1142 1149 Py_ssize_t *revs = NULL; /* native array of revision in the chain */
1143 1150 int64_t chainpayload = 0; /* sum of all delta in the chain */
1144 1151 int64_t deltachainspan = 0; /* distance from first byte to last byte */
1145 1152
1146 1153 /* variable used for slicing the delta chain */
1147 1154 int64_t readdata = 0; /* amount of data currently planned to be read */
1148 1155 double density = 0; /* ration of payload data compared to read ones */
1149 1156 int64_t previous_end;
1150 1157 struct Gap *gaps = NULL; /* array of notable gap in the chain */
1151 1158 Py_ssize_t num_gaps =
1152 1159 0; /* total number of notable gap recorded so far */
1153 1160 Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
1154 1161 Py_ssize_t num_selected = 0; /* number of gaps skipped */
1155 1162 PyObject *chunk = NULL; /* individual slice */
1156 1163 PyObject *allchunks = NULL; /* all slices */
1157 1164 Py_ssize_t previdx;
1158 1165
1159 1166 /* parsing argument */
1160 1167 if (!PyArg_ParseTuple(args, "O!dn", &PyList_Type, &list_revs,
1161 1168 &targetdensity, &mingapsize)) {
1162 1169 goto bail;
1163 1170 }
1164 1171
1165 1172 /* If the delta chain contains a single element, we do not need slicing
1166 1173 */
1167 1174 num_revs = PyList_GET_SIZE(list_revs);
1168 1175 if (num_revs <= 1) {
1169 1176 result = PyTuple_Pack(1, list_revs);
1170 1177 goto done;
1171 1178 }
1172 1179
1173 1180 /* Turn the python list into a native integer array (for efficiency) */
1174 1181 revs = (Py_ssize_t *)calloc(num_revs, sizeof(Py_ssize_t));
1175 1182 if (revs == NULL) {
1176 1183 PyErr_NoMemory();
1177 1184 goto bail;
1178 1185 }
1179 1186 for (i = 0; i < num_revs; i++) {
1180 1187 Py_ssize_t revnum = PyInt_AsLong(PyList_GET_ITEM(list_revs, i));
1181 1188 if (revnum == -1 && PyErr_Occurred()) {
1182 1189 goto bail;
1183 1190 }
1184 1191 if (revnum < 0 || revnum >= idxlen) {
1185 1192 PyErr_SetString(PyExc_IndexError, "index out of range");
1186 1193 goto bail;
1187 1194 }
1188 1195 revs[i] = revnum;
1189 1196 }
1190 1197
1191 1198 /* Compute and check various property of the unsliced delta chain */
1192 1199 deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
1193 1200 if (deltachainspan < 0) {
1194 1201 goto bail;
1195 1202 }
1196 1203
1197 1204 if (deltachainspan <= mingapsize) {
1198 1205 result = PyTuple_Pack(1, list_revs);
1199 1206 goto done;
1200 1207 }
1201 1208 chainpayload = 0;
1202 1209 for (i = 0; i < num_revs; i++) {
1203 1210 int tmp = index_get_length(self, revs[i]);
1204 1211 if (tmp < 0) {
1205 1212 goto bail;
1206 1213 }
1207 1214 chainpayload += tmp;
1208 1215 }
1209 1216
1210 1217 readdata = deltachainspan;
1211 1218 density = 1.0;
1212 1219
1213 1220 if (0 < deltachainspan) {
1214 1221 density = (double)chainpayload / (double)deltachainspan;
1215 1222 }
1216 1223
1217 1224 if (density >= targetdensity) {
1218 1225 result = PyTuple_Pack(1, list_revs);
1219 1226 goto done;
1220 1227 }
1221 1228
1222 1229 /* if chain is too sparse, look for relevant gaps */
1223 1230 gaps = (struct Gap *)calloc(num_revs, sizeof(struct Gap));
1224 1231 if (gaps == NULL) {
1225 1232 PyErr_NoMemory();
1226 1233 goto bail;
1227 1234 }
1228 1235
1229 1236 previous_end = -1;
1230 1237 for (i = 0; i < num_revs; i++) {
1231 1238 int64_t revstart;
1232 1239 int revsize;
1233 1240 revstart = index_get_start(self, revs[i]);
1234 1241 if (revstart < 0) {
1235 1242 goto bail;
1236 1243 };
1237 1244 revsize = index_get_length(self, revs[i]);
1238 1245 if (revsize < 0) {
1239 1246 goto bail;
1240 1247 };
1241 1248 if (revsize == 0) {
1242 1249 continue;
1243 1250 }
1244 1251 if (previous_end >= 0) {
1245 1252 int64_t gapsize = revstart - previous_end;
1246 1253 if (gapsize > mingapsize) {
1247 1254 gaps[num_gaps].size = gapsize;
1248 1255 gaps[num_gaps].idx = i;
1249 1256 num_gaps += 1;
1250 1257 }
1251 1258 }
1252 1259 previous_end = revstart + revsize;
1253 1260 }
1254 1261 if (num_gaps == 0) {
1255 1262 result = PyTuple_Pack(1, list_revs);
1256 1263 goto done;
1257 1264 }
1258 1265 qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
1259 1266
1260 1267 /* Slice the largest gap first, they improve the density the most */
1261 1268 selected_indices =
1262 1269 (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
1263 1270 if (selected_indices == NULL) {
1264 1271 PyErr_NoMemory();
1265 1272 goto bail;
1266 1273 }
1267 1274
1268 1275 for (i = num_gaps - 1; i >= 0; i--) {
1269 1276 selected_indices[num_selected] = gaps[i].idx;
1270 1277 readdata -= gaps[i].size;
1271 1278 num_selected += 1;
1272 1279 if (readdata <= 0) {
1273 1280 density = 1.0;
1274 1281 } else {
1275 1282 density = (double)chainpayload / (double)readdata;
1276 1283 }
1277 1284 if (density >= targetdensity) {
1278 1285 break;
1279 1286 }
1280 1287 }
1281 1288 qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
1282 1289 &Py_ssize_t_compare);
1283 1290
1284 1291 /* create the resulting slice */
1285 1292 allchunks = PyList_New(0);
1286 1293 if (allchunks == NULL) {
1287 1294 goto bail;
1288 1295 }
1289 1296 previdx = 0;
1290 1297 selected_indices[num_selected] = num_revs;
1291 1298 for (i = 0; i <= num_selected; i++) {
1292 1299 Py_ssize_t idx = selected_indices[i];
1293 1300 Py_ssize_t endidx = trim_endidx(self, revs, previdx, idx);
1294 1301 if (endidx < 0) {
1295 1302 goto bail;
1296 1303 }
1297 1304 if (previdx < endidx) {
1298 1305 chunk = PyList_GetSlice(list_revs, previdx, endidx);
1299 1306 if (chunk == NULL) {
1300 1307 goto bail;
1301 1308 }
1302 1309 if (PyList_Append(allchunks, chunk) == -1) {
1303 1310 goto bail;
1304 1311 }
1305 1312 Py_DECREF(chunk);
1306 1313 chunk = NULL;
1307 1314 }
1308 1315 previdx = idx;
1309 1316 }
1310 1317 result = allchunks;
1311 1318 goto done;
1312 1319
1313 1320 bail:
1314 1321 Py_XDECREF(allchunks);
1315 1322 Py_XDECREF(chunk);
1316 1323 done:
1317 1324 free(revs);
1318 1325 free(gaps);
1319 1326 free(selected_indices);
1320 1327 return result;
1321 1328 }
1322 1329
1323 1330 static inline int nt_level(const char *node, Py_ssize_t level)
1324 1331 {
1325 1332 int v = node[level >> 1];
1326 1333 if (!(level & 1))
1327 1334 v >>= 4;
1328 1335 return v & 0xf;
1329 1336 }
1330 1337
1331 1338 /*
1332 1339 * Return values:
1333 1340 *
1334 1341 * -4: match is ambiguous (multiple candidates)
1335 1342 * -2: not found
1336 1343 * rest: valid rev
1337 1344 */
1338 1345 static int nt_find(nodetree *self, const char *node, Py_ssize_t nodelen,
1339 1346 int hex)
1340 1347 {
1341 1348 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
1342 1349 int level, maxlevel, off;
1343 1350
1344 1351 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
1345 1352 return -1;
1346 1353
1347 1354 if (hex)
1348 1355 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
1349 1356 else
1350 1357 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
1351 1358
1352 1359 for (level = off = 0; level < maxlevel; level++) {
1353 1360 int k = getnybble(node, level);
1354 1361 nodetreenode *n = &self->nodes[off];
1355 1362 int v = n->children[k];
1356 1363
1357 1364 if (v < 0) {
1358 1365 const char *n;
1359 1366 Py_ssize_t i;
1360 1367
1361 1368 v = -(v + 2);
1362 1369 n = index_node(self->index, v);
1363 1370 if (n == NULL)
1364 1371 return -2;
1365 1372 for (i = level; i < maxlevel; i++)
1366 1373 if (getnybble(node, i) != nt_level(n, i))
1367 1374 return -2;
1368 1375 return v;
1369 1376 }
1370 1377 if (v == 0)
1371 1378 return -2;
1372 1379 off = v;
1373 1380 }
1374 1381 /* multiple matches against an ambiguous prefix */
1375 1382 return -4;
1376 1383 }
1377 1384
1378 1385 static int nt_new(nodetree *self)
1379 1386 {
1380 1387 if (self->length == self->capacity) {
1381 1388 unsigned newcapacity;
1382 1389 nodetreenode *newnodes;
1383 1390 newcapacity = self->capacity * 2;
1384 1391 if (newcapacity >= INT_MAX / sizeof(nodetreenode)) {
1385 1392 PyErr_SetString(PyExc_MemoryError,
1386 1393 "overflow in nt_new");
1387 1394 return -1;
1388 1395 }
1389 1396 newnodes =
1390 1397 realloc(self->nodes, newcapacity * sizeof(nodetreenode));
1391 1398 if (newnodes == NULL) {
1392 1399 PyErr_SetString(PyExc_MemoryError, "out of memory");
1393 1400 return -1;
1394 1401 }
1395 1402 self->capacity = newcapacity;
1396 1403 self->nodes = newnodes;
1397 1404 memset(&self->nodes[self->length], 0,
1398 1405 sizeof(nodetreenode) * (self->capacity - self->length));
1399 1406 }
1400 1407 return self->length++;
1401 1408 }
1402 1409
1403 1410 static int nt_insert(nodetree *self, const char *node, int rev)
1404 1411 {
1405 1412 int level = 0;
1406 1413 int off = 0;
1407 1414
1408 1415 while (level < 40) {
1409 1416 int k = nt_level(node, level);
1410 1417 nodetreenode *n;
1411 1418 int v;
1412 1419
1413 1420 n = &self->nodes[off];
1414 1421 v = n->children[k];
1415 1422
1416 1423 if (v == 0) {
1417 1424 n->children[k] = -rev - 2;
1418 1425 return 0;
1419 1426 }
1420 1427 if (v < 0) {
1421 1428 const char *oldnode =
1422 1429 index_node_existing(self->index, -(v + 2));
1423 1430 int noff;
1424 1431
1425 1432 if (oldnode == NULL)
1426 1433 return -1;
1427 1434 if (!memcmp(oldnode, node, 20)) {
1428 1435 n->children[k] = -rev - 2;
1429 1436 return 0;
1430 1437 }
1431 1438 noff = nt_new(self);
1432 1439 if (noff == -1)
1433 1440 return -1;
1434 1441 /* self->nodes may have been changed by realloc */
1435 1442 self->nodes[off].children[k] = noff;
1436 1443 off = noff;
1437 1444 n = &self->nodes[off];
1438 1445 n->children[nt_level(oldnode, ++level)] = v;
1439 1446 if (level > self->depth)
1440 1447 self->depth = level;
1441 1448 self->splits += 1;
1442 1449 } else {
1443 1450 level += 1;
1444 1451 off = v;
1445 1452 }
1446 1453 }
1447 1454
1448 1455 return -1;
1449 1456 }
1450 1457
1451 1458 static PyObject *ntobj_insert(nodetreeObject *self, PyObject *args)
1452 1459 {
1453 1460 Py_ssize_t rev;
1454 1461 const char *node;
1455 1462 Py_ssize_t length;
1456 1463 if (!PyArg_ParseTuple(args, "n", &rev))
1457 1464 return NULL;
1458 1465 length = index_length(self->nt.index);
1459 1466 if (rev < 0 || rev >= length) {
1460 1467 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1461 1468 return NULL;
1462 1469 }
1463 1470 node = index_node_existing(self->nt.index, rev);
1464 1471 if (nt_insert(&self->nt, node, (int)rev) == -1)
1465 1472 return NULL;
1466 1473 Py_RETURN_NONE;
1467 1474 }
1468 1475
1469 1476 static int nt_delete_node(nodetree *self, const char *node)
1470 1477 {
1471 1478 /* rev==-2 happens to get encoded as 0, which is interpreted as not set
1472 1479 */
1473 1480 return nt_insert(self, node, -2);
1474 1481 }
1475 1482
1476 1483 static int nt_init(nodetree *self, indexObject *index, unsigned capacity)
1477 1484 {
1478 1485 /* Initialize before overflow-checking to avoid nt_dealloc() crash. */
1479 1486 self->nodes = NULL;
1480 1487
1481 1488 self->index = index;
1482 1489 /* The input capacity is in terms of revisions, while the field is in
1483 1490 * terms of nodetree nodes. */
1484 1491 self->capacity = (capacity < 4 ? 4 : capacity / 2);
1485 1492 self->depth = 0;
1486 1493 self->splits = 0;
1487 1494 if ((size_t)self->capacity > INT_MAX / sizeof(nodetreenode)) {
1488 1495 PyErr_SetString(PyExc_ValueError, "overflow in init_nt");
1489 1496 return -1;
1490 1497 }
1491 1498 self->nodes = calloc(self->capacity, sizeof(nodetreenode));
1492 1499 if (self->nodes == NULL) {
1493 1500 PyErr_NoMemory();
1494 1501 return -1;
1495 1502 }
1496 1503 self->length = 1;
1497 1504 return 0;
1498 1505 }
1499 1506
1500 1507 static PyTypeObject indexType;
1501 1508
1502 1509 static int ntobj_init(nodetreeObject *self, PyObject *args)
1503 1510 {
1504 1511 PyObject *index;
1505 1512 unsigned capacity;
1506 1513 if (!PyArg_ParseTuple(args, "O!I", &indexType, &index, &capacity))
1507 1514 return -1;
1508 1515 Py_INCREF(index);
1509 1516 return nt_init(&self->nt, (indexObject *)index, capacity);
1510 1517 }
1511 1518
1512 1519 static int nt_partialmatch(nodetree *self, const char *node, Py_ssize_t nodelen)
1513 1520 {
1514 1521 return nt_find(self, node, nodelen, 1);
1515 1522 }
1516 1523
1517 1524 /*
1518 1525 * Find the length of the shortest unique prefix of node.
1519 1526 *
1520 1527 * Return values:
1521 1528 *
1522 1529 * -3: error (exception set)
1523 1530 * -2: not found (no exception set)
1524 1531 * rest: length of shortest prefix
1525 1532 */
1526 1533 static int nt_shortest(nodetree *self, const char *node)
1527 1534 {
1528 1535 int level, off;
1529 1536
1530 1537 for (level = off = 0; level < 40; level++) {
1531 1538 int k, v;
1532 1539 nodetreenode *n = &self->nodes[off];
1533 1540 k = nt_level(node, level);
1534 1541 v = n->children[k];
1535 1542 if (v < 0) {
1536 1543 const char *n;
1537 1544 v = -(v + 2);
1538 1545 n = index_node_existing(self->index, v);
1539 1546 if (n == NULL)
1540 1547 return -3;
1541 1548 if (memcmp(node, n, 20) != 0)
1542 1549 /*
1543 1550 * Found a unique prefix, but it wasn't for the
1544 1551 * requested node (i.e the requested node does
1545 1552 * not exist).
1546 1553 */
1547 1554 return -2;
1548 1555 return level + 1;
1549 1556 }
1550 1557 if (v == 0)
1551 1558 return -2;
1552 1559 off = v;
1553 1560 }
1554 1561 /*
1555 1562 * The node was still not unique after 40 hex digits, so this won't
1556 1563 * happen. Also, if we get here, then there's a programming error in
1557 1564 * this file that made us insert a node longer than 40 hex digits.
1558 1565 */
1559 1566 PyErr_SetString(PyExc_Exception, "broken node tree");
1560 1567 return -3;
1561 1568 }
1562 1569
1563 1570 static PyObject *ntobj_shortest(nodetreeObject *self, PyObject *args)
1564 1571 {
1565 1572 PyObject *val;
1566 1573 char *node;
1567 1574 int length;
1568 1575
1569 1576 if (!PyArg_ParseTuple(args, "O", &val))
1570 1577 return NULL;
1571 1578 if (node_check(val, &node) == -1)
1572 1579 return NULL;
1573 1580
1574 1581 length = nt_shortest(&self->nt, node);
1575 1582 if (length == -3)
1576 1583 return NULL;
1577 1584 if (length == -2) {
1578 1585 raise_revlog_error();
1579 1586 return NULL;
1580 1587 }
1581 1588 return PyInt_FromLong(length);
1582 1589 }
1583 1590
1584 1591 static void nt_dealloc(nodetree *self)
1585 1592 {
1586 1593 free(self->nodes);
1587 1594 self->nodes = NULL;
1588 1595 }
1589 1596
1590 1597 static void ntobj_dealloc(nodetreeObject *self)
1591 1598 {
1592 1599 Py_XDECREF(self->nt.index);
1593 1600 nt_dealloc(&self->nt);
1594 1601 PyObject_Del(self);
1595 1602 }
1596 1603
1597 1604 static PyMethodDef ntobj_methods[] = {
1598 1605 {"insert", (PyCFunction)ntobj_insert, METH_VARARGS,
1599 1606 "insert an index entry"},
1600 1607 {"shortest", (PyCFunction)ntobj_shortest, METH_VARARGS,
1601 1608 "find length of shortest hex nodeid of a binary ID"},
1602 1609 {NULL} /* Sentinel */
1603 1610 };
1604 1611
1605 1612 static PyTypeObject nodetreeType = {
1606 1613 PyVarObject_HEAD_INIT(NULL, 0) /* header */
1607 1614 "parsers.nodetree", /* tp_name */
1608 1615 sizeof(nodetreeObject), /* tp_basicsize */
1609 1616 0, /* tp_itemsize */
1610 1617 (destructor)ntobj_dealloc, /* tp_dealloc */
1611 1618 0, /* tp_print */
1612 1619 0, /* tp_getattr */
1613 1620 0, /* tp_setattr */
1614 1621 0, /* tp_compare */
1615 1622 0, /* tp_repr */
1616 1623 0, /* tp_as_number */
1617 1624 0, /* tp_as_sequence */
1618 1625 0, /* tp_as_mapping */
1619 1626 0, /* tp_hash */
1620 1627 0, /* tp_call */
1621 1628 0, /* tp_str */
1622 1629 0, /* tp_getattro */
1623 1630 0, /* tp_setattro */
1624 1631 0, /* tp_as_buffer */
1625 1632 Py_TPFLAGS_DEFAULT, /* tp_flags */
1626 1633 "nodetree", /* tp_doc */
1627 1634 0, /* tp_traverse */
1628 1635 0, /* tp_clear */
1629 1636 0, /* tp_richcompare */
1630 1637 0, /* tp_weaklistoffset */
1631 1638 0, /* tp_iter */
1632 1639 0, /* tp_iternext */
1633 1640 ntobj_methods, /* tp_methods */
1634 1641 0, /* tp_members */
1635 1642 0, /* tp_getset */
1636 1643 0, /* tp_base */
1637 1644 0, /* tp_dict */
1638 1645 0, /* tp_descr_get */
1639 1646 0, /* tp_descr_set */
1640 1647 0, /* tp_dictoffset */
1641 1648 (initproc)ntobj_init, /* tp_init */
1642 1649 0, /* tp_alloc */
1643 1650 };
1644 1651
1645 1652 static int index_init_nt(indexObject *self)
1646 1653 {
1647 1654 if (!self->ntinitialized) {
1648 1655 if (nt_init(&self->nt, self, (int)self->raw_length) == -1) {
1649 1656 nt_dealloc(&self->nt);
1650 1657 return -1;
1651 1658 }
1652 1659 if (nt_insert(&self->nt, nullid, -1) == -1) {
1653 1660 nt_dealloc(&self->nt);
1654 1661 return -1;
1655 1662 }
1656 1663 self->ntinitialized = 1;
1657 1664 self->ntrev = (int)index_length(self);
1658 1665 self->ntlookups = 1;
1659 1666 self->ntmisses = 0;
1660 1667 }
1661 1668 return 0;
1662 1669 }
1663 1670
1664 1671 /*
1665 1672 * Return values:
1666 1673 *
1667 1674 * -3: error (exception set)
1668 1675 * -2: not found (no exception set)
1669 1676 * rest: valid rev
1670 1677 */
1671 1678 static int index_find_node(indexObject *self, const char *node,
1672 1679 Py_ssize_t nodelen)
1673 1680 {
1674 1681 int rev;
1675 1682
1676 1683 if (index_init_nt(self) == -1)
1677 1684 return -3;
1678 1685
1679 1686 self->ntlookups++;
1680 1687 rev = nt_find(&self->nt, node, nodelen, 0);
1681 1688 if (rev >= -1)
1682 1689 return rev;
1683 1690
1684 1691 /*
1685 1692 * For the first handful of lookups, we scan the entire index,
1686 1693 * and cache only the matching nodes. This optimizes for cases
1687 1694 * like "hg tip", where only a few nodes are accessed.
1688 1695 *
1689 1696 * After that, we cache every node we visit, using a single
1690 1697 * scan amortized over multiple lookups. This gives the best
1691 1698 * bulk performance, e.g. for "hg log".
1692 1699 */
1693 1700 if (self->ntmisses++ < 4) {
1694 1701 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1695 1702 const char *n = index_node_existing(self, rev);
1696 1703 if (n == NULL)
1697 1704 return -3;
1698 1705 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1699 1706 if (nt_insert(&self->nt, n, rev) == -1)
1700 1707 return -3;
1701 1708 break;
1702 1709 }
1703 1710 }
1704 1711 } else {
1705 1712 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1706 1713 const char *n = index_node_existing(self, rev);
1707 1714 if (n == NULL)
1708 1715 return -3;
1709 1716 if (nt_insert(&self->nt, n, rev) == -1) {
1710 1717 self->ntrev = rev + 1;
1711 1718 return -3;
1712 1719 }
1713 1720 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1714 1721 break;
1715 1722 }
1716 1723 }
1717 1724 self->ntrev = rev;
1718 1725 }
1719 1726
1720 1727 if (rev >= 0)
1721 1728 return rev;
1722 1729 return -2;
1723 1730 }
1724 1731
1725 1732 static PyObject *index_getitem(indexObject *self, PyObject *value)
1726 1733 {
1727 1734 char *node;
1728 1735 int rev;
1729 1736
1730 1737 if (PyInt_Check(value)) {
1731 1738 long idx;
1732 1739 if (!pylong_to_long(value, &idx)) {
1733 1740 return NULL;
1734 1741 }
1735 1742 return index_get(self, idx);
1736 1743 }
1737 1744
1738 1745 if (node_check(value, &node) == -1)
1739 1746 return NULL;
1740 1747 rev = index_find_node(self, node, 20);
1741 1748 if (rev >= -1)
1742 1749 return PyInt_FromLong(rev);
1743 1750 if (rev == -2)
1744 1751 raise_revlog_error();
1745 1752 return NULL;
1746 1753 }
1747 1754
1748 1755 /*
1749 1756 * Fully populate the radix tree.
1750 1757 */
1751 1758 static int index_populate_nt(indexObject *self)
1752 1759 {
1753 1760 int rev;
1754 1761 if (self->ntrev > 0) {
1755 1762 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1756 1763 const char *n = index_node_existing(self, rev);
1757 1764 if (n == NULL)
1758 1765 return -1;
1759 1766 if (nt_insert(&self->nt, n, rev) == -1)
1760 1767 return -1;
1761 1768 }
1762 1769 self->ntrev = -1;
1763 1770 }
1764 1771 return 0;
1765 1772 }
1766 1773
1767 1774 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1768 1775 {
1769 1776 const char *fullnode;
1770 1777 int nodelen;
1771 1778 char *node;
1772 1779 int rev, i;
1773 1780
1774 1781 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &node, &nodelen))
1775 1782 return NULL;
1776 1783
1777 1784 if (nodelen < 1) {
1778 1785 PyErr_SetString(PyExc_ValueError, "key too short");
1779 1786 return NULL;
1780 1787 }
1781 1788
1782 1789 if (nodelen > 40) {
1783 1790 PyErr_SetString(PyExc_ValueError, "key too long");
1784 1791 return NULL;
1785 1792 }
1786 1793
1787 1794 for (i = 0; i < nodelen; i++)
1788 1795 hexdigit(node, i);
1789 1796 if (PyErr_Occurred()) {
1790 1797 /* input contains non-hex characters */
1791 1798 PyErr_Clear();
1792 1799 Py_RETURN_NONE;
1793 1800 }
1794 1801
1795 1802 if (index_init_nt(self) == -1)
1796 1803 return NULL;
1797 1804 if (index_populate_nt(self) == -1)
1798 1805 return NULL;
1799 1806 rev = nt_partialmatch(&self->nt, node, nodelen);
1800 1807
1801 1808 switch (rev) {
1802 1809 case -4:
1803 1810 raise_revlog_error();
1804 1811 return NULL;
1805 1812 case -2:
1806 1813 Py_RETURN_NONE;
1807 1814 case -1:
1808 1815 return PyBytes_FromStringAndSize(nullid, 20);
1809 1816 }
1810 1817
1811 1818 fullnode = index_node_existing(self, rev);
1812 1819 if (fullnode == NULL) {
1813 1820 return NULL;
1814 1821 }
1815 1822 return PyBytes_FromStringAndSize(fullnode, 20);
1816 1823 }
1817 1824
1818 1825 static PyObject *index_shortest(indexObject *self, PyObject *args)
1819 1826 {
1820 1827 PyObject *val;
1821 1828 char *node;
1822 1829 int length;
1823 1830
1824 1831 if (!PyArg_ParseTuple(args, "O", &val))
1825 1832 return NULL;
1826 1833 if (node_check(val, &node) == -1)
1827 1834 return NULL;
1828 1835
1829 1836 self->ntlookups++;
1830 1837 if (index_init_nt(self) == -1)
1831 1838 return NULL;
1832 1839 if (index_populate_nt(self) == -1)
1833 1840 return NULL;
1834 1841 length = nt_shortest(&self->nt, node);
1835 1842 if (length == -3)
1836 1843 return NULL;
1837 1844 if (length == -2) {
1838 1845 raise_revlog_error();
1839 1846 return NULL;
1840 1847 }
1841 1848 return PyInt_FromLong(length);
1842 1849 }
1843 1850
1844 1851 static PyObject *index_m_get(indexObject *self, PyObject *args)
1845 1852 {
1846 1853 PyObject *val;
1847 1854 char *node;
1848 1855 int rev;
1849 1856
1850 1857 if (!PyArg_ParseTuple(args, "O", &val))
1851 1858 return NULL;
1852 1859 if (node_check(val, &node) == -1)
1853 1860 return NULL;
1854 1861 rev = index_find_node(self, node, 20);
1855 1862 if (rev == -3)
1856 1863 return NULL;
1857 1864 if (rev == -2)
1858 1865 Py_RETURN_NONE;
1859 1866 return PyInt_FromLong(rev);
1860 1867 }
1861 1868
1862 1869 static int index_contains(indexObject *self, PyObject *value)
1863 1870 {
1864 1871 char *node;
1865 1872
1866 1873 if (PyInt_Check(value)) {
1867 1874 long rev;
1868 1875 if (!pylong_to_long(value, &rev)) {
1869 1876 return -1;
1870 1877 }
1871 1878 return rev >= -1 && rev < index_length(self);
1872 1879 }
1873 1880
1874 1881 if (node_check(value, &node) == -1)
1875 1882 return -1;
1876 1883
1877 1884 switch (index_find_node(self, node, 20)) {
1878 1885 case -3:
1879 1886 return -1;
1880 1887 case -2:
1881 1888 return 0;
1882 1889 default:
1883 1890 return 1;
1884 1891 }
1885 1892 }
1886 1893
1887 1894 typedef uint64_t bitmask;
1888 1895
1889 1896 /*
1890 1897 * Given a disjoint set of revs, return all candidates for the
1891 1898 * greatest common ancestor. In revset notation, this is the set
1892 1899 * "heads(::a and ::b and ...)"
1893 1900 */
1894 1901 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
1895 1902 int revcount)
1896 1903 {
1897 1904 const bitmask allseen = (1ull << revcount) - 1;
1898 1905 const bitmask poison = 1ull << revcount;
1899 1906 PyObject *gca = PyList_New(0);
1900 1907 int i, v, interesting;
1901 1908 int maxrev = -1;
1902 1909 bitmask sp;
1903 1910 bitmask *seen;
1904 1911
1905 1912 if (gca == NULL)
1906 1913 return PyErr_NoMemory();
1907 1914
1908 1915 for (i = 0; i < revcount; i++) {
1909 1916 if (revs[i] > maxrev)
1910 1917 maxrev = revs[i];
1911 1918 }
1912 1919
1913 1920 seen = calloc(sizeof(*seen), maxrev + 1);
1914 1921 if (seen == NULL) {
1915 1922 Py_DECREF(gca);
1916 1923 return PyErr_NoMemory();
1917 1924 }
1918 1925
1919 1926 for (i = 0; i < revcount; i++)
1920 1927 seen[revs[i]] = 1ull << i;
1921 1928
1922 1929 interesting = revcount;
1923 1930
1924 1931 for (v = maxrev; v >= 0 && interesting; v--) {
1925 1932 bitmask sv = seen[v];
1926 1933 int parents[2];
1927 1934
1928 1935 if (!sv)
1929 1936 continue;
1930 1937
1931 1938 if (sv < poison) {
1932 1939 interesting -= 1;
1933 1940 if (sv == allseen) {
1934 1941 PyObject *obj = PyInt_FromLong(v);
1935 1942 if (obj == NULL)
1936 1943 goto bail;
1937 1944 if (PyList_Append(gca, obj) == -1) {
1938 1945 Py_DECREF(obj);
1939 1946 goto bail;
1940 1947 }
1941 1948 sv |= poison;
1942 1949 for (i = 0; i < revcount; i++) {
1943 1950 if (revs[i] == v)
1944 1951 goto done;
1945 1952 }
1946 1953 }
1947 1954 }
1948 1955 if (index_get_parents(self, v, parents, maxrev) < 0)
1949 1956 goto bail;
1950 1957
1951 1958 for (i = 0; i < 2; i++) {
1952 1959 int p = parents[i];
1953 1960 if (p == -1)
1954 1961 continue;
1955 1962 sp = seen[p];
1956 1963 if (sv < poison) {
1957 1964 if (sp == 0) {
1958 1965 seen[p] = sv;
1959 1966 interesting++;
1960 1967 } else if (sp != sv)
1961 1968 seen[p] |= sv;
1962 1969 } else {
1963 1970 if (sp && sp < poison)
1964 1971 interesting--;
1965 1972 seen[p] = sv;
1966 1973 }
1967 1974 }
1968 1975 }
1969 1976
1970 1977 done:
1971 1978 free(seen);
1972 1979 return gca;
1973 1980 bail:
1974 1981 free(seen);
1975 1982 Py_XDECREF(gca);
1976 1983 return NULL;
1977 1984 }
1978 1985
1979 1986 /*
1980 1987 * Given a disjoint set of revs, return the subset with the longest
1981 1988 * path to the root.
1982 1989 */
1983 1990 static PyObject *find_deepest(indexObject *self, PyObject *revs)
1984 1991 {
1985 1992 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
1986 1993 static const Py_ssize_t capacity = 24;
1987 1994 int *depth, *interesting = NULL;
1988 1995 int i, j, v, ninteresting;
1989 1996 PyObject *dict = NULL, *keys = NULL;
1990 1997 long *seen = NULL;
1991 1998 int maxrev = -1;
1992 1999 long final;
1993 2000
1994 2001 if (revcount > capacity) {
1995 2002 PyErr_Format(PyExc_OverflowError,
1996 2003 "bitset size (%ld) > capacity (%ld)",
1997 2004 (long)revcount, (long)capacity);
1998 2005 return NULL;
1999 2006 }
2000 2007
2001 2008 for (i = 0; i < revcount; i++) {
2002 2009 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2003 2010 if (n > maxrev)
2004 2011 maxrev = n;
2005 2012 }
2006 2013
2007 2014 depth = calloc(sizeof(*depth), maxrev + 1);
2008 2015 if (depth == NULL)
2009 2016 return PyErr_NoMemory();
2010 2017
2011 2018 seen = calloc(sizeof(*seen), maxrev + 1);
2012 2019 if (seen == NULL) {
2013 2020 PyErr_NoMemory();
2014 2021 goto bail;
2015 2022 }
2016 2023
2017 2024 interesting = calloc(sizeof(*interesting), ((size_t)1) << revcount);
2018 2025 if (interesting == NULL) {
2019 2026 PyErr_NoMemory();
2020 2027 goto bail;
2021 2028 }
2022 2029
2023 2030 if (PyList_Sort(revs) == -1)
2024 2031 goto bail;
2025 2032
2026 2033 for (i = 0; i < revcount; i++) {
2027 2034 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2028 2035 long b = 1l << i;
2029 2036 depth[n] = 1;
2030 2037 seen[n] = b;
2031 2038 interesting[b] = 1;
2032 2039 }
2033 2040
2034 2041 /* invariant: ninteresting is the number of non-zero entries in
2035 2042 * interesting. */
2036 2043 ninteresting = (int)revcount;
2037 2044
2038 2045 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
2039 2046 int dv = depth[v];
2040 2047 int parents[2];
2041 2048 long sv;
2042 2049
2043 2050 if (dv == 0)
2044 2051 continue;
2045 2052
2046 2053 sv = seen[v];
2047 2054 if (index_get_parents(self, v, parents, maxrev) < 0)
2048 2055 goto bail;
2049 2056
2050 2057 for (i = 0; i < 2; i++) {
2051 2058 int p = parents[i];
2052 2059 long sp;
2053 2060 int dp;
2054 2061
2055 2062 if (p == -1)
2056 2063 continue;
2057 2064
2058 2065 dp = depth[p];
2059 2066 sp = seen[p];
2060 2067 if (dp <= dv) {
2061 2068 depth[p] = dv + 1;
2062 2069 if (sp != sv) {
2063 2070 interesting[sv] += 1;
2064 2071 seen[p] = sv;
2065 2072 if (sp) {
2066 2073 interesting[sp] -= 1;
2067 2074 if (interesting[sp] == 0)
2068 2075 ninteresting -= 1;
2069 2076 }
2070 2077 }
2071 2078 } else if (dv == dp - 1) {
2072 2079 long nsp = sp | sv;
2073 2080 if (nsp == sp)
2074 2081 continue;
2075 2082 seen[p] = nsp;
2076 2083 interesting[sp] -= 1;
2077 2084 if (interesting[sp] == 0)
2078 2085 ninteresting -= 1;
2079 2086 if (interesting[nsp] == 0)
2080 2087 ninteresting += 1;
2081 2088 interesting[nsp] += 1;
2082 2089 }
2083 2090 }
2084 2091 interesting[sv] -= 1;
2085 2092 if (interesting[sv] == 0)
2086 2093 ninteresting -= 1;
2087 2094 }
2088 2095
2089 2096 final = 0;
2090 2097 j = ninteresting;
2091 2098 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
2092 2099 if (interesting[i] == 0)
2093 2100 continue;
2094 2101 final |= i;
2095 2102 j -= 1;
2096 2103 }
2097 2104 if (final == 0) {
2098 2105 keys = PyList_New(0);
2099 2106 goto bail;
2100 2107 }
2101 2108
2102 2109 dict = PyDict_New();
2103 2110 if (dict == NULL)
2104 2111 goto bail;
2105 2112
2106 2113 for (i = 0; i < revcount; i++) {
2107 2114 PyObject *key;
2108 2115
2109 2116 if ((final & (1 << i)) == 0)
2110 2117 continue;
2111 2118
2112 2119 key = PyList_GET_ITEM(revs, i);
2113 2120 Py_INCREF(key);
2114 2121 Py_INCREF(Py_None);
2115 2122 if (PyDict_SetItem(dict, key, Py_None) == -1) {
2116 2123 Py_DECREF(key);
2117 2124 Py_DECREF(Py_None);
2118 2125 goto bail;
2119 2126 }
2120 2127 }
2121 2128
2122 2129 keys = PyDict_Keys(dict);
2123 2130
2124 2131 bail:
2125 2132 free(depth);
2126 2133 free(seen);
2127 2134 free(interesting);
2128 2135 Py_XDECREF(dict);
2129 2136
2130 2137 return keys;
2131 2138 }
2132 2139
2133 2140 /*
2134 2141 * Given a (possibly overlapping) set of revs, return all the
2135 2142 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
2136 2143 */
2137 2144 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
2138 2145 {
2139 2146 PyObject *ret = NULL;
2140 2147 Py_ssize_t argcount, i, len;
2141 2148 bitmask repeat = 0;
2142 2149 int revcount = 0;
2143 2150 int *revs;
2144 2151
2145 2152 argcount = PySequence_Length(args);
2146 2153 revs = PyMem_Malloc(argcount * sizeof(*revs));
2147 2154 if (argcount > 0 && revs == NULL)
2148 2155 return PyErr_NoMemory();
2149 2156 len = index_length(self);
2150 2157
2151 2158 for (i = 0; i < argcount; i++) {
2152 2159 static const int capacity = 24;
2153 2160 PyObject *obj = PySequence_GetItem(args, i);
2154 2161 bitmask x;
2155 2162 long val;
2156 2163
2157 2164 if (!PyInt_Check(obj)) {
2158 2165 PyErr_SetString(PyExc_TypeError,
2159 2166 "arguments must all be ints");
2160 2167 Py_DECREF(obj);
2161 2168 goto bail;
2162 2169 }
2163 2170 val = PyInt_AsLong(obj);
2164 2171 Py_DECREF(obj);
2165 2172 if (val == -1) {
2166 2173 ret = PyList_New(0);
2167 2174 goto done;
2168 2175 }
2169 2176 if (val < 0 || val >= len) {
2170 2177 PyErr_SetString(PyExc_IndexError, "index out of range");
2171 2178 goto bail;
2172 2179 }
2173 2180 /* this cheesy bloom filter lets us avoid some more
2174 2181 * expensive duplicate checks in the common set-is-disjoint
2175 2182 * case */
2176 2183 x = 1ull << (val & 0x3f);
2177 2184 if (repeat & x) {
2178 2185 int k;
2179 2186 for (k = 0; k < revcount; k++) {
2180 2187 if (val == revs[k])
2181 2188 goto duplicate;
2182 2189 }
2183 2190 } else
2184 2191 repeat |= x;
2185 2192 if (revcount >= capacity) {
2186 2193 PyErr_Format(PyExc_OverflowError,
2187 2194 "bitset size (%d) > capacity (%d)",
2188 2195 revcount, capacity);
2189 2196 goto bail;
2190 2197 }
2191 2198 revs[revcount++] = (int)val;
2192 2199 duplicate:;
2193 2200 }
2194 2201
2195 2202 if (revcount == 0) {
2196 2203 ret = PyList_New(0);
2197 2204 goto done;
2198 2205 }
2199 2206 if (revcount == 1) {
2200 2207 PyObject *obj;
2201 2208 ret = PyList_New(1);
2202 2209 if (ret == NULL)
2203 2210 goto bail;
2204 2211 obj = PyInt_FromLong(revs[0]);
2205 2212 if (obj == NULL)
2206 2213 goto bail;
2207 2214 PyList_SET_ITEM(ret, 0, obj);
2208 2215 goto done;
2209 2216 }
2210 2217
2211 2218 ret = find_gca_candidates(self, revs, revcount);
2212 2219 if (ret == NULL)
2213 2220 goto bail;
2214 2221
2215 2222 done:
2216 2223 PyMem_Free(revs);
2217 2224 return ret;
2218 2225
2219 2226 bail:
2220 2227 PyMem_Free(revs);
2221 2228 Py_XDECREF(ret);
2222 2229 return NULL;
2223 2230 }
2224 2231
2225 2232 /*
2226 2233 * Given a (possibly overlapping) set of revs, return the greatest
2227 2234 * common ancestors: those with the longest path to the root.
2228 2235 */
2229 2236 static PyObject *index_ancestors(indexObject *self, PyObject *args)
2230 2237 {
2231 2238 PyObject *ret;
2232 2239 PyObject *gca = index_commonancestorsheads(self, args);
2233 2240 if (gca == NULL)
2234 2241 return NULL;
2235 2242
2236 2243 if (PyList_GET_SIZE(gca) <= 1) {
2237 2244 return gca;
2238 2245 }
2239 2246
2240 2247 ret = find_deepest(self, gca);
2241 2248 Py_DECREF(gca);
2242 2249 return ret;
2243 2250 }
2244 2251
2245 2252 /*
2246 2253 * Invalidate any trie entries introduced by added revs.
2247 2254 */
2248 2255 static void index_invalidate_added(indexObject *self, Py_ssize_t start)
2249 2256 {
2250 2257 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
2251 2258
2252 2259 for (i = start; i < len; i++) {
2253 2260 PyObject *tuple = PyList_GET_ITEM(self->added, i);
2254 2261 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
2255 2262
2256 2263 nt_delete_node(&self->nt, PyBytes_AS_STRING(node));
2257 2264 }
2258 2265
2259 2266 if (start == 0)
2260 2267 Py_CLEAR(self->added);
2261 2268 }
2262 2269
2263 2270 /*
2264 2271 * Delete a numeric range of revs, which must be at the end of the
2265 2272 * range, but exclude the sentinel nullid entry.
2266 2273 */
2267 2274 static int index_slice_del(indexObject *self, PyObject *item)
2268 2275 {
2269 2276 Py_ssize_t start, stop, step, slicelength;
2270 2277 Py_ssize_t length = index_length(self) + 1;
2271 2278 int ret = 0;
2272 2279
2273 2280 /* Argument changed from PySliceObject* to PyObject* in Python 3. */
2274 2281 #ifdef IS_PY3K
2275 2282 if (PySlice_GetIndicesEx(item, length, &start, &stop, &step,
2276 2283 &slicelength) < 0)
2277 2284 #else
2278 2285 if (PySlice_GetIndicesEx((PySliceObject *)item, length, &start, &stop,
2279 2286 &step, &slicelength) < 0)
2280 2287 #endif
2281 2288 return -1;
2282 2289
2283 2290 if (slicelength <= 0)
2284 2291 return 0;
2285 2292
2286 2293 if ((step < 0 && start < stop) || (step > 0 && start > stop))
2287 2294 stop = start;
2288 2295
2289 2296 if (step < 0) {
2290 2297 stop = start + 1;
2291 2298 start = stop + step * (slicelength - 1) - 1;
2292 2299 step = -step;
2293 2300 }
2294 2301
2295 2302 if (step != 1) {
2296 2303 PyErr_SetString(PyExc_ValueError,
2297 2304 "revlog index delete requires step size of 1");
2298 2305 return -1;
2299 2306 }
2300 2307
2301 2308 if (stop != length - 1) {
2302 2309 PyErr_SetString(PyExc_IndexError,
2303 2310 "revlog index deletion indices are invalid");
2304 2311 return -1;
2305 2312 }
2306 2313
2307 2314 if (start < self->length) {
2308 2315 if (self->ntinitialized) {
2309 2316 Py_ssize_t i;
2310 2317
2311 2318 for (i = start + 1; i < self->length; i++) {
2312 2319 const char *node = index_node_existing(self, i);
2313 2320 if (node == NULL)
2314 2321 return -1;
2315 2322
2316 2323 nt_delete_node(&self->nt, node);
2317 2324 }
2318 2325 if (self->added)
2319 2326 index_invalidate_added(self, 0);
2320 2327 if (self->ntrev > start)
2321 2328 self->ntrev = (int)start;
2322 2329 }
2323 2330 self->length = start;
2324 2331 if (start < self->raw_length) {
2325 2332 if (self->cache) {
2326 2333 Py_ssize_t i;
2327 2334 for (i = start; i < self->raw_length; i++)
2328 2335 Py_CLEAR(self->cache[i]);
2329 2336 }
2330 2337 self->raw_length = start;
2331 2338 }
2332 2339 goto done;
2333 2340 }
2334 2341
2335 2342 if (self->ntinitialized) {
2336 2343 index_invalidate_added(self, start - self->length);
2337 2344 if (self->ntrev > start)
2338 2345 self->ntrev = (int)start;
2339 2346 }
2340 2347 if (self->added)
2341 2348 ret = PyList_SetSlice(self->added, start - self->length,
2342 2349 PyList_GET_SIZE(self->added), NULL);
2343 2350 done:
2344 2351 Py_CLEAR(self->headrevs);
2345 2352 return ret;
2346 2353 }
2347 2354
2348 2355 /*
2349 2356 * Supported ops:
2350 2357 *
2351 2358 * slice deletion
2352 2359 * string assignment (extend node->rev mapping)
2353 2360 * string deletion (shrink node->rev mapping)
2354 2361 */
2355 2362 static int index_assign_subscript(indexObject *self, PyObject *item,
2356 2363 PyObject *value)
2357 2364 {
2358 2365 char *node;
2359 2366 long rev;
2360 2367
2361 2368 if (PySlice_Check(item) && value == NULL)
2362 2369 return index_slice_del(self, item);
2363 2370
2364 2371 if (node_check(item, &node) == -1)
2365 2372 return -1;
2366 2373
2367 2374 if (value == NULL)
2368 2375 return self->ntinitialized ? nt_delete_node(&self->nt, node)
2369 2376 : 0;
2370 2377 rev = PyInt_AsLong(value);
2371 2378 if (rev > INT_MAX || rev < 0) {
2372 2379 if (!PyErr_Occurred())
2373 2380 PyErr_SetString(PyExc_ValueError, "rev out of range");
2374 2381 return -1;
2375 2382 }
2376 2383
2377 2384 if (index_init_nt(self) == -1)
2378 2385 return -1;
2379 2386 return nt_insert(&self->nt, node, (int)rev);
2380 2387 }
2381 2388
2382 2389 /*
2383 2390 * Find all RevlogNG entries in an index that has inline data. Update
2384 2391 * the optional "offsets" table with those entries.
2385 2392 */
2386 2393 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
2387 2394 {
2388 2395 const char *data = (const char *)self->buf.buf;
2389 2396 Py_ssize_t pos = 0;
2390 2397 Py_ssize_t end = self->buf.len;
2391 2398 long incr = v1_hdrsize;
2392 2399 Py_ssize_t len = 0;
2393 2400
2394 2401 while (pos + v1_hdrsize <= end && pos >= 0) {
2395 2402 uint32_t comp_len;
2396 2403 /* 3rd element of header is length of compressed inline data */
2397 2404 comp_len = getbe32(data + pos + 8);
2398 2405 incr = v1_hdrsize + comp_len;
2399 2406 if (offsets)
2400 2407 offsets[len] = data + pos;
2401 2408 len++;
2402 2409 pos += incr;
2403 2410 }
2404 2411
2405 2412 if (pos != end) {
2406 2413 if (!PyErr_Occurred())
2407 2414 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2408 2415 return -1;
2409 2416 }
2410 2417
2411 2418 return len;
2412 2419 }
2413 2420
2414 2421 static int index_init(indexObject *self, PyObject *args)
2415 2422 {
2416 2423 PyObject *data_obj, *inlined_obj;
2417 2424 Py_ssize_t size;
2418 2425
2419 2426 /* Initialize before argument-checking to avoid index_dealloc() crash.
2420 2427 */
2421 2428 self->raw_length = 0;
2422 2429 self->added = NULL;
2423 2430 self->cache = NULL;
2424 2431 self->data = NULL;
2425 2432 memset(&self->buf, 0, sizeof(self->buf));
2426 2433 self->headrevs = NULL;
2427 2434 self->filteredrevs = Py_None;
2428 2435 Py_INCREF(Py_None);
2429 2436 self->ntinitialized = 0;
2430 2437 self->offsets = NULL;
2431 2438
2432 2439 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
2433 2440 return -1;
2434 2441 if (!PyObject_CheckBuffer(data_obj)) {
2435 2442 PyErr_SetString(PyExc_TypeError,
2436 2443 "data does not support buffer interface");
2437 2444 return -1;
2438 2445 }
2439 2446
2440 2447 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
2441 2448 return -1;
2442 2449 size = self->buf.len;
2443 2450
2444 2451 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
2445 2452 self->data = data_obj;
2446 2453
2447 2454 self->ntlookups = self->ntmisses = 0;
2448 2455 self->ntrev = -1;
2449 2456 Py_INCREF(self->data);
2450 2457
2451 2458 if (self->inlined) {
2452 2459 Py_ssize_t len = inline_scan(self, NULL);
2453 2460 if (len == -1)
2454 2461 goto bail;
2455 2462 self->raw_length = len;
2456 2463 self->length = len;
2457 2464 } else {
2458 2465 if (size % v1_hdrsize) {
2459 2466 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2460 2467 goto bail;
2461 2468 }
2462 2469 self->raw_length = size / v1_hdrsize;
2463 2470 self->length = self->raw_length;
2464 2471 }
2465 2472
2466 2473 return 0;
2467 2474 bail:
2468 2475 return -1;
2469 2476 }
2470 2477
2471 2478 static PyObject *index_nodemap(indexObject *self)
2472 2479 {
2473 2480 Py_INCREF(self);
2474 2481 return (PyObject *)self;
2475 2482 }
2476 2483
2477 2484 static void _index_clearcaches(indexObject *self)
2478 2485 {
2479 2486 if (self->cache) {
2480 2487 Py_ssize_t i;
2481 2488
2482 2489 for (i = 0; i < self->raw_length; i++)
2483 2490 Py_CLEAR(self->cache[i]);
2484 2491 free(self->cache);
2485 2492 self->cache = NULL;
2486 2493 }
2487 2494 if (self->offsets) {
2488 2495 PyMem_Free((void *)self->offsets);
2489 2496 self->offsets = NULL;
2490 2497 }
2491 2498 if (self->ntinitialized) {
2492 2499 nt_dealloc(&self->nt);
2493 2500 }
2494 2501 self->ntinitialized = 0;
2495 2502 Py_CLEAR(self->headrevs);
2496 2503 }
2497 2504
2498 2505 static PyObject *index_clearcaches(indexObject *self)
2499 2506 {
2500 2507 _index_clearcaches(self);
2501 2508 self->ntrev = -1;
2502 2509 self->ntlookups = self->ntmisses = 0;
2503 2510 Py_RETURN_NONE;
2504 2511 }
2505 2512
2506 2513 static void index_dealloc(indexObject *self)
2507 2514 {
2508 2515 _index_clearcaches(self);
2509 2516 Py_XDECREF(self->filteredrevs);
2510 2517 if (self->buf.buf) {
2511 2518 PyBuffer_Release(&self->buf);
2512 2519 memset(&self->buf, 0, sizeof(self->buf));
2513 2520 }
2514 2521 Py_XDECREF(self->data);
2515 2522 Py_XDECREF(self->added);
2516 2523 PyObject_Del(self);
2517 2524 }
2518 2525
2519 2526 static PySequenceMethods index_sequence_methods = {
2520 2527 (lenfunc)index_length, /* sq_length */
2521 2528 0, /* sq_concat */
2522 2529 0, /* sq_repeat */
2523 2530 (ssizeargfunc)index_get, /* sq_item */
2524 2531 0, /* sq_slice */
2525 2532 0, /* sq_ass_item */
2526 2533 0, /* sq_ass_slice */
2527 2534 (objobjproc)index_contains, /* sq_contains */
2528 2535 };
2529 2536
2530 2537 static PyMappingMethods index_mapping_methods = {
2531 2538 (lenfunc)index_length, /* mp_length */
2532 2539 (binaryfunc)index_getitem, /* mp_subscript */
2533 2540 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
2534 2541 };
2535 2542
2536 2543 static PyMethodDef index_methods[] = {
2537 2544 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
2538 2545 "return the gca set of the given revs"},
2539 2546 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
2540 2547 METH_VARARGS,
2541 2548 "return the heads of the common ancestors of the given revs"},
2542 2549 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
2543 2550 "clear the index caches"},
2544 2551 {"get", (PyCFunction)index_m_get, METH_VARARGS, "get an index entry"},
2545 2552 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets, METH_VARARGS,
2546 2553 "compute phases"},
2547 2554 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
2548 2555 "reachableroots"},
2549 2556 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
2550 2557 "get head revisions"}, /* Can do filtering since 3.2 */
2551 2558 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
2552 2559 "get filtered head revisions"}, /* Can always do filtering */
2553 2560 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
2554 2561 "determine revisions with deltas to reconstruct fulltext"},
2555 2562 {"slicechunktodensity", (PyCFunction)index_slicechunktodensity,
2556 2563 METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"},
2557 2564 {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
2558 2565 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
2559 2566 "match a potentially ambiguous node ID"},
2560 2567 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
2561 2568 "find length of shortest hex nodeid of a binary ID"},
2562 2569 {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"},
2563 2570 {NULL} /* Sentinel */
2564 2571 };
2565 2572
2566 2573 static PyGetSetDef index_getset[] = {
2567 2574 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
2568 2575 {NULL} /* Sentinel */
2569 2576 };
2570 2577
2571 2578 static PyTypeObject indexType = {
2572 2579 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2573 2580 "parsers.index", /* tp_name */
2574 2581 sizeof(indexObject), /* tp_basicsize */
2575 2582 0, /* tp_itemsize */
2576 2583 (destructor)index_dealloc, /* tp_dealloc */
2577 2584 0, /* tp_print */
2578 2585 0, /* tp_getattr */
2579 2586 0, /* tp_setattr */
2580 2587 0, /* tp_compare */
2581 2588 0, /* tp_repr */
2582 2589 0, /* tp_as_number */
2583 2590 &index_sequence_methods, /* tp_as_sequence */
2584 2591 &index_mapping_methods, /* tp_as_mapping */
2585 2592 0, /* tp_hash */
2586 2593 0, /* tp_call */
2587 2594 0, /* tp_str */
2588 2595 0, /* tp_getattro */
2589 2596 0, /* tp_setattro */
2590 2597 0, /* tp_as_buffer */
2591 2598 Py_TPFLAGS_DEFAULT, /* tp_flags */
2592 2599 "revlog index", /* tp_doc */
2593 2600 0, /* tp_traverse */
2594 2601 0, /* tp_clear */
2595 2602 0, /* tp_richcompare */
2596 2603 0, /* tp_weaklistoffset */
2597 2604 0, /* tp_iter */
2598 2605 0, /* tp_iternext */
2599 2606 index_methods, /* tp_methods */
2600 2607 0, /* tp_members */
2601 2608 index_getset, /* tp_getset */
2602 2609 0, /* tp_base */
2603 2610 0, /* tp_dict */
2604 2611 0, /* tp_descr_get */
2605 2612 0, /* tp_descr_set */
2606 2613 0, /* tp_dictoffset */
2607 2614 (initproc)index_init, /* tp_init */
2608 2615 0, /* tp_alloc */
2609 2616 };
2610 2617
2611 2618 /*
2612 2619 * returns a tuple of the form (index, index, cache) with elements as
2613 2620 * follows:
2614 2621 *
2615 2622 * index: an index object that lazily parses RevlogNG records
2616 2623 * cache: if data is inlined, a tuple (0, index_file_content), else None
2617 2624 * index_file_content could be a string, or a buffer
2618 2625 *
2619 2626 * added complications are for backwards compatibility
2620 2627 */
2621 2628 PyObject *parse_index2(PyObject *self, PyObject *args)
2622 2629 {
2623 2630 PyObject *tuple = NULL, *cache = NULL;
2624 2631 indexObject *idx;
2625 2632 int ret;
2626 2633
2627 2634 idx = PyObject_New(indexObject, &indexType);
2628 2635 if (idx == NULL)
2629 2636 goto bail;
2630 2637
2631 2638 ret = index_init(idx, args);
2632 2639 if (ret == -1)
2633 2640 goto bail;
2634 2641
2635 2642 if (idx->inlined) {
2636 2643 cache = Py_BuildValue("iO", 0, idx->data);
2637 2644 if (cache == NULL)
2638 2645 goto bail;
2639 2646 } else {
2640 2647 cache = Py_None;
2641 2648 Py_INCREF(cache);
2642 2649 }
2643 2650
2644 2651 tuple = Py_BuildValue("NN", idx, cache);
2645 2652 if (!tuple)
2646 2653 goto bail;
2647 2654 return tuple;
2648 2655
2649 2656 bail:
2650 2657 Py_XDECREF(idx);
2651 2658 Py_XDECREF(cache);
2652 2659 Py_XDECREF(tuple);
2653 2660 return NULL;
2654 2661 }
2655 2662
2656 2663 #ifdef WITH_RUST
2657 2664
2658 2665 /* rustlazyancestors: iteration over ancestors implemented in Rust
2659 2666 *
2660 2667 * This class holds a reference to an index and to the Rust iterator.
2661 2668 */
2662 2669 typedef struct rustlazyancestorsObjectStruct rustlazyancestorsObject;
2663 2670
2664 2671 struct rustlazyancestorsObjectStruct {
2665 2672 PyObject_HEAD
2666 2673 /* Type-specific fields go here. */
2667 2674 indexObject *index; /* Ref kept to avoid GC'ing the index */
2668 2675 void *iter; /* Rust iterator */
2669 2676 };
2670 2677
2671 2678 /* FFI exposed from Rust code */
2672 2679 rustlazyancestorsObject *
2673 2680 rustlazyancestors_init(indexObject *index,
2674 2681 /* to pass index_get_parents() */
2675 2682 int (*)(indexObject *, Py_ssize_t, int *, int),
2676 2683 /* intrevs vector */
2677 2684 Py_ssize_t initrevslen, long *initrevs, long stoprev,
2678 2685 int inclusive);
2679 2686 void rustlazyancestors_drop(rustlazyancestorsObject *self);
2680 2687 int rustlazyancestors_next(rustlazyancestorsObject *self);
2681 2688 int rustlazyancestors_contains(rustlazyancestorsObject *self, long rev);
2682 2689
2683 2690 /* CPython instance methods */
2684 2691 static int rustla_init(rustlazyancestorsObject *self, PyObject *args)
2685 2692 {
2686 2693 PyObject *initrevsarg = NULL;
2687 2694 PyObject *inclusivearg = NULL;
2688 2695 long stoprev = 0;
2689 2696 long *initrevs = NULL;
2690 2697 int inclusive = 0;
2691 2698 Py_ssize_t i;
2692 2699
2693 2700 indexObject *index;
2694 2701 if (!PyArg_ParseTuple(args, "O!O!lO!", &indexType, &index, &PyList_Type,
2695 2702 &initrevsarg, &stoprev, &PyBool_Type,
2696 2703 &inclusivearg))
2697 2704 return -1;
2698 2705
2699 2706 Py_INCREF(index);
2700 2707 self->index = index;
2701 2708
2702 2709 if (inclusivearg == Py_True)
2703 2710 inclusive = 1;
2704 2711
2705 2712 Py_ssize_t linit = PyList_GET_SIZE(initrevsarg);
2706 2713
2707 2714 initrevs = (long *)calloc(linit, sizeof(long));
2708 2715
2709 2716 if (initrevs == NULL) {
2710 2717 PyErr_NoMemory();
2711 2718 goto bail;
2712 2719 }
2713 2720
2714 2721 for (i = 0; i < linit; i++) {
2715 2722 initrevs[i] = PyInt_AsLong(PyList_GET_ITEM(initrevsarg, i));
2716 2723 }
2717 2724 if (PyErr_Occurred())
2718 2725 goto bail;
2719 2726
2720 2727 self->iter = rustlazyancestors_init(index, index_get_parents, linit,
2721 2728 initrevs, stoprev, inclusive);
2722 2729 if (self->iter == NULL) {
2723 2730 /* if this is because of GraphError::ParentOutOfRange
2724 2731 * index_get_parents() has already set the proper ValueError */
2725 2732 goto bail;
2726 2733 }
2727 2734
2728 2735 free(initrevs);
2729 2736 return 0;
2730 2737
2731 2738 bail:
2732 2739 free(initrevs);
2733 2740 return -1;
2734 2741 };
2735 2742
2736 2743 static void rustla_dealloc(rustlazyancestorsObject *self)
2737 2744 {
2738 2745 Py_XDECREF(self->index);
2739 2746 if (self->iter != NULL) { /* can happen if rustla_init failed */
2740 2747 rustlazyancestors_drop(self->iter);
2741 2748 }
2742 2749 PyObject_Del(self);
2743 2750 }
2744 2751
2745 2752 static PyObject *rustla_next(rustlazyancestorsObject *self)
2746 2753 {
2747 2754 int res = rustlazyancestors_next(self->iter);
2748 2755 if (res == -1) {
2749 2756 /* Setting an explicit exception seems unnecessary
2750 2757 * as examples from Python source code (Objects/rangeobjets.c
2751 2758 * and Modules/_io/stringio.c) seem to demonstrate.
2752 2759 */
2753 2760 return NULL;
2754 2761 }
2755 2762 return PyInt_FromLong(res);
2756 2763 }
2757 2764
2758 2765 static int rustla_contains(rustlazyancestorsObject *self, PyObject *rev)
2759 2766 {
2760 2767 long lrev;
2761 2768 if (!pylong_to_long(rev, &lrev)) {
2762 2769 PyErr_Clear();
2763 2770 return 0;
2764 2771 }
2765 2772 return rustlazyancestors_contains(self->iter, lrev);
2766 2773 }
2767 2774
2768 2775 static PySequenceMethods rustla_sequence_methods = {
2769 2776 0, /* sq_length */
2770 2777 0, /* sq_concat */
2771 2778 0, /* sq_repeat */
2772 2779 0, /* sq_item */
2773 2780 0, /* sq_slice */
2774 2781 0, /* sq_ass_item */
2775 2782 0, /* sq_ass_slice */
2776 2783 (objobjproc)rustla_contains, /* sq_contains */
2777 2784 };
2778 2785
2779 2786 static PyTypeObject rustlazyancestorsType = {
2780 2787 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2781 2788 "parsers.rustlazyancestors", /* tp_name */
2782 2789 sizeof(rustlazyancestorsObject), /* tp_basicsize */
2783 2790 0, /* tp_itemsize */
2784 2791 (destructor)rustla_dealloc, /* tp_dealloc */
2785 2792 0, /* tp_print */
2786 2793 0, /* tp_getattr */
2787 2794 0, /* tp_setattr */
2788 2795 0, /* tp_compare */
2789 2796 0, /* tp_repr */
2790 2797 0, /* tp_as_number */
2791 2798 &rustla_sequence_methods, /* tp_as_sequence */
2792 2799 0, /* tp_as_mapping */
2793 2800 0, /* tp_hash */
2794 2801 0, /* tp_call */
2795 2802 0, /* tp_str */
2796 2803 0, /* tp_getattro */
2797 2804 0, /* tp_setattro */
2798 2805 0, /* tp_as_buffer */
2799 2806 Py_TPFLAGS_DEFAULT, /* tp_flags */
2800 2807 "Iterator over ancestors, implemented in Rust", /* tp_doc */
2801 2808 0, /* tp_traverse */
2802 2809 0, /* tp_clear */
2803 2810 0, /* tp_richcompare */
2804 2811 0, /* tp_weaklistoffset */
2805 2812 0, /* tp_iter */
2806 2813 (iternextfunc)rustla_next, /* tp_iternext */
2807 2814 0, /* tp_methods */
2808 2815 0, /* tp_members */
2809 2816 0, /* tp_getset */
2810 2817 0, /* tp_base */
2811 2818 0, /* tp_dict */
2812 2819 0, /* tp_descr_get */
2813 2820 0, /* tp_descr_set */
2814 2821 0, /* tp_dictoffset */
2815 2822 (initproc)rustla_init, /* tp_init */
2816 2823 0, /* tp_alloc */
2817 2824 };
2818 2825 #endif /* WITH_RUST */
2819 2826
2820 2827 void revlog_module_init(PyObject *mod)
2821 2828 {
2822 2829 indexType.tp_new = PyType_GenericNew;
2823 2830 if (PyType_Ready(&indexType) < 0)
2824 2831 return;
2825 2832 Py_INCREF(&indexType);
2826 2833 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
2827 2834
2828 2835 nodetreeType.tp_new = PyType_GenericNew;
2829 2836 if (PyType_Ready(&nodetreeType) < 0)
2830 2837 return;
2831 2838 Py_INCREF(&nodetreeType);
2832 2839 PyModule_AddObject(mod, "nodetree", (PyObject *)&nodetreeType);
2833 2840
2834 2841 if (!nullentry) {
2835 2842 nullentry = Py_BuildValue(PY23("iiiiiiis#", "iiiiiiiy#"), 0, 0,
2836 2843 0, -1, -1, -1, -1, nullid, 20);
2837 2844 }
2838 2845 if (nullentry)
2839 2846 PyObject_GC_UnTrack(nullentry);
2840 2847
2841 2848 #ifdef WITH_RUST
2842 2849 rustlazyancestorsType.tp_new = PyType_GenericNew;
2843 2850 if (PyType_Ready(&rustlazyancestorsType) < 0)
2844 2851 return;
2845 2852 Py_INCREF(&rustlazyancestorsType);
2846 2853 PyModule_AddObject(mod, "rustlazyancestors",
2847 2854 (PyObject *)&rustlazyancestorsType);
2848 2855 #endif
2849 2856 }
General Comments 0
You need to be logged in to leave comments. Login now