##// END OF EJS Templates
sparse-revlog: handle nullrev in index_get_start...
Boris Feld -
r40997:b54727f8 default
parent child Browse files
Show More
@@ -1,2890 +1,2893 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #include <Python.h>
11 11 #include <assert.h>
12 12 #include <ctype.h>
13 13 #include <limits.h>
14 14 #include <stddef.h>
15 15 #include <stdlib.h>
16 16 #include <string.h>
17 17
18 18 #include "bitmanipulation.h"
19 19 #include "charencode.h"
20 20 #include "revlog.h"
21 21 #include "util.h"
22 22
23 23 #ifdef IS_PY3K
24 24 /* The mapping of Python types is meant to be temporary to get Python
25 25 * 3 to compile. We should remove this once Python 3 support is fully
26 26 * supported and proper types are used in the extensions themselves. */
27 27 #define PyInt_Check PyLong_Check
28 28 #define PyInt_FromLong PyLong_FromLong
29 29 #define PyInt_FromSsize_t PyLong_FromSsize_t
30 30 #define PyInt_AsLong PyLong_AsLong
31 31 #endif
32 32
33 33 typedef struct indexObjectStruct indexObject;
34 34
35 35 typedef struct {
36 36 int children[16];
37 37 } nodetreenode;
38 38
39 39 /*
40 40 * A base-16 trie for fast node->rev mapping.
41 41 *
42 42 * Positive value is index of the next node in the trie
43 43 * Negative value is a leaf: -(rev + 2)
44 44 * Zero is empty
45 45 */
46 46 typedef struct {
47 47 indexObject *index;
48 48 nodetreenode *nodes;
49 49 unsigned length; /* # nodes in use */
50 50 unsigned capacity; /* # nodes allocated */
51 51 int depth; /* maximum depth of tree */
52 52 int splits; /* # splits performed */
53 53 } nodetree;
54 54
55 55 typedef struct {
56 56 PyObject_HEAD /* ; */
57 57 nodetree nt;
58 58 } nodetreeObject;
59 59
60 60 /*
61 61 * This class has two behaviors.
62 62 *
63 63 * When used in a list-like way (with integer keys), we decode an
64 64 * entry in a RevlogNG index file on demand. Our last entry is a
65 65 * sentinel, always a nullid. We have limited support for
66 66 * integer-keyed insert and delete, only at elements right before the
67 67 * sentinel.
68 68 *
69 69 * With string keys, we lazily perform a reverse mapping from node to
70 70 * rev, using a base-16 trie.
71 71 */
72 72 struct indexObjectStruct {
73 73 PyObject_HEAD
74 74 /* Type-specific fields go here. */
75 75 PyObject *data; /* raw bytes of index */
76 76 Py_buffer buf; /* buffer of data */
77 77 PyObject **cache; /* cached tuples */
78 78 const char **offsets; /* populated on demand */
79 79 Py_ssize_t raw_length; /* original number of elements */
80 80 Py_ssize_t length; /* current number of elements */
81 81 PyObject *added; /* populated on demand */
82 82 PyObject *headrevs; /* cache, invalidated on changes */
83 83 PyObject *filteredrevs; /* filtered revs set */
84 84 nodetree nt; /* base-16 trie */
85 85 int ntinitialized; /* 0 or 1 */
86 86 int ntrev; /* last rev scanned */
87 87 int ntlookups; /* # lookups */
88 88 int ntmisses; /* # lookups that miss the cache */
89 89 int inlined;
90 90 };
91 91
92 92 static Py_ssize_t index_length(const indexObject *self)
93 93 {
94 94 if (self->added == NULL)
95 95 return self->length;
96 96 return self->length + PyList_GET_SIZE(self->added);
97 97 }
98 98
99 99 static PyObject *nullentry = NULL;
100 100 static const char nullid[20] = {0};
101 101 static const Py_ssize_t nullrev = -1;
102 102
103 103 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
104 104
105 105 #if LONG_MAX == 0x7fffffffL
106 106 static const char *const tuple_format = PY23("Kiiiiiis#", "Kiiiiiiy#");
107 107 #else
108 108 static const char *const tuple_format = PY23("kiiiiiis#", "kiiiiiiy#");
109 109 #endif
110 110
111 111 /* A RevlogNG v1 index entry is 64 bytes long. */
112 112 static const long v1_hdrsize = 64;
113 113
114 114 static void raise_revlog_error(void)
115 115 {
116 116 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
117 117
118 118 mod = PyImport_ImportModule("mercurial.error");
119 119 if (mod == NULL) {
120 120 goto cleanup;
121 121 }
122 122
123 123 dict = PyModule_GetDict(mod);
124 124 if (dict == NULL) {
125 125 goto cleanup;
126 126 }
127 127 Py_INCREF(dict);
128 128
129 129 errclass = PyDict_GetItemString(dict, "RevlogError");
130 130 if (errclass == NULL) {
131 131 PyErr_SetString(PyExc_SystemError,
132 132 "could not find RevlogError");
133 133 goto cleanup;
134 134 }
135 135
136 136 /* value of exception is ignored by callers */
137 137 PyErr_SetString(errclass, "RevlogError");
138 138
139 139 cleanup:
140 140 Py_XDECREF(dict);
141 141 Py_XDECREF(mod);
142 142 }
143 143
144 144 /*
145 145 * Return a pointer to the beginning of a RevlogNG record.
146 146 */
147 147 static const char *index_deref(indexObject *self, Py_ssize_t pos)
148 148 {
149 149 if (self->inlined && pos > 0) {
150 150 if (self->offsets == NULL) {
151 151 self->offsets = PyMem_Malloc(self->raw_length *
152 152 sizeof(*self->offsets));
153 153 if (self->offsets == NULL)
154 154 return (const char *)PyErr_NoMemory();
155 155 inline_scan(self, self->offsets);
156 156 }
157 157 return self->offsets[pos];
158 158 }
159 159
160 160 return (const char *)(self->buf.buf) + pos * v1_hdrsize;
161 161 }
162 162
163 163 /*
164 164 * Get parents of the given rev.
165 165 *
166 166 * The specified rev must be valid and must not be nullrev. A returned
167 167 * parent revision may be nullrev, but is guaranteed to be in valid range.
168 168 */
169 169 static inline int index_get_parents(indexObject *self, Py_ssize_t rev, int *ps,
170 170 int maxrev)
171 171 {
172 172 if (rev >= self->length) {
173 173 long tmp;
174 174 PyObject *tuple =
175 175 PyList_GET_ITEM(self->added, rev - self->length);
176 176 if (!pylong_to_long(PyTuple_GET_ITEM(tuple, 5), &tmp)) {
177 177 return -1;
178 178 }
179 179 ps[0] = (int)tmp;
180 180 if (!pylong_to_long(PyTuple_GET_ITEM(tuple, 6), &tmp)) {
181 181 return -1;
182 182 }
183 183 ps[1] = (int)tmp;
184 184 } else {
185 185 const char *data = index_deref(self, rev);
186 186 ps[0] = getbe32(data + 24);
187 187 ps[1] = getbe32(data + 28);
188 188 }
189 189 /* If index file is corrupted, ps[] may point to invalid revisions. So
190 190 * there is a risk of buffer overflow to trust them unconditionally. */
191 191 if (ps[0] < -1 || ps[0] > maxrev || ps[1] < -1 || ps[1] > maxrev) {
192 192 PyErr_SetString(PyExc_ValueError, "parent out of range");
193 193 return -1;
194 194 }
195 195 return 0;
196 196 }
197 197
198 198 /*
199 199 * Get parents of the given rev.
200 200 *
201 201 * If the specified rev is out of range, IndexError will be raised. If the
202 202 * revlog entry is corrupted, ValueError may be raised.
203 203 *
204 204 * Returns 0 on success or -1 on failure.
205 205 */
206 206 int HgRevlogIndex_GetParents(PyObject *op, int rev, int *ps)
207 207 {
208 208 int tiprev;
209 209 if (!op || !HgRevlogIndex_Check(op) || !ps) {
210 210 PyErr_BadInternalCall();
211 211 return -1;
212 212 }
213 213 tiprev = (int)index_length((indexObject *)op) - 1;
214 214 if (rev < -1 || rev > tiprev) {
215 215 PyErr_Format(PyExc_IndexError, "rev out of range: %d", rev);
216 216 return -1;
217 217 } else if (rev == -1) {
218 218 ps[0] = ps[1] = -1;
219 219 return 0;
220 220 } else {
221 221 return index_get_parents((indexObject *)op, rev, ps, tiprev);
222 222 }
223 223 }
224 224
225 225 static inline int64_t index_get_start(indexObject *self, Py_ssize_t rev)
226 226 {
227 227 uint64_t offset;
228 if (rev == nullrev) {
229 return 0;
230 }
228 231 if (rev >= self->length) {
229 232 PyObject *tuple;
230 233 PyObject *pylong;
231 234 PY_LONG_LONG tmp;
232 235 tuple = PyList_GET_ITEM(self->added, rev - self->length);
233 236 pylong = PyTuple_GET_ITEM(tuple, 0);
234 237 tmp = PyLong_AsLongLong(pylong);
235 238 if (tmp == -1 && PyErr_Occurred()) {
236 239 return -1;
237 240 }
238 241 if (tmp < 0) {
239 242 PyErr_Format(PyExc_OverflowError,
240 243 "revlog entry size out of bound (%lld)",
241 244 (long long)tmp);
242 245 return -1;
243 246 }
244 247 offset = (uint64_t)tmp;
245 248 } else {
246 249 const char *data = index_deref(self, rev);
247 250 offset = getbe32(data + 4);
248 251 if (rev == 0) {
249 252 /* mask out version number for the first entry */
250 253 offset &= 0xFFFF;
251 254 } else {
252 255 uint32_t offset_high = getbe32(data);
253 256 offset |= ((uint64_t)offset_high) << 32;
254 257 }
255 258 }
256 259 return (int64_t)(offset >> 16);
257 260 }
258 261
259 262 static inline int index_get_length(indexObject *self, Py_ssize_t rev)
260 263 {
261 264 if (rev >= self->length) {
262 265 PyObject *tuple;
263 266 PyObject *pylong;
264 267 long ret;
265 268 tuple = PyList_GET_ITEM(self->added, rev - self->length);
266 269 pylong = PyTuple_GET_ITEM(tuple, 1);
267 270 ret = PyInt_AsLong(pylong);
268 271 if (ret == -1 && PyErr_Occurred()) {
269 272 return -1;
270 273 }
271 274 if (ret < 0 || ret > (long)INT_MAX) {
272 275 PyErr_Format(PyExc_OverflowError,
273 276 "revlog entry size out of bound (%ld)",
274 277 ret);
275 278 return -1;
276 279 }
277 280 return (int)ret;
278 281 } else {
279 282 const char *data = index_deref(self, rev);
280 283 int tmp = (int)getbe32(data + 8);
281 284 if (tmp < 0) {
282 285 PyErr_Format(PyExc_OverflowError,
283 286 "revlog entry size out of bound (%d)",
284 287 tmp);
285 288 return -1;
286 289 }
287 290 return tmp;
288 291 }
289 292 }
290 293
291 294 /*
292 295 * RevlogNG format (all in big endian, data may be inlined):
293 296 * 6 bytes: offset
294 297 * 2 bytes: flags
295 298 * 4 bytes: compressed length
296 299 * 4 bytes: uncompressed length
297 300 * 4 bytes: base revision
298 301 * 4 bytes: link revision
299 302 * 4 bytes: parent 1 revision
300 303 * 4 bytes: parent 2 revision
301 304 * 32 bytes: nodeid (only 20 bytes used)
302 305 */
303 306 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
304 307 {
305 308 uint64_t offset_flags;
306 309 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
307 310 const char *c_node_id;
308 311 const char *data;
309 312 Py_ssize_t length = index_length(self);
310 313 PyObject *entry;
311 314
312 315 if (pos == nullrev) {
313 316 Py_INCREF(nullentry);
314 317 return nullentry;
315 318 }
316 319
317 320 if (pos < 0 || pos >= length) {
318 321 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
319 322 return NULL;
320 323 }
321 324
322 325 if (pos >= self->length) {
323 326 PyObject *obj;
324 327 obj = PyList_GET_ITEM(self->added, pos - self->length);
325 328 Py_INCREF(obj);
326 329 return obj;
327 330 }
328 331
329 332 if (self->cache) {
330 333 if (self->cache[pos]) {
331 334 Py_INCREF(self->cache[pos]);
332 335 return self->cache[pos];
333 336 }
334 337 } else {
335 338 self->cache = calloc(self->raw_length, sizeof(PyObject *));
336 339 if (self->cache == NULL)
337 340 return PyErr_NoMemory();
338 341 }
339 342
340 343 data = index_deref(self, pos);
341 344 if (data == NULL)
342 345 return NULL;
343 346
344 347 offset_flags = getbe32(data + 4);
345 348 if (pos == 0) /* mask out version number for the first entry */
346 349 offset_flags &= 0xFFFF;
347 350 else {
348 351 uint32_t offset_high = getbe32(data);
349 352 offset_flags |= ((uint64_t)offset_high) << 32;
350 353 }
351 354
352 355 comp_len = getbe32(data + 8);
353 356 uncomp_len = getbe32(data + 12);
354 357 base_rev = getbe32(data + 16);
355 358 link_rev = getbe32(data + 20);
356 359 parent_1 = getbe32(data + 24);
357 360 parent_2 = getbe32(data + 28);
358 361 c_node_id = data + 32;
359 362
360 363 entry = Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len,
361 364 base_rev, link_rev, parent_1, parent_2, c_node_id,
362 365 20);
363 366
364 367 if (entry) {
365 368 PyObject_GC_UnTrack(entry);
366 369 Py_INCREF(entry);
367 370 }
368 371
369 372 self->cache[pos] = entry;
370 373
371 374 return entry;
372 375 }
373 376
374 377 /*
375 378 * Return the 20-byte SHA of the node corresponding to the given rev.
376 379 */
377 380 static const char *index_node(indexObject *self, Py_ssize_t pos)
378 381 {
379 382 Py_ssize_t length = index_length(self);
380 383 const char *data;
381 384
382 385 if (pos == nullrev)
383 386 return nullid;
384 387
385 388 if (pos >= length)
386 389 return NULL;
387 390
388 391 if (pos >= self->length) {
389 392 PyObject *tuple, *str;
390 393 tuple = PyList_GET_ITEM(self->added, pos - self->length);
391 394 str = PyTuple_GetItem(tuple, 7);
392 395 return str ? PyBytes_AS_STRING(str) : NULL;
393 396 }
394 397
395 398 data = index_deref(self, pos);
396 399 return data ? data + 32 : NULL;
397 400 }
398 401
399 402 /*
400 403 * Return the 20-byte SHA of the node corresponding to the given rev. The
401 404 * rev is assumed to be existing. If not, an exception is set.
402 405 */
403 406 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
404 407 {
405 408 const char *node = index_node(self, pos);
406 409 if (node == NULL) {
407 410 PyErr_Format(PyExc_IndexError, "could not access rev %d",
408 411 (int)pos);
409 412 }
410 413 return node;
411 414 }
412 415
413 416 static int nt_insert(nodetree *self, const char *node, int rev);
414 417
415 418 static int node_check(PyObject *obj, char **node)
416 419 {
417 420 Py_ssize_t nodelen;
418 421 if (PyBytes_AsStringAndSize(obj, node, &nodelen) == -1)
419 422 return -1;
420 423 if (nodelen == 20)
421 424 return 0;
422 425 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
423 426 return -1;
424 427 }
425 428
426 429 static PyObject *index_append(indexObject *self, PyObject *obj)
427 430 {
428 431 char *node;
429 432 Py_ssize_t len;
430 433
431 434 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
432 435 PyErr_SetString(PyExc_TypeError, "8-tuple required");
433 436 return NULL;
434 437 }
435 438
436 439 if (node_check(PyTuple_GET_ITEM(obj, 7), &node) == -1)
437 440 return NULL;
438 441
439 442 len = index_length(self);
440 443
441 444 if (self->added == NULL) {
442 445 self->added = PyList_New(0);
443 446 if (self->added == NULL)
444 447 return NULL;
445 448 }
446 449
447 450 if (PyList_Append(self->added, obj) == -1)
448 451 return NULL;
449 452
450 453 if (self->ntinitialized)
451 454 nt_insert(&self->nt, node, (int)len);
452 455
453 456 Py_CLEAR(self->headrevs);
454 457 Py_RETURN_NONE;
455 458 }
456 459
457 460 static PyObject *index_stats(indexObject *self)
458 461 {
459 462 PyObject *obj = PyDict_New();
460 463 PyObject *s = NULL;
461 464 PyObject *t = NULL;
462 465
463 466 if (obj == NULL)
464 467 return NULL;
465 468
466 469 #define istat(__n, __d) \
467 470 do { \
468 471 s = PyBytes_FromString(__d); \
469 472 t = PyInt_FromSsize_t(self->__n); \
470 473 if (!s || !t) \
471 474 goto bail; \
472 475 if (PyDict_SetItem(obj, s, t) == -1) \
473 476 goto bail; \
474 477 Py_CLEAR(s); \
475 478 Py_CLEAR(t); \
476 479 } while (0)
477 480
478 481 if (self->added) {
479 482 Py_ssize_t len = PyList_GET_SIZE(self->added);
480 483 s = PyBytes_FromString("index entries added");
481 484 t = PyInt_FromSsize_t(len);
482 485 if (!s || !t)
483 486 goto bail;
484 487 if (PyDict_SetItem(obj, s, t) == -1)
485 488 goto bail;
486 489 Py_CLEAR(s);
487 490 Py_CLEAR(t);
488 491 }
489 492
490 493 if (self->raw_length != self->length)
491 494 istat(raw_length, "revs on disk");
492 495 istat(length, "revs in memory");
493 496 istat(ntlookups, "node trie lookups");
494 497 istat(ntmisses, "node trie misses");
495 498 istat(ntrev, "node trie last rev scanned");
496 499 if (self->ntinitialized) {
497 500 istat(nt.capacity, "node trie capacity");
498 501 istat(nt.depth, "node trie depth");
499 502 istat(nt.length, "node trie count");
500 503 istat(nt.splits, "node trie splits");
501 504 }
502 505
503 506 #undef istat
504 507
505 508 return obj;
506 509
507 510 bail:
508 511 Py_XDECREF(obj);
509 512 Py_XDECREF(s);
510 513 Py_XDECREF(t);
511 514 return NULL;
512 515 }
513 516
514 517 /*
515 518 * When we cache a list, we want to be sure the caller can't mutate
516 519 * the cached copy.
517 520 */
518 521 static PyObject *list_copy(PyObject *list)
519 522 {
520 523 Py_ssize_t len = PyList_GET_SIZE(list);
521 524 PyObject *newlist = PyList_New(len);
522 525 Py_ssize_t i;
523 526
524 527 if (newlist == NULL)
525 528 return NULL;
526 529
527 530 for (i = 0; i < len; i++) {
528 531 PyObject *obj = PyList_GET_ITEM(list, i);
529 532 Py_INCREF(obj);
530 533 PyList_SET_ITEM(newlist, i, obj);
531 534 }
532 535
533 536 return newlist;
534 537 }
535 538
536 539 static int check_filter(PyObject *filter, Py_ssize_t arg)
537 540 {
538 541 if (filter) {
539 542 PyObject *arglist, *result;
540 543 int isfiltered;
541 544
542 545 arglist = Py_BuildValue("(n)", arg);
543 546 if (!arglist) {
544 547 return -1;
545 548 }
546 549
547 550 result = PyEval_CallObject(filter, arglist);
548 551 Py_DECREF(arglist);
549 552 if (!result) {
550 553 return -1;
551 554 }
552 555
553 556 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
554 557 * same as this function, so we can just return it directly.*/
555 558 isfiltered = PyObject_IsTrue(result);
556 559 Py_DECREF(result);
557 560 return isfiltered;
558 561 } else {
559 562 return 0;
560 563 }
561 564 }
562 565
563 566 static Py_ssize_t add_roots_get_min(indexObject *self, PyObject *list,
564 567 Py_ssize_t marker, char *phases)
565 568 {
566 569 PyObject *iter = NULL;
567 570 PyObject *iter_item = NULL;
568 571 Py_ssize_t min_idx = index_length(self) + 2;
569 572 long iter_item_long;
570 573
571 574 if (PyList_GET_SIZE(list) != 0) {
572 575 iter = PyObject_GetIter(list);
573 576 if (iter == NULL)
574 577 return -2;
575 578 while ((iter_item = PyIter_Next(iter))) {
576 579 if (!pylong_to_long(iter_item, &iter_item_long)) {
577 580 Py_DECREF(iter_item);
578 581 return -2;
579 582 }
580 583 Py_DECREF(iter_item);
581 584 if (iter_item_long < min_idx)
582 585 min_idx = iter_item_long;
583 586 phases[iter_item_long] = (char)marker;
584 587 }
585 588 Py_DECREF(iter);
586 589 }
587 590
588 591 return min_idx;
589 592 }
590 593
591 594 static inline void set_phase_from_parents(char *phases, int parent_1,
592 595 int parent_2, Py_ssize_t i)
593 596 {
594 597 if (parent_1 >= 0 && phases[parent_1] > phases[i])
595 598 phases[i] = phases[parent_1];
596 599 if (parent_2 >= 0 && phases[parent_2] > phases[i])
597 600 phases[i] = phases[parent_2];
598 601 }
599 602
600 603 static PyObject *reachableroots2(indexObject *self, PyObject *args)
601 604 {
602 605
603 606 /* Input */
604 607 long minroot;
605 608 PyObject *includepatharg = NULL;
606 609 int includepath = 0;
607 610 /* heads and roots are lists */
608 611 PyObject *heads = NULL;
609 612 PyObject *roots = NULL;
610 613 PyObject *reachable = NULL;
611 614
612 615 PyObject *val;
613 616 Py_ssize_t len = index_length(self);
614 617 long revnum;
615 618 Py_ssize_t k;
616 619 Py_ssize_t i;
617 620 Py_ssize_t l;
618 621 int r;
619 622 int parents[2];
620 623
621 624 /* Internal data structure:
622 625 * tovisit: array of length len+1 (all revs + nullrev), filled upto
623 626 * lentovisit
624 627 *
625 628 * revstates: array of length len+1 (all revs + nullrev) */
626 629 int *tovisit = NULL;
627 630 long lentovisit = 0;
628 631 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
629 632 char *revstates = NULL;
630 633
631 634 /* Get arguments */
632 635 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
633 636 &PyList_Type, &roots, &PyBool_Type,
634 637 &includepatharg))
635 638 goto bail;
636 639
637 640 if (includepatharg == Py_True)
638 641 includepath = 1;
639 642
640 643 /* Initialize return set */
641 644 reachable = PyList_New(0);
642 645 if (reachable == NULL)
643 646 goto bail;
644 647
645 648 /* Initialize internal datastructures */
646 649 tovisit = (int *)malloc((len + 1) * sizeof(int));
647 650 if (tovisit == NULL) {
648 651 PyErr_NoMemory();
649 652 goto bail;
650 653 }
651 654
652 655 revstates = (char *)calloc(len + 1, 1);
653 656 if (revstates == NULL) {
654 657 PyErr_NoMemory();
655 658 goto bail;
656 659 }
657 660
658 661 l = PyList_GET_SIZE(roots);
659 662 for (i = 0; i < l; i++) {
660 663 revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
661 664 if (revnum == -1 && PyErr_Occurred())
662 665 goto bail;
663 666 /* If root is out of range, e.g. wdir(), it must be unreachable
664 667 * from heads. So we can just ignore it. */
665 668 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
666 669 continue;
667 670 revstates[revnum + 1] |= RS_ROOT;
668 671 }
669 672
670 673 /* Populate tovisit with all the heads */
671 674 l = PyList_GET_SIZE(heads);
672 675 for (i = 0; i < l; i++) {
673 676 revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i));
674 677 if (revnum == -1 && PyErr_Occurred())
675 678 goto bail;
676 679 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
677 680 PyErr_SetString(PyExc_IndexError, "head out of range");
678 681 goto bail;
679 682 }
680 683 if (!(revstates[revnum + 1] & RS_SEEN)) {
681 684 tovisit[lentovisit++] = (int)revnum;
682 685 revstates[revnum + 1] |= RS_SEEN;
683 686 }
684 687 }
685 688
686 689 /* Visit the tovisit list and find the reachable roots */
687 690 k = 0;
688 691 while (k < lentovisit) {
689 692 /* Add the node to reachable if it is a root*/
690 693 revnum = tovisit[k++];
691 694 if (revstates[revnum + 1] & RS_ROOT) {
692 695 revstates[revnum + 1] |= RS_REACHABLE;
693 696 val = PyInt_FromLong(revnum);
694 697 if (val == NULL)
695 698 goto bail;
696 699 r = PyList_Append(reachable, val);
697 700 Py_DECREF(val);
698 701 if (r < 0)
699 702 goto bail;
700 703 if (includepath == 0)
701 704 continue;
702 705 }
703 706
704 707 /* Add its parents to the list of nodes to visit */
705 708 if (revnum == nullrev)
706 709 continue;
707 710 r = index_get_parents(self, revnum, parents, (int)len - 1);
708 711 if (r < 0)
709 712 goto bail;
710 713 for (i = 0; i < 2; i++) {
711 714 if (!(revstates[parents[i] + 1] & RS_SEEN) &&
712 715 parents[i] >= minroot) {
713 716 tovisit[lentovisit++] = parents[i];
714 717 revstates[parents[i] + 1] |= RS_SEEN;
715 718 }
716 719 }
717 720 }
718 721
719 722 /* Find all the nodes in between the roots we found and the heads
720 723 * and add them to the reachable set */
721 724 if (includepath == 1) {
722 725 long minidx = minroot;
723 726 if (minidx < 0)
724 727 minidx = 0;
725 728 for (i = minidx; i < len; i++) {
726 729 if (!(revstates[i + 1] & RS_SEEN))
727 730 continue;
728 731 r = index_get_parents(self, i, parents, (int)len - 1);
729 732 /* Corrupted index file, error is set from
730 733 * index_get_parents */
731 734 if (r < 0)
732 735 goto bail;
733 736 if (((revstates[parents[0] + 1] |
734 737 revstates[parents[1] + 1]) &
735 738 RS_REACHABLE) &&
736 739 !(revstates[i + 1] & RS_REACHABLE)) {
737 740 revstates[i + 1] |= RS_REACHABLE;
738 741 val = PyInt_FromSsize_t(i);
739 742 if (val == NULL)
740 743 goto bail;
741 744 r = PyList_Append(reachable, val);
742 745 Py_DECREF(val);
743 746 if (r < 0)
744 747 goto bail;
745 748 }
746 749 }
747 750 }
748 751
749 752 free(revstates);
750 753 free(tovisit);
751 754 return reachable;
752 755 bail:
753 756 Py_XDECREF(reachable);
754 757 free(revstates);
755 758 free(tovisit);
756 759 return NULL;
757 760 }
758 761
759 762 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
760 763 {
761 764 PyObject *roots = Py_None;
762 765 PyObject *ret = NULL;
763 766 PyObject *phasessize = NULL;
764 767 PyObject *phaseroots = NULL;
765 768 PyObject *phaseset = NULL;
766 769 PyObject *phasessetlist = NULL;
767 770 PyObject *rev = NULL;
768 771 Py_ssize_t len = index_length(self);
769 772 Py_ssize_t numphase = 0;
770 773 Py_ssize_t minrevallphases = 0;
771 774 Py_ssize_t minrevphase = 0;
772 775 Py_ssize_t i = 0;
773 776 char *phases = NULL;
774 777 long phase;
775 778
776 779 if (!PyArg_ParseTuple(args, "O", &roots))
777 780 goto done;
778 781 if (roots == NULL || !PyList_Check(roots)) {
779 782 PyErr_SetString(PyExc_TypeError, "roots must be a list");
780 783 goto done;
781 784 }
782 785
783 786 phases = calloc(
784 787 len, 1); /* phase per rev: {0: public, 1: draft, 2: secret} */
785 788 if (phases == NULL) {
786 789 PyErr_NoMemory();
787 790 goto done;
788 791 }
789 792 /* Put the phase information of all the roots in phases */
790 793 numphase = PyList_GET_SIZE(roots) + 1;
791 794 minrevallphases = len + 1;
792 795 phasessetlist = PyList_New(numphase);
793 796 if (phasessetlist == NULL)
794 797 goto done;
795 798
796 799 PyList_SET_ITEM(phasessetlist, 0, Py_None);
797 800 Py_INCREF(Py_None);
798 801
799 802 for (i = 0; i < numphase - 1; i++) {
800 803 phaseroots = PyList_GET_ITEM(roots, i);
801 804 phaseset = PySet_New(NULL);
802 805 if (phaseset == NULL)
803 806 goto release;
804 807 PyList_SET_ITEM(phasessetlist, i + 1, phaseset);
805 808 if (!PyList_Check(phaseroots)) {
806 809 PyErr_SetString(PyExc_TypeError,
807 810 "roots item must be a list");
808 811 goto release;
809 812 }
810 813 minrevphase =
811 814 add_roots_get_min(self, phaseroots, i + 1, phases);
812 815 if (minrevphase == -2) /* Error from add_roots_get_min */
813 816 goto release;
814 817 minrevallphases = MIN(minrevallphases, minrevphase);
815 818 }
816 819 /* Propagate the phase information from the roots to the revs */
817 820 if (minrevallphases != -1) {
818 821 int parents[2];
819 822 for (i = minrevallphases; i < len; i++) {
820 823 if (index_get_parents(self, i, parents, (int)len - 1) <
821 824 0)
822 825 goto release;
823 826 set_phase_from_parents(phases, parents[0], parents[1],
824 827 i);
825 828 }
826 829 }
827 830 /* Transform phase list to a python list */
828 831 phasessize = PyInt_FromSsize_t(len);
829 832 if (phasessize == NULL)
830 833 goto release;
831 834 for (i = 0; i < len; i++) {
832 835 phase = phases[i];
833 836 /* We only store the sets of phase for non public phase, the
834 837 * public phase is computed as a difference */
835 838 if (phase != 0) {
836 839 phaseset = PyList_GET_ITEM(phasessetlist, phase);
837 840 rev = PyInt_FromSsize_t(i);
838 841 if (rev == NULL)
839 842 goto release;
840 843 PySet_Add(phaseset, rev);
841 844 Py_XDECREF(rev);
842 845 }
843 846 }
844 847 ret = PyTuple_Pack(2, phasessize, phasessetlist);
845 848
846 849 release:
847 850 Py_XDECREF(phasessize);
848 851 Py_XDECREF(phasessetlist);
849 852 done:
850 853 free(phases);
851 854 return ret;
852 855 }
853 856
854 857 static PyObject *index_headrevs(indexObject *self, PyObject *args)
855 858 {
856 859 Py_ssize_t i, j, len;
857 860 char *nothead = NULL;
858 861 PyObject *heads = NULL;
859 862 PyObject *filter = NULL;
860 863 PyObject *filteredrevs = Py_None;
861 864
862 865 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
863 866 return NULL;
864 867 }
865 868
866 869 if (self->headrevs && filteredrevs == self->filteredrevs)
867 870 return list_copy(self->headrevs);
868 871
869 872 Py_DECREF(self->filteredrevs);
870 873 self->filteredrevs = filteredrevs;
871 874 Py_INCREF(filteredrevs);
872 875
873 876 if (filteredrevs != Py_None) {
874 877 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
875 878 if (!filter) {
876 879 PyErr_SetString(
877 880 PyExc_TypeError,
878 881 "filteredrevs has no attribute __contains__");
879 882 goto bail;
880 883 }
881 884 }
882 885
883 886 len = index_length(self);
884 887 heads = PyList_New(0);
885 888 if (heads == NULL)
886 889 goto bail;
887 890 if (len == 0) {
888 891 PyObject *nullid = PyInt_FromLong(-1);
889 892 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
890 893 Py_XDECREF(nullid);
891 894 goto bail;
892 895 }
893 896 goto done;
894 897 }
895 898
896 899 nothead = calloc(len, 1);
897 900 if (nothead == NULL) {
898 901 PyErr_NoMemory();
899 902 goto bail;
900 903 }
901 904
902 905 for (i = len - 1; i >= 0; i--) {
903 906 int isfiltered;
904 907 int parents[2];
905 908
906 909 /* If nothead[i] == 1, it means we've seen an unfiltered child
907 910 * of this node already, and therefore this node is not
908 911 * filtered. So we can skip the expensive check_filter step.
909 912 */
910 913 if (nothead[i] != 1) {
911 914 isfiltered = check_filter(filter, i);
912 915 if (isfiltered == -1) {
913 916 PyErr_SetString(PyExc_TypeError,
914 917 "unable to check filter");
915 918 goto bail;
916 919 }
917 920
918 921 if (isfiltered) {
919 922 nothead[i] = 1;
920 923 continue;
921 924 }
922 925 }
923 926
924 927 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
925 928 goto bail;
926 929 for (j = 0; j < 2; j++) {
927 930 if (parents[j] >= 0)
928 931 nothead[parents[j]] = 1;
929 932 }
930 933 }
931 934
932 935 for (i = 0; i < len; i++) {
933 936 PyObject *head;
934 937
935 938 if (nothead[i])
936 939 continue;
937 940 head = PyInt_FromSsize_t(i);
938 941 if (head == NULL || PyList_Append(heads, head) == -1) {
939 942 Py_XDECREF(head);
940 943 goto bail;
941 944 }
942 945 }
943 946
944 947 done:
945 948 self->headrevs = heads;
946 949 Py_XDECREF(filter);
947 950 free(nothead);
948 951 return list_copy(self->headrevs);
949 952 bail:
950 953 Py_XDECREF(filter);
951 954 Py_XDECREF(heads);
952 955 free(nothead);
953 956 return NULL;
954 957 }
955 958
956 959 /**
957 960 * Obtain the base revision index entry.
958 961 *
959 962 * Callers must ensure that rev >= 0 or illegal memory access may occur.
960 963 */
961 964 static inline int index_baserev(indexObject *self, int rev)
962 965 {
963 966 const char *data;
964 967
965 968 if (rev >= self->length) {
966 969 PyObject *tuple =
967 970 PyList_GET_ITEM(self->added, rev - self->length);
968 971 long ret;
969 972 if (!pylong_to_long(PyTuple_GET_ITEM(tuple, 3), &ret)) {
970 973 return -2;
971 974 }
972 975 return (int)ret;
973 976 } else {
974 977 data = index_deref(self, rev);
975 978 if (data == NULL) {
976 979 return -2;
977 980 }
978 981
979 982 return getbe32(data + 16);
980 983 }
981 984 }
982 985
983 986 static PyObject *index_deltachain(indexObject *self, PyObject *args)
984 987 {
985 988 int rev, generaldelta;
986 989 PyObject *stoparg;
987 990 int stoprev, iterrev, baserev = -1;
988 991 int stopped;
989 992 PyObject *chain = NULL, *result = NULL;
990 993 const Py_ssize_t length = index_length(self);
991 994
992 995 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
993 996 return NULL;
994 997 }
995 998
996 999 if (PyInt_Check(stoparg)) {
997 1000 stoprev = (int)PyInt_AsLong(stoparg);
998 1001 if (stoprev == -1 && PyErr_Occurred()) {
999 1002 return NULL;
1000 1003 }
1001 1004 } else if (stoparg == Py_None) {
1002 1005 stoprev = -2;
1003 1006 } else {
1004 1007 PyErr_SetString(PyExc_ValueError,
1005 1008 "stoprev must be integer or None");
1006 1009 return NULL;
1007 1010 }
1008 1011
1009 1012 if (rev < 0 || rev >= length) {
1010 1013 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1011 1014 return NULL;
1012 1015 }
1013 1016
1014 1017 chain = PyList_New(0);
1015 1018 if (chain == NULL) {
1016 1019 return NULL;
1017 1020 }
1018 1021
1019 1022 baserev = index_baserev(self, rev);
1020 1023
1021 1024 /* This should never happen. */
1022 1025 if (baserev <= -2) {
1023 1026 /* Error should be set by index_deref() */
1024 1027 assert(PyErr_Occurred());
1025 1028 goto bail;
1026 1029 }
1027 1030
1028 1031 iterrev = rev;
1029 1032
1030 1033 while (iterrev != baserev && iterrev != stoprev) {
1031 1034 PyObject *value = PyInt_FromLong(iterrev);
1032 1035 if (value == NULL) {
1033 1036 goto bail;
1034 1037 }
1035 1038 if (PyList_Append(chain, value)) {
1036 1039 Py_DECREF(value);
1037 1040 goto bail;
1038 1041 }
1039 1042 Py_DECREF(value);
1040 1043
1041 1044 if (generaldelta) {
1042 1045 iterrev = baserev;
1043 1046 } else {
1044 1047 iterrev--;
1045 1048 }
1046 1049
1047 1050 if (iterrev < 0) {
1048 1051 break;
1049 1052 }
1050 1053
1051 1054 if (iterrev >= length) {
1052 1055 PyErr_SetString(PyExc_IndexError,
1053 1056 "revision outside index");
1054 1057 return NULL;
1055 1058 }
1056 1059
1057 1060 baserev = index_baserev(self, iterrev);
1058 1061
1059 1062 /* This should never happen. */
1060 1063 if (baserev <= -2) {
1061 1064 /* Error should be set by index_deref() */
1062 1065 assert(PyErr_Occurred());
1063 1066 goto bail;
1064 1067 }
1065 1068 }
1066 1069
1067 1070 if (iterrev == stoprev) {
1068 1071 stopped = 1;
1069 1072 } else {
1070 1073 PyObject *value = PyInt_FromLong(iterrev);
1071 1074 if (value == NULL) {
1072 1075 goto bail;
1073 1076 }
1074 1077 if (PyList_Append(chain, value)) {
1075 1078 Py_DECREF(value);
1076 1079 goto bail;
1077 1080 }
1078 1081 Py_DECREF(value);
1079 1082
1080 1083 stopped = 0;
1081 1084 }
1082 1085
1083 1086 if (PyList_Reverse(chain)) {
1084 1087 goto bail;
1085 1088 }
1086 1089
1087 1090 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
1088 1091 Py_DECREF(chain);
1089 1092 return result;
1090 1093
1091 1094 bail:
1092 1095 Py_DECREF(chain);
1093 1096 return NULL;
1094 1097 }
1095 1098
1096 1099 static inline int64_t
1097 1100 index_segment_span(indexObject *self, Py_ssize_t start_rev, Py_ssize_t end_rev)
1098 1101 {
1099 1102 int64_t start_offset;
1100 1103 int64_t end_offset;
1101 1104 int end_size;
1102 1105 start_offset = index_get_start(self, start_rev);
1103 1106 if (start_offset < 0) {
1104 1107 return -1;
1105 1108 }
1106 1109 end_offset = index_get_start(self, end_rev);
1107 1110 if (end_offset < 0) {
1108 1111 return -1;
1109 1112 }
1110 1113 end_size = index_get_length(self, end_rev);
1111 1114 if (end_size < 0) {
1112 1115 return -1;
1113 1116 }
1114 1117 if (end_offset < start_offset) {
1115 1118 PyErr_Format(PyExc_ValueError,
1116 1119 "corrupted revlog index: inconsistent offset "
1117 1120 "between revisions (%zd) and (%zd)",
1118 1121 start_rev, end_rev);
1119 1122 return -1;
1120 1123 }
1121 1124 return (end_offset - start_offset) + (int64_t)end_size;
1122 1125 }
1123 1126
1124 1127 /* returns endidx so that revs[startidx:endidx] has no empty trailing revs */
1125 1128 static Py_ssize_t trim_endidx(indexObject *self, const Py_ssize_t *revs,
1126 1129 Py_ssize_t startidx, Py_ssize_t endidx)
1127 1130 {
1128 1131 int length;
1129 1132 while (endidx > 1 && endidx > startidx) {
1130 1133 length = index_get_length(self, revs[endidx - 1]);
1131 1134 if (length < 0) {
1132 1135 return -1;
1133 1136 }
1134 1137 if (length != 0) {
1135 1138 break;
1136 1139 }
1137 1140 endidx -= 1;
1138 1141 }
1139 1142 return endidx;
1140 1143 }
1141 1144
1142 1145 struct Gap {
1143 1146 int64_t size;
1144 1147 Py_ssize_t idx;
1145 1148 };
1146 1149
1147 1150 static int gap_compare(const void *left, const void *right)
1148 1151 {
1149 1152 const struct Gap *l_left = ((const struct Gap *)left);
1150 1153 const struct Gap *l_right = ((const struct Gap *)right);
1151 1154 if (l_left->size < l_right->size) {
1152 1155 return -1;
1153 1156 } else if (l_left->size > l_right->size) {
1154 1157 return 1;
1155 1158 }
1156 1159 return 0;
1157 1160 }
1158 1161 static int Py_ssize_t_compare(const void *left, const void *right)
1159 1162 {
1160 1163 const Py_ssize_t l_left = *(const Py_ssize_t *)left;
1161 1164 const Py_ssize_t l_right = *(const Py_ssize_t *)right;
1162 1165 if (l_left < l_right) {
1163 1166 return -1;
1164 1167 } else if (l_left > l_right) {
1165 1168 return 1;
1166 1169 }
1167 1170 return 0;
1168 1171 }
1169 1172
1170 1173 static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
1171 1174 {
1172 1175 /* method arguments */
1173 1176 PyObject *list_revs = NULL; /* revisions in the chain */
1174 1177 double targetdensity = 0; /* min density to achieve */
1175 1178 Py_ssize_t mingapsize = 0; /* threshold to ignore gaps */
1176 1179
1177 1180 /* other core variables */
1178 1181 Py_ssize_t idxlen = index_length(self);
1179 1182 Py_ssize_t i; /* used for various iteration */
1180 1183 PyObject *result = NULL; /* the final return of the function */
1181 1184
1182 1185 /* generic information about the delta chain being slice */
1183 1186 Py_ssize_t num_revs = 0; /* size of the full delta chain */
1184 1187 Py_ssize_t *revs = NULL; /* native array of revision in the chain */
1185 1188 int64_t chainpayload = 0; /* sum of all delta in the chain */
1186 1189 int64_t deltachainspan = 0; /* distance from first byte to last byte */
1187 1190
1188 1191 /* variable used for slicing the delta chain */
1189 1192 int64_t readdata = 0; /* amount of data currently planned to be read */
1190 1193 double density = 0; /* ration of payload data compared to read ones */
1191 1194 int64_t previous_end;
1192 1195 struct Gap *gaps = NULL; /* array of notable gap in the chain */
1193 1196 Py_ssize_t num_gaps =
1194 1197 0; /* total number of notable gap recorded so far */
1195 1198 Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
1196 1199 Py_ssize_t num_selected = 0; /* number of gaps skipped */
1197 1200 PyObject *chunk = NULL; /* individual slice */
1198 1201 PyObject *allchunks = NULL; /* all slices */
1199 1202 Py_ssize_t previdx;
1200 1203
1201 1204 /* parsing argument */
1202 1205 if (!PyArg_ParseTuple(args, "O!dn", &PyList_Type, &list_revs,
1203 1206 &targetdensity, &mingapsize)) {
1204 1207 goto bail;
1205 1208 }
1206 1209
1207 1210 /* If the delta chain contains a single element, we do not need slicing
1208 1211 */
1209 1212 num_revs = PyList_GET_SIZE(list_revs);
1210 1213 if (num_revs <= 1) {
1211 1214 result = PyTuple_Pack(1, list_revs);
1212 1215 goto done;
1213 1216 }
1214 1217
1215 1218 /* Turn the python list into a native integer array (for efficiency) */
1216 1219 revs = (Py_ssize_t *)calloc(num_revs, sizeof(Py_ssize_t));
1217 1220 if (revs == NULL) {
1218 1221 PyErr_NoMemory();
1219 1222 goto bail;
1220 1223 }
1221 1224 for (i = 0; i < num_revs; i++) {
1222 1225 Py_ssize_t revnum = PyInt_AsLong(PyList_GET_ITEM(list_revs, i));
1223 1226 if (revnum == -1 && PyErr_Occurred()) {
1224 1227 goto bail;
1225 1228 }
1226 1229 if (revnum < 0 || revnum >= idxlen) {
1227 1230 PyErr_Format(PyExc_IndexError,
1228 1231 "index out of range: %zd", revnum);
1229 1232 goto bail;
1230 1233 }
1231 1234 revs[i] = revnum;
1232 1235 }
1233 1236
1234 1237 /* Compute and check various property of the unsliced delta chain */
1235 1238 deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
1236 1239 if (deltachainspan < 0) {
1237 1240 goto bail;
1238 1241 }
1239 1242
1240 1243 if (deltachainspan <= mingapsize) {
1241 1244 result = PyTuple_Pack(1, list_revs);
1242 1245 goto done;
1243 1246 }
1244 1247 chainpayload = 0;
1245 1248 for (i = 0; i < num_revs; i++) {
1246 1249 int tmp = index_get_length(self, revs[i]);
1247 1250 if (tmp < 0) {
1248 1251 goto bail;
1249 1252 }
1250 1253 chainpayload += tmp;
1251 1254 }
1252 1255
1253 1256 readdata = deltachainspan;
1254 1257 density = 1.0;
1255 1258
1256 1259 if (0 < deltachainspan) {
1257 1260 density = (double)chainpayload / (double)deltachainspan;
1258 1261 }
1259 1262
1260 1263 if (density >= targetdensity) {
1261 1264 result = PyTuple_Pack(1, list_revs);
1262 1265 goto done;
1263 1266 }
1264 1267
1265 1268 /* if chain is too sparse, look for relevant gaps */
1266 1269 gaps = (struct Gap *)calloc(num_revs, sizeof(struct Gap));
1267 1270 if (gaps == NULL) {
1268 1271 PyErr_NoMemory();
1269 1272 goto bail;
1270 1273 }
1271 1274
1272 1275 previous_end = -1;
1273 1276 for (i = 0; i < num_revs; i++) {
1274 1277 int64_t revstart;
1275 1278 int revsize;
1276 1279 revstart = index_get_start(self, revs[i]);
1277 1280 if (revstart < 0) {
1278 1281 goto bail;
1279 1282 };
1280 1283 revsize = index_get_length(self, revs[i]);
1281 1284 if (revsize < 0) {
1282 1285 goto bail;
1283 1286 };
1284 1287 if (revsize == 0) {
1285 1288 continue;
1286 1289 }
1287 1290 if (previous_end >= 0) {
1288 1291 int64_t gapsize = revstart - previous_end;
1289 1292 if (gapsize > mingapsize) {
1290 1293 gaps[num_gaps].size = gapsize;
1291 1294 gaps[num_gaps].idx = i;
1292 1295 num_gaps += 1;
1293 1296 }
1294 1297 }
1295 1298 previous_end = revstart + revsize;
1296 1299 }
1297 1300 if (num_gaps == 0) {
1298 1301 result = PyTuple_Pack(1, list_revs);
1299 1302 goto done;
1300 1303 }
1301 1304 qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
1302 1305
1303 1306 /* Slice the largest gap first, they improve the density the most */
1304 1307 selected_indices =
1305 1308 (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
1306 1309 if (selected_indices == NULL) {
1307 1310 PyErr_NoMemory();
1308 1311 goto bail;
1309 1312 }
1310 1313
1311 1314 for (i = num_gaps - 1; i >= 0; i--) {
1312 1315 selected_indices[num_selected] = gaps[i].idx;
1313 1316 readdata -= gaps[i].size;
1314 1317 num_selected += 1;
1315 1318 if (readdata <= 0) {
1316 1319 density = 1.0;
1317 1320 } else {
1318 1321 density = (double)chainpayload / (double)readdata;
1319 1322 }
1320 1323 if (density >= targetdensity) {
1321 1324 break;
1322 1325 }
1323 1326 }
1324 1327 qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
1325 1328 &Py_ssize_t_compare);
1326 1329
1327 1330 /* create the resulting slice */
1328 1331 allchunks = PyList_New(0);
1329 1332 if (allchunks == NULL) {
1330 1333 goto bail;
1331 1334 }
1332 1335 previdx = 0;
1333 1336 selected_indices[num_selected] = num_revs;
1334 1337 for (i = 0; i <= num_selected; i++) {
1335 1338 Py_ssize_t idx = selected_indices[i];
1336 1339 Py_ssize_t endidx = trim_endidx(self, revs, previdx, idx);
1337 1340 if (endidx < 0) {
1338 1341 goto bail;
1339 1342 }
1340 1343 if (previdx < endidx) {
1341 1344 chunk = PyList_GetSlice(list_revs, previdx, endidx);
1342 1345 if (chunk == NULL) {
1343 1346 goto bail;
1344 1347 }
1345 1348 if (PyList_Append(allchunks, chunk) == -1) {
1346 1349 goto bail;
1347 1350 }
1348 1351 Py_DECREF(chunk);
1349 1352 chunk = NULL;
1350 1353 }
1351 1354 previdx = idx;
1352 1355 }
1353 1356 result = allchunks;
1354 1357 goto done;
1355 1358
1356 1359 bail:
1357 1360 Py_XDECREF(allchunks);
1358 1361 Py_XDECREF(chunk);
1359 1362 done:
1360 1363 free(revs);
1361 1364 free(gaps);
1362 1365 free(selected_indices);
1363 1366 return result;
1364 1367 }
1365 1368
1366 1369 static inline int nt_level(const char *node, Py_ssize_t level)
1367 1370 {
1368 1371 int v = node[level >> 1];
1369 1372 if (!(level & 1))
1370 1373 v >>= 4;
1371 1374 return v & 0xf;
1372 1375 }
1373 1376
1374 1377 /*
1375 1378 * Return values:
1376 1379 *
1377 1380 * -4: match is ambiguous (multiple candidates)
1378 1381 * -2: not found
1379 1382 * rest: valid rev
1380 1383 */
1381 1384 static int nt_find(nodetree *self, const char *node, Py_ssize_t nodelen,
1382 1385 int hex)
1383 1386 {
1384 1387 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
1385 1388 int level, maxlevel, off;
1386 1389
1387 1390 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
1388 1391 return -1;
1389 1392
1390 1393 if (hex)
1391 1394 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
1392 1395 else
1393 1396 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
1394 1397
1395 1398 for (level = off = 0; level < maxlevel; level++) {
1396 1399 int k = getnybble(node, level);
1397 1400 nodetreenode *n = &self->nodes[off];
1398 1401 int v = n->children[k];
1399 1402
1400 1403 if (v < 0) {
1401 1404 const char *n;
1402 1405 Py_ssize_t i;
1403 1406
1404 1407 v = -(v + 2);
1405 1408 n = index_node(self->index, v);
1406 1409 if (n == NULL)
1407 1410 return -2;
1408 1411 for (i = level; i < maxlevel; i++)
1409 1412 if (getnybble(node, i) != nt_level(n, i))
1410 1413 return -2;
1411 1414 return v;
1412 1415 }
1413 1416 if (v == 0)
1414 1417 return -2;
1415 1418 off = v;
1416 1419 }
1417 1420 /* multiple matches against an ambiguous prefix */
1418 1421 return -4;
1419 1422 }
1420 1423
1421 1424 static int nt_new(nodetree *self)
1422 1425 {
1423 1426 if (self->length == self->capacity) {
1424 1427 unsigned newcapacity;
1425 1428 nodetreenode *newnodes;
1426 1429 newcapacity = self->capacity * 2;
1427 1430 if (newcapacity >= INT_MAX / sizeof(nodetreenode)) {
1428 1431 PyErr_SetString(PyExc_MemoryError,
1429 1432 "overflow in nt_new");
1430 1433 return -1;
1431 1434 }
1432 1435 newnodes =
1433 1436 realloc(self->nodes, newcapacity * sizeof(nodetreenode));
1434 1437 if (newnodes == NULL) {
1435 1438 PyErr_SetString(PyExc_MemoryError, "out of memory");
1436 1439 return -1;
1437 1440 }
1438 1441 self->capacity = newcapacity;
1439 1442 self->nodes = newnodes;
1440 1443 memset(&self->nodes[self->length], 0,
1441 1444 sizeof(nodetreenode) * (self->capacity - self->length));
1442 1445 }
1443 1446 return self->length++;
1444 1447 }
1445 1448
1446 1449 static int nt_insert(nodetree *self, const char *node, int rev)
1447 1450 {
1448 1451 int level = 0;
1449 1452 int off = 0;
1450 1453
1451 1454 while (level < 40) {
1452 1455 int k = nt_level(node, level);
1453 1456 nodetreenode *n;
1454 1457 int v;
1455 1458
1456 1459 n = &self->nodes[off];
1457 1460 v = n->children[k];
1458 1461
1459 1462 if (v == 0) {
1460 1463 n->children[k] = -rev - 2;
1461 1464 return 0;
1462 1465 }
1463 1466 if (v < 0) {
1464 1467 const char *oldnode =
1465 1468 index_node_existing(self->index, -(v + 2));
1466 1469 int noff;
1467 1470
1468 1471 if (oldnode == NULL)
1469 1472 return -1;
1470 1473 if (!memcmp(oldnode, node, 20)) {
1471 1474 n->children[k] = -rev - 2;
1472 1475 return 0;
1473 1476 }
1474 1477 noff = nt_new(self);
1475 1478 if (noff == -1)
1476 1479 return -1;
1477 1480 /* self->nodes may have been changed by realloc */
1478 1481 self->nodes[off].children[k] = noff;
1479 1482 off = noff;
1480 1483 n = &self->nodes[off];
1481 1484 n->children[nt_level(oldnode, ++level)] = v;
1482 1485 if (level > self->depth)
1483 1486 self->depth = level;
1484 1487 self->splits += 1;
1485 1488 } else {
1486 1489 level += 1;
1487 1490 off = v;
1488 1491 }
1489 1492 }
1490 1493
1491 1494 return -1;
1492 1495 }
1493 1496
1494 1497 static PyObject *ntobj_insert(nodetreeObject *self, PyObject *args)
1495 1498 {
1496 1499 Py_ssize_t rev;
1497 1500 const char *node;
1498 1501 Py_ssize_t length;
1499 1502 if (!PyArg_ParseTuple(args, "n", &rev))
1500 1503 return NULL;
1501 1504 length = index_length(self->nt.index);
1502 1505 if (rev < 0 || rev >= length) {
1503 1506 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1504 1507 return NULL;
1505 1508 }
1506 1509 node = index_node_existing(self->nt.index, rev);
1507 1510 if (nt_insert(&self->nt, node, (int)rev) == -1)
1508 1511 return NULL;
1509 1512 Py_RETURN_NONE;
1510 1513 }
1511 1514
1512 1515 static int nt_delete_node(nodetree *self, const char *node)
1513 1516 {
1514 1517 /* rev==-2 happens to get encoded as 0, which is interpreted as not set
1515 1518 */
1516 1519 return nt_insert(self, node, -2);
1517 1520 }
1518 1521
1519 1522 static int nt_init(nodetree *self, indexObject *index, unsigned capacity)
1520 1523 {
1521 1524 /* Initialize before overflow-checking to avoid nt_dealloc() crash. */
1522 1525 self->nodes = NULL;
1523 1526
1524 1527 self->index = index;
1525 1528 /* The input capacity is in terms of revisions, while the field is in
1526 1529 * terms of nodetree nodes. */
1527 1530 self->capacity = (capacity < 4 ? 4 : capacity / 2);
1528 1531 self->depth = 0;
1529 1532 self->splits = 0;
1530 1533 if ((size_t)self->capacity > INT_MAX / sizeof(nodetreenode)) {
1531 1534 PyErr_SetString(PyExc_ValueError, "overflow in init_nt");
1532 1535 return -1;
1533 1536 }
1534 1537 self->nodes = calloc(self->capacity, sizeof(nodetreenode));
1535 1538 if (self->nodes == NULL) {
1536 1539 PyErr_NoMemory();
1537 1540 return -1;
1538 1541 }
1539 1542 self->length = 1;
1540 1543 return 0;
1541 1544 }
1542 1545
1543 1546 static int ntobj_init(nodetreeObject *self, PyObject *args)
1544 1547 {
1545 1548 PyObject *index;
1546 1549 unsigned capacity;
1547 1550 if (!PyArg_ParseTuple(args, "O!I", &HgRevlogIndex_Type, &index,
1548 1551 &capacity))
1549 1552 return -1;
1550 1553 Py_INCREF(index);
1551 1554 return nt_init(&self->nt, (indexObject *)index, capacity);
1552 1555 }
1553 1556
1554 1557 static int nt_partialmatch(nodetree *self, const char *node, Py_ssize_t nodelen)
1555 1558 {
1556 1559 return nt_find(self, node, nodelen, 1);
1557 1560 }
1558 1561
1559 1562 /*
1560 1563 * Find the length of the shortest unique prefix of node.
1561 1564 *
1562 1565 * Return values:
1563 1566 *
1564 1567 * -3: error (exception set)
1565 1568 * -2: not found (no exception set)
1566 1569 * rest: length of shortest prefix
1567 1570 */
1568 1571 static int nt_shortest(nodetree *self, const char *node)
1569 1572 {
1570 1573 int level, off;
1571 1574
1572 1575 for (level = off = 0; level < 40; level++) {
1573 1576 int k, v;
1574 1577 nodetreenode *n = &self->nodes[off];
1575 1578 k = nt_level(node, level);
1576 1579 v = n->children[k];
1577 1580 if (v < 0) {
1578 1581 const char *n;
1579 1582 v = -(v + 2);
1580 1583 n = index_node_existing(self->index, v);
1581 1584 if (n == NULL)
1582 1585 return -3;
1583 1586 if (memcmp(node, n, 20) != 0)
1584 1587 /*
1585 1588 * Found a unique prefix, but it wasn't for the
1586 1589 * requested node (i.e the requested node does
1587 1590 * not exist).
1588 1591 */
1589 1592 return -2;
1590 1593 return level + 1;
1591 1594 }
1592 1595 if (v == 0)
1593 1596 return -2;
1594 1597 off = v;
1595 1598 }
1596 1599 /*
1597 1600 * The node was still not unique after 40 hex digits, so this won't
1598 1601 * happen. Also, if we get here, then there's a programming error in
1599 1602 * this file that made us insert a node longer than 40 hex digits.
1600 1603 */
1601 1604 PyErr_SetString(PyExc_Exception, "broken node tree");
1602 1605 return -3;
1603 1606 }
1604 1607
1605 1608 static PyObject *ntobj_shortest(nodetreeObject *self, PyObject *args)
1606 1609 {
1607 1610 PyObject *val;
1608 1611 char *node;
1609 1612 int length;
1610 1613
1611 1614 if (!PyArg_ParseTuple(args, "O", &val))
1612 1615 return NULL;
1613 1616 if (node_check(val, &node) == -1)
1614 1617 return NULL;
1615 1618
1616 1619 length = nt_shortest(&self->nt, node);
1617 1620 if (length == -3)
1618 1621 return NULL;
1619 1622 if (length == -2) {
1620 1623 raise_revlog_error();
1621 1624 return NULL;
1622 1625 }
1623 1626 return PyInt_FromLong(length);
1624 1627 }
1625 1628
1626 1629 static void nt_dealloc(nodetree *self)
1627 1630 {
1628 1631 free(self->nodes);
1629 1632 self->nodes = NULL;
1630 1633 }
1631 1634
1632 1635 static void ntobj_dealloc(nodetreeObject *self)
1633 1636 {
1634 1637 Py_XDECREF(self->nt.index);
1635 1638 nt_dealloc(&self->nt);
1636 1639 PyObject_Del(self);
1637 1640 }
1638 1641
1639 1642 static PyMethodDef ntobj_methods[] = {
1640 1643 {"insert", (PyCFunction)ntobj_insert, METH_VARARGS,
1641 1644 "insert an index entry"},
1642 1645 {"shortest", (PyCFunction)ntobj_shortest, METH_VARARGS,
1643 1646 "find length of shortest hex nodeid of a binary ID"},
1644 1647 {NULL} /* Sentinel */
1645 1648 };
1646 1649
1647 1650 static PyTypeObject nodetreeType = {
1648 1651 PyVarObject_HEAD_INIT(NULL, 0) /* header */
1649 1652 "parsers.nodetree", /* tp_name */
1650 1653 sizeof(nodetreeObject), /* tp_basicsize */
1651 1654 0, /* tp_itemsize */
1652 1655 (destructor)ntobj_dealloc, /* tp_dealloc */
1653 1656 0, /* tp_print */
1654 1657 0, /* tp_getattr */
1655 1658 0, /* tp_setattr */
1656 1659 0, /* tp_compare */
1657 1660 0, /* tp_repr */
1658 1661 0, /* tp_as_number */
1659 1662 0, /* tp_as_sequence */
1660 1663 0, /* tp_as_mapping */
1661 1664 0, /* tp_hash */
1662 1665 0, /* tp_call */
1663 1666 0, /* tp_str */
1664 1667 0, /* tp_getattro */
1665 1668 0, /* tp_setattro */
1666 1669 0, /* tp_as_buffer */
1667 1670 Py_TPFLAGS_DEFAULT, /* tp_flags */
1668 1671 "nodetree", /* tp_doc */
1669 1672 0, /* tp_traverse */
1670 1673 0, /* tp_clear */
1671 1674 0, /* tp_richcompare */
1672 1675 0, /* tp_weaklistoffset */
1673 1676 0, /* tp_iter */
1674 1677 0, /* tp_iternext */
1675 1678 ntobj_methods, /* tp_methods */
1676 1679 0, /* tp_members */
1677 1680 0, /* tp_getset */
1678 1681 0, /* tp_base */
1679 1682 0, /* tp_dict */
1680 1683 0, /* tp_descr_get */
1681 1684 0, /* tp_descr_set */
1682 1685 0, /* tp_dictoffset */
1683 1686 (initproc)ntobj_init, /* tp_init */
1684 1687 0, /* tp_alloc */
1685 1688 };
1686 1689
1687 1690 static int index_init_nt(indexObject *self)
1688 1691 {
1689 1692 if (!self->ntinitialized) {
1690 1693 if (nt_init(&self->nt, self, (int)self->raw_length) == -1) {
1691 1694 nt_dealloc(&self->nt);
1692 1695 return -1;
1693 1696 }
1694 1697 if (nt_insert(&self->nt, nullid, -1) == -1) {
1695 1698 nt_dealloc(&self->nt);
1696 1699 return -1;
1697 1700 }
1698 1701 self->ntinitialized = 1;
1699 1702 self->ntrev = (int)index_length(self);
1700 1703 self->ntlookups = 1;
1701 1704 self->ntmisses = 0;
1702 1705 }
1703 1706 return 0;
1704 1707 }
1705 1708
1706 1709 /*
1707 1710 * Return values:
1708 1711 *
1709 1712 * -3: error (exception set)
1710 1713 * -2: not found (no exception set)
1711 1714 * rest: valid rev
1712 1715 */
1713 1716 static int index_find_node(indexObject *self, const char *node,
1714 1717 Py_ssize_t nodelen)
1715 1718 {
1716 1719 int rev;
1717 1720
1718 1721 if (index_init_nt(self) == -1)
1719 1722 return -3;
1720 1723
1721 1724 self->ntlookups++;
1722 1725 rev = nt_find(&self->nt, node, nodelen, 0);
1723 1726 if (rev >= -1)
1724 1727 return rev;
1725 1728
1726 1729 /*
1727 1730 * For the first handful of lookups, we scan the entire index,
1728 1731 * and cache only the matching nodes. This optimizes for cases
1729 1732 * like "hg tip", where only a few nodes are accessed.
1730 1733 *
1731 1734 * After that, we cache every node we visit, using a single
1732 1735 * scan amortized over multiple lookups. This gives the best
1733 1736 * bulk performance, e.g. for "hg log".
1734 1737 */
1735 1738 if (self->ntmisses++ < 4) {
1736 1739 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1737 1740 const char *n = index_node_existing(self, rev);
1738 1741 if (n == NULL)
1739 1742 return -3;
1740 1743 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1741 1744 if (nt_insert(&self->nt, n, rev) == -1)
1742 1745 return -3;
1743 1746 break;
1744 1747 }
1745 1748 }
1746 1749 } else {
1747 1750 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1748 1751 const char *n = index_node_existing(self, rev);
1749 1752 if (n == NULL)
1750 1753 return -3;
1751 1754 if (nt_insert(&self->nt, n, rev) == -1) {
1752 1755 self->ntrev = rev + 1;
1753 1756 return -3;
1754 1757 }
1755 1758 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1756 1759 break;
1757 1760 }
1758 1761 }
1759 1762 self->ntrev = rev;
1760 1763 }
1761 1764
1762 1765 if (rev >= 0)
1763 1766 return rev;
1764 1767 return -2;
1765 1768 }
1766 1769
1767 1770 static PyObject *index_getitem(indexObject *self, PyObject *value)
1768 1771 {
1769 1772 char *node;
1770 1773 int rev;
1771 1774
1772 1775 if (PyInt_Check(value)) {
1773 1776 long idx;
1774 1777 if (!pylong_to_long(value, &idx)) {
1775 1778 return NULL;
1776 1779 }
1777 1780 return index_get(self, idx);
1778 1781 }
1779 1782
1780 1783 if (node_check(value, &node) == -1)
1781 1784 return NULL;
1782 1785 rev = index_find_node(self, node, 20);
1783 1786 if (rev >= -1)
1784 1787 return PyInt_FromLong(rev);
1785 1788 if (rev == -2)
1786 1789 raise_revlog_error();
1787 1790 return NULL;
1788 1791 }
1789 1792
1790 1793 /*
1791 1794 * Fully populate the radix tree.
1792 1795 */
1793 1796 static int index_populate_nt(indexObject *self)
1794 1797 {
1795 1798 int rev;
1796 1799 if (self->ntrev > 0) {
1797 1800 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1798 1801 const char *n = index_node_existing(self, rev);
1799 1802 if (n == NULL)
1800 1803 return -1;
1801 1804 if (nt_insert(&self->nt, n, rev) == -1)
1802 1805 return -1;
1803 1806 }
1804 1807 self->ntrev = -1;
1805 1808 }
1806 1809 return 0;
1807 1810 }
1808 1811
1809 1812 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1810 1813 {
1811 1814 const char *fullnode;
1812 1815 int nodelen;
1813 1816 char *node;
1814 1817 int rev, i;
1815 1818
1816 1819 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &node, &nodelen))
1817 1820 return NULL;
1818 1821
1819 1822 if (nodelen < 1) {
1820 1823 PyErr_SetString(PyExc_ValueError, "key too short");
1821 1824 return NULL;
1822 1825 }
1823 1826
1824 1827 if (nodelen > 40) {
1825 1828 PyErr_SetString(PyExc_ValueError, "key too long");
1826 1829 return NULL;
1827 1830 }
1828 1831
1829 1832 for (i = 0; i < nodelen; i++)
1830 1833 hexdigit(node, i);
1831 1834 if (PyErr_Occurred()) {
1832 1835 /* input contains non-hex characters */
1833 1836 PyErr_Clear();
1834 1837 Py_RETURN_NONE;
1835 1838 }
1836 1839
1837 1840 if (index_init_nt(self) == -1)
1838 1841 return NULL;
1839 1842 if (index_populate_nt(self) == -1)
1840 1843 return NULL;
1841 1844 rev = nt_partialmatch(&self->nt, node, nodelen);
1842 1845
1843 1846 switch (rev) {
1844 1847 case -4:
1845 1848 raise_revlog_error();
1846 1849 return NULL;
1847 1850 case -2:
1848 1851 Py_RETURN_NONE;
1849 1852 case -1:
1850 1853 return PyBytes_FromStringAndSize(nullid, 20);
1851 1854 }
1852 1855
1853 1856 fullnode = index_node_existing(self, rev);
1854 1857 if (fullnode == NULL) {
1855 1858 return NULL;
1856 1859 }
1857 1860 return PyBytes_FromStringAndSize(fullnode, 20);
1858 1861 }
1859 1862
1860 1863 static PyObject *index_shortest(indexObject *self, PyObject *args)
1861 1864 {
1862 1865 PyObject *val;
1863 1866 char *node;
1864 1867 int length;
1865 1868
1866 1869 if (!PyArg_ParseTuple(args, "O", &val))
1867 1870 return NULL;
1868 1871 if (node_check(val, &node) == -1)
1869 1872 return NULL;
1870 1873
1871 1874 self->ntlookups++;
1872 1875 if (index_init_nt(self) == -1)
1873 1876 return NULL;
1874 1877 if (index_populate_nt(self) == -1)
1875 1878 return NULL;
1876 1879 length = nt_shortest(&self->nt, node);
1877 1880 if (length == -3)
1878 1881 return NULL;
1879 1882 if (length == -2) {
1880 1883 raise_revlog_error();
1881 1884 return NULL;
1882 1885 }
1883 1886 return PyInt_FromLong(length);
1884 1887 }
1885 1888
1886 1889 static PyObject *index_m_get(indexObject *self, PyObject *args)
1887 1890 {
1888 1891 PyObject *val;
1889 1892 char *node;
1890 1893 int rev;
1891 1894
1892 1895 if (!PyArg_ParseTuple(args, "O", &val))
1893 1896 return NULL;
1894 1897 if (node_check(val, &node) == -1)
1895 1898 return NULL;
1896 1899 rev = index_find_node(self, node, 20);
1897 1900 if (rev == -3)
1898 1901 return NULL;
1899 1902 if (rev == -2)
1900 1903 Py_RETURN_NONE;
1901 1904 return PyInt_FromLong(rev);
1902 1905 }
1903 1906
1904 1907 static int index_contains(indexObject *self, PyObject *value)
1905 1908 {
1906 1909 char *node;
1907 1910
1908 1911 if (PyInt_Check(value)) {
1909 1912 long rev;
1910 1913 if (!pylong_to_long(value, &rev)) {
1911 1914 return -1;
1912 1915 }
1913 1916 return rev >= -1 && rev < index_length(self);
1914 1917 }
1915 1918
1916 1919 if (node_check(value, &node) == -1)
1917 1920 return -1;
1918 1921
1919 1922 switch (index_find_node(self, node, 20)) {
1920 1923 case -3:
1921 1924 return -1;
1922 1925 case -2:
1923 1926 return 0;
1924 1927 default:
1925 1928 return 1;
1926 1929 }
1927 1930 }
1928 1931
1929 1932 typedef uint64_t bitmask;
1930 1933
1931 1934 /*
1932 1935 * Given a disjoint set of revs, return all candidates for the
1933 1936 * greatest common ancestor. In revset notation, this is the set
1934 1937 * "heads(::a and ::b and ...)"
1935 1938 */
1936 1939 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
1937 1940 int revcount)
1938 1941 {
1939 1942 const bitmask allseen = (1ull << revcount) - 1;
1940 1943 const bitmask poison = 1ull << revcount;
1941 1944 PyObject *gca = PyList_New(0);
1942 1945 int i, v, interesting;
1943 1946 int maxrev = -1;
1944 1947 bitmask sp;
1945 1948 bitmask *seen;
1946 1949
1947 1950 if (gca == NULL)
1948 1951 return PyErr_NoMemory();
1949 1952
1950 1953 for (i = 0; i < revcount; i++) {
1951 1954 if (revs[i] > maxrev)
1952 1955 maxrev = revs[i];
1953 1956 }
1954 1957
1955 1958 seen = calloc(sizeof(*seen), maxrev + 1);
1956 1959 if (seen == NULL) {
1957 1960 Py_DECREF(gca);
1958 1961 return PyErr_NoMemory();
1959 1962 }
1960 1963
1961 1964 for (i = 0; i < revcount; i++)
1962 1965 seen[revs[i]] = 1ull << i;
1963 1966
1964 1967 interesting = revcount;
1965 1968
1966 1969 for (v = maxrev; v >= 0 && interesting; v--) {
1967 1970 bitmask sv = seen[v];
1968 1971 int parents[2];
1969 1972
1970 1973 if (!sv)
1971 1974 continue;
1972 1975
1973 1976 if (sv < poison) {
1974 1977 interesting -= 1;
1975 1978 if (sv == allseen) {
1976 1979 PyObject *obj = PyInt_FromLong(v);
1977 1980 if (obj == NULL)
1978 1981 goto bail;
1979 1982 if (PyList_Append(gca, obj) == -1) {
1980 1983 Py_DECREF(obj);
1981 1984 goto bail;
1982 1985 }
1983 1986 sv |= poison;
1984 1987 for (i = 0; i < revcount; i++) {
1985 1988 if (revs[i] == v)
1986 1989 goto done;
1987 1990 }
1988 1991 }
1989 1992 }
1990 1993 if (index_get_parents(self, v, parents, maxrev) < 0)
1991 1994 goto bail;
1992 1995
1993 1996 for (i = 0; i < 2; i++) {
1994 1997 int p = parents[i];
1995 1998 if (p == -1)
1996 1999 continue;
1997 2000 sp = seen[p];
1998 2001 if (sv < poison) {
1999 2002 if (sp == 0) {
2000 2003 seen[p] = sv;
2001 2004 interesting++;
2002 2005 } else if (sp != sv)
2003 2006 seen[p] |= sv;
2004 2007 } else {
2005 2008 if (sp && sp < poison)
2006 2009 interesting--;
2007 2010 seen[p] = sv;
2008 2011 }
2009 2012 }
2010 2013 }
2011 2014
2012 2015 done:
2013 2016 free(seen);
2014 2017 return gca;
2015 2018 bail:
2016 2019 free(seen);
2017 2020 Py_XDECREF(gca);
2018 2021 return NULL;
2019 2022 }
2020 2023
2021 2024 /*
2022 2025 * Given a disjoint set of revs, return the subset with the longest
2023 2026 * path to the root.
2024 2027 */
2025 2028 static PyObject *find_deepest(indexObject *self, PyObject *revs)
2026 2029 {
2027 2030 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
2028 2031 static const Py_ssize_t capacity = 24;
2029 2032 int *depth, *interesting = NULL;
2030 2033 int i, j, v, ninteresting;
2031 2034 PyObject *dict = NULL, *keys = NULL;
2032 2035 long *seen = NULL;
2033 2036 int maxrev = -1;
2034 2037 long final;
2035 2038
2036 2039 if (revcount > capacity) {
2037 2040 PyErr_Format(PyExc_OverflowError,
2038 2041 "bitset size (%ld) > capacity (%ld)",
2039 2042 (long)revcount, (long)capacity);
2040 2043 return NULL;
2041 2044 }
2042 2045
2043 2046 for (i = 0; i < revcount; i++) {
2044 2047 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2045 2048 if (n > maxrev)
2046 2049 maxrev = n;
2047 2050 }
2048 2051
2049 2052 depth = calloc(sizeof(*depth), maxrev + 1);
2050 2053 if (depth == NULL)
2051 2054 return PyErr_NoMemory();
2052 2055
2053 2056 seen = calloc(sizeof(*seen), maxrev + 1);
2054 2057 if (seen == NULL) {
2055 2058 PyErr_NoMemory();
2056 2059 goto bail;
2057 2060 }
2058 2061
2059 2062 interesting = calloc(sizeof(*interesting), ((size_t)1) << revcount);
2060 2063 if (interesting == NULL) {
2061 2064 PyErr_NoMemory();
2062 2065 goto bail;
2063 2066 }
2064 2067
2065 2068 if (PyList_Sort(revs) == -1)
2066 2069 goto bail;
2067 2070
2068 2071 for (i = 0; i < revcount; i++) {
2069 2072 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2070 2073 long b = 1l << i;
2071 2074 depth[n] = 1;
2072 2075 seen[n] = b;
2073 2076 interesting[b] = 1;
2074 2077 }
2075 2078
2076 2079 /* invariant: ninteresting is the number of non-zero entries in
2077 2080 * interesting. */
2078 2081 ninteresting = (int)revcount;
2079 2082
2080 2083 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
2081 2084 int dv = depth[v];
2082 2085 int parents[2];
2083 2086 long sv;
2084 2087
2085 2088 if (dv == 0)
2086 2089 continue;
2087 2090
2088 2091 sv = seen[v];
2089 2092 if (index_get_parents(self, v, parents, maxrev) < 0)
2090 2093 goto bail;
2091 2094
2092 2095 for (i = 0; i < 2; i++) {
2093 2096 int p = parents[i];
2094 2097 long sp;
2095 2098 int dp;
2096 2099
2097 2100 if (p == -1)
2098 2101 continue;
2099 2102
2100 2103 dp = depth[p];
2101 2104 sp = seen[p];
2102 2105 if (dp <= dv) {
2103 2106 depth[p] = dv + 1;
2104 2107 if (sp != sv) {
2105 2108 interesting[sv] += 1;
2106 2109 seen[p] = sv;
2107 2110 if (sp) {
2108 2111 interesting[sp] -= 1;
2109 2112 if (interesting[sp] == 0)
2110 2113 ninteresting -= 1;
2111 2114 }
2112 2115 }
2113 2116 } else if (dv == dp - 1) {
2114 2117 long nsp = sp | sv;
2115 2118 if (nsp == sp)
2116 2119 continue;
2117 2120 seen[p] = nsp;
2118 2121 interesting[sp] -= 1;
2119 2122 if (interesting[sp] == 0)
2120 2123 ninteresting -= 1;
2121 2124 if (interesting[nsp] == 0)
2122 2125 ninteresting += 1;
2123 2126 interesting[nsp] += 1;
2124 2127 }
2125 2128 }
2126 2129 interesting[sv] -= 1;
2127 2130 if (interesting[sv] == 0)
2128 2131 ninteresting -= 1;
2129 2132 }
2130 2133
2131 2134 final = 0;
2132 2135 j = ninteresting;
2133 2136 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
2134 2137 if (interesting[i] == 0)
2135 2138 continue;
2136 2139 final |= i;
2137 2140 j -= 1;
2138 2141 }
2139 2142 if (final == 0) {
2140 2143 keys = PyList_New(0);
2141 2144 goto bail;
2142 2145 }
2143 2146
2144 2147 dict = PyDict_New();
2145 2148 if (dict == NULL)
2146 2149 goto bail;
2147 2150
2148 2151 for (i = 0; i < revcount; i++) {
2149 2152 PyObject *key;
2150 2153
2151 2154 if ((final & (1 << i)) == 0)
2152 2155 continue;
2153 2156
2154 2157 key = PyList_GET_ITEM(revs, i);
2155 2158 Py_INCREF(key);
2156 2159 Py_INCREF(Py_None);
2157 2160 if (PyDict_SetItem(dict, key, Py_None) == -1) {
2158 2161 Py_DECREF(key);
2159 2162 Py_DECREF(Py_None);
2160 2163 goto bail;
2161 2164 }
2162 2165 }
2163 2166
2164 2167 keys = PyDict_Keys(dict);
2165 2168
2166 2169 bail:
2167 2170 free(depth);
2168 2171 free(seen);
2169 2172 free(interesting);
2170 2173 Py_XDECREF(dict);
2171 2174
2172 2175 return keys;
2173 2176 }
2174 2177
2175 2178 /*
2176 2179 * Given a (possibly overlapping) set of revs, return all the
2177 2180 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
2178 2181 */
2179 2182 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
2180 2183 {
2181 2184 PyObject *ret = NULL;
2182 2185 Py_ssize_t argcount, i, len;
2183 2186 bitmask repeat = 0;
2184 2187 int revcount = 0;
2185 2188 int *revs;
2186 2189
2187 2190 argcount = PySequence_Length(args);
2188 2191 revs = PyMem_Malloc(argcount * sizeof(*revs));
2189 2192 if (argcount > 0 && revs == NULL)
2190 2193 return PyErr_NoMemory();
2191 2194 len = index_length(self);
2192 2195
2193 2196 for (i = 0; i < argcount; i++) {
2194 2197 static const int capacity = 24;
2195 2198 PyObject *obj = PySequence_GetItem(args, i);
2196 2199 bitmask x;
2197 2200 long val;
2198 2201
2199 2202 if (!PyInt_Check(obj)) {
2200 2203 PyErr_SetString(PyExc_TypeError,
2201 2204 "arguments must all be ints");
2202 2205 Py_DECREF(obj);
2203 2206 goto bail;
2204 2207 }
2205 2208 val = PyInt_AsLong(obj);
2206 2209 Py_DECREF(obj);
2207 2210 if (val == -1) {
2208 2211 ret = PyList_New(0);
2209 2212 goto done;
2210 2213 }
2211 2214 if (val < 0 || val >= len) {
2212 2215 PyErr_SetString(PyExc_IndexError, "index out of range");
2213 2216 goto bail;
2214 2217 }
2215 2218 /* this cheesy bloom filter lets us avoid some more
2216 2219 * expensive duplicate checks in the common set-is-disjoint
2217 2220 * case */
2218 2221 x = 1ull << (val & 0x3f);
2219 2222 if (repeat & x) {
2220 2223 int k;
2221 2224 for (k = 0; k < revcount; k++) {
2222 2225 if (val == revs[k])
2223 2226 goto duplicate;
2224 2227 }
2225 2228 } else
2226 2229 repeat |= x;
2227 2230 if (revcount >= capacity) {
2228 2231 PyErr_Format(PyExc_OverflowError,
2229 2232 "bitset size (%d) > capacity (%d)",
2230 2233 revcount, capacity);
2231 2234 goto bail;
2232 2235 }
2233 2236 revs[revcount++] = (int)val;
2234 2237 duplicate:;
2235 2238 }
2236 2239
2237 2240 if (revcount == 0) {
2238 2241 ret = PyList_New(0);
2239 2242 goto done;
2240 2243 }
2241 2244 if (revcount == 1) {
2242 2245 PyObject *obj;
2243 2246 ret = PyList_New(1);
2244 2247 if (ret == NULL)
2245 2248 goto bail;
2246 2249 obj = PyInt_FromLong(revs[0]);
2247 2250 if (obj == NULL)
2248 2251 goto bail;
2249 2252 PyList_SET_ITEM(ret, 0, obj);
2250 2253 goto done;
2251 2254 }
2252 2255
2253 2256 ret = find_gca_candidates(self, revs, revcount);
2254 2257 if (ret == NULL)
2255 2258 goto bail;
2256 2259
2257 2260 done:
2258 2261 PyMem_Free(revs);
2259 2262 return ret;
2260 2263
2261 2264 bail:
2262 2265 PyMem_Free(revs);
2263 2266 Py_XDECREF(ret);
2264 2267 return NULL;
2265 2268 }
2266 2269
2267 2270 /*
2268 2271 * Given a (possibly overlapping) set of revs, return the greatest
2269 2272 * common ancestors: those with the longest path to the root.
2270 2273 */
2271 2274 static PyObject *index_ancestors(indexObject *self, PyObject *args)
2272 2275 {
2273 2276 PyObject *ret;
2274 2277 PyObject *gca = index_commonancestorsheads(self, args);
2275 2278 if (gca == NULL)
2276 2279 return NULL;
2277 2280
2278 2281 if (PyList_GET_SIZE(gca) <= 1) {
2279 2282 return gca;
2280 2283 }
2281 2284
2282 2285 ret = find_deepest(self, gca);
2283 2286 Py_DECREF(gca);
2284 2287 return ret;
2285 2288 }
2286 2289
2287 2290 /*
2288 2291 * Invalidate any trie entries introduced by added revs.
2289 2292 */
2290 2293 static void index_invalidate_added(indexObject *self, Py_ssize_t start)
2291 2294 {
2292 2295 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
2293 2296
2294 2297 for (i = start; i < len; i++) {
2295 2298 PyObject *tuple = PyList_GET_ITEM(self->added, i);
2296 2299 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
2297 2300
2298 2301 nt_delete_node(&self->nt, PyBytes_AS_STRING(node));
2299 2302 }
2300 2303
2301 2304 if (start == 0)
2302 2305 Py_CLEAR(self->added);
2303 2306 }
2304 2307
2305 2308 /*
2306 2309 * Delete a numeric range of revs, which must be at the end of the
2307 2310 * range, but exclude the sentinel nullid entry.
2308 2311 */
2309 2312 static int index_slice_del(indexObject *self, PyObject *item)
2310 2313 {
2311 2314 Py_ssize_t start, stop, step, slicelength;
2312 2315 Py_ssize_t length = index_length(self) + 1;
2313 2316 int ret = 0;
2314 2317
2315 2318 /* Argument changed from PySliceObject* to PyObject* in Python 3. */
2316 2319 #ifdef IS_PY3K
2317 2320 if (PySlice_GetIndicesEx(item, length, &start, &stop, &step,
2318 2321 &slicelength) < 0)
2319 2322 #else
2320 2323 if (PySlice_GetIndicesEx((PySliceObject *)item, length, &start, &stop,
2321 2324 &step, &slicelength) < 0)
2322 2325 #endif
2323 2326 return -1;
2324 2327
2325 2328 if (slicelength <= 0)
2326 2329 return 0;
2327 2330
2328 2331 if ((step < 0 && start < stop) || (step > 0 && start > stop))
2329 2332 stop = start;
2330 2333
2331 2334 if (step < 0) {
2332 2335 stop = start + 1;
2333 2336 start = stop + step * (slicelength - 1) - 1;
2334 2337 step = -step;
2335 2338 }
2336 2339
2337 2340 if (step != 1) {
2338 2341 PyErr_SetString(PyExc_ValueError,
2339 2342 "revlog index delete requires step size of 1");
2340 2343 return -1;
2341 2344 }
2342 2345
2343 2346 if (stop != length - 1) {
2344 2347 PyErr_SetString(PyExc_IndexError,
2345 2348 "revlog index deletion indices are invalid");
2346 2349 return -1;
2347 2350 }
2348 2351
2349 2352 if (start < self->length) {
2350 2353 if (self->ntinitialized) {
2351 2354 Py_ssize_t i;
2352 2355
2353 2356 for (i = start + 1; i < self->length; i++) {
2354 2357 const char *node = index_node_existing(self, i);
2355 2358 if (node == NULL)
2356 2359 return -1;
2357 2360
2358 2361 nt_delete_node(&self->nt, node);
2359 2362 }
2360 2363 if (self->added)
2361 2364 index_invalidate_added(self, 0);
2362 2365 if (self->ntrev > start)
2363 2366 self->ntrev = (int)start;
2364 2367 }
2365 2368 self->length = start;
2366 2369 if (start < self->raw_length) {
2367 2370 if (self->cache) {
2368 2371 Py_ssize_t i;
2369 2372 for (i = start; i < self->raw_length; i++)
2370 2373 Py_CLEAR(self->cache[i]);
2371 2374 }
2372 2375 self->raw_length = start;
2373 2376 }
2374 2377 goto done;
2375 2378 }
2376 2379
2377 2380 if (self->ntinitialized) {
2378 2381 index_invalidate_added(self, start - self->length);
2379 2382 if (self->ntrev > start)
2380 2383 self->ntrev = (int)start;
2381 2384 }
2382 2385 if (self->added)
2383 2386 ret = PyList_SetSlice(self->added, start - self->length,
2384 2387 PyList_GET_SIZE(self->added), NULL);
2385 2388 done:
2386 2389 Py_CLEAR(self->headrevs);
2387 2390 return ret;
2388 2391 }
2389 2392
2390 2393 /*
2391 2394 * Supported ops:
2392 2395 *
2393 2396 * slice deletion
2394 2397 * string assignment (extend node->rev mapping)
2395 2398 * string deletion (shrink node->rev mapping)
2396 2399 */
2397 2400 static int index_assign_subscript(indexObject *self, PyObject *item,
2398 2401 PyObject *value)
2399 2402 {
2400 2403 char *node;
2401 2404 long rev;
2402 2405
2403 2406 if (PySlice_Check(item) && value == NULL)
2404 2407 return index_slice_del(self, item);
2405 2408
2406 2409 if (node_check(item, &node) == -1)
2407 2410 return -1;
2408 2411
2409 2412 if (value == NULL)
2410 2413 return self->ntinitialized ? nt_delete_node(&self->nt, node)
2411 2414 : 0;
2412 2415 rev = PyInt_AsLong(value);
2413 2416 if (rev > INT_MAX || rev < 0) {
2414 2417 if (!PyErr_Occurred())
2415 2418 PyErr_SetString(PyExc_ValueError, "rev out of range");
2416 2419 return -1;
2417 2420 }
2418 2421
2419 2422 if (index_init_nt(self) == -1)
2420 2423 return -1;
2421 2424 return nt_insert(&self->nt, node, (int)rev);
2422 2425 }
2423 2426
2424 2427 /*
2425 2428 * Find all RevlogNG entries in an index that has inline data. Update
2426 2429 * the optional "offsets" table with those entries.
2427 2430 */
2428 2431 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
2429 2432 {
2430 2433 const char *data = (const char *)self->buf.buf;
2431 2434 Py_ssize_t pos = 0;
2432 2435 Py_ssize_t end = self->buf.len;
2433 2436 long incr = v1_hdrsize;
2434 2437 Py_ssize_t len = 0;
2435 2438
2436 2439 while (pos + v1_hdrsize <= end && pos >= 0) {
2437 2440 uint32_t comp_len;
2438 2441 /* 3rd element of header is length of compressed inline data */
2439 2442 comp_len = getbe32(data + pos + 8);
2440 2443 incr = v1_hdrsize + comp_len;
2441 2444 if (offsets)
2442 2445 offsets[len] = data + pos;
2443 2446 len++;
2444 2447 pos += incr;
2445 2448 }
2446 2449
2447 2450 if (pos != end) {
2448 2451 if (!PyErr_Occurred())
2449 2452 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2450 2453 return -1;
2451 2454 }
2452 2455
2453 2456 return len;
2454 2457 }
2455 2458
2456 2459 static int index_init(indexObject *self, PyObject *args)
2457 2460 {
2458 2461 PyObject *data_obj, *inlined_obj;
2459 2462 Py_ssize_t size;
2460 2463
2461 2464 /* Initialize before argument-checking to avoid index_dealloc() crash.
2462 2465 */
2463 2466 self->raw_length = 0;
2464 2467 self->added = NULL;
2465 2468 self->cache = NULL;
2466 2469 self->data = NULL;
2467 2470 memset(&self->buf, 0, sizeof(self->buf));
2468 2471 self->headrevs = NULL;
2469 2472 self->filteredrevs = Py_None;
2470 2473 Py_INCREF(Py_None);
2471 2474 self->ntinitialized = 0;
2472 2475 self->offsets = NULL;
2473 2476
2474 2477 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
2475 2478 return -1;
2476 2479 if (!PyObject_CheckBuffer(data_obj)) {
2477 2480 PyErr_SetString(PyExc_TypeError,
2478 2481 "data does not support buffer interface");
2479 2482 return -1;
2480 2483 }
2481 2484
2482 2485 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
2483 2486 return -1;
2484 2487 size = self->buf.len;
2485 2488
2486 2489 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
2487 2490 self->data = data_obj;
2488 2491
2489 2492 self->ntlookups = self->ntmisses = 0;
2490 2493 self->ntrev = -1;
2491 2494 Py_INCREF(self->data);
2492 2495
2493 2496 if (self->inlined) {
2494 2497 Py_ssize_t len = inline_scan(self, NULL);
2495 2498 if (len == -1)
2496 2499 goto bail;
2497 2500 self->raw_length = len;
2498 2501 self->length = len;
2499 2502 } else {
2500 2503 if (size % v1_hdrsize) {
2501 2504 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2502 2505 goto bail;
2503 2506 }
2504 2507 self->raw_length = size / v1_hdrsize;
2505 2508 self->length = self->raw_length;
2506 2509 }
2507 2510
2508 2511 return 0;
2509 2512 bail:
2510 2513 return -1;
2511 2514 }
2512 2515
2513 2516 static PyObject *index_nodemap(indexObject *self)
2514 2517 {
2515 2518 Py_INCREF(self);
2516 2519 return (PyObject *)self;
2517 2520 }
2518 2521
2519 2522 static void _index_clearcaches(indexObject *self)
2520 2523 {
2521 2524 if (self->cache) {
2522 2525 Py_ssize_t i;
2523 2526
2524 2527 for (i = 0; i < self->raw_length; i++)
2525 2528 Py_CLEAR(self->cache[i]);
2526 2529 free(self->cache);
2527 2530 self->cache = NULL;
2528 2531 }
2529 2532 if (self->offsets) {
2530 2533 PyMem_Free((void *)self->offsets);
2531 2534 self->offsets = NULL;
2532 2535 }
2533 2536 if (self->ntinitialized) {
2534 2537 nt_dealloc(&self->nt);
2535 2538 }
2536 2539 self->ntinitialized = 0;
2537 2540 Py_CLEAR(self->headrevs);
2538 2541 }
2539 2542
2540 2543 static PyObject *index_clearcaches(indexObject *self)
2541 2544 {
2542 2545 _index_clearcaches(self);
2543 2546 self->ntrev = -1;
2544 2547 self->ntlookups = self->ntmisses = 0;
2545 2548 Py_RETURN_NONE;
2546 2549 }
2547 2550
2548 2551 static void index_dealloc(indexObject *self)
2549 2552 {
2550 2553 _index_clearcaches(self);
2551 2554 Py_XDECREF(self->filteredrevs);
2552 2555 if (self->buf.buf) {
2553 2556 PyBuffer_Release(&self->buf);
2554 2557 memset(&self->buf, 0, sizeof(self->buf));
2555 2558 }
2556 2559 Py_XDECREF(self->data);
2557 2560 Py_XDECREF(self->added);
2558 2561 PyObject_Del(self);
2559 2562 }
2560 2563
2561 2564 static PySequenceMethods index_sequence_methods = {
2562 2565 (lenfunc)index_length, /* sq_length */
2563 2566 0, /* sq_concat */
2564 2567 0, /* sq_repeat */
2565 2568 (ssizeargfunc)index_get, /* sq_item */
2566 2569 0, /* sq_slice */
2567 2570 0, /* sq_ass_item */
2568 2571 0, /* sq_ass_slice */
2569 2572 (objobjproc)index_contains, /* sq_contains */
2570 2573 };
2571 2574
2572 2575 static PyMappingMethods index_mapping_methods = {
2573 2576 (lenfunc)index_length, /* mp_length */
2574 2577 (binaryfunc)index_getitem, /* mp_subscript */
2575 2578 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
2576 2579 };
2577 2580
2578 2581 static PyMethodDef index_methods[] = {
2579 2582 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
2580 2583 "return the gca set of the given revs"},
2581 2584 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
2582 2585 METH_VARARGS,
2583 2586 "return the heads of the common ancestors of the given revs"},
2584 2587 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
2585 2588 "clear the index caches"},
2586 2589 {"get", (PyCFunction)index_m_get, METH_VARARGS, "get an index entry"},
2587 2590 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets, METH_VARARGS,
2588 2591 "compute phases"},
2589 2592 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
2590 2593 "reachableroots"},
2591 2594 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
2592 2595 "get head revisions"}, /* Can do filtering since 3.2 */
2593 2596 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
2594 2597 "get filtered head revisions"}, /* Can always do filtering */
2595 2598 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
2596 2599 "determine revisions with deltas to reconstruct fulltext"},
2597 2600 {"slicechunktodensity", (PyCFunction)index_slicechunktodensity,
2598 2601 METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"},
2599 2602 {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
2600 2603 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
2601 2604 "match a potentially ambiguous node ID"},
2602 2605 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
2603 2606 "find length of shortest hex nodeid of a binary ID"},
2604 2607 {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"},
2605 2608 {NULL} /* Sentinel */
2606 2609 };
2607 2610
2608 2611 static PyGetSetDef index_getset[] = {
2609 2612 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
2610 2613 {NULL} /* Sentinel */
2611 2614 };
2612 2615
2613 2616 PyTypeObject HgRevlogIndex_Type = {
2614 2617 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2615 2618 "parsers.index", /* tp_name */
2616 2619 sizeof(indexObject), /* tp_basicsize */
2617 2620 0, /* tp_itemsize */
2618 2621 (destructor)index_dealloc, /* tp_dealloc */
2619 2622 0, /* tp_print */
2620 2623 0, /* tp_getattr */
2621 2624 0, /* tp_setattr */
2622 2625 0, /* tp_compare */
2623 2626 0, /* tp_repr */
2624 2627 0, /* tp_as_number */
2625 2628 &index_sequence_methods, /* tp_as_sequence */
2626 2629 &index_mapping_methods, /* tp_as_mapping */
2627 2630 0, /* tp_hash */
2628 2631 0, /* tp_call */
2629 2632 0, /* tp_str */
2630 2633 0, /* tp_getattro */
2631 2634 0, /* tp_setattro */
2632 2635 0, /* tp_as_buffer */
2633 2636 Py_TPFLAGS_DEFAULT, /* tp_flags */
2634 2637 "revlog index", /* tp_doc */
2635 2638 0, /* tp_traverse */
2636 2639 0, /* tp_clear */
2637 2640 0, /* tp_richcompare */
2638 2641 0, /* tp_weaklistoffset */
2639 2642 0, /* tp_iter */
2640 2643 0, /* tp_iternext */
2641 2644 index_methods, /* tp_methods */
2642 2645 0, /* tp_members */
2643 2646 index_getset, /* tp_getset */
2644 2647 0, /* tp_base */
2645 2648 0, /* tp_dict */
2646 2649 0, /* tp_descr_get */
2647 2650 0, /* tp_descr_set */
2648 2651 0, /* tp_dictoffset */
2649 2652 (initproc)index_init, /* tp_init */
2650 2653 0, /* tp_alloc */
2651 2654 };
2652 2655
2653 2656 /*
2654 2657 * returns a tuple of the form (index, index, cache) with elements as
2655 2658 * follows:
2656 2659 *
2657 2660 * index: an index object that lazily parses RevlogNG records
2658 2661 * cache: if data is inlined, a tuple (0, index_file_content), else None
2659 2662 * index_file_content could be a string, or a buffer
2660 2663 *
2661 2664 * added complications are for backwards compatibility
2662 2665 */
2663 2666 PyObject *parse_index2(PyObject *self, PyObject *args)
2664 2667 {
2665 2668 PyObject *tuple = NULL, *cache = NULL;
2666 2669 indexObject *idx;
2667 2670 int ret;
2668 2671
2669 2672 idx = PyObject_New(indexObject, &HgRevlogIndex_Type);
2670 2673 if (idx == NULL)
2671 2674 goto bail;
2672 2675
2673 2676 ret = index_init(idx, args);
2674 2677 if (ret == -1)
2675 2678 goto bail;
2676 2679
2677 2680 if (idx->inlined) {
2678 2681 cache = Py_BuildValue("iO", 0, idx->data);
2679 2682 if (cache == NULL)
2680 2683 goto bail;
2681 2684 } else {
2682 2685 cache = Py_None;
2683 2686 Py_INCREF(cache);
2684 2687 }
2685 2688
2686 2689 tuple = Py_BuildValue("NN", idx, cache);
2687 2690 if (!tuple)
2688 2691 goto bail;
2689 2692 return tuple;
2690 2693
2691 2694 bail:
2692 2695 Py_XDECREF(idx);
2693 2696 Py_XDECREF(cache);
2694 2697 Py_XDECREF(tuple);
2695 2698 return NULL;
2696 2699 }
2697 2700
2698 2701 #ifdef WITH_RUST
2699 2702
2700 2703 /* rustlazyancestors: iteration over ancestors implemented in Rust
2701 2704 *
2702 2705 * This class holds a reference to an index and to the Rust iterator.
2703 2706 */
2704 2707 typedef struct rustlazyancestorsObjectStruct rustlazyancestorsObject;
2705 2708
2706 2709 struct rustlazyancestorsObjectStruct {
2707 2710 PyObject_HEAD
2708 2711 /* Type-specific fields go here. */
2709 2712 indexObject *index; /* Ref kept to avoid GC'ing the index */
2710 2713 void *iter; /* Rust iterator */
2711 2714 };
2712 2715
2713 2716 /* FFI exposed from Rust code */
2714 2717 rustlazyancestorsObject *rustlazyancestors_init(indexObject *index,
2715 2718 /* intrevs vector */
2716 2719 Py_ssize_t initrevslen,
2717 2720 long *initrevs, long stoprev,
2718 2721 int inclusive);
2719 2722 void rustlazyancestors_drop(rustlazyancestorsObject *self);
2720 2723 int rustlazyancestors_next(rustlazyancestorsObject *self);
2721 2724 int rustlazyancestors_contains(rustlazyancestorsObject *self, long rev);
2722 2725
2723 2726 /* CPython instance methods */
2724 2727 static int rustla_init(rustlazyancestorsObject *self, PyObject *args)
2725 2728 {
2726 2729 PyObject *initrevsarg = NULL;
2727 2730 PyObject *inclusivearg = NULL;
2728 2731 long stoprev = 0;
2729 2732 long *initrevs = NULL;
2730 2733 int inclusive = 0;
2731 2734 Py_ssize_t i;
2732 2735
2733 2736 indexObject *index;
2734 2737 if (!PyArg_ParseTuple(args, "O!O!lO!", &HgRevlogIndex_Type, &index,
2735 2738 &PyList_Type, &initrevsarg, &stoprev,
2736 2739 &PyBool_Type, &inclusivearg))
2737 2740 return -1;
2738 2741
2739 2742 Py_INCREF(index);
2740 2743 self->index = index;
2741 2744
2742 2745 if (inclusivearg == Py_True)
2743 2746 inclusive = 1;
2744 2747
2745 2748 Py_ssize_t linit = PyList_GET_SIZE(initrevsarg);
2746 2749
2747 2750 initrevs = (long *)calloc(linit, sizeof(long));
2748 2751
2749 2752 if (initrevs == NULL) {
2750 2753 PyErr_NoMemory();
2751 2754 goto bail;
2752 2755 }
2753 2756
2754 2757 for (i = 0; i < linit; i++) {
2755 2758 initrevs[i] = PyInt_AsLong(PyList_GET_ITEM(initrevsarg, i));
2756 2759 }
2757 2760 if (PyErr_Occurred())
2758 2761 goto bail;
2759 2762
2760 2763 self->iter =
2761 2764 rustlazyancestors_init(index, linit, initrevs, stoprev, inclusive);
2762 2765 if (self->iter == NULL) {
2763 2766 /* if this is because of GraphError::ParentOutOfRange
2764 2767 * HgRevlogIndex_GetParents() has already set the proper
2765 2768 * exception */
2766 2769 goto bail;
2767 2770 }
2768 2771
2769 2772 free(initrevs);
2770 2773 return 0;
2771 2774
2772 2775 bail:
2773 2776 free(initrevs);
2774 2777 return -1;
2775 2778 };
2776 2779
2777 2780 static void rustla_dealloc(rustlazyancestorsObject *self)
2778 2781 {
2779 2782 Py_XDECREF(self->index);
2780 2783 if (self->iter != NULL) { /* can happen if rustla_init failed */
2781 2784 rustlazyancestors_drop(self->iter);
2782 2785 }
2783 2786 PyObject_Del(self);
2784 2787 }
2785 2788
2786 2789 static PyObject *rustla_next(rustlazyancestorsObject *self)
2787 2790 {
2788 2791 int res = rustlazyancestors_next(self->iter);
2789 2792 if (res == -1) {
2790 2793 /* Setting an explicit exception seems unnecessary
2791 2794 * as examples from Python source code (Objects/rangeobjets.c
2792 2795 * and Modules/_io/stringio.c) seem to demonstrate.
2793 2796 */
2794 2797 return NULL;
2795 2798 }
2796 2799 return PyInt_FromLong(res);
2797 2800 }
2798 2801
2799 2802 static int rustla_contains(rustlazyancestorsObject *self, PyObject *rev)
2800 2803 {
2801 2804 long lrev;
2802 2805 if (!pylong_to_long(rev, &lrev)) {
2803 2806 PyErr_Clear();
2804 2807 return 0;
2805 2808 }
2806 2809 return rustlazyancestors_contains(self->iter, lrev);
2807 2810 }
2808 2811
2809 2812 static PySequenceMethods rustla_sequence_methods = {
2810 2813 0, /* sq_length */
2811 2814 0, /* sq_concat */
2812 2815 0, /* sq_repeat */
2813 2816 0, /* sq_item */
2814 2817 0, /* sq_slice */
2815 2818 0, /* sq_ass_item */
2816 2819 0, /* sq_ass_slice */
2817 2820 (objobjproc)rustla_contains, /* sq_contains */
2818 2821 };
2819 2822
2820 2823 static PyTypeObject rustlazyancestorsType = {
2821 2824 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2822 2825 "parsers.rustlazyancestors", /* tp_name */
2823 2826 sizeof(rustlazyancestorsObject), /* tp_basicsize */
2824 2827 0, /* tp_itemsize */
2825 2828 (destructor)rustla_dealloc, /* tp_dealloc */
2826 2829 0, /* tp_print */
2827 2830 0, /* tp_getattr */
2828 2831 0, /* tp_setattr */
2829 2832 0, /* tp_compare */
2830 2833 0, /* tp_repr */
2831 2834 0, /* tp_as_number */
2832 2835 &rustla_sequence_methods, /* tp_as_sequence */
2833 2836 0, /* tp_as_mapping */
2834 2837 0, /* tp_hash */
2835 2838 0, /* tp_call */
2836 2839 0, /* tp_str */
2837 2840 0, /* tp_getattro */
2838 2841 0, /* tp_setattro */
2839 2842 0, /* tp_as_buffer */
2840 2843 Py_TPFLAGS_DEFAULT, /* tp_flags */
2841 2844 "Iterator over ancestors, implemented in Rust", /* tp_doc */
2842 2845 0, /* tp_traverse */
2843 2846 0, /* tp_clear */
2844 2847 0, /* tp_richcompare */
2845 2848 0, /* tp_weaklistoffset */
2846 2849 0, /* tp_iter */
2847 2850 (iternextfunc)rustla_next, /* tp_iternext */
2848 2851 0, /* tp_methods */
2849 2852 0, /* tp_members */
2850 2853 0, /* tp_getset */
2851 2854 0, /* tp_base */
2852 2855 0, /* tp_dict */
2853 2856 0, /* tp_descr_get */
2854 2857 0, /* tp_descr_set */
2855 2858 0, /* tp_dictoffset */
2856 2859 (initproc)rustla_init, /* tp_init */
2857 2860 0, /* tp_alloc */
2858 2861 };
2859 2862 #endif /* WITH_RUST */
2860 2863
2861 2864 void revlog_module_init(PyObject *mod)
2862 2865 {
2863 2866 HgRevlogIndex_Type.tp_new = PyType_GenericNew;
2864 2867 if (PyType_Ready(&HgRevlogIndex_Type) < 0)
2865 2868 return;
2866 2869 Py_INCREF(&HgRevlogIndex_Type);
2867 2870 PyModule_AddObject(mod, "index", (PyObject *)&HgRevlogIndex_Type);
2868 2871
2869 2872 nodetreeType.tp_new = PyType_GenericNew;
2870 2873 if (PyType_Ready(&nodetreeType) < 0)
2871 2874 return;
2872 2875 Py_INCREF(&nodetreeType);
2873 2876 PyModule_AddObject(mod, "nodetree", (PyObject *)&nodetreeType);
2874 2877
2875 2878 if (!nullentry) {
2876 2879 nullentry = Py_BuildValue(PY23("iiiiiiis#", "iiiiiiiy#"), 0, 0,
2877 2880 0, -1, -1, -1, -1, nullid, 20);
2878 2881 }
2879 2882 if (nullentry)
2880 2883 PyObject_GC_UnTrack(nullentry);
2881 2884
2882 2885 #ifdef WITH_RUST
2883 2886 rustlazyancestorsType.tp_new = PyType_GenericNew;
2884 2887 if (PyType_Ready(&rustlazyancestorsType) < 0)
2885 2888 return;
2886 2889 Py_INCREF(&rustlazyancestorsType);
2887 2890 PyModule_AddObject(mod, "rustlazyancestors",
2888 2891 (PyObject *)&rustlazyancestorsType);
2889 2892 #endif
2890 2893 }
General Comments 0
You need to be logged in to leave comments. Login now