##// END OF EJS Templates
phases: make sure an exception should be set on error return...
Yuya Nishihara -
r45737:03332e5f default
parent child Browse files
Show More
@@ -1,2956 +1,2960 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #define PY_SSIZE_T_CLEAN
11 11 #include <Python.h>
12 12 #include <assert.h>
13 13 #include <ctype.h>
14 14 #include <limits.h>
15 15 #include <stddef.h>
16 16 #include <stdlib.h>
17 17 #include <string.h>
18 18
19 19 #include "bitmanipulation.h"
20 20 #include "charencode.h"
21 21 #include "revlog.h"
22 22 #include "util.h"
23 23
24 24 #ifdef IS_PY3K
25 25 /* The mapping of Python types is meant to be temporary to get Python
26 26 * 3 to compile. We should remove this once Python 3 support is fully
27 27 * supported and proper types are used in the extensions themselves. */
28 28 #define PyInt_Check PyLong_Check
29 29 #define PyInt_FromLong PyLong_FromLong
30 30 #define PyInt_FromSsize_t PyLong_FromSsize_t
31 31 #define PyInt_AsLong PyLong_AsLong
32 32 #endif
33 33
34 34 typedef struct indexObjectStruct indexObject;
35 35
36 36 typedef struct {
37 37 int children[16];
38 38 } nodetreenode;
39 39
40 40 typedef struct {
41 41 int abi_version;
42 42 Py_ssize_t (*index_length)(const indexObject *);
43 43 const char *(*index_node)(indexObject *, Py_ssize_t);
44 44 int (*index_parents)(PyObject *, int, int *);
45 45 } Revlog_CAPI;
46 46
47 47 /*
48 48 * A base-16 trie for fast node->rev mapping.
49 49 *
50 50 * Positive value is index of the next node in the trie
51 51 * Negative value is a leaf: -(rev + 2)
52 52 * Zero is empty
53 53 */
54 54 typedef struct {
55 55 indexObject *index;
56 56 nodetreenode *nodes;
57 57 unsigned length; /* # nodes in use */
58 58 unsigned capacity; /* # nodes allocated */
59 59 int depth; /* maximum depth of tree */
60 60 int splits; /* # splits performed */
61 61 } nodetree;
62 62
63 63 typedef struct {
64 64 PyObject_HEAD /* ; */
65 65 nodetree nt;
66 66 } nodetreeObject;
67 67
68 68 /*
69 69 * This class has two behaviors.
70 70 *
71 71 * When used in a list-like way (with integer keys), we decode an
72 72 * entry in a RevlogNG index file on demand. We have limited support for
73 73 * integer-keyed insert and delete, only at elements right before the
74 74 * end.
75 75 *
76 76 * With string keys, we lazily perform a reverse mapping from node to
77 77 * rev, using a base-16 trie.
78 78 */
79 79 struct indexObjectStruct {
80 80 PyObject_HEAD
81 81 /* Type-specific fields go here. */
82 82 PyObject *data; /* raw bytes of index */
83 83 Py_buffer buf; /* buffer of data */
84 84 PyObject **cache; /* cached tuples */
85 85 const char **offsets; /* populated on demand */
86 86 Py_ssize_t raw_length; /* original number of elements */
87 87 Py_ssize_t length; /* current number of elements */
88 88 PyObject *added; /* populated on demand */
89 89 PyObject *headrevs; /* cache, invalidated on changes */
90 90 PyObject *filteredrevs; /* filtered revs set */
91 91 nodetree nt; /* base-16 trie */
92 92 int ntinitialized; /* 0 or 1 */
93 93 int ntrev; /* last rev scanned */
94 94 int ntlookups; /* # lookups */
95 95 int ntmisses; /* # lookups that miss the cache */
96 96 int inlined;
97 97 };
98 98
99 99 static Py_ssize_t index_length(const indexObject *self)
100 100 {
101 101 if (self->added == NULL)
102 102 return self->length;
103 103 return self->length + PyList_GET_SIZE(self->added);
104 104 }
105 105
106 106 static PyObject *nullentry = NULL;
107 107 static const char nullid[20] = {0};
108 108 static const Py_ssize_t nullrev = -1;
109 109
110 110 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
111 111
112 112 static int index_find_node(indexObject *self, const char *node,
113 113 Py_ssize_t nodelen);
114 114
115 115 #if LONG_MAX == 0x7fffffffL
116 116 static const char *const tuple_format = PY23("Kiiiiiis#", "Kiiiiiiy#");
117 117 #else
118 118 static const char *const tuple_format = PY23("kiiiiiis#", "kiiiiiiy#");
119 119 #endif
120 120
121 121 /* A RevlogNG v1 index entry is 64 bytes long. */
122 122 static const long v1_hdrsize = 64;
123 123
124 124 static void raise_revlog_error(void)
125 125 {
126 126 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
127 127
128 128 mod = PyImport_ImportModule("mercurial.error");
129 129 if (mod == NULL) {
130 130 goto cleanup;
131 131 }
132 132
133 133 dict = PyModule_GetDict(mod);
134 134 if (dict == NULL) {
135 135 goto cleanup;
136 136 }
137 137 Py_INCREF(dict);
138 138
139 139 errclass = PyDict_GetItemString(dict, "RevlogError");
140 140 if (errclass == NULL) {
141 141 PyErr_SetString(PyExc_SystemError,
142 142 "could not find RevlogError");
143 143 goto cleanup;
144 144 }
145 145
146 146 /* value of exception is ignored by callers */
147 147 PyErr_SetString(errclass, "RevlogError");
148 148
149 149 cleanup:
150 150 Py_XDECREF(dict);
151 151 Py_XDECREF(mod);
152 152 }
153 153
154 154 /*
155 155 * Return a pointer to the beginning of a RevlogNG record.
156 156 */
157 157 static const char *index_deref(indexObject *self, Py_ssize_t pos)
158 158 {
159 159 if (self->inlined && pos > 0) {
160 160 if (self->offsets == NULL) {
161 161 Py_ssize_t ret;
162 162 self->offsets = PyMem_Malloc(self->raw_length *
163 163 sizeof(*self->offsets));
164 164 if (self->offsets == NULL)
165 165 return (const char *)PyErr_NoMemory();
166 166 ret = inline_scan(self, self->offsets);
167 167 if (ret == -1) {
168 168 return NULL;
169 169 };
170 170 }
171 171 return self->offsets[pos];
172 172 }
173 173
174 174 return (const char *)(self->buf.buf) + pos * v1_hdrsize;
175 175 }
176 176
177 177 /*
178 178 * Get parents of the given rev.
179 179 *
180 180 * The specified rev must be valid and must not be nullrev. A returned
181 181 * parent revision may be nullrev, but is guaranteed to be in valid range.
182 182 */
183 183 static inline int index_get_parents(indexObject *self, Py_ssize_t rev, int *ps,
184 184 int maxrev)
185 185 {
186 186 if (rev >= self->length) {
187 187 long tmp;
188 188 PyObject *tuple =
189 189 PyList_GET_ITEM(self->added, rev - self->length);
190 190 if (!pylong_to_long(PyTuple_GET_ITEM(tuple, 5), &tmp)) {
191 191 return -1;
192 192 }
193 193 ps[0] = (int)tmp;
194 194 if (!pylong_to_long(PyTuple_GET_ITEM(tuple, 6), &tmp)) {
195 195 return -1;
196 196 }
197 197 ps[1] = (int)tmp;
198 198 } else {
199 199 const char *data = index_deref(self, rev);
200 200 ps[0] = getbe32(data + 24);
201 201 ps[1] = getbe32(data + 28);
202 202 }
203 203 /* If index file is corrupted, ps[] may point to invalid revisions. So
204 204 * there is a risk of buffer overflow to trust them unconditionally. */
205 205 if (ps[0] < -1 || ps[0] > maxrev || ps[1] < -1 || ps[1] > maxrev) {
206 206 PyErr_SetString(PyExc_ValueError, "parent out of range");
207 207 return -1;
208 208 }
209 209 return 0;
210 210 }
211 211
212 212 /*
213 213 * Get parents of the given rev.
214 214 *
215 215 * If the specified rev is out of range, IndexError will be raised. If the
216 216 * revlog entry is corrupted, ValueError may be raised.
217 217 *
218 218 * Returns 0 on success or -1 on failure.
219 219 */
220 220 static int HgRevlogIndex_GetParents(PyObject *op, int rev, int *ps)
221 221 {
222 222 int tiprev;
223 223 if (!op || !HgRevlogIndex_Check(op) || !ps) {
224 224 PyErr_BadInternalCall();
225 225 return -1;
226 226 }
227 227 tiprev = (int)index_length((indexObject *)op) - 1;
228 228 if (rev < -1 || rev > tiprev) {
229 229 PyErr_Format(PyExc_IndexError, "rev out of range: %d", rev);
230 230 return -1;
231 231 } else if (rev == -1) {
232 232 ps[0] = ps[1] = -1;
233 233 return 0;
234 234 } else {
235 235 return index_get_parents((indexObject *)op, rev, ps, tiprev);
236 236 }
237 237 }
238 238
239 239 static inline int64_t index_get_start(indexObject *self, Py_ssize_t rev)
240 240 {
241 241 uint64_t offset;
242 242 if (rev == nullrev) {
243 243 return 0;
244 244 }
245 245 if (rev >= self->length) {
246 246 PyObject *tuple;
247 247 PyObject *pylong;
248 248 PY_LONG_LONG tmp;
249 249 tuple = PyList_GET_ITEM(self->added, rev - self->length);
250 250 pylong = PyTuple_GET_ITEM(tuple, 0);
251 251 tmp = PyLong_AsLongLong(pylong);
252 252 if (tmp == -1 && PyErr_Occurred()) {
253 253 return -1;
254 254 }
255 255 if (tmp < 0) {
256 256 PyErr_Format(PyExc_OverflowError,
257 257 "revlog entry size out of bound (%lld)",
258 258 (long long)tmp);
259 259 return -1;
260 260 }
261 261 offset = (uint64_t)tmp;
262 262 } else {
263 263 const char *data = index_deref(self, rev);
264 264 offset = getbe32(data + 4);
265 265 if (rev == 0) {
266 266 /* mask out version number for the first entry */
267 267 offset &= 0xFFFF;
268 268 } else {
269 269 uint32_t offset_high = getbe32(data);
270 270 offset |= ((uint64_t)offset_high) << 32;
271 271 }
272 272 }
273 273 return (int64_t)(offset >> 16);
274 274 }
275 275
276 276 static inline int index_get_length(indexObject *self, Py_ssize_t rev)
277 277 {
278 278 if (rev == nullrev) {
279 279 return 0;
280 280 }
281 281 if (rev >= self->length) {
282 282 PyObject *tuple;
283 283 PyObject *pylong;
284 284 long ret;
285 285 tuple = PyList_GET_ITEM(self->added, rev - self->length);
286 286 pylong = PyTuple_GET_ITEM(tuple, 1);
287 287 ret = PyInt_AsLong(pylong);
288 288 if (ret == -1 && PyErr_Occurred()) {
289 289 return -1;
290 290 }
291 291 if (ret < 0 || ret > (long)INT_MAX) {
292 292 PyErr_Format(PyExc_OverflowError,
293 293 "revlog entry size out of bound (%ld)",
294 294 ret);
295 295 return -1;
296 296 }
297 297 return (int)ret;
298 298 } else {
299 299 const char *data = index_deref(self, rev);
300 300 int tmp = (int)getbe32(data + 8);
301 301 if (tmp < 0) {
302 302 PyErr_Format(PyExc_OverflowError,
303 303 "revlog entry size out of bound (%d)",
304 304 tmp);
305 305 return -1;
306 306 }
307 307 return tmp;
308 308 }
309 309 }
310 310
311 311 /*
312 312 * RevlogNG format (all in big endian, data may be inlined):
313 313 * 6 bytes: offset
314 314 * 2 bytes: flags
315 315 * 4 bytes: compressed length
316 316 * 4 bytes: uncompressed length
317 317 * 4 bytes: base revision
318 318 * 4 bytes: link revision
319 319 * 4 bytes: parent 1 revision
320 320 * 4 bytes: parent 2 revision
321 321 * 32 bytes: nodeid (only 20 bytes used)
322 322 */
323 323 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
324 324 {
325 325 uint64_t offset_flags;
326 326 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
327 327 const char *c_node_id;
328 328 const char *data;
329 329 Py_ssize_t length = index_length(self);
330 330 PyObject *entry;
331 331
332 332 if (pos == nullrev) {
333 333 Py_INCREF(nullentry);
334 334 return nullentry;
335 335 }
336 336
337 337 if (pos < 0 || pos >= length) {
338 338 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
339 339 return NULL;
340 340 }
341 341
342 342 if (pos >= self->length) {
343 343 PyObject *obj;
344 344 obj = PyList_GET_ITEM(self->added, pos - self->length);
345 345 Py_INCREF(obj);
346 346 return obj;
347 347 }
348 348
349 349 if (self->cache) {
350 350 if (self->cache[pos]) {
351 351 Py_INCREF(self->cache[pos]);
352 352 return self->cache[pos];
353 353 }
354 354 } else {
355 355 self->cache = calloc(self->raw_length, sizeof(PyObject *));
356 356 if (self->cache == NULL)
357 357 return PyErr_NoMemory();
358 358 }
359 359
360 360 data = index_deref(self, pos);
361 361 if (data == NULL)
362 362 return NULL;
363 363
364 364 offset_flags = getbe32(data + 4);
365 365 if (pos == 0) /* mask out version number for the first entry */
366 366 offset_flags &= 0xFFFF;
367 367 else {
368 368 uint32_t offset_high = getbe32(data);
369 369 offset_flags |= ((uint64_t)offset_high) << 32;
370 370 }
371 371
372 372 comp_len = getbe32(data + 8);
373 373 uncomp_len = getbe32(data + 12);
374 374 base_rev = getbe32(data + 16);
375 375 link_rev = getbe32(data + 20);
376 376 parent_1 = getbe32(data + 24);
377 377 parent_2 = getbe32(data + 28);
378 378 c_node_id = data + 32;
379 379
380 380 entry = Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len,
381 381 base_rev, link_rev, parent_1, parent_2, c_node_id,
382 382 (Py_ssize_t)20);
383 383
384 384 if (entry) {
385 385 PyObject_GC_UnTrack(entry);
386 386 Py_INCREF(entry);
387 387 }
388 388
389 389 self->cache[pos] = entry;
390 390
391 391 return entry;
392 392 }
393 393
394 394 /*
395 395 * Return the 20-byte SHA of the node corresponding to the given rev.
396 396 */
397 397 static const char *index_node(indexObject *self, Py_ssize_t pos)
398 398 {
399 399 Py_ssize_t length = index_length(self);
400 400 const char *data;
401 401
402 402 if (pos == nullrev)
403 403 return nullid;
404 404
405 405 if (pos >= length)
406 406 return NULL;
407 407
408 408 if (pos >= self->length) {
409 409 PyObject *tuple, *str;
410 410 tuple = PyList_GET_ITEM(self->added, pos - self->length);
411 411 str = PyTuple_GetItem(tuple, 7);
412 412 return str ? PyBytes_AS_STRING(str) : NULL;
413 413 }
414 414
415 415 data = index_deref(self, pos);
416 416 return data ? data + 32 : NULL;
417 417 }
418 418
419 419 /*
420 420 * Return the 20-byte SHA of the node corresponding to the given rev. The
421 421 * rev is assumed to be existing. If not, an exception is set.
422 422 */
423 423 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
424 424 {
425 425 const char *node = index_node(self, pos);
426 426 if (node == NULL) {
427 427 PyErr_Format(PyExc_IndexError, "could not access rev %d",
428 428 (int)pos);
429 429 }
430 430 return node;
431 431 }
432 432
433 433 static int nt_insert(nodetree *self, const char *node, int rev);
434 434
435 435 static int node_check(PyObject *obj, char **node)
436 436 {
437 437 Py_ssize_t nodelen;
438 438 if (PyBytes_AsStringAndSize(obj, node, &nodelen) == -1)
439 439 return -1;
440 440 if (nodelen == 20)
441 441 return 0;
442 442 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
443 443 return -1;
444 444 }
445 445
446 446 static PyObject *index_append(indexObject *self, PyObject *obj)
447 447 {
448 448 char *node;
449 449 Py_ssize_t len;
450 450
451 451 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
452 452 PyErr_SetString(PyExc_TypeError, "8-tuple required");
453 453 return NULL;
454 454 }
455 455
456 456 if (node_check(PyTuple_GET_ITEM(obj, 7), &node) == -1)
457 457 return NULL;
458 458
459 459 len = index_length(self);
460 460
461 461 if (self->added == NULL) {
462 462 self->added = PyList_New(0);
463 463 if (self->added == NULL)
464 464 return NULL;
465 465 }
466 466
467 467 if (PyList_Append(self->added, obj) == -1)
468 468 return NULL;
469 469
470 470 if (self->ntinitialized)
471 471 nt_insert(&self->nt, node, (int)len);
472 472
473 473 Py_CLEAR(self->headrevs);
474 474 Py_RETURN_NONE;
475 475 }
476 476
477 477 static PyObject *index_stats(indexObject *self)
478 478 {
479 479 PyObject *obj = PyDict_New();
480 480 PyObject *s = NULL;
481 481 PyObject *t = NULL;
482 482
483 483 if (obj == NULL)
484 484 return NULL;
485 485
486 486 #define istat(__n, __d) \
487 487 do { \
488 488 s = PyBytes_FromString(__d); \
489 489 t = PyInt_FromSsize_t(self->__n); \
490 490 if (!s || !t) \
491 491 goto bail; \
492 492 if (PyDict_SetItem(obj, s, t) == -1) \
493 493 goto bail; \
494 494 Py_CLEAR(s); \
495 495 Py_CLEAR(t); \
496 496 } while (0)
497 497
498 498 if (self->added) {
499 499 Py_ssize_t len = PyList_GET_SIZE(self->added);
500 500 s = PyBytes_FromString("index entries added");
501 501 t = PyInt_FromSsize_t(len);
502 502 if (!s || !t)
503 503 goto bail;
504 504 if (PyDict_SetItem(obj, s, t) == -1)
505 505 goto bail;
506 506 Py_CLEAR(s);
507 507 Py_CLEAR(t);
508 508 }
509 509
510 510 if (self->raw_length != self->length)
511 511 istat(raw_length, "revs on disk");
512 512 istat(length, "revs in memory");
513 513 istat(ntlookups, "node trie lookups");
514 514 istat(ntmisses, "node trie misses");
515 515 istat(ntrev, "node trie last rev scanned");
516 516 if (self->ntinitialized) {
517 517 istat(nt.capacity, "node trie capacity");
518 518 istat(nt.depth, "node trie depth");
519 519 istat(nt.length, "node trie count");
520 520 istat(nt.splits, "node trie splits");
521 521 }
522 522
523 523 #undef istat
524 524
525 525 return obj;
526 526
527 527 bail:
528 528 Py_XDECREF(obj);
529 529 Py_XDECREF(s);
530 530 Py_XDECREF(t);
531 531 return NULL;
532 532 }
533 533
534 534 /*
535 535 * When we cache a list, we want to be sure the caller can't mutate
536 536 * the cached copy.
537 537 */
538 538 static PyObject *list_copy(PyObject *list)
539 539 {
540 540 Py_ssize_t len = PyList_GET_SIZE(list);
541 541 PyObject *newlist = PyList_New(len);
542 542 Py_ssize_t i;
543 543
544 544 if (newlist == NULL)
545 545 return NULL;
546 546
547 547 for (i = 0; i < len; i++) {
548 548 PyObject *obj = PyList_GET_ITEM(list, i);
549 549 Py_INCREF(obj);
550 550 PyList_SET_ITEM(newlist, i, obj);
551 551 }
552 552
553 553 return newlist;
554 554 }
555 555
556 556 static int check_filter(PyObject *filter, Py_ssize_t arg)
557 557 {
558 558 if (filter) {
559 559 PyObject *arglist, *result;
560 560 int isfiltered;
561 561
562 562 arglist = Py_BuildValue("(n)", arg);
563 563 if (!arglist) {
564 564 return -1;
565 565 }
566 566
567 567 result = PyEval_CallObject(filter, arglist);
568 568 Py_DECREF(arglist);
569 569 if (!result) {
570 570 return -1;
571 571 }
572 572
573 573 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
574 574 * same as this function, so we can just return it directly.*/
575 575 isfiltered = PyObject_IsTrue(result);
576 576 Py_DECREF(result);
577 577 return isfiltered;
578 578 } else {
579 579 return 0;
580 580 }
581 581 }
582 582
583 583 static inline void set_phase_from_parents(char *phases, int parent_1,
584 584 int parent_2, Py_ssize_t i)
585 585 {
586 586 if (parent_1 >= 0 && phases[parent_1] > phases[i])
587 587 phases[i] = phases[parent_1];
588 588 if (parent_2 >= 0 && phases[parent_2] > phases[i])
589 589 phases[i] = phases[parent_2];
590 590 }
591 591
592 592 static PyObject *reachableroots2(indexObject *self, PyObject *args)
593 593 {
594 594
595 595 /* Input */
596 596 long minroot;
597 597 PyObject *includepatharg = NULL;
598 598 int includepath = 0;
599 599 /* heads and roots are lists */
600 600 PyObject *heads = NULL;
601 601 PyObject *roots = NULL;
602 602 PyObject *reachable = NULL;
603 603
604 604 PyObject *val;
605 605 Py_ssize_t len = index_length(self);
606 606 long revnum;
607 607 Py_ssize_t k;
608 608 Py_ssize_t i;
609 609 Py_ssize_t l;
610 610 int r;
611 611 int parents[2];
612 612
613 613 /* Internal data structure:
614 614 * tovisit: array of length len+1 (all revs + nullrev), filled upto
615 615 * lentovisit
616 616 *
617 617 * revstates: array of length len+1 (all revs + nullrev) */
618 618 int *tovisit = NULL;
619 619 long lentovisit = 0;
620 620 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
621 621 char *revstates = NULL;
622 622
623 623 /* Get arguments */
624 624 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
625 625 &PyList_Type, &roots, &PyBool_Type,
626 626 &includepatharg))
627 627 goto bail;
628 628
629 629 if (includepatharg == Py_True)
630 630 includepath = 1;
631 631
632 632 /* Initialize return set */
633 633 reachable = PyList_New(0);
634 634 if (reachable == NULL)
635 635 goto bail;
636 636
637 637 /* Initialize internal datastructures */
638 638 tovisit = (int *)malloc((len + 1) * sizeof(int));
639 639 if (tovisit == NULL) {
640 640 PyErr_NoMemory();
641 641 goto bail;
642 642 }
643 643
644 644 revstates = (char *)calloc(len + 1, 1);
645 645 if (revstates == NULL) {
646 646 PyErr_NoMemory();
647 647 goto bail;
648 648 }
649 649
650 650 l = PyList_GET_SIZE(roots);
651 651 for (i = 0; i < l; i++) {
652 652 revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
653 653 if (revnum == -1 && PyErr_Occurred())
654 654 goto bail;
655 655 /* If root is out of range, e.g. wdir(), it must be unreachable
656 656 * from heads. So we can just ignore it. */
657 657 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
658 658 continue;
659 659 revstates[revnum + 1] |= RS_ROOT;
660 660 }
661 661
662 662 /* Populate tovisit with all the heads */
663 663 l = PyList_GET_SIZE(heads);
664 664 for (i = 0; i < l; i++) {
665 665 revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i));
666 666 if (revnum == -1 && PyErr_Occurred())
667 667 goto bail;
668 668 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
669 669 PyErr_SetString(PyExc_IndexError, "head out of range");
670 670 goto bail;
671 671 }
672 672 if (!(revstates[revnum + 1] & RS_SEEN)) {
673 673 tovisit[lentovisit++] = (int)revnum;
674 674 revstates[revnum + 1] |= RS_SEEN;
675 675 }
676 676 }
677 677
678 678 /* Visit the tovisit list and find the reachable roots */
679 679 k = 0;
680 680 while (k < lentovisit) {
681 681 /* Add the node to reachable if it is a root*/
682 682 revnum = tovisit[k++];
683 683 if (revstates[revnum + 1] & RS_ROOT) {
684 684 revstates[revnum + 1] |= RS_REACHABLE;
685 685 val = PyInt_FromLong(revnum);
686 686 if (val == NULL)
687 687 goto bail;
688 688 r = PyList_Append(reachable, val);
689 689 Py_DECREF(val);
690 690 if (r < 0)
691 691 goto bail;
692 692 if (includepath == 0)
693 693 continue;
694 694 }
695 695
696 696 /* Add its parents to the list of nodes to visit */
697 697 if (revnum == nullrev)
698 698 continue;
699 699 r = index_get_parents(self, revnum, parents, (int)len - 1);
700 700 if (r < 0)
701 701 goto bail;
702 702 for (i = 0; i < 2; i++) {
703 703 if (!(revstates[parents[i] + 1] & RS_SEEN) &&
704 704 parents[i] >= minroot) {
705 705 tovisit[lentovisit++] = parents[i];
706 706 revstates[parents[i] + 1] |= RS_SEEN;
707 707 }
708 708 }
709 709 }
710 710
711 711 /* Find all the nodes in between the roots we found and the heads
712 712 * and add them to the reachable set */
713 713 if (includepath == 1) {
714 714 long minidx = minroot;
715 715 if (minidx < 0)
716 716 minidx = 0;
717 717 for (i = minidx; i < len; i++) {
718 718 if (!(revstates[i + 1] & RS_SEEN))
719 719 continue;
720 720 r = index_get_parents(self, i, parents, (int)len - 1);
721 721 /* Corrupted index file, error is set from
722 722 * index_get_parents */
723 723 if (r < 0)
724 724 goto bail;
725 725 if (((revstates[parents[0] + 1] |
726 726 revstates[parents[1] + 1]) &
727 727 RS_REACHABLE) &&
728 728 !(revstates[i + 1] & RS_REACHABLE)) {
729 729 revstates[i + 1] |= RS_REACHABLE;
730 730 val = PyInt_FromSsize_t(i);
731 731 if (val == NULL)
732 732 goto bail;
733 733 r = PyList_Append(reachable, val);
734 734 Py_DECREF(val);
735 735 if (r < 0)
736 736 goto bail;
737 737 }
738 738 }
739 739 }
740 740
741 741 free(revstates);
742 742 free(tovisit);
743 743 return reachable;
744 744 bail:
745 745 Py_XDECREF(reachable);
746 746 free(revstates);
747 747 free(tovisit);
748 748 return NULL;
749 749 }
750 750
751 751 static int add_roots_get_min(indexObject *self, PyObject *roots, char *phases,
752 752 char phase)
753 753 {
754 754 Py_ssize_t len = index_length(self);
755 755 PyObject *item;
756 756 PyObject *iterator;
757 757 int rev, minrev = -1;
758 758 char *node;
759 759
760 760 if (!PySet_Check(roots)) {
761 761 PyErr_SetString(PyExc_TypeError,
762 762 "roots must be a set of nodes");
763 763 return -2;
764 764 }
765 765 iterator = PyObject_GetIter(roots);
766 766 if (iterator == NULL)
767 767 return -2;
768 768 while ((item = PyIter_Next(iterator))) {
769 769 if (node_check(item, &node) == -1)
770 770 goto failed;
771 771 rev = index_find_node(self, node, 20);
772 772 /* null is implicitly public, so negative is invalid */
773 773 if (rev < 0 || rev >= len)
774 774 goto failed;
775 775 phases[rev] = phase;
776 776 if (minrev == -1 || minrev > rev)
777 777 minrev = rev;
778 778 Py_DECREF(item);
779 779 }
780 780 Py_DECREF(iterator);
781 781 return minrev;
782 782 failed:
783 783 Py_DECREF(iterator);
784 784 Py_DECREF(item);
785 785 return -2;
786 786 }
787 787
788 788 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
789 789 {
790 790 /* 0: public (untracked), 1: draft, 2: secret, 32: archive,
791 791 96: internal */
792 792 static const char trackedphases[] = {1, 2, 32, 96};
793 793 PyObject *ret = NULL;
794 794 PyObject *roots = Py_None;
795 795 PyObject *pyphase = NULL;
796 796 PyObject *pyrev = NULL;
797 797 PyObject *phaseroots = NULL;
798 798 PyObject *phasessize = NULL;
799 799 PyObject *phasesets[4] = {NULL, NULL, NULL, NULL};
800 800 Py_ssize_t len = index_length(self);
801 801 char *phases = NULL;
802 802 int minphaserev = -1, rev, i;
803 803 const int numphases = (int)(sizeof(phasesets) / sizeof(phasesets[0]));
804 804
805 805 if (!PyArg_ParseTuple(args, "O", &roots))
806 806 return NULL;
807 807 if (roots == NULL || !PyDict_Check(roots)) {
808 808 PyErr_SetString(PyExc_TypeError, "roots must be a dictionary");
809 809 return NULL;
810 810 }
811 811
812 812 phases = calloc(len, 1);
813 813 if (phases == NULL) {
814 814 PyErr_NoMemory();
815 815 return NULL;
816 816 }
817 817
818 818 for (i = 0; i < numphases; ++i) {
819 819 pyphase = PyInt_FromLong(trackedphases[i]);
820 820 if (pyphase == NULL)
821 821 goto release;
822 822 phaseroots = PyDict_GetItem(roots, pyphase);
823 823 Py_DECREF(pyphase);
824 824 if (phaseroots == NULL)
825 825 continue;
826 826 rev = add_roots_get_min(self, phaseroots, phases,
827 827 trackedphases[i]);
828 828 phaseroots = NULL;
829 829 if (rev == -2)
830 830 goto release;
831 831 if (rev != -1 && (minphaserev == -1 || rev < minphaserev))
832 832 minphaserev = rev;
833 833 }
834 834
835 835 for (i = 0; i < numphases; ++i) {
836 836 phasesets[i] = PySet_New(NULL);
837 837 if (phasesets[i] == NULL)
838 838 goto release;
839 839 }
840 840
841 841 if (minphaserev == -1)
842 842 minphaserev = len;
843 843 for (rev = minphaserev; rev < len; ++rev) {
844 844 int parents[2];
845 845 /*
846 846 * The parent lookup could be skipped for phaseroots, but
847 847 * phase --force would historically not recompute them
848 848 * correctly, leaving descendents with a lower phase around.
849 849 * As such, unconditionally recompute the phase.
850 850 */
851 851 if (index_get_parents(self, rev, parents, (int)len - 1) < 0)
852 852 goto release;
853 853 set_phase_from_parents(phases, parents[0], parents[1], rev);
854 854 switch (phases[rev]) {
855 855 case 0:
856 856 continue;
857 857 case 1:
858 858 pyphase = phasesets[0];
859 859 break;
860 860 case 2:
861 861 pyphase = phasesets[1];
862 862 break;
863 863 case 32:
864 864 pyphase = phasesets[2];
865 865 break;
866 866 case 96:
867 867 pyphase = phasesets[3];
868 868 break;
869 869 default:
870 /* this should never happen since the phase number is
871 * specified by this function. */
872 PyErr_SetString(PyExc_SystemError,
873 "bad phase number in internal list");
870 874 goto release;
871 875 }
872 876 pyrev = PyInt_FromLong(rev);
873 877 if (pyrev == NULL)
874 878 goto release;
875 879 if (PySet_Add(pyphase, pyrev) == -1) {
876 880 Py_DECREF(pyrev);
877 881 goto release;
878 882 }
879 883 Py_DECREF(pyrev);
880 884 }
881 885 phaseroots = _dict_new_presized(numphases);
882 886 if (phaseroots == NULL)
883 887 goto release;
884 888 for (i = 0; i < numphases; ++i) {
885 889 pyphase = PyInt_FromLong(trackedphases[i]);
886 890 if (pyphase == NULL)
887 891 goto release;
888 892 if (PyDict_SetItem(phaseroots, pyphase, phasesets[i]) == -1) {
889 893 Py_DECREF(pyphase);
890 894 goto release;
891 895 }
892 896 Py_DECREF(phasesets[i]);
893 897 phasesets[i] = NULL;
894 898 }
895 899 phasessize = PyInt_FromSsize_t(len);
896 900 if (phasessize == NULL)
897 901 goto release;
898 902
899 903 ret = PyTuple_Pack(2, phasessize, phaseroots);
900 904 Py_DECREF(phasessize);
901 905 Py_DECREF(phaseroots);
902 906 return ret;
903 907
904 908 release:
905 909 for (i = 0; i < numphases; ++i)
906 910 Py_XDECREF(phasesets[i]);
907 911 Py_XDECREF(phaseroots);
908 912
909 913 free(phases);
910 914 return NULL;
911 915 }
912 916
913 917 static PyObject *index_headrevs(indexObject *self, PyObject *args)
914 918 {
915 919 Py_ssize_t i, j, len;
916 920 char *nothead = NULL;
917 921 PyObject *heads = NULL;
918 922 PyObject *filter = NULL;
919 923 PyObject *filteredrevs = Py_None;
920 924
921 925 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
922 926 return NULL;
923 927 }
924 928
925 929 if (self->headrevs && filteredrevs == self->filteredrevs)
926 930 return list_copy(self->headrevs);
927 931
928 932 Py_DECREF(self->filteredrevs);
929 933 self->filteredrevs = filteredrevs;
930 934 Py_INCREF(filteredrevs);
931 935
932 936 if (filteredrevs != Py_None) {
933 937 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
934 938 if (!filter) {
935 939 PyErr_SetString(
936 940 PyExc_TypeError,
937 941 "filteredrevs has no attribute __contains__");
938 942 goto bail;
939 943 }
940 944 }
941 945
942 946 len = index_length(self);
943 947 heads = PyList_New(0);
944 948 if (heads == NULL)
945 949 goto bail;
946 950 if (len == 0) {
947 951 PyObject *nullid = PyInt_FromLong(-1);
948 952 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
949 953 Py_XDECREF(nullid);
950 954 goto bail;
951 955 }
952 956 goto done;
953 957 }
954 958
955 959 nothead = calloc(len, 1);
956 960 if (nothead == NULL) {
957 961 PyErr_NoMemory();
958 962 goto bail;
959 963 }
960 964
961 965 for (i = len - 1; i >= 0; i--) {
962 966 int isfiltered;
963 967 int parents[2];
964 968
965 969 /* If nothead[i] == 1, it means we've seen an unfiltered child
966 970 * of this node already, and therefore this node is not
967 971 * filtered. So we can skip the expensive check_filter step.
968 972 */
969 973 if (nothead[i] != 1) {
970 974 isfiltered = check_filter(filter, i);
971 975 if (isfiltered == -1) {
972 976 PyErr_SetString(PyExc_TypeError,
973 977 "unable to check filter");
974 978 goto bail;
975 979 }
976 980
977 981 if (isfiltered) {
978 982 nothead[i] = 1;
979 983 continue;
980 984 }
981 985 }
982 986
983 987 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
984 988 goto bail;
985 989 for (j = 0; j < 2; j++) {
986 990 if (parents[j] >= 0)
987 991 nothead[parents[j]] = 1;
988 992 }
989 993 }
990 994
991 995 for (i = 0; i < len; i++) {
992 996 PyObject *head;
993 997
994 998 if (nothead[i])
995 999 continue;
996 1000 head = PyInt_FromSsize_t(i);
997 1001 if (head == NULL || PyList_Append(heads, head) == -1) {
998 1002 Py_XDECREF(head);
999 1003 goto bail;
1000 1004 }
1001 1005 }
1002 1006
1003 1007 done:
1004 1008 self->headrevs = heads;
1005 1009 Py_XDECREF(filter);
1006 1010 free(nothead);
1007 1011 return list_copy(self->headrevs);
1008 1012 bail:
1009 1013 Py_XDECREF(filter);
1010 1014 Py_XDECREF(heads);
1011 1015 free(nothead);
1012 1016 return NULL;
1013 1017 }
1014 1018
1015 1019 /**
1016 1020 * Obtain the base revision index entry.
1017 1021 *
1018 1022 * Callers must ensure that rev >= 0 or illegal memory access may occur.
1019 1023 */
1020 1024 static inline int index_baserev(indexObject *self, int rev)
1021 1025 {
1022 1026 const char *data;
1023 1027 int result;
1024 1028
1025 1029 if (rev >= self->length) {
1026 1030 PyObject *tuple =
1027 1031 PyList_GET_ITEM(self->added, rev - self->length);
1028 1032 long ret;
1029 1033 if (!pylong_to_long(PyTuple_GET_ITEM(tuple, 3), &ret)) {
1030 1034 return -2;
1031 1035 }
1032 1036 result = (int)ret;
1033 1037 } else {
1034 1038 data = index_deref(self, rev);
1035 1039 if (data == NULL) {
1036 1040 return -2;
1037 1041 }
1038 1042
1039 1043 result = getbe32(data + 16);
1040 1044 }
1041 1045 if (result > rev) {
1042 1046 PyErr_Format(
1043 1047 PyExc_ValueError,
1044 1048 "corrupted revlog, revision base above revision: %d, %d",
1045 1049 rev, result);
1046 1050 return -2;
1047 1051 }
1048 1052 if (result < -1) {
1049 1053 PyErr_Format(
1050 1054 PyExc_ValueError,
1051 1055 "corrupted revlog, revision base out of range: %d, %d", rev,
1052 1056 result);
1053 1057 return -2;
1054 1058 }
1055 1059 return result;
1056 1060 }
1057 1061
1058 1062 /**
1059 1063 * Find if a revision is a snapshot or not
1060 1064 *
1061 1065 * Only relevant for sparse-revlog case.
1062 1066 * Callers must ensure that rev is in a valid range.
1063 1067 */
1064 1068 static int index_issnapshotrev(indexObject *self, Py_ssize_t rev)
1065 1069 {
1066 1070 int ps[2];
1067 1071 Py_ssize_t base;
1068 1072 while (rev >= 0) {
1069 1073 base = (Py_ssize_t)index_baserev(self, rev);
1070 1074 if (base == rev) {
1071 1075 base = -1;
1072 1076 }
1073 1077 if (base == -2) {
1074 1078 assert(PyErr_Occurred());
1075 1079 return -1;
1076 1080 }
1077 1081 if (base == -1) {
1078 1082 return 1;
1079 1083 }
1080 1084 if (index_get_parents(self, rev, ps, (int)rev) < 0) {
1081 1085 assert(PyErr_Occurred());
1082 1086 return -1;
1083 1087 };
1084 1088 if (base == ps[0] || base == ps[1]) {
1085 1089 return 0;
1086 1090 }
1087 1091 rev = base;
1088 1092 }
1089 1093 return rev == -1;
1090 1094 }
1091 1095
1092 1096 static PyObject *index_issnapshot(indexObject *self, PyObject *value)
1093 1097 {
1094 1098 long rev;
1095 1099 int issnap;
1096 1100 Py_ssize_t length = index_length(self);
1097 1101
1098 1102 if (!pylong_to_long(value, &rev)) {
1099 1103 return NULL;
1100 1104 }
1101 1105 if (rev < -1 || rev >= length) {
1102 1106 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
1103 1107 rev);
1104 1108 return NULL;
1105 1109 };
1106 1110 issnap = index_issnapshotrev(self, (Py_ssize_t)rev);
1107 1111 if (issnap < 0) {
1108 1112 return NULL;
1109 1113 };
1110 1114 return PyBool_FromLong((long)issnap);
1111 1115 }
1112 1116
1113 1117 static PyObject *index_findsnapshots(indexObject *self, PyObject *args)
1114 1118 {
1115 1119 Py_ssize_t start_rev;
1116 1120 PyObject *cache;
1117 1121 Py_ssize_t base;
1118 1122 Py_ssize_t rev;
1119 1123 PyObject *key = NULL;
1120 1124 PyObject *value = NULL;
1121 1125 const Py_ssize_t length = index_length(self);
1122 1126 if (!PyArg_ParseTuple(args, "O!n", &PyDict_Type, &cache, &start_rev)) {
1123 1127 return NULL;
1124 1128 }
1125 1129 for (rev = start_rev; rev < length; rev++) {
1126 1130 int issnap;
1127 1131 PyObject *allvalues = NULL;
1128 1132 issnap = index_issnapshotrev(self, rev);
1129 1133 if (issnap < 0) {
1130 1134 goto bail;
1131 1135 }
1132 1136 if (issnap == 0) {
1133 1137 continue;
1134 1138 }
1135 1139 base = (Py_ssize_t)index_baserev(self, rev);
1136 1140 if (base == rev) {
1137 1141 base = -1;
1138 1142 }
1139 1143 if (base == -2) {
1140 1144 assert(PyErr_Occurred());
1141 1145 goto bail;
1142 1146 }
1143 1147 key = PyInt_FromSsize_t(base);
1144 1148 allvalues = PyDict_GetItem(cache, key);
1145 1149 if (allvalues == NULL && PyErr_Occurred()) {
1146 1150 goto bail;
1147 1151 }
1148 1152 if (allvalues == NULL) {
1149 1153 int r;
1150 1154 allvalues = PyList_New(0);
1151 1155 if (!allvalues) {
1152 1156 goto bail;
1153 1157 }
1154 1158 r = PyDict_SetItem(cache, key, allvalues);
1155 1159 Py_DECREF(allvalues);
1156 1160 if (r < 0) {
1157 1161 goto bail;
1158 1162 }
1159 1163 }
1160 1164 value = PyInt_FromSsize_t(rev);
1161 1165 if (PyList_Append(allvalues, value)) {
1162 1166 goto bail;
1163 1167 }
1164 1168 Py_CLEAR(key);
1165 1169 Py_CLEAR(value);
1166 1170 }
1167 1171 Py_RETURN_NONE;
1168 1172 bail:
1169 1173 Py_XDECREF(key);
1170 1174 Py_XDECREF(value);
1171 1175 return NULL;
1172 1176 }
1173 1177
1174 1178 static PyObject *index_deltachain(indexObject *self, PyObject *args)
1175 1179 {
1176 1180 int rev, generaldelta;
1177 1181 PyObject *stoparg;
1178 1182 int stoprev, iterrev, baserev = -1;
1179 1183 int stopped;
1180 1184 PyObject *chain = NULL, *result = NULL;
1181 1185 const Py_ssize_t length = index_length(self);
1182 1186
1183 1187 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
1184 1188 return NULL;
1185 1189 }
1186 1190
1187 1191 if (PyInt_Check(stoparg)) {
1188 1192 stoprev = (int)PyInt_AsLong(stoparg);
1189 1193 if (stoprev == -1 && PyErr_Occurred()) {
1190 1194 return NULL;
1191 1195 }
1192 1196 } else if (stoparg == Py_None) {
1193 1197 stoprev = -2;
1194 1198 } else {
1195 1199 PyErr_SetString(PyExc_ValueError,
1196 1200 "stoprev must be integer or None");
1197 1201 return NULL;
1198 1202 }
1199 1203
1200 1204 if (rev < 0 || rev >= length) {
1201 1205 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1202 1206 return NULL;
1203 1207 }
1204 1208
1205 1209 chain = PyList_New(0);
1206 1210 if (chain == NULL) {
1207 1211 return NULL;
1208 1212 }
1209 1213
1210 1214 baserev = index_baserev(self, rev);
1211 1215
1212 1216 /* This should never happen. */
1213 1217 if (baserev <= -2) {
1214 1218 /* Error should be set by index_deref() */
1215 1219 assert(PyErr_Occurred());
1216 1220 goto bail;
1217 1221 }
1218 1222
1219 1223 iterrev = rev;
1220 1224
1221 1225 while (iterrev != baserev && iterrev != stoprev) {
1222 1226 PyObject *value = PyInt_FromLong(iterrev);
1223 1227 if (value == NULL) {
1224 1228 goto bail;
1225 1229 }
1226 1230 if (PyList_Append(chain, value)) {
1227 1231 Py_DECREF(value);
1228 1232 goto bail;
1229 1233 }
1230 1234 Py_DECREF(value);
1231 1235
1232 1236 if (generaldelta) {
1233 1237 iterrev = baserev;
1234 1238 } else {
1235 1239 iterrev--;
1236 1240 }
1237 1241
1238 1242 if (iterrev < 0) {
1239 1243 break;
1240 1244 }
1241 1245
1242 1246 if (iterrev >= length) {
1243 1247 PyErr_SetString(PyExc_IndexError,
1244 1248 "revision outside index");
1245 1249 return NULL;
1246 1250 }
1247 1251
1248 1252 baserev = index_baserev(self, iterrev);
1249 1253
1250 1254 /* This should never happen. */
1251 1255 if (baserev <= -2) {
1252 1256 /* Error should be set by index_deref() */
1253 1257 assert(PyErr_Occurred());
1254 1258 goto bail;
1255 1259 }
1256 1260 }
1257 1261
1258 1262 if (iterrev == stoprev) {
1259 1263 stopped = 1;
1260 1264 } else {
1261 1265 PyObject *value = PyInt_FromLong(iterrev);
1262 1266 if (value == NULL) {
1263 1267 goto bail;
1264 1268 }
1265 1269 if (PyList_Append(chain, value)) {
1266 1270 Py_DECREF(value);
1267 1271 goto bail;
1268 1272 }
1269 1273 Py_DECREF(value);
1270 1274
1271 1275 stopped = 0;
1272 1276 }
1273 1277
1274 1278 if (PyList_Reverse(chain)) {
1275 1279 goto bail;
1276 1280 }
1277 1281
1278 1282 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
1279 1283 Py_DECREF(chain);
1280 1284 return result;
1281 1285
1282 1286 bail:
1283 1287 Py_DECREF(chain);
1284 1288 return NULL;
1285 1289 }
1286 1290
1287 1291 static inline int64_t
1288 1292 index_segment_span(indexObject *self, Py_ssize_t start_rev, Py_ssize_t end_rev)
1289 1293 {
1290 1294 int64_t start_offset;
1291 1295 int64_t end_offset;
1292 1296 int end_size;
1293 1297 start_offset = index_get_start(self, start_rev);
1294 1298 if (start_offset < 0) {
1295 1299 return -1;
1296 1300 }
1297 1301 end_offset = index_get_start(self, end_rev);
1298 1302 if (end_offset < 0) {
1299 1303 return -1;
1300 1304 }
1301 1305 end_size = index_get_length(self, end_rev);
1302 1306 if (end_size < 0) {
1303 1307 return -1;
1304 1308 }
1305 1309 if (end_offset < start_offset) {
1306 1310 PyErr_Format(PyExc_ValueError,
1307 1311 "corrupted revlog index: inconsistent offset "
1308 1312 "between revisions (%zd) and (%zd)",
1309 1313 start_rev, end_rev);
1310 1314 return -1;
1311 1315 }
1312 1316 return (end_offset - start_offset) + (int64_t)end_size;
1313 1317 }
1314 1318
1315 1319 /* returns endidx so that revs[startidx:endidx] has no empty trailing revs */
1316 1320 static Py_ssize_t trim_endidx(indexObject *self, const Py_ssize_t *revs,
1317 1321 Py_ssize_t startidx, Py_ssize_t endidx)
1318 1322 {
1319 1323 int length;
1320 1324 while (endidx > 1 && endidx > startidx) {
1321 1325 length = index_get_length(self, revs[endidx - 1]);
1322 1326 if (length < 0) {
1323 1327 return -1;
1324 1328 }
1325 1329 if (length != 0) {
1326 1330 break;
1327 1331 }
1328 1332 endidx -= 1;
1329 1333 }
1330 1334 return endidx;
1331 1335 }
1332 1336
1333 1337 struct Gap {
1334 1338 int64_t size;
1335 1339 Py_ssize_t idx;
1336 1340 };
1337 1341
1338 1342 static int gap_compare(const void *left, const void *right)
1339 1343 {
1340 1344 const struct Gap *l_left = ((const struct Gap *)left);
1341 1345 const struct Gap *l_right = ((const struct Gap *)right);
1342 1346 if (l_left->size < l_right->size) {
1343 1347 return -1;
1344 1348 } else if (l_left->size > l_right->size) {
1345 1349 return 1;
1346 1350 }
1347 1351 return 0;
1348 1352 }
1349 1353 static int Py_ssize_t_compare(const void *left, const void *right)
1350 1354 {
1351 1355 const Py_ssize_t l_left = *(const Py_ssize_t *)left;
1352 1356 const Py_ssize_t l_right = *(const Py_ssize_t *)right;
1353 1357 if (l_left < l_right) {
1354 1358 return -1;
1355 1359 } else if (l_left > l_right) {
1356 1360 return 1;
1357 1361 }
1358 1362 return 0;
1359 1363 }
1360 1364
1361 1365 static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
1362 1366 {
1363 1367 /* method arguments */
1364 1368 PyObject *list_revs = NULL; /* revisions in the chain */
1365 1369 double targetdensity = 0; /* min density to achieve */
1366 1370 Py_ssize_t mingapsize = 0; /* threshold to ignore gaps */
1367 1371
1368 1372 /* other core variables */
1369 1373 Py_ssize_t idxlen = index_length(self);
1370 1374 Py_ssize_t i; /* used for various iteration */
1371 1375 PyObject *result = NULL; /* the final return of the function */
1372 1376
1373 1377 /* generic information about the delta chain being slice */
1374 1378 Py_ssize_t num_revs = 0; /* size of the full delta chain */
1375 1379 Py_ssize_t *revs = NULL; /* native array of revision in the chain */
1376 1380 int64_t chainpayload = 0; /* sum of all delta in the chain */
1377 1381 int64_t deltachainspan = 0; /* distance from first byte to last byte */
1378 1382
1379 1383 /* variable used for slicing the delta chain */
1380 1384 int64_t readdata = 0; /* amount of data currently planned to be read */
1381 1385 double density = 0; /* ration of payload data compared to read ones */
1382 1386 int64_t previous_end;
1383 1387 struct Gap *gaps = NULL; /* array of notable gap in the chain */
1384 1388 Py_ssize_t num_gaps =
1385 1389 0; /* total number of notable gap recorded so far */
1386 1390 Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
1387 1391 Py_ssize_t num_selected = 0; /* number of gaps skipped */
1388 1392 PyObject *chunk = NULL; /* individual slice */
1389 1393 PyObject *allchunks = NULL; /* all slices */
1390 1394 Py_ssize_t previdx;
1391 1395
1392 1396 /* parsing argument */
1393 1397 if (!PyArg_ParseTuple(args, "O!dn", &PyList_Type, &list_revs,
1394 1398 &targetdensity, &mingapsize)) {
1395 1399 goto bail;
1396 1400 }
1397 1401
1398 1402 /* If the delta chain contains a single element, we do not need slicing
1399 1403 */
1400 1404 num_revs = PyList_GET_SIZE(list_revs);
1401 1405 if (num_revs <= 1) {
1402 1406 result = PyTuple_Pack(1, list_revs);
1403 1407 goto done;
1404 1408 }
1405 1409
1406 1410 /* Turn the python list into a native integer array (for efficiency) */
1407 1411 revs = (Py_ssize_t *)calloc(num_revs, sizeof(Py_ssize_t));
1408 1412 if (revs == NULL) {
1409 1413 PyErr_NoMemory();
1410 1414 goto bail;
1411 1415 }
1412 1416 for (i = 0; i < num_revs; i++) {
1413 1417 Py_ssize_t revnum = PyInt_AsLong(PyList_GET_ITEM(list_revs, i));
1414 1418 if (revnum == -1 && PyErr_Occurred()) {
1415 1419 goto bail;
1416 1420 }
1417 1421 if (revnum < nullrev || revnum >= idxlen) {
1418 1422 PyErr_Format(PyExc_IndexError,
1419 1423 "index out of range: %zd", revnum);
1420 1424 goto bail;
1421 1425 }
1422 1426 revs[i] = revnum;
1423 1427 }
1424 1428
1425 1429 /* Compute and check various property of the unsliced delta chain */
1426 1430 deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
1427 1431 if (deltachainspan < 0) {
1428 1432 goto bail;
1429 1433 }
1430 1434
1431 1435 if (deltachainspan <= mingapsize) {
1432 1436 result = PyTuple_Pack(1, list_revs);
1433 1437 goto done;
1434 1438 }
1435 1439 chainpayload = 0;
1436 1440 for (i = 0; i < num_revs; i++) {
1437 1441 int tmp = index_get_length(self, revs[i]);
1438 1442 if (tmp < 0) {
1439 1443 goto bail;
1440 1444 }
1441 1445 chainpayload += tmp;
1442 1446 }
1443 1447
1444 1448 readdata = deltachainspan;
1445 1449 density = 1.0;
1446 1450
1447 1451 if (0 < deltachainspan) {
1448 1452 density = (double)chainpayload / (double)deltachainspan;
1449 1453 }
1450 1454
1451 1455 if (density >= targetdensity) {
1452 1456 result = PyTuple_Pack(1, list_revs);
1453 1457 goto done;
1454 1458 }
1455 1459
1456 1460 /* if chain is too sparse, look for relevant gaps */
1457 1461 gaps = (struct Gap *)calloc(num_revs, sizeof(struct Gap));
1458 1462 if (gaps == NULL) {
1459 1463 PyErr_NoMemory();
1460 1464 goto bail;
1461 1465 }
1462 1466
1463 1467 previous_end = -1;
1464 1468 for (i = 0; i < num_revs; i++) {
1465 1469 int64_t revstart;
1466 1470 int revsize;
1467 1471 revstart = index_get_start(self, revs[i]);
1468 1472 if (revstart < 0) {
1469 1473 goto bail;
1470 1474 };
1471 1475 revsize = index_get_length(self, revs[i]);
1472 1476 if (revsize < 0) {
1473 1477 goto bail;
1474 1478 };
1475 1479 if (revsize == 0) {
1476 1480 continue;
1477 1481 }
1478 1482 if (previous_end >= 0) {
1479 1483 int64_t gapsize = revstart - previous_end;
1480 1484 if (gapsize > mingapsize) {
1481 1485 gaps[num_gaps].size = gapsize;
1482 1486 gaps[num_gaps].idx = i;
1483 1487 num_gaps += 1;
1484 1488 }
1485 1489 }
1486 1490 previous_end = revstart + revsize;
1487 1491 }
1488 1492 if (num_gaps == 0) {
1489 1493 result = PyTuple_Pack(1, list_revs);
1490 1494 goto done;
1491 1495 }
1492 1496 qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
1493 1497
1494 1498 /* Slice the largest gap first, they improve the density the most */
1495 1499 selected_indices =
1496 1500 (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
1497 1501 if (selected_indices == NULL) {
1498 1502 PyErr_NoMemory();
1499 1503 goto bail;
1500 1504 }
1501 1505
1502 1506 for (i = num_gaps - 1; i >= 0; i--) {
1503 1507 selected_indices[num_selected] = gaps[i].idx;
1504 1508 readdata -= gaps[i].size;
1505 1509 num_selected += 1;
1506 1510 if (readdata <= 0) {
1507 1511 density = 1.0;
1508 1512 } else {
1509 1513 density = (double)chainpayload / (double)readdata;
1510 1514 }
1511 1515 if (density >= targetdensity) {
1512 1516 break;
1513 1517 }
1514 1518 }
1515 1519 qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
1516 1520 &Py_ssize_t_compare);
1517 1521
1518 1522 /* create the resulting slice */
1519 1523 allchunks = PyList_New(0);
1520 1524 if (allchunks == NULL) {
1521 1525 goto bail;
1522 1526 }
1523 1527 previdx = 0;
1524 1528 selected_indices[num_selected] = num_revs;
1525 1529 for (i = 0; i <= num_selected; i++) {
1526 1530 Py_ssize_t idx = selected_indices[i];
1527 1531 Py_ssize_t endidx = trim_endidx(self, revs, previdx, idx);
1528 1532 if (endidx < 0) {
1529 1533 goto bail;
1530 1534 }
1531 1535 if (previdx < endidx) {
1532 1536 chunk = PyList_GetSlice(list_revs, previdx, endidx);
1533 1537 if (chunk == NULL) {
1534 1538 goto bail;
1535 1539 }
1536 1540 if (PyList_Append(allchunks, chunk) == -1) {
1537 1541 goto bail;
1538 1542 }
1539 1543 Py_DECREF(chunk);
1540 1544 chunk = NULL;
1541 1545 }
1542 1546 previdx = idx;
1543 1547 }
1544 1548 result = allchunks;
1545 1549 goto done;
1546 1550
1547 1551 bail:
1548 1552 Py_XDECREF(allchunks);
1549 1553 Py_XDECREF(chunk);
1550 1554 done:
1551 1555 free(revs);
1552 1556 free(gaps);
1553 1557 free(selected_indices);
1554 1558 return result;
1555 1559 }
1556 1560
1557 1561 static inline int nt_level(const char *node, Py_ssize_t level)
1558 1562 {
1559 1563 int v = node[level >> 1];
1560 1564 if (!(level & 1))
1561 1565 v >>= 4;
1562 1566 return v & 0xf;
1563 1567 }
1564 1568
1565 1569 /*
1566 1570 * Return values:
1567 1571 *
1568 1572 * -4: match is ambiguous (multiple candidates)
1569 1573 * -2: not found
1570 1574 * rest: valid rev
1571 1575 */
1572 1576 static int nt_find(nodetree *self, const char *node, Py_ssize_t nodelen,
1573 1577 int hex)
1574 1578 {
1575 1579 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
1576 1580 int level, maxlevel, off;
1577 1581
1578 1582 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
1579 1583 return -1;
1580 1584
1581 1585 if (hex)
1582 1586 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
1583 1587 else
1584 1588 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
1585 1589
1586 1590 for (level = off = 0; level < maxlevel; level++) {
1587 1591 int k = getnybble(node, level);
1588 1592 nodetreenode *n = &self->nodes[off];
1589 1593 int v = n->children[k];
1590 1594
1591 1595 if (v < 0) {
1592 1596 const char *n;
1593 1597 Py_ssize_t i;
1594 1598
1595 1599 v = -(v + 2);
1596 1600 n = index_node(self->index, v);
1597 1601 if (n == NULL)
1598 1602 return -2;
1599 1603 for (i = level; i < maxlevel; i++)
1600 1604 if (getnybble(node, i) != nt_level(n, i))
1601 1605 return -2;
1602 1606 return v;
1603 1607 }
1604 1608 if (v == 0)
1605 1609 return -2;
1606 1610 off = v;
1607 1611 }
1608 1612 /* multiple matches against an ambiguous prefix */
1609 1613 return -4;
1610 1614 }
1611 1615
1612 1616 static int nt_new(nodetree *self)
1613 1617 {
1614 1618 if (self->length == self->capacity) {
1615 1619 unsigned newcapacity;
1616 1620 nodetreenode *newnodes;
1617 1621 newcapacity = self->capacity * 2;
1618 1622 if (newcapacity >= INT_MAX / sizeof(nodetreenode)) {
1619 1623 PyErr_SetString(PyExc_MemoryError,
1620 1624 "overflow in nt_new");
1621 1625 return -1;
1622 1626 }
1623 1627 newnodes =
1624 1628 realloc(self->nodes, newcapacity * sizeof(nodetreenode));
1625 1629 if (newnodes == NULL) {
1626 1630 PyErr_SetString(PyExc_MemoryError, "out of memory");
1627 1631 return -1;
1628 1632 }
1629 1633 self->capacity = newcapacity;
1630 1634 self->nodes = newnodes;
1631 1635 memset(&self->nodes[self->length], 0,
1632 1636 sizeof(nodetreenode) * (self->capacity - self->length));
1633 1637 }
1634 1638 return self->length++;
1635 1639 }
1636 1640
1637 1641 static int nt_insert(nodetree *self, const char *node, int rev)
1638 1642 {
1639 1643 int level = 0;
1640 1644 int off = 0;
1641 1645
1642 1646 while (level < 40) {
1643 1647 int k = nt_level(node, level);
1644 1648 nodetreenode *n;
1645 1649 int v;
1646 1650
1647 1651 n = &self->nodes[off];
1648 1652 v = n->children[k];
1649 1653
1650 1654 if (v == 0) {
1651 1655 n->children[k] = -rev - 2;
1652 1656 return 0;
1653 1657 }
1654 1658 if (v < 0) {
1655 1659 const char *oldnode =
1656 1660 index_node_existing(self->index, -(v + 2));
1657 1661 int noff;
1658 1662
1659 1663 if (oldnode == NULL)
1660 1664 return -1;
1661 1665 if (!memcmp(oldnode, node, 20)) {
1662 1666 n->children[k] = -rev - 2;
1663 1667 return 0;
1664 1668 }
1665 1669 noff = nt_new(self);
1666 1670 if (noff == -1)
1667 1671 return -1;
1668 1672 /* self->nodes may have been changed by realloc */
1669 1673 self->nodes[off].children[k] = noff;
1670 1674 off = noff;
1671 1675 n = &self->nodes[off];
1672 1676 n->children[nt_level(oldnode, ++level)] = v;
1673 1677 if (level > self->depth)
1674 1678 self->depth = level;
1675 1679 self->splits += 1;
1676 1680 } else {
1677 1681 level += 1;
1678 1682 off = v;
1679 1683 }
1680 1684 }
1681 1685
1682 1686 return -1;
1683 1687 }
1684 1688
1685 1689 static PyObject *ntobj_insert(nodetreeObject *self, PyObject *args)
1686 1690 {
1687 1691 Py_ssize_t rev;
1688 1692 const char *node;
1689 1693 Py_ssize_t length;
1690 1694 if (!PyArg_ParseTuple(args, "n", &rev))
1691 1695 return NULL;
1692 1696 length = index_length(self->nt.index);
1693 1697 if (rev < 0 || rev >= length) {
1694 1698 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1695 1699 return NULL;
1696 1700 }
1697 1701 node = index_node_existing(self->nt.index, rev);
1698 1702 if (nt_insert(&self->nt, node, (int)rev) == -1)
1699 1703 return NULL;
1700 1704 Py_RETURN_NONE;
1701 1705 }
1702 1706
1703 1707 static int nt_delete_node(nodetree *self, const char *node)
1704 1708 {
1705 1709 /* rev==-2 happens to get encoded as 0, which is interpreted as not set
1706 1710 */
1707 1711 return nt_insert(self, node, -2);
1708 1712 }
1709 1713
1710 1714 static int nt_init(nodetree *self, indexObject *index, unsigned capacity)
1711 1715 {
1712 1716 /* Initialize before overflow-checking to avoid nt_dealloc() crash. */
1713 1717 self->nodes = NULL;
1714 1718
1715 1719 self->index = index;
1716 1720 /* The input capacity is in terms of revisions, while the field is in
1717 1721 * terms of nodetree nodes. */
1718 1722 self->capacity = (capacity < 4 ? 4 : capacity / 2);
1719 1723 self->depth = 0;
1720 1724 self->splits = 0;
1721 1725 if ((size_t)self->capacity > INT_MAX / sizeof(nodetreenode)) {
1722 1726 PyErr_SetString(PyExc_ValueError, "overflow in init_nt");
1723 1727 return -1;
1724 1728 }
1725 1729 self->nodes = calloc(self->capacity, sizeof(nodetreenode));
1726 1730 if (self->nodes == NULL) {
1727 1731 PyErr_NoMemory();
1728 1732 return -1;
1729 1733 }
1730 1734 self->length = 1;
1731 1735 return 0;
1732 1736 }
1733 1737
1734 1738 static int ntobj_init(nodetreeObject *self, PyObject *args)
1735 1739 {
1736 1740 PyObject *index;
1737 1741 unsigned capacity;
1738 1742 if (!PyArg_ParseTuple(args, "O!I", &HgRevlogIndex_Type, &index,
1739 1743 &capacity))
1740 1744 return -1;
1741 1745 Py_INCREF(index);
1742 1746 return nt_init(&self->nt, (indexObject *)index, capacity);
1743 1747 }
1744 1748
1745 1749 static int nt_partialmatch(nodetree *self, const char *node, Py_ssize_t nodelen)
1746 1750 {
1747 1751 return nt_find(self, node, nodelen, 1);
1748 1752 }
1749 1753
1750 1754 /*
1751 1755 * Find the length of the shortest unique prefix of node.
1752 1756 *
1753 1757 * Return values:
1754 1758 *
1755 1759 * -3: error (exception set)
1756 1760 * -2: not found (no exception set)
1757 1761 * rest: length of shortest prefix
1758 1762 */
1759 1763 static int nt_shortest(nodetree *self, const char *node)
1760 1764 {
1761 1765 int level, off;
1762 1766
1763 1767 for (level = off = 0; level < 40; level++) {
1764 1768 int k, v;
1765 1769 nodetreenode *n = &self->nodes[off];
1766 1770 k = nt_level(node, level);
1767 1771 v = n->children[k];
1768 1772 if (v < 0) {
1769 1773 const char *n;
1770 1774 v = -(v + 2);
1771 1775 n = index_node_existing(self->index, v);
1772 1776 if (n == NULL)
1773 1777 return -3;
1774 1778 if (memcmp(node, n, 20) != 0)
1775 1779 /*
1776 1780 * Found a unique prefix, but it wasn't for the
1777 1781 * requested node (i.e the requested node does
1778 1782 * not exist).
1779 1783 */
1780 1784 return -2;
1781 1785 return level + 1;
1782 1786 }
1783 1787 if (v == 0)
1784 1788 return -2;
1785 1789 off = v;
1786 1790 }
1787 1791 /*
1788 1792 * The node was still not unique after 40 hex digits, so this won't
1789 1793 * happen. Also, if we get here, then there's a programming error in
1790 1794 * this file that made us insert a node longer than 40 hex digits.
1791 1795 */
1792 1796 PyErr_SetString(PyExc_Exception, "broken node tree");
1793 1797 return -3;
1794 1798 }
1795 1799
1796 1800 static PyObject *ntobj_shortest(nodetreeObject *self, PyObject *args)
1797 1801 {
1798 1802 PyObject *val;
1799 1803 char *node;
1800 1804 int length;
1801 1805
1802 1806 if (!PyArg_ParseTuple(args, "O", &val))
1803 1807 return NULL;
1804 1808 if (node_check(val, &node) == -1)
1805 1809 return NULL;
1806 1810
1807 1811 length = nt_shortest(&self->nt, node);
1808 1812 if (length == -3)
1809 1813 return NULL;
1810 1814 if (length == -2) {
1811 1815 raise_revlog_error();
1812 1816 return NULL;
1813 1817 }
1814 1818 return PyInt_FromLong(length);
1815 1819 }
1816 1820
1817 1821 static void nt_dealloc(nodetree *self)
1818 1822 {
1819 1823 free(self->nodes);
1820 1824 self->nodes = NULL;
1821 1825 }
1822 1826
1823 1827 static void ntobj_dealloc(nodetreeObject *self)
1824 1828 {
1825 1829 Py_XDECREF(self->nt.index);
1826 1830 nt_dealloc(&self->nt);
1827 1831 PyObject_Del(self);
1828 1832 }
1829 1833
1830 1834 static PyMethodDef ntobj_methods[] = {
1831 1835 {"insert", (PyCFunction)ntobj_insert, METH_VARARGS,
1832 1836 "insert an index entry"},
1833 1837 {"shortest", (PyCFunction)ntobj_shortest, METH_VARARGS,
1834 1838 "find length of shortest hex nodeid of a binary ID"},
1835 1839 {NULL} /* Sentinel */
1836 1840 };
1837 1841
1838 1842 static PyTypeObject nodetreeType = {
1839 1843 PyVarObject_HEAD_INIT(NULL, 0) /* header */
1840 1844 "parsers.nodetree", /* tp_name */
1841 1845 sizeof(nodetreeObject), /* tp_basicsize */
1842 1846 0, /* tp_itemsize */
1843 1847 (destructor)ntobj_dealloc, /* tp_dealloc */
1844 1848 0, /* tp_print */
1845 1849 0, /* tp_getattr */
1846 1850 0, /* tp_setattr */
1847 1851 0, /* tp_compare */
1848 1852 0, /* tp_repr */
1849 1853 0, /* tp_as_number */
1850 1854 0, /* tp_as_sequence */
1851 1855 0, /* tp_as_mapping */
1852 1856 0, /* tp_hash */
1853 1857 0, /* tp_call */
1854 1858 0, /* tp_str */
1855 1859 0, /* tp_getattro */
1856 1860 0, /* tp_setattro */
1857 1861 0, /* tp_as_buffer */
1858 1862 Py_TPFLAGS_DEFAULT, /* tp_flags */
1859 1863 "nodetree", /* tp_doc */
1860 1864 0, /* tp_traverse */
1861 1865 0, /* tp_clear */
1862 1866 0, /* tp_richcompare */
1863 1867 0, /* tp_weaklistoffset */
1864 1868 0, /* tp_iter */
1865 1869 0, /* tp_iternext */
1866 1870 ntobj_methods, /* tp_methods */
1867 1871 0, /* tp_members */
1868 1872 0, /* tp_getset */
1869 1873 0, /* tp_base */
1870 1874 0, /* tp_dict */
1871 1875 0, /* tp_descr_get */
1872 1876 0, /* tp_descr_set */
1873 1877 0, /* tp_dictoffset */
1874 1878 (initproc)ntobj_init, /* tp_init */
1875 1879 0, /* tp_alloc */
1876 1880 };
1877 1881
1878 1882 static int index_init_nt(indexObject *self)
1879 1883 {
1880 1884 if (!self->ntinitialized) {
1881 1885 if (nt_init(&self->nt, self, (int)self->raw_length) == -1) {
1882 1886 nt_dealloc(&self->nt);
1883 1887 return -1;
1884 1888 }
1885 1889 if (nt_insert(&self->nt, nullid, -1) == -1) {
1886 1890 nt_dealloc(&self->nt);
1887 1891 return -1;
1888 1892 }
1889 1893 self->ntinitialized = 1;
1890 1894 self->ntrev = (int)index_length(self);
1891 1895 self->ntlookups = 1;
1892 1896 self->ntmisses = 0;
1893 1897 }
1894 1898 return 0;
1895 1899 }
1896 1900
1897 1901 /*
1898 1902 * Return values:
1899 1903 *
1900 1904 * -3: error (exception set)
1901 1905 * -2: not found (no exception set)
1902 1906 * rest: valid rev
1903 1907 */
1904 1908 static int index_find_node(indexObject *self, const char *node,
1905 1909 Py_ssize_t nodelen)
1906 1910 {
1907 1911 int rev;
1908 1912
1909 1913 if (index_init_nt(self) == -1)
1910 1914 return -3;
1911 1915
1912 1916 self->ntlookups++;
1913 1917 rev = nt_find(&self->nt, node, nodelen, 0);
1914 1918 if (rev >= -1)
1915 1919 return rev;
1916 1920
1917 1921 /*
1918 1922 * For the first handful of lookups, we scan the entire index,
1919 1923 * and cache only the matching nodes. This optimizes for cases
1920 1924 * like "hg tip", where only a few nodes are accessed.
1921 1925 *
1922 1926 * After that, we cache every node we visit, using a single
1923 1927 * scan amortized over multiple lookups. This gives the best
1924 1928 * bulk performance, e.g. for "hg log".
1925 1929 */
1926 1930 if (self->ntmisses++ < 4) {
1927 1931 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1928 1932 const char *n = index_node_existing(self, rev);
1929 1933 if (n == NULL)
1930 1934 return -3;
1931 1935 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1932 1936 if (nt_insert(&self->nt, n, rev) == -1)
1933 1937 return -3;
1934 1938 break;
1935 1939 }
1936 1940 }
1937 1941 } else {
1938 1942 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1939 1943 const char *n = index_node_existing(self, rev);
1940 1944 if (n == NULL)
1941 1945 return -3;
1942 1946 if (nt_insert(&self->nt, n, rev) == -1) {
1943 1947 self->ntrev = rev + 1;
1944 1948 return -3;
1945 1949 }
1946 1950 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1947 1951 break;
1948 1952 }
1949 1953 }
1950 1954 self->ntrev = rev;
1951 1955 }
1952 1956
1953 1957 if (rev >= 0)
1954 1958 return rev;
1955 1959 return -2;
1956 1960 }
1957 1961
1958 1962 static PyObject *index_getitem(indexObject *self, PyObject *value)
1959 1963 {
1960 1964 char *node;
1961 1965 int rev;
1962 1966
1963 1967 if (PyInt_Check(value)) {
1964 1968 long idx;
1965 1969 if (!pylong_to_long(value, &idx)) {
1966 1970 return NULL;
1967 1971 }
1968 1972 return index_get(self, idx);
1969 1973 }
1970 1974
1971 1975 if (node_check(value, &node) == -1)
1972 1976 return NULL;
1973 1977 rev = index_find_node(self, node, 20);
1974 1978 if (rev >= -1)
1975 1979 return PyInt_FromLong(rev);
1976 1980 if (rev == -2)
1977 1981 raise_revlog_error();
1978 1982 return NULL;
1979 1983 }
1980 1984
1981 1985 /*
1982 1986 * Fully populate the radix tree.
1983 1987 */
1984 1988 static int index_populate_nt(indexObject *self)
1985 1989 {
1986 1990 int rev;
1987 1991 if (self->ntrev > 0) {
1988 1992 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1989 1993 const char *n = index_node_existing(self, rev);
1990 1994 if (n == NULL)
1991 1995 return -1;
1992 1996 if (nt_insert(&self->nt, n, rev) == -1)
1993 1997 return -1;
1994 1998 }
1995 1999 self->ntrev = -1;
1996 2000 }
1997 2001 return 0;
1998 2002 }
1999 2003
2000 2004 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
2001 2005 {
2002 2006 const char *fullnode;
2003 2007 Py_ssize_t nodelen;
2004 2008 char *node;
2005 2009 int rev, i;
2006 2010
2007 2011 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &node, &nodelen))
2008 2012 return NULL;
2009 2013
2010 2014 if (nodelen < 1) {
2011 2015 PyErr_SetString(PyExc_ValueError, "key too short");
2012 2016 return NULL;
2013 2017 }
2014 2018
2015 2019 if (nodelen > 40) {
2016 2020 PyErr_SetString(PyExc_ValueError, "key too long");
2017 2021 return NULL;
2018 2022 }
2019 2023
2020 2024 for (i = 0; i < nodelen; i++)
2021 2025 hexdigit(node, i);
2022 2026 if (PyErr_Occurred()) {
2023 2027 /* input contains non-hex characters */
2024 2028 PyErr_Clear();
2025 2029 Py_RETURN_NONE;
2026 2030 }
2027 2031
2028 2032 if (index_init_nt(self) == -1)
2029 2033 return NULL;
2030 2034 if (index_populate_nt(self) == -1)
2031 2035 return NULL;
2032 2036 rev = nt_partialmatch(&self->nt, node, nodelen);
2033 2037
2034 2038 switch (rev) {
2035 2039 case -4:
2036 2040 raise_revlog_error();
2037 2041 return NULL;
2038 2042 case -2:
2039 2043 Py_RETURN_NONE;
2040 2044 case -1:
2041 2045 return PyBytes_FromStringAndSize(nullid, 20);
2042 2046 }
2043 2047
2044 2048 fullnode = index_node_existing(self, rev);
2045 2049 if (fullnode == NULL) {
2046 2050 return NULL;
2047 2051 }
2048 2052 return PyBytes_FromStringAndSize(fullnode, 20);
2049 2053 }
2050 2054
2051 2055 static PyObject *index_shortest(indexObject *self, PyObject *args)
2052 2056 {
2053 2057 PyObject *val;
2054 2058 char *node;
2055 2059 int length;
2056 2060
2057 2061 if (!PyArg_ParseTuple(args, "O", &val))
2058 2062 return NULL;
2059 2063 if (node_check(val, &node) == -1)
2060 2064 return NULL;
2061 2065
2062 2066 self->ntlookups++;
2063 2067 if (index_init_nt(self) == -1)
2064 2068 return NULL;
2065 2069 if (index_populate_nt(self) == -1)
2066 2070 return NULL;
2067 2071 length = nt_shortest(&self->nt, node);
2068 2072 if (length == -3)
2069 2073 return NULL;
2070 2074 if (length == -2) {
2071 2075 raise_revlog_error();
2072 2076 return NULL;
2073 2077 }
2074 2078 return PyInt_FromLong(length);
2075 2079 }
2076 2080
2077 2081 static PyObject *index_m_get(indexObject *self, PyObject *args)
2078 2082 {
2079 2083 PyObject *val;
2080 2084 char *node;
2081 2085 int rev;
2082 2086
2083 2087 if (!PyArg_ParseTuple(args, "O", &val))
2084 2088 return NULL;
2085 2089 if (node_check(val, &node) == -1)
2086 2090 return NULL;
2087 2091 rev = index_find_node(self, node, 20);
2088 2092 if (rev == -3)
2089 2093 return NULL;
2090 2094 if (rev == -2)
2091 2095 Py_RETURN_NONE;
2092 2096 return PyInt_FromLong(rev);
2093 2097 }
2094 2098
2095 2099 static int index_contains(indexObject *self, PyObject *value)
2096 2100 {
2097 2101 char *node;
2098 2102
2099 2103 if (PyInt_Check(value)) {
2100 2104 long rev;
2101 2105 if (!pylong_to_long(value, &rev)) {
2102 2106 return -1;
2103 2107 }
2104 2108 return rev >= -1 && rev < index_length(self);
2105 2109 }
2106 2110
2107 2111 if (node_check(value, &node) == -1)
2108 2112 return -1;
2109 2113
2110 2114 switch (index_find_node(self, node, 20)) {
2111 2115 case -3:
2112 2116 return -1;
2113 2117 case -2:
2114 2118 return 0;
2115 2119 default:
2116 2120 return 1;
2117 2121 }
2118 2122 }
2119 2123
2120 2124 static PyObject *index_m_has_node(indexObject *self, PyObject *args)
2121 2125 {
2122 2126 int ret = index_contains(self, args);
2123 2127 if (ret < 0)
2124 2128 return NULL;
2125 2129 return PyBool_FromLong((long)ret);
2126 2130 }
2127 2131
2128 2132 static PyObject *index_m_rev(indexObject *self, PyObject *val)
2129 2133 {
2130 2134 char *node;
2131 2135 int rev;
2132 2136
2133 2137 if (node_check(val, &node) == -1)
2134 2138 return NULL;
2135 2139 rev = index_find_node(self, node, 20);
2136 2140 if (rev >= -1)
2137 2141 return PyInt_FromLong(rev);
2138 2142 if (rev == -2)
2139 2143 raise_revlog_error();
2140 2144 return NULL;
2141 2145 }
2142 2146
2143 2147 typedef uint64_t bitmask;
2144 2148
2145 2149 /*
2146 2150 * Given a disjoint set of revs, return all candidates for the
2147 2151 * greatest common ancestor. In revset notation, this is the set
2148 2152 * "heads(::a and ::b and ...)"
2149 2153 */
2150 2154 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
2151 2155 int revcount)
2152 2156 {
2153 2157 const bitmask allseen = (1ull << revcount) - 1;
2154 2158 const bitmask poison = 1ull << revcount;
2155 2159 PyObject *gca = PyList_New(0);
2156 2160 int i, v, interesting;
2157 2161 int maxrev = -1;
2158 2162 bitmask sp;
2159 2163 bitmask *seen;
2160 2164
2161 2165 if (gca == NULL)
2162 2166 return PyErr_NoMemory();
2163 2167
2164 2168 for (i = 0; i < revcount; i++) {
2165 2169 if (revs[i] > maxrev)
2166 2170 maxrev = revs[i];
2167 2171 }
2168 2172
2169 2173 seen = calloc(sizeof(*seen), maxrev + 1);
2170 2174 if (seen == NULL) {
2171 2175 Py_DECREF(gca);
2172 2176 return PyErr_NoMemory();
2173 2177 }
2174 2178
2175 2179 for (i = 0; i < revcount; i++)
2176 2180 seen[revs[i]] = 1ull << i;
2177 2181
2178 2182 interesting = revcount;
2179 2183
2180 2184 for (v = maxrev; v >= 0 && interesting; v--) {
2181 2185 bitmask sv = seen[v];
2182 2186 int parents[2];
2183 2187
2184 2188 if (!sv)
2185 2189 continue;
2186 2190
2187 2191 if (sv < poison) {
2188 2192 interesting -= 1;
2189 2193 if (sv == allseen) {
2190 2194 PyObject *obj = PyInt_FromLong(v);
2191 2195 if (obj == NULL)
2192 2196 goto bail;
2193 2197 if (PyList_Append(gca, obj) == -1) {
2194 2198 Py_DECREF(obj);
2195 2199 goto bail;
2196 2200 }
2197 2201 sv |= poison;
2198 2202 for (i = 0; i < revcount; i++) {
2199 2203 if (revs[i] == v)
2200 2204 goto done;
2201 2205 }
2202 2206 }
2203 2207 }
2204 2208 if (index_get_parents(self, v, parents, maxrev) < 0)
2205 2209 goto bail;
2206 2210
2207 2211 for (i = 0; i < 2; i++) {
2208 2212 int p = parents[i];
2209 2213 if (p == -1)
2210 2214 continue;
2211 2215 sp = seen[p];
2212 2216 if (sv < poison) {
2213 2217 if (sp == 0) {
2214 2218 seen[p] = sv;
2215 2219 interesting++;
2216 2220 } else if (sp != sv)
2217 2221 seen[p] |= sv;
2218 2222 } else {
2219 2223 if (sp && sp < poison)
2220 2224 interesting--;
2221 2225 seen[p] = sv;
2222 2226 }
2223 2227 }
2224 2228 }
2225 2229
2226 2230 done:
2227 2231 free(seen);
2228 2232 return gca;
2229 2233 bail:
2230 2234 free(seen);
2231 2235 Py_XDECREF(gca);
2232 2236 return NULL;
2233 2237 }
2234 2238
2235 2239 /*
2236 2240 * Given a disjoint set of revs, return the subset with the longest
2237 2241 * path to the root.
2238 2242 */
2239 2243 static PyObject *find_deepest(indexObject *self, PyObject *revs)
2240 2244 {
2241 2245 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
2242 2246 static const Py_ssize_t capacity = 24;
2243 2247 int *depth, *interesting = NULL;
2244 2248 int i, j, v, ninteresting;
2245 2249 PyObject *dict = NULL, *keys = NULL;
2246 2250 long *seen = NULL;
2247 2251 int maxrev = -1;
2248 2252 long final;
2249 2253
2250 2254 if (revcount > capacity) {
2251 2255 PyErr_Format(PyExc_OverflowError,
2252 2256 "bitset size (%ld) > capacity (%ld)",
2253 2257 (long)revcount, (long)capacity);
2254 2258 return NULL;
2255 2259 }
2256 2260
2257 2261 for (i = 0; i < revcount; i++) {
2258 2262 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2259 2263 if (n > maxrev)
2260 2264 maxrev = n;
2261 2265 }
2262 2266
2263 2267 depth = calloc(sizeof(*depth), maxrev + 1);
2264 2268 if (depth == NULL)
2265 2269 return PyErr_NoMemory();
2266 2270
2267 2271 seen = calloc(sizeof(*seen), maxrev + 1);
2268 2272 if (seen == NULL) {
2269 2273 PyErr_NoMemory();
2270 2274 goto bail;
2271 2275 }
2272 2276
2273 2277 interesting = calloc(sizeof(*interesting), ((size_t)1) << revcount);
2274 2278 if (interesting == NULL) {
2275 2279 PyErr_NoMemory();
2276 2280 goto bail;
2277 2281 }
2278 2282
2279 2283 if (PyList_Sort(revs) == -1)
2280 2284 goto bail;
2281 2285
2282 2286 for (i = 0; i < revcount; i++) {
2283 2287 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2284 2288 long b = 1l << i;
2285 2289 depth[n] = 1;
2286 2290 seen[n] = b;
2287 2291 interesting[b] = 1;
2288 2292 }
2289 2293
2290 2294 /* invariant: ninteresting is the number of non-zero entries in
2291 2295 * interesting. */
2292 2296 ninteresting = (int)revcount;
2293 2297
2294 2298 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
2295 2299 int dv = depth[v];
2296 2300 int parents[2];
2297 2301 long sv;
2298 2302
2299 2303 if (dv == 0)
2300 2304 continue;
2301 2305
2302 2306 sv = seen[v];
2303 2307 if (index_get_parents(self, v, parents, maxrev) < 0)
2304 2308 goto bail;
2305 2309
2306 2310 for (i = 0; i < 2; i++) {
2307 2311 int p = parents[i];
2308 2312 long sp;
2309 2313 int dp;
2310 2314
2311 2315 if (p == -1)
2312 2316 continue;
2313 2317
2314 2318 dp = depth[p];
2315 2319 sp = seen[p];
2316 2320 if (dp <= dv) {
2317 2321 depth[p] = dv + 1;
2318 2322 if (sp != sv) {
2319 2323 interesting[sv] += 1;
2320 2324 seen[p] = sv;
2321 2325 if (sp) {
2322 2326 interesting[sp] -= 1;
2323 2327 if (interesting[sp] == 0)
2324 2328 ninteresting -= 1;
2325 2329 }
2326 2330 }
2327 2331 } else if (dv == dp - 1) {
2328 2332 long nsp = sp | sv;
2329 2333 if (nsp == sp)
2330 2334 continue;
2331 2335 seen[p] = nsp;
2332 2336 interesting[sp] -= 1;
2333 2337 if (interesting[sp] == 0)
2334 2338 ninteresting -= 1;
2335 2339 if (interesting[nsp] == 0)
2336 2340 ninteresting += 1;
2337 2341 interesting[nsp] += 1;
2338 2342 }
2339 2343 }
2340 2344 interesting[sv] -= 1;
2341 2345 if (interesting[sv] == 0)
2342 2346 ninteresting -= 1;
2343 2347 }
2344 2348
2345 2349 final = 0;
2346 2350 j = ninteresting;
2347 2351 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
2348 2352 if (interesting[i] == 0)
2349 2353 continue;
2350 2354 final |= i;
2351 2355 j -= 1;
2352 2356 }
2353 2357 if (final == 0) {
2354 2358 keys = PyList_New(0);
2355 2359 goto bail;
2356 2360 }
2357 2361
2358 2362 dict = PyDict_New();
2359 2363 if (dict == NULL)
2360 2364 goto bail;
2361 2365
2362 2366 for (i = 0; i < revcount; i++) {
2363 2367 PyObject *key;
2364 2368
2365 2369 if ((final & (1 << i)) == 0)
2366 2370 continue;
2367 2371
2368 2372 key = PyList_GET_ITEM(revs, i);
2369 2373 Py_INCREF(key);
2370 2374 Py_INCREF(Py_None);
2371 2375 if (PyDict_SetItem(dict, key, Py_None) == -1) {
2372 2376 Py_DECREF(key);
2373 2377 Py_DECREF(Py_None);
2374 2378 goto bail;
2375 2379 }
2376 2380 }
2377 2381
2378 2382 keys = PyDict_Keys(dict);
2379 2383
2380 2384 bail:
2381 2385 free(depth);
2382 2386 free(seen);
2383 2387 free(interesting);
2384 2388 Py_XDECREF(dict);
2385 2389
2386 2390 return keys;
2387 2391 }
2388 2392
2389 2393 /*
2390 2394 * Given a (possibly overlapping) set of revs, return all the
2391 2395 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
2392 2396 */
2393 2397 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
2394 2398 {
2395 2399 PyObject *ret = NULL;
2396 2400 Py_ssize_t argcount, i, len;
2397 2401 bitmask repeat = 0;
2398 2402 int revcount = 0;
2399 2403 int *revs;
2400 2404
2401 2405 argcount = PySequence_Length(args);
2402 2406 revs = PyMem_Malloc(argcount * sizeof(*revs));
2403 2407 if (argcount > 0 && revs == NULL)
2404 2408 return PyErr_NoMemory();
2405 2409 len = index_length(self);
2406 2410
2407 2411 for (i = 0; i < argcount; i++) {
2408 2412 static const int capacity = 24;
2409 2413 PyObject *obj = PySequence_GetItem(args, i);
2410 2414 bitmask x;
2411 2415 long val;
2412 2416
2413 2417 if (!PyInt_Check(obj)) {
2414 2418 PyErr_SetString(PyExc_TypeError,
2415 2419 "arguments must all be ints");
2416 2420 Py_DECREF(obj);
2417 2421 goto bail;
2418 2422 }
2419 2423 val = PyInt_AsLong(obj);
2420 2424 Py_DECREF(obj);
2421 2425 if (val == -1) {
2422 2426 ret = PyList_New(0);
2423 2427 goto done;
2424 2428 }
2425 2429 if (val < 0 || val >= len) {
2426 2430 PyErr_SetString(PyExc_IndexError, "index out of range");
2427 2431 goto bail;
2428 2432 }
2429 2433 /* this cheesy bloom filter lets us avoid some more
2430 2434 * expensive duplicate checks in the common set-is-disjoint
2431 2435 * case */
2432 2436 x = 1ull << (val & 0x3f);
2433 2437 if (repeat & x) {
2434 2438 int k;
2435 2439 for (k = 0; k < revcount; k++) {
2436 2440 if (val == revs[k])
2437 2441 goto duplicate;
2438 2442 }
2439 2443 } else
2440 2444 repeat |= x;
2441 2445 if (revcount >= capacity) {
2442 2446 PyErr_Format(PyExc_OverflowError,
2443 2447 "bitset size (%d) > capacity (%d)",
2444 2448 revcount, capacity);
2445 2449 goto bail;
2446 2450 }
2447 2451 revs[revcount++] = (int)val;
2448 2452 duplicate:;
2449 2453 }
2450 2454
2451 2455 if (revcount == 0) {
2452 2456 ret = PyList_New(0);
2453 2457 goto done;
2454 2458 }
2455 2459 if (revcount == 1) {
2456 2460 PyObject *obj;
2457 2461 ret = PyList_New(1);
2458 2462 if (ret == NULL)
2459 2463 goto bail;
2460 2464 obj = PyInt_FromLong(revs[0]);
2461 2465 if (obj == NULL)
2462 2466 goto bail;
2463 2467 PyList_SET_ITEM(ret, 0, obj);
2464 2468 goto done;
2465 2469 }
2466 2470
2467 2471 ret = find_gca_candidates(self, revs, revcount);
2468 2472 if (ret == NULL)
2469 2473 goto bail;
2470 2474
2471 2475 done:
2472 2476 PyMem_Free(revs);
2473 2477 return ret;
2474 2478
2475 2479 bail:
2476 2480 PyMem_Free(revs);
2477 2481 Py_XDECREF(ret);
2478 2482 return NULL;
2479 2483 }
2480 2484
2481 2485 /*
2482 2486 * Given a (possibly overlapping) set of revs, return the greatest
2483 2487 * common ancestors: those with the longest path to the root.
2484 2488 */
2485 2489 static PyObject *index_ancestors(indexObject *self, PyObject *args)
2486 2490 {
2487 2491 PyObject *ret;
2488 2492 PyObject *gca = index_commonancestorsheads(self, args);
2489 2493 if (gca == NULL)
2490 2494 return NULL;
2491 2495
2492 2496 if (PyList_GET_SIZE(gca) <= 1) {
2493 2497 return gca;
2494 2498 }
2495 2499
2496 2500 ret = find_deepest(self, gca);
2497 2501 Py_DECREF(gca);
2498 2502 return ret;
2499 2503 }
2500 2504
2501 2505 /*
2502 2506 * Invalidate any trie entries introduced by added revs.
2503 2507 */
2504 2508 static void index_invalidate_added(indexObject *self, Py_ssize_t start)
2505 2509 {
2506 2510 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
2507 2511
2508 2512 for (i = start; i < len; i++) {
2509 2513 PyObject *tuple = PyList_GET_ITEM(self->added, i);
2510 2514 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
2511 2515
2512 2516 nt_delete_node(&self->nt, PyBytes_AS_STRING(node));
2513 2517 }
2514 2518
2515 2519 if (start == 0)
2516 2520 Py_CLEAR(self->added);
2517 2521 }
2518 2522
2519 2523 /*
2520 2524 * Delete a numeric range of revs, which must be at the end of the
2521 2525 * range.
2522 2526 */
2523 2527 static int index_slice_del(indexObject *self, PyObject *item)
2524 2528 {
2525 2529 Py_ssize_t start, stop, step, slicelength;
2526 2530 Py_ssize_t length = index_length(self) + 1;
2527 2531 int ret = 0;
2528 2532
2529 2533 /* Argument changed from PySliceObject* to PyObject* in Python 3. */
2530 2534 #ifdef IS_PY3K
2531 2535 if (PySlice_GetIndicesEx(item, length, &start, &stop, &step,
2532 2536 &slicelength) < 0)
2533 2537 #else
2534 2538 if (PySlice_GetIndicesEx((PySliceObject *)item, length, &start, &stop,
2535 2539 &step, &slicelength) < 0)
2536 2540 #endif
2537 2541 return -1;
2538 2542
2539 2543 if (slicelength <= 0)
2540 2544 return 0;
2541 2545
2542 2546 if ((step < 0 && start < stop) || (step > 0 && start > stop))
2543 2547 stop = start;
2544 2548
2545 2549 if (step < 0) {
2546 2550 stop = start + 1;
2547 2551 start = stop + step * (slicelength - 1) - 1;
2548 2552 step = -step;
2549 2553 }
2550 2554
2551 2555 if (step != 1) {
2552 2556 PyErr_SetString(PyExc_ValueError,
2553 2557 "revlog index delete requires step size of 1");
2554 2558 return -1;
2555 2559 }
2556 2560
2557 2561 if (stop != length - 1) {
2558 2562 PyErr_SetString(PyExc_IndexError,
2559 2563 "revlog index deletion indices are invalid");
2560 2564 return -1;
2561 2565 }
2562 2566
2563 2567 if (start < self->length) {
2564 2568 if (self->ntinitialized) {
2565 2569 Py_ssize_t i;
2566 2570
2567 2571 for (i = start; i < self->length; i++) {
2568 2572 const char *node = index_node_existing(self, i);
2569 2573 if (node == NULL)
2570 2574 return -1;
2571 2575
2572 2576 nt_delete_node(&self->nt, node);
2573 2577 }
2574 2578 if (self->added)
2575 2579 index_invalidate_added(self, 0);
2576 2580 if (self->ntrev > start)
2577 2581 self->ntrev = (int)start;
2578 2582 } else if (self->added) {
2579 2583 Py_CLEAR(self->added);
2580 2584 }
2581 2585
2582 2586 self->length = start;
2583 2587 if (start < self->raw_length) {
2584 2588 if (self->cache) {
2585 2589 Py_ssize_t i;
2586 2590 for (i = start; i < self->raw_length; i++)
2587 2591 Py_CLEAR(self->cache[i]);
2588 2592 }
2589 2593 self->raw_length = start;
2590 2594 }
2591 2595 goto done;
2592 2596 }
2593 2597
2594 2598 if (self->ntinitialized) {
2595 2599 index_invalidate_added(self, start - self->length);
2596 2600 if (self->ntrev > start)
2597 2601 self->ntrev = (int)start;
2598 2602 }
2599 2603 if (self->added)
2600 2604 ret = PyList_SetSlice(self->added, start - self->length,
2601 2605 PyList_GET_SIZE(self->added), NULL);
2602 2606 done:
2603 2607 Py_CLEAR(self->headrevs);
2604 2608 return ret;
2605 2609 }
2606 2610
2607 2611 /*
2608 2612 * Supported ops:
2609 2613 *
2610 2614 * slice deletion
2611 2615 * string assignment (extend node->rev mapping)
2612 2616 * string deletion (shrink node->rev mapping)
2613 2617 */
2614 2618 static int index_assign_subscript(indexObject *self, PyObject *item,
2615 2619 PyObject *value)
2616 2620 {
2617 2621 char *node;
2618 2622 long rev;
2619 2623
2620 2624 if (PySlice_Check(item) && value == NULL)
2621 2625 return index_slice_del(self, item);
2622 2626
2623 2627 if (node_check(item, &node) == -1)
2624 2628 return -1;
2625 2629
2626 2630 if (value == NULL)
2627 2631 return self->ntinitialized ? nt_delete_node(&self->nt, node)
2628 2632 : 0;
2629 2633 rev = PyInt_AsLong(value);
2630 2634 if (rev > INT_MAX || rev < 0) {
2631 2635 if (!PyErr_Occurred())
2632 2636 PyErr_SetString(PyExc_ValueError, "rev out of range");
2633 2637 return -1;
2634 2638 }
2635 2639
2636 2640 if (index_init_nt(self) == -1)
2637 2641 return -1;
2638 2642 return nt_insert(&self->nt, node, (int)rev);
2639 2643 }
2640 2644
2641 2645 /*
2642 2646 * Find all RevlogNG entries in an index that has inline data. Update
2643 2647 * the optional "offsets" table with those entries.
2644 2648 */
2645 2649 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
2646 2650 {
2647 2651 const char *data = (const char *)self->buf.buf;
2648 2652 Py_ssize_t pos = 0;
2649 2653 Py_ssize_t end = self->buf.len;
2650 2654 long incr = v1_hdrsize;
2651 2655 Py_ssize_t len = 0;
2652 2656
2653 2657 while (pos + v1_hdrsize <= end && pos >= 0) {
2654 2658 uint32_t comp_len;
2655 2659 /* 3rd element of header is length of compressed inline data */
2656 2660 comp_len = getbe32(data + pos + 8);
2657 2661 incr = v1_hdrsize + comp_len;
2658 2662 if (offsets)
2659 2663 offsets[len] = data + pos;
2660 2664 len++;
2661 2665 pos += incr;
2662 2666 }
2663 2667
2664 2668 if (pos != end) {
2665 2669 if (!PyErr_Occurred())
2666 2670 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2667 2671 return -1;
2668 2672 }
2669 2673
2670 2674 return len;
2671 2675 }
2672 2676
2673 2677 static int index_init(indexObject *self, PyObject *args)
2674 2678 {
2675 2679 PyObject *data_obj, *inlined_obj;
2676 2680 Py_ssize_t size;
2677 2681
2678 2682 /* Initialize before argument-checking to avoid index_dealloc() crash.
2679 2683 */
2680 2684 self->raw_length = 0;
2681 2685 self->added = NULL;
2682 2686 self->cache = NULL;
2683 2687 self->data = NULL;
2684 2688 memset(&self->buf, 0, sizeof(self->buf));
2685 2689 self->headrevs = NULL;
2686 2690 self->filteredrevs = Py_None;
2687 2691 Py_INCREF(Py_None);
2688 2692 self->ntinitialized = 0;
2689 2693 self->offsets = NULL;
2690 2694
2691 2695 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
2692 2696 return -1;
2693 2697 if (!PyObject_CheckBuffer(data_obj)) {
2694 2698 PyErr_SetString(PyExc_TypeError,
2695 2699 "data does not support buffer interface");
2696 2700 return -1;
2697 2701 }
2698 2702
2699 2703 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
2700 2704 return -1;
2701 2705 size = self->buf.len;
2702 2706
2703 2707 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
2704 2708 self->data = data_obj;
2705 2709
2706 2710 self->ntlookups = self->ntmisses = 0;
2707 2711 self->ntrev = -1;
2708 2712 Py_INCREF(self->data);
2709 2713
2710 2714 if (self->inlined) {
2711 2715 Py_ssize_t len = inline_scan(self, NULL);
2712 2716 if (len == -1)
2713 2717 goto bail;
2714 2718 self->raw_length = len;
2715 2719 self->length = len;
2716 2720 } else {
2717 2721 if (size % v1_hdrsize) {
2718 2722 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2719 2723 goto bail;
2720 2724 }
2721 2725 self->raw_length = size / v1_hdrsize;
2722 2726 self->length = self->raw_length;
2723 2727 }
2724 2728
2725 2729 return 0;
2726 2730 bail:
2727 2731 return -1;
2728 2732 }
2729 2733
2730 2734 static PyObject *index_nodemap(indexObject *self)
2731 2735 {
2732 2736 Py_INCREF(self);
2733 2737 return (PyObject *)self;
2734 2738 }
2735 2739
2736 2740 static void _index_clearcaches(indexObject *self)
2737 2741 {
2738 2742 if (self->cache) {
2739 2743 Py_ssize_t i;
2740 2744
2741 2745 for (i = 0; i < self->raw_length; i++)
2742 2746 Py_CLEAR(self->cache[i]);
2743 2747 free(self->cache);
2744 2748 self->cache = NULL;
2745 2749 }
2746 2750 if (self->offsets) {
2747 2751 PyMem_Free((void *)self->offsets);
2748 2752 self->offsets = NULL;
2749 2753 }
2750 2754 if (self->ntinitialized) {
2751 2755 nt_dealloc(&self->nt);
2752 2756 }
2753 2757 self->ntinitialized = 0;
2754 2758 Py_CLEAR(self->headrevs);
2755 2759 }
2756 2760
2757 2761 static PyObject *index_clearcaches(indexObject *self)
2758 2762 {
2759 2763 _index_clearcaches(self);
2760 2764 self->ntrev = -1;
2761 2765 self->ntlookups = self->ntmisses = 0;
2762 2766 Py_RETURN_NONE;
2763 2767 }
2764 2768
2765 2769 static void index_dealloc(indexObject *self)
2766 2770 {
2767 2771 _index_clearcaches(self);
2768 2772 Py_XDECREF(self->filteredrevs);
2769 2773 if (self->buf.buf) {
2770 2774 PyBuffer_Release(&self->buf);
2771 2775 memset(&self->buf, 0, sizeof(self->buf));
2772 2776 }
2773 2777 Py_XDECREF(self->data);
2774 2778 Py_XDECREF(self->added);
2775 2779 PyObject_Del(self);
2776 2780 }
2777 2781
2778 2782 static PySequenceMethods index_sequence_methods = {
2779 2783 (lenfunc)index_length, /* sq_length */
2780 2784 0, /* sq_concat */
2781 2785 0, /* sq_repeat */
2782 2786 (ssizeargfunc)index_get, /* sq_item */
2783 2787 0, /* sq_slice */
2784 2788 0, /* sq_ass_item */
2785 2789 0, /* sq_ass_slice */
2786 2790 (objobjproc)index_contains, /* sq_contains */
2787 2791 };
2788 2792
2789 2793 static PyMappingMethods index_mapping_methods = {
2790 2794 (lenfunc)index_length, /* mp_length */
2791 2795 (binaryfunc)index_getitem, /* mp_subscript */
2792 2796 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
2793 2797 };
2794 2798
2795 2799 static PyMethodDef index_methods[] = {
2796 2800 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
2797 2801 "return the gca set of the given revs"},
2798 2802 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
2799 2803 METH_VARARGS,
2800 2804 "return the heads of the common ancestors of the given revs"},
2801 2805 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
2802 2806 "clear the index caches"},
2803 2807 {"get", (PyCFunction)index_m_get, METH_VARARGS, "get an index entry"},
2804 2808 {"get_rev", (PyCFunction)index_m_get, METH_VARARGS,
2805 2809 "return `rev` associated with a node or None"},
2806 2810 {"has_node", (PyCFunction)index_m_has_node, METH_O,
2807 2811 "return True if the node exist in the index"},
2808 2812 {"rev", (PyCFunction)index_m_rev, METH_O,
2809 2813 "return `rev` associated with a node or raise RevlogError"},
2810 2814 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets, METH_VARARGS,
2811 2815 "compute phases"},
2812 2816 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
2813 2817 "reachableroots"},
2814 2818 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
2815 2819 "get head revisions"}, /* Can do filtering since 3.2 */
2816 2820 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
2817 2821 "get filtered head revisions"}, /* Can always do filtering */
2818 2822 {"issnapshot", (PyCFunction)index_issnapshot, METH_O,
2819 2823 "True if the object is a snapshot"},
2820 2824 {"findsnapshots", (PyCFunction)index_findsnapshots, METH_VARARGS,
2821 2825 "Gather snapshot data in a cache dict"},
2822 2826 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
2823 2827 "determine revisions with deltas to reconstruct fulltext"},
2824 2828 {"slicechunktodensity", (PyCFunction)index_slicechunktodensity,
2825 2829 METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"},
2826 2830 {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
2827 2831 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
2828 2832 "match a potentially ambiguous node ID"},
2829 2833 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
2830 2834 "find length of shortest hex nodeid of a binary ID"},
2831 2835 {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"},
2832 2836 {NULL} /* Sentinel */
2833 2837 };
2834 2838
2835 2839 static PyGetSetDef index_getset[] = {
2836 2840 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
2837 2841 {NULL} /* Sentinel */
2838 2842 };
2839 2843
2840 2844 PyTypeObject HgRevlogIndex_Type = {
2841 2845 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2842 2846 "parsers.index", /* tp_name */
2843 2847 sizeof(indexObject), /* tp_basicsize */
2844 2848 0, /* tp_itemsize */
2845 2849 (destructor)index_dealloc, /* tp_dealloc */
2846 2850 0, /* tp_print */
2847 2851 0, /* tp_getattr */
2848 2852 0, /* tp_setattr */
2849 2853 0, /* tp_compare */
2850 2854 0, /* tp_repr */
2851 2855 0, /* tp_as_number */
2852 2856 &index_sequence_methods, /* tp_as_sequence */
2853 2857 &index_mapping_methods, /* tp_as_mapping */
2854 2858 0, /* tp_hash */
2855 2859 0, /* tp_call */
2856 2860 0, /* tp_str */
2857 2861 0, /* tp_getattro */
2858 2862 0, /* tp_setattro */
2859 2863 0, /* tp_as_buffer */
2860 2864 Py_TPFLAGS_DEFAULT, /* tp_flags */
2861 2865 "revlog index", /* tp_doc */
2862 2866 0, /* tp_traverse */
2863 2867 0, /* tp_clear */
2864 2868 0, /* tp_richcompare */
2865 2869 0, /* tp_weaklistoffset */
2866 2870 0, /* tp_iter */
2867 2871 0, /* tp_iternext */
2868 2872 index_methods, /* tp_methods */
2869 2873 0, /* tp_members */
2870 2874 index_getset, /* tp_getset */
2871 2875 0, /* tp_base */
2872 2876 0, /* tp_dict */
2873 2877 0, /* tp_descr_get */
2874 2878 0, /* tp_descr_set */
2875 2879 0, /* tp_dictoffset */
2876 2880 (initproc)index_init, /* tp_init */
2877 2881 0, /* tp_alloc */
2878 2882 };
2879 2883
2880 2884 /*
2881 2885 * returns a tuple of the form (index, index, cache) with elements as
2882 2886 * follows:
2883 2887 *
2884 2888 * index: an index object that lazily parses RevlogNG records
2885 2889 * cache: if data is inlined, a tuple (0, index_file_content), else None
2886 2890 * index_file_content could be a string, or a buffer
2887 2891 *
2888 2892 * added complications are for backwards compatibility
2889 2893 */
2890 2894 PyObject *parse_index2(PyObject *self, PyObject *args)
2891 2895 {
2892 2896 PyObject *cache = NULL;
2893 2897 indexObject *idx;
2894 2898 int ret;
2895 2899
2896 2900 idx = PyObject_New(indexObject, &HgRevlogIndex_Type);
2897 2901 if (idx == NULL)
2898 2902 goto bail;
2899 2903
2900 2904 ret = index_init(idx, args);
2901 2905 if (ret == -1)
2902 2906 goto bail;
2903 2907
2904 2908 if (idx->inlined) {
2905 2909 cache = Py_BuildValue("iO", 0, idx->data);
2906 2910 if (cache == NULL)
2907 2911 goto bail;
2908 2912 } else {
2909 2913 cache = Py_None;
2910 2914 Py_INCREF(cache);
2911 2915 }
2912 2916
2913 2917 return Py_BuildValue("NN", idx, cache);
2914 2918
2915 2919 bail:
2916 2920 Py_XDECREF(idx);
2917 2921 Py_XDECREF(cache);
2918 2922 return NULL;
2919 2923 }
2920 2924
2921 2925 static Revlog_CAPI CAPI = {
2922 2926 /* increment the abi_version field upon each change in the Revlog_CAPI
2923 2927 struct or in the ABI of the listed functions */
2924 2928 2,
2925 2929 index_length,
2926 2930 index_node,
2927 2931 HgRevlogIndex_GetParents,
2928 2932 };
2929 2933
2930 2934 void revlog_module_init(PyObject *mod)
2931 2935 {
2932 2936 PyObject *caps = NULL;
2933 2937 HgRevlogIndex_Type.tp_new = PyType_GenericNew;
2934 2938 if (PyType_Ready(&HgRevlogIndex_Type) < 0)
2935 2939 return;
2936 2940 Py_INCREF(&HgRevlogIndex_Type);
2937 2941 PyModule_AddObject(mod, "index", (PyObject *)&HgRevlogIndex_Type);
2938 2942
2939 2943 nodetreeType.tp_new = PyType_GenericNew;
2940 2944 if (PyType_Ready(&nodetreeType) < 0)
2941 2945 return;
2942 2946 Py_INCREF(&nodetreeType);
2943 2947 PyModule_AddObject(mod, "nodetree", (PyObject *)&nodetreeType);
2944 2948
2945 2949 if (!nullentry) {
2946 2950 nullentry =
2947 2951 Py_BuildValue(PY23("iiiiiiis#", "iiiiiiiy#"), 0, 0, 0, -1,
2948 2952 -1, -1, -1, nullid, (Py_ssize_t)20);
2949 2953 }
2950 2954 if (nullentry)
2951 2955 PyObject_GC_UnTrack(nullentry);
2952 2956
2953 2957 caps = PyCapsule_New(&CAPI, "mercurial.cext.parsers.revlog_CAPI", NULL);
2954 2958 if (caps != NULL)
2955 2959 PyModule_AddObject(mod, "revlog_CAPI", caps);
2956 2960 }
General Comments 0
You need to be logged in to leave comments. Login now