##// END OF EJS Templates
merge with stable
Yuya Nishihara -
r45078:090a1a78 merge default
parent child Browse files
Show More
@@ -1,2917 +1,2918 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #define PY_SSIZE_T_CLEAN
11 11 #include <Python.h>
12 12 #include <assert.h>
13 13 #include <ctype.h>
14 14 #include <limits.h>
15 15 #include <stddef.h>
16 16 #include <stdlib.h>
17 17 #include <string.h>
18 18
19 19 #include "bitmanipulation.h"
20 20 #include "charencode.h"
21 21 #include "revlog.h"
22 22 #include "util.h"
23 23
24 24 #ifdef IS_PY3K
25 25 /* The mapping of Python types is meant to be temporary to get Python
26 26 * 3 to compile. We should remove this once Python 3 support is fully
27 27 * supported and proper types are used in the extensions themselves. */
28 28 #define PyInt_Check PyLong_Check
29 29 #define PyInt_FromLong PyLong_FromLong
30 30 #define PyInt_FromSsize_t PyLong_FromSsize_t
31 31 #define PyInt_AsLong PyLong_AsLong
32 32 #endif
33 33
34 34 typedef struct indexObjectStruct indexObject;
35 35
36 36 typedef struct {
37 37 int children[16];
38 38 } nodetreenode;
39 39
40 40 typedef struct {
41 41 int abi_version;
42 42 Py_ssize_t (*index_length)(const indexObject *);
43 43 const char *(*index_node)(indexObject *, Py_ssize_t);
44 44 int (*index_parents)(PyObject *, int, int *);
45 45 } Revlog_CAPI;
46 46
47 47 /*
48 48 * A base-16 trie for fast node->rev mapping.
49 49 *
50 50 * Positive value is index of the next node in the trie
51 51 * Negative value is a leaf: -(rev + 2)
52 52 * Zero is empty
53 53 */
54 54 typedef struct {
55 55 indexObject *index;
56 56 nodetreenode *nodes;
57 57 unsigned length; /* # nodes in use */
58 58 unsigned capacity; /* # nodes allocated */
59 59 int depth; /* maximum depth of tree */
60 60 int splits; /* # splits performed */
61 61 } nodetree;
62 62
63 63 typedef struct {
64 64 PyObject_HEAD /* ; */
65 65 nodetree nt;
66 66 } nodetreeObject;
67 67
68 68 /*
69 69 * This class has two behaviors.
70 70 *
71 71 * When used in a list-like way (with integer keys), we decode an
72 72 * entry in a RevlogNG index file on demand. We have limited support for
73 73 * integer-keyed insert and delete, only at elements right before the
74 74 * end.
75 75 *
76 76 * With string keys, we lazily perform a reverse mapping from node to
77 77 * rev, using a base-16 trie.
78 78 */
79 79 struct indexObjectStruct {
80 80 PyObject_HEAD
81 81 /* Type-specific fields go here. */
82 82 PyObject *data; /* raw bytes of index */
83 83 Py_buffer buf; /* buffer of data */
84 84 PyObject **cache; /* cached tuples */
85 85 const char **offsets; /* populated on demand */
86 86 Py_ssize_t raw_length; /* original number of elements */
87 87 Py_ssize_t length; /* current number of elements */
88 88 PyObject *added; /* populated on demand */
89 89 PyObject *headrevs; /* cache, invalidated on changes */
90 90 PyObject *filteredrevs; /* filtered revs set */
91 91 nodetree nt; /* base-16 trie */
92 92 int ntinitialized; /* 0 or 1 */
93 93 int ntrev; /* last rev scanned */
94 94 int ntlookups; /* # lookups */
95 95 int ntmisses; /* # lookups that miss the cache */
96 96 int inlined;
97 97 };
98 98
99 99 static Py_ssize_t index_length(const indexObject *self)
100 100 {
101 101 if (self->added == NULL)
102 102 return self->length;
103 103 return self->length + PyList_GET_SIZE(self->added);
104 104 }
105 105
106 106 static PyObject *nullentry = NULL;
107 107 static const char nullid[20] = {0};
108 108 static const Py_ssize_t nullrev = -1;
109 109
110 110 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
111 111
112 112 #if LONG_MAX == 0x7fffffffL
113 113 static const char *const tuple_format = PY23("Kiiiiiis#", "Kiiiiiiy#");
114 114 #else
115 115 static const char *const tuple_format = PY23("kiiiiiis#", "kiiiiiiy#");
116 116 #endif
117 117
118 118 /* A RevlogNG v1 index entry is 64 bytes long. */
119 119 static const long v1_hdrsize = 64;
120 120
121 121 static void raise_revlog_error(void)
122 122 {
123 123 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
124 124
125 125 mod = PyImport_ImportModule("mercurial.error");
126 126 if (mod == NULL) {
127 127 goto cleanup;
128 128 }
129 129
130 130 dict = PyModule_GetDict(mod);
131 131 if (dict == NULL) {
132 132 goto cleanup;
133 133 }
134 134 Py_INCREF(dict);
135 135
136 136 errclass = PyDict_GetItemString(dict, "RevlogError");
137 137 if (errclass == NULL) {
138 138 PyErr_SetString(PyExc_SystemError,
139 139 "could not find RevlogError");
140 140 goto cleanup;
141 141 }
142 142
143 143 /* value of exception is ignored by callers */
144 144 PyErr_SetString(errclass, "RevlogError");
145 145
146 146 cleanup:
147 147 Py_XDECREF(dict);
148 148 Py_XDECREF(mod);
149 149 }
150 150
151 151 /*
152 152 * Return a pointer to the beginning of a RevlogNG record.
153 153 */
154 154 static const char *index_deref(indexObject *self, Py_ssize_t pos)
155 155 {
156 156 if (self->inlined && pos > 0) {
157 157 if (self->offsets == NULL) {
158 Py_ssize_t ret;
158 159 self->offsets = PyMem_Malloc(self->raw_length *
159 160 sizeof(*self->offsets));
160 161 if (self->offsets == NULL)
161 162 return (const char *)PyErr_NoMemory();
162 Py_ssize_t ret = inline_scan(self, self->offsets);
163 ret = inline_scan(self, self->offsets);
163 164 if (ret == -1) {
164 165 return NULL;
165 166 };
166 167 }
167 168 return self->offsets[pos];
168 169 }
169 170
170 171 return (const char *)(self->buf.buf) + pos * v1_hdrsize;
171 172 }
172 173
173 174 /*
174 175 * Get parents of the given rev.
175 176 *
176 177 * The specified rev must be valid and must not be nullrev. A returned
177 178 * parent revision may be nullrev, but is guaranteed to be in valid range.
178 179 */
179 180 static inline int index_get_parents(indexObject *self, Py_ssize_t rev, int *ps,
180 181 int maxrev)
181 182 {
182 183 if (rev >= self->length) {
183 184 long tmp;
184 185 PyObject *tuple =
185 186 PyList_GET_ITEM(self->added, rev - self->length);
186 187 if (!pylong_to_long(PyTuple_GET_ITEM(tuple, 5), &tmp)) {
187 188 return -1;
188 189 }
189 190 ps[0] = (int)tmp;
190 191 if (!pylong_to_long(PyTuple_GET_ITEM(tuple, 6), &tmp)) {
191 192 return -1;
192 193 }
193 194 ps[1] = (int)tmp;
194 195 } else {
195 196 const char *data = index_deref(self, rev);
196 197 ps[0] = getbe32(data + 24);
197 198 ps[1] = getbe32(data + 28);
198 199 }
199 200 /* If index file is corrupted, ps[] may point to invalid revisions. So
200 201 * there is a risk of buffer overflow to trust them unconditionally. */
201 202 if (ps[0] < -1 || ps[0] > maxrev || ps[1] < -1 || ps[1] > maxrev) {
202 203 PyErr_SetString(PyExc_ValueError, "parent out of range");
203 204 return -1;
204 205 }
205 206 return 0;
206 207 }
207 208
208 209 /*
209 210 * Get parents of the given rev.
210 211 *
211 212 * If the specified rev is out of range, IndexError will be raised. If the
212 213 * revlog entry is corrupted, ValueError may be raised.
213 214 *
214 215 * Returns 0 on success or -1 on failure.
215 216 */
216 217 static int HgRevlogIndex_GetParents(PyObject *op, int rev, int *ps)
217 218 {
218 219 int tiprev;
219 220 if (!op || !HgRevlogIndex_Check(op) || !ps) {
220 221 PyErr_BadInternalCall();
221 222 return -1;
222 223 }
223 224 tiprev = (int)index_length((indexObject *)op) - 1;
224 225 if (rev < -1 || rev > tiprev) {
225 226 PyErr_Format(PyExc_IndexError, "rev out of range: %d", rev);
226 227 return -1;
227 228 } else if (rev == -1) {
228 229 ps[0] = ps[1] = -1;
229 230 return 0;
230 231 } else {
231 232 return index_get_parents((indexObject *)op, rev, ps, tiprev);
232 233 }
233 234 }
234 235
235 236 static inline int64_t index_get_start(indexObject *self, Py_ssize_t rev)
236 237 {
237 238 uint64_t offset;
238 239 if (rev == nullrev) {
239 240 return 0;
240 241 }
241 242 if (rev >= self->length) {
242 243 PyObject *tuple;
243 244 PyObject *pylong;
244 245 PY_LONG_LONG tmp;
245 246 tuple = PyList_GET_ITEM(self->added, rev - self->length);
246 247 pylong = PyTuple_GET_ITEM(tuple, 0);
247 248 tmp = PyLong_AsLongLong(pylong);
248 249 if (tmp == -1 && PyErr_Occurred()) {
249 250 return -1;
250 251 }
251 252 if (tmp < 0) {
252 253 PyErr_Format(PyExc_OverflowError,
253 254 "revlog entry size out of bound (%lld)",
254 255 (long long)tmp);
255 256 return -1;
256 257 }
257 258 offset = (uint64_t)tmp;
258 259 } else {
259 260 const char *data = index_deref(self, rev);
260 261 offset = getbe32(data + 4);
261 262 if (rev == 0) {
262 263 /* mask out version number for the first entry */
263 264 offset &= 0xFFFF;
264 265 } else {
265 266 uint32_t offset_high = getbe32(data);
266 267 offset |= ((uint64_t)offset_high) << 32;
267 268 }
268 269 }
269 270 return (int64_t)(offset >> 16);
270 271 }
271 272
272 273 static inline int index_get_length(indexObject *self, Py_ssize_t rev)
273 274 {
274 275 if (rev == nullrev) {
275 276 return 0;
276 277 }
277 278 if (rev >= self->length) {
278 279 PyObject *tuple;
279 280 PyObject *pylong;
280 281 long ret;
281 282 tuple = PyList_GET_ITEM(self->added, rev - self->length);
282 283 pylong = PyTuple_GET_ITEM(tuple, 1);
283 284 ret = PyInt_AsLong(pylong);
284 285 if (ret == -1 && PyErr_Occurred()) {
285 286 return -1;
286 287 }
287 288 if (ret < 0 || ret > (long)INT_MAX) {
288 289 PyErr_Format(PyExc_OverflowError,
289 290 "revlog entry size out of bound (%ld)",
290 291 ret);
291 292 return -1;
292 293 }
293 294 return (int)ret;
294 295 } else {
295 296 const char *data = index_deref(self, rev);
296 297 int tmp = (int)getbe32(data + 8);
297 298 if (tmp < 0) {
298 299 PyErr_Format(PyExc_OverflowError,
299 300 "revlog entry size out of bound (%d)",
300 301 tmp);
301 302 return -1;
302 303 }
303 304 return tmp;
304 305 }
305 306 }
306 307
307 308 /*
308 309 * RevlogNG format (all in big endian, data may be inlined):
309 310 * 6 bytes: offset
310 311 * 2 bytes: flags
311 312 * 4 bytes: compressed length
312 313 * 4 bytes: uncompressed length
313 314 * 4 bytes: base revision
314 315 * 4 bytes: link revision
315 316 * 4 bytes: parent 1 revision
316 317 * 4 bytes: parent 2 revision
317 318 * 32 bytes: nodeid (only 20 bytes used)
318 319 */
319 320 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
320 321 {
321 322 uint64_t offset_flags;
322 323 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
323 324 const char *c_node_id;
324 325 const char *data;
325 326 Py_ssize_t length = index_length(self);
326 327 PyObject *entry;
327 328
328 329 if (pos == nullrev) {
329 330 Py_INCREF(nullentry);
330 331 return nullentry;
331 332 }
332 333
333 334 if (pos < 0 || pos >= length) {
334 335 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
335 336 return NULL;
336 337 }
337 338
338 339 if (pos >= self->length) {
339 340 PyObject *obj;
340 341 obj = PyList_GET_ITEM(self->added, pos - self->length);
341 342 Py_INCREF(obj);
342 343 return obj;
343 344 }
344 345
345 346 if (self->cache) {
346 347 if (self->cache[pos]) {
347 348 Py_INCREF(self->cache[pos]);
348 349 return self->cache[pos];
349 350 }
350 351 } else {
351 352 self->cache = calloc(self->raw_length, sizeof(PyObject *));
352 353 if (self->cache == NULL)
353 354 return PyErr_NoMemory();
354 355 }
355 356
356 357 data = index_deref(self, pos);
357 358 if (data == NULL)
358 359 return NULL;
359 360
360 361 offset_flags = getbe32(data + 4);
361 362 if (pos == 0) /* mask out version number for the first entry */
362 363 offset_flags &= 0xFFFF;
363 364 else {
364 365 uint32_t offset_high = getbe32(data);
365 366 offset_flags |= ((uint64_t)offset_high) << 32;
366 367 }
367 368
368 369 comp_len = getbe32(data + 8);
369 370 uncomp_len = getbe32(data + 12);
370 371 base_rev = getbe32(data + 16);
371 372 link_rev = getbe32(data + 20);
372 373 parent_1 = getbe32(data + 24);
373 374 parent_2 = getbe32(data + 28);
374 375 c_node_id = data + 32;
375 376
376 377 entry = Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len,
377 378 base_rev, link_rev, parent_1, parent_2, c_node_id,
378 379 (Py_ssize_t)20);
379 380
380 381 if (entry) {
381 382 PyObject_GC_UnTrack(entry);
382 383 Py_INCREF(entry);
383 384 }
384 385
385 386 self->cache[pos] = entry;
386 387
387 388 return entry;
388 389 }
389 390
390 391 /*
391 392 * Return the 20-byte SHA of the node corresponding to the given rev.
392 393 */
393 394 static const char *index_node(indexObject *self, Py_ssize_t pos)
394 395 {
395 396 Py_ssize_t length = index_length(self);
396 397 const char *data;
397 398
398 399 if (pos == nullrev)
399 400 return nullid;
400 401
401 402 if (pos >= length)
402 403 return NULL;
403 404
404 405 if (pos >= self->length) {
405 406 PyObject *tuple, *str;
406 407 tuple = PyList_GET_ITEM(self->added, pos - self->length);
407 408 str = PyTuple_GetItem(tuple, 7);
408 409 return str ? PyBytes_AS_STRING(str) : NULL;
409 410 }
410 411
411 412 data = index_deref(self, pos);
412 413 return data ? data + 32 : NULL;
413 414 }
414 415
415 416 /*
416 417 * Return the 20-byte SHA of the node corresponding to the given rev. The
417 418 * rev is assumed to be existing. If not, an exception is set.
418 419 */
419 420 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
420 421 {
421 422 const char *node = index_node(self, pos);
422 423 if (node == NULL) {
423 424 PyErr_Format(PyExc_IndexError, "could not access rev %d",
424 425 (int)pos);
425 426 }
426 427 return node;
427 428 }
428 429
429 430 static int nt_insert(nodetree *self, const char *node, int rev);
430 431
431 432 static int node_check(PyObject *obj, char **node)
432 433 {
433 434 Py_ssize_t nodelen;
434 435 if (PyBytes_AsStringAndSize(obj, node, &nodelen) == -1)
435 436 return -1;
436 437 if (nodelen == 20)
437 438 return 0;
438 439 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
439 440 return -1;
440 441 }
441 442
442 443 static PyObject *index_append(indexObject *self, PyObject *obj)
443 444 {
444 445 char *node;
445 446 Py_ssize_t len;
446 447
447 448 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
448 449 PyErr_SetString(PyExc_TypeError, "8-tuple required");
449 450 return NULL;
450 451 }
451 452
452 453 if (node_check(PyTuple_GET_ITEM(obj, 7), &node) == -1)
453 454 return NULL;
454 455
455 456 len = index_length(self);
456 457
457 458 if (self->added == NULL) {
458 459 self->added = PyList_New(0);
459 460 if (self->added == NULL)
460 461 return NULL;
461 462 }
462 463
463 464 if (PyList_Append(self->added, obj) == -1)
464 465 return NULL;
465 466
466 467 if (self->ntinitialized)
467 468 nt_insert(&self->nt, node, (int)len);
468 469
469 470 Py_CLEAR(self->headrevs);
470 471 Py_RETURN_NONE;
471 472 }
472 473
473 474 static PyObject *index_stats(indexObject *self)
474 475 {
475 476 PyObject *obj = PyDict_New();
476 477 PyObject *s = NULL;
477 478 PyObject *t = NULL;
478 479
479 480 if (obj == NULL)
480 481 return NULL;
481 482
482 483 #define istat(__n, __d) \
483 484 do { \
484 485 s = PyBytes_FromString(__d); \
485 486 t = PyInt_FromSsize_t(self->__n); \
486 487 if (!s || !t) \
487 488 goto bail; \
488 489 if (PyDict_SetItem(obj, s, t) == -1) \
489 490 goto bail; \
490 491 Py_CLEAR(s); \
491 492 Py_CLEAR(t); \
492 493 } while (0)
493 494
494 495 if (self->added) {
495 496 Py_ssize_t len = PyList_GET_SIZE(self->added);
496 497 s = PyBytes_FromString("index entries added");
497 498 t = PyInt_FromSsize_t(len);
498 499 if (!s || !t)
499 500 goto bail;
500 501 if (PyDict_SetItem(obj, s, t) == -1)
501 502 goto bail;
502 503 Py_CLEAR(s);
503 504 Py_CLEAR(t);
504 505 }
505 506
506 507 if (self->raw_length != self->length)
507 508 istat(raw_length, "revs on disk");
508 509 istat(length, "revs in memory");
509 510 istat(ntlookups, "node trie lookups");
510 511 istat(ntmisses, "node trie misses");
511 512 istat(ntrev, "node trie last rev scanned");
512 513 if (self->ntinitialized) {
513 514 istat(nt.capacity, "node trie capacity");
514 515 istat(nt.depth, "node trie depth");
515 516 istat(nt.length, "node trie count");
516 517 istat(nt.splits, "node trie splits");
517 518 }
518 519
519 520 #undef istat
520 521
521 522 return obj;
522 523
523 524 bail:
524 525 Py_XDECREF(obj);
525 526 Py_XDECREF(s);
526 527 Py_XDECREF(t);
527 528 return NULL;
528 529 }
529 530
530 531 /*
531 532 * When we cache a list, we want to be sure the caller can't mutate
532 533 * the cached copy.
533 534 */
534 535 static PyObject *list_copy(PyObject *list)
535 536 {
536 537 Py_ssize_t len = PyList_GET_SIZE(list);
537 538 PyObject *newlist = PyList_New(len);
538 539 Py_ssize_t i;
539 540
540 541 if (newlist == NULL)
541 542 return NULL;
542 543
543 544 for (i = 0; i < len; i++) {
544 545 PyObject *obj = PyList_GET_ITEM(list, i);
545 546 Py_INCREF(obj);
546 547 PyList_SET_ITEM(newlist, i, obj);
547 548 }
548 549
549 550 return newlist;
550 551 }
551 552
552 553 static int check_filter(PyObject *filter, Py_ssize_t arg)
553 554 {
554 555 if (filter) {
555 556 PyObject *arglist, *result;
556 557 int isfiltered;
557 558
558 559 arglist = Py_BuildValue("(n)", arg);
559 560 if (!arglist) {
560 561 return -1;
561 562 }
562 563
563 564 result = PyEval_CallObject(filter, arglist);
564 565 Py_DECREF(arglist);
565 566 if (!result) {
566 567 return -1;
567 568 }
568 569
569 570 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
570 571 * same as this function, so we can just return it directly.*/
571 572 isfiltered = PyObject_IsTrue(result);
572 573 Py_DECREF(result);
573 574 return isfiltered;
574 575 } else {
575 576 return 0;
576 577 }
577 578 }
578 579
579 580 static Py_ssize_t add_roots_get_min(indexObject *self, PyObject *list,
580 581 Py_ssize_t marker, char *phases)
581 582 {
582 583 PyObject *iter = NULL;
583 584 PyObject *iter_item = NULL;
584 585 Py_ssize_t min_idx = index_length(self) + 2;
585 586 long iter_item_long;
586 587
587 588 if (PyList_GET_SIZE(list) != 0) {
588 589 iter = PyObject_GetIter(list);
589 590 if (iter == NULL)
590 591 return -2;
591 592 while ((iter_item = PyIter_Next(iter))) {
592 593 if (!pylong_to_long(iter_item, &iter_item_long)) {
593 594 Py_DECREF(iter_item);
594 595 return -2;
595 596 }
596 597 Py_DECREF(iter_item);
597 598 if (iter_item_long < min_idx)
598 599 min_idx = iter_item_long;
599 600 phases[iter_item_long] = (char)marker;
600 601 }
601 602 Py_DECREF(iter);
602 603 }
603 604
604 605 return min_idx;
605 606 }
606 607
607 608 static inline void set_phase_from_parents(char *phases, int parent_1,
608 609 int parent_2, Py_ssize_t i)
609 610 {
610 611 if (parent_1 >= 0 && phases[parent_1] > phases[i])
611 612 phases[i] = phases[parent_1];
612 613 if (parent_2 >= 0 && phases[parent_2] > phases[i])
613 614 phases[i] = phases[parent_2];
614 615 }
615 616
616 617 static PyObject *reachableroots2(indexObject *self, PyObject *args)
617 618 {
618 619
619 620 /* Input */
620 621 long minroot;
621 622 PyObject *includepatharg = NULL;
622 623 int includepath = 0;
623 624 /* heads and roots are lists */
624 625 PyObject *heads = NULL;
625 626 PyObject *roots = NULL;
626 627 PyObject *reachable = NULL;
627 628
628 629 PyObject *val;
629 630 Py_ssize_t len = index_length(self);
630 631 long revnum;
631 632 Py_ssize_t k;
632 633 Py_ssize_t i;
633 634 Py_ssize_t l;
634 635 int r;
635 636 int parents[2];
636 637
637 638 /* Internal data structure:
638 639 * tovisit: array of length len+1 (all revs + nullrev), filled upto
639 640 * lentovisit
640 641 *
641 642 * revstates: array of length len+1 (all revs + nullrev) */
642 643 int *tovisit = NULL;
643 644 long lentovisit = 0;
644 645 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
645 646 char *revstates = NULL;
646 647
647 648 /* Get arguments */
648 649 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
649 650 &PyList_Type, &roots, &PyBool_Type,
650 651 &includepatharg))
651 652 goto bail;
652 653
653 654 if (includepatharg == Py_True)
654 655 includepath = 1;
655 656
656 657 /* Initialize return set */
657 658 reachable = PyList_New(0);
658 659 if (reachable == NULL)
659 660 goto bail;
660 661
661 662 /* Initialize internal datastructures */
662 663 tovisit = (int *)malloc((len + 1) * sizeof(int));
663 664 if (tovisit == NULL) {
664 665 PyErr_NoMemory();
665 666 goto bail;
666 667 }
667 668
668 669 revstates = (char *)calloc(len + 1, 1);
669 670 if (revstates == NULL) {
670 671 PyErr_NoMemory();
671 672 goto bail;
672 673 }
673 674
674 675 l = PyList_GET_SIZE(roots);
675 676 for (i = 0; i < l; i++) {
676 677 revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
677 678 if (revnum == -1 && PyErr_Occurred())
678 679 goto bail;
679 680 /* If root is out of range, e.g. wdir(), it must be unreachable
680 681 * from heads. So we can just ignore it. */
681 682 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
682 683 continue;
683 684 revstates[revnum + 1] |= RS_ROOT;
684 685 }
685 686
686 687 /* Populate tovisit with all the heads */
687 688 l = PyList_GET_SIZE(heads);
688 689 for (i = 0; i < l; i++) {
689 690 revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i));
690 691 if (revnum == -1 && PyErr_Occurred())
691 692 goto bail;
692 693 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
693 694 PyErr_SetString(PyExc_IndexError, "head out of range");
694 695 goto bail;
695 696 }
696 697 if (!(revstates[revnum + 1] & RS_SEEN)) {
697 698 tovisit[lentovisit++] = (int)revnum;
698 699 revstates[revnum + 1] |= RS_SEEN;
699 700 }
700 701 }
701 702
702 703 /* Visit the tovisit list and find the reachable roots */
703 704 k = 0;
704 705 while (k < lentovisit) {
705 706 /* Add the node to reachable if it is a root*/
706 707 revnum = tovisit[k++];
707 708 if (revstates[revnum + 1] & RS_ROOT) {
708 709 revstates[revnum + 1] |= RS_REACHABLE;
709 710 val = PyInt_FromLong(revnum);
710 711 if (val == NULL)
711 712 goto bail;
712 713 r = PyList_Append(reachable, val);
713 714 Py_DECREF(val);
714 715 if (r < 0)
715 716 goto bail;
716 717 if (includepath == 0)
717 718 continue;
718 719 }
719 720
720 721 /* Add its parents to the list of nodes to visit */
721 722 if (revnum == nullrev)
722 723 continue;
723 724 r = index_get_parents(self, revnum, parents, (int)len - 1);
724 725 if (r < 0)
725 726 goto bail;
726 727 for (i = 0; i < 2; i++) {
727 728 if (!(revstates[parents[i] + 1] & RS_SEEN) &&
728 729 parents[i] >= minroot) {
729 730 tovisit[lentovisit++] = parents[i];
730 731 revstates[parents[i] + 1] |= RS_SEEN;
731 732 }
732 733 }
733 734 }
734 735
735 736 /* Find all the nodes in between the roots we found and the heads
736 737 * and add them to the reachable set */
737 738 if (includepath == 1) {
738 739 long minidx = minroot;
739 740 if (minidx < 0)
740 741 minidx = 0;
741 742 for (i = minidx; i < len; i++) {
742 743 if (!(revstates[i + 1] & RS_SEEN))
743 744 continue;
744 745 r = index_get_parents(self, i, parents, (int)len - 1);
745 746 /* Corrupted index file, error is set from
746 747 * index_get_parents */
747 748 if (r < 0)
748 749 goto bail;
749 750 if (((revstates[parents[0] + 1] |
750 751 revstates[parents[1] + 1]) &
751 752 RS_REACHABLE) &&
752 753 !(revstates[i + 1] & RS_REACHABLE)) {
753 754 revstates[i + 1] |= RS_REACHABLE;
754 755 val = PyInt_FromSsize_t(i);
755 756 if (val == NULL)
756 757 goto bail;
757 758 r = PyList_Append(reachable, val);
758 759 Py_DECREF(val);
759 760 if (r < 0)
760 761 goto bail;
761 762 }
762 763 }
763 764 }
764 765
765 766 free(revstates);
766 767 free(tovisit);
767 768 return reachable;
768 769 bail:
769 770 Py_XDECREF(reachable);
770 771 free(revstates);
771 772 free(tovisit);
772 773 return NULL;
773 774 }
774 775
775 776 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
776 777 {
777 778 PyObject *roots = Py_None;
778 779 PyObject *ret = NULL;
779 780 PyObject *phasessize = NULL;
780 781 PyObject *phaseroots = NULL;
781 782 PyObject *phaseset = NULL;
782 783 PyObject *phasessetlist = NULL;
783 784 PyObject *rev = NULL;
784 785 Py_ssize_t len = index_length(self);
785 786 Py_ssize_t numphase = 0;
786 787 Py_ssize_t minrevallphases = 0;
787 788 Py_ssize_t minrevphase = 0;
788 789 Py_ssize_t i = 0;
789 790 char *phases = NULL;
790 791 long phase;
791 792
792 793 if (!PyArg_ParseTuple(args, "O", &roots))
793 794 goto done;
794 795 if (roots == NULL || !PyList_Check(roots)) {
795 796 PyErr_SetString(PyExc_TypeError, "roots must be a list");
796 797 goto done;
797 798 }
798 799
799 800 phases = calloc(
800 801 len, 1); /* phase per rev: {0: public, 1: draft, 2: secret} */
801 802 if (phases == NULL) {
802 803 PyErr_NoMemory();
803 804 goto done;
804 805 }
805 806 /* Put the phase information of all the roots in phases */
806 807 numphase = PyList_GET_SIZE(roots) + 1;
807 808 minrevallphases = len + 1;
808 809 phasessetlist = PyList_New(numphase);
809 810 if (phasessetlist == NULL)
810 811 goto done;
811 812
812 813 PyList_SET_ITEM(phasessetlist, 0, Py_None);
813 814 Py_INCREF(Py_None);
814 815
815 816 for (i = 0; i < numphase - 1; i++) {
816 817 phaseroots = PyList_GET_ITEM(roots, i);
817 818 phaseset = PySet_New(NULL);
818 819 if (phaseset == NULL)
819 820 goto release;
820 821 PyList_SET_ITEM(phasessetlist, i + 1, phaseset);
821 822 if (!PyList_Check(phaseroots)) {
822 823 PyErr_SetString(PyExc_TypeError,
823 824 "roots item must be a list");
824 825 goto release;
825 826 }
826 827 minrevphase =
827 828 add_roots_get_min(self, phaseroots, i + 1, phases);
828 829 if (minrevphase == -2) /* Error from add_roots_get_min */
829 830 goto release;
830 831 minrevallphases = MIN(minrevallphases, minrevphase);
831 832 }
832 833 /* Propagate the phase information from the roots to the revs */
833 834 if (minrevallphases != -1) {
834 835 int parents[2];
835 836 for (i = minrevallphases; i < len; i++) {
836 837 if (index_get_parents(self, i, parents, (int)len - 1) <
837 838 0)
838 839 goto release;
839 840 set_phase_from_parents(phases, parents[0], parents[1],
840 841 i);
841 842 }
842 843 }
843 844 /* Transform phase list to a python list */
844 845 phasessize = PyInt_FromSsize_t(len);
845 846 if (phasessize == NULL)
846 847 goto release;
847 848 for (i = 0; i < len; i++) {
848 849 phase = phases[i];
849 850 /* We only store the sets of phase for non public phase, the
850 851 * public phase is computed as a difference */
851 852 if (phase != 0) {
852 853 phaseset = PyList_GET_ITEM(phasessetlist, phase);
853 854 rev = PyInt_FromSsize_t(i);
854 855 if (rev == NULL)
855 856 goto release;
856 857 PySet_Add(phaseset, rev);
857 858 Py_XDECREF(rev);
858 859 }
859 860 }
860 861 ret = PyTuple_Pack(2, phasessize, phasessetlist);
861 862
862 863 release:
863 864 Py_XDECREF(phasessize);
864 865 Py_XDECREF(phasessetlist);
865 866 done:
866 867 free(phases);
867 868 return ret;
868 869 }
869 870
870 871 static PyObject *index_headrevs(indexObject *self, PyObject *args)
871 872 {
872 873 Py_ssize_t i, j, len;
873 874 char *nothead = NULL;
874 875 PyObject *heads = NULL;
875 876 PyObject *filter = NULL;
876 877 PyObject *filteredrevs = Py_None;
877 878
878 879 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
879 880 return NULL;
880 881 }
881 882
882 883 if (self->headrevs && filteredrevs == self->filteredrevs)
883 884 return list_copy(self->headrevs);
884 885
885 886 Py_DECREF(self->filteredrevs);
886 887 self->filteredrevs = filteredrevs;
887 888 Py_INCREF(filteredrevs);
888 889
889 890 if (filteredrevs != Py_None) {
890 891 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
891 892 if (!filter) {
892 893 PyErr_SetString(
893 894 PyExc_TypeError,
894 895 "filteredrevs has no attribute __contains__");
895 896 goto bail;
896 897 }
897 898 }
898 899
899 900 len = index_length(self);
900 901 heads = PyList_New(0);
901 902 if (heads == NULL)
902 903 goto bail;
903 904 if (len == 0) {
904 905 PyObject *nullid = PyInt_FromLong(-1);
905 906 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
906 907 Py_XDECREF(nullid);
907 908 goto bail;
908 909 }
909 910 goto done;
910 911 }
911 912
912 913 nothead = calloc(len, 1);
913 914 if (nothead == NULL) {
914 915 PyErr_NoMemory();
915 916 goto bail;
916 917 }
917 918
918 919 for (i = len - 1; i >= 0; i--) {
919 920 int isfiltered;
920 921 int parents[2];
921 922
922 923 /* If nothead[i] == 1, it means we've seen an unfiltered child
923 924 * of this node already, and therefore this node is not
924 925 * filtered. So we can skip the expensive check_filter step.
925 926 */
926 927 if (nothead[i] != 1) {
927 928 isfiltered = check_filter(filter, i);
928 929 if (isfiltered == -1) {
929 930 PyErr_SetString(PyExc_TypeError,
930 931 "unable to check filter");
931 932 goto bail;
932 933 }
933 934
934 935 if (isfiltered) {
935 936 nothead[i] = 1;
936 937 continue;
937 938 }
938 939 }
939 940
940 941 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
941 942 goto bail;
942 943 for (j = 0; j < 2; j++) {
943 944 if (parents[j] >= 0)
944 945 nothead[parents[j]] = 1;
945 946 }
946 947 }
947 948
948 949 for (i = 0; i < len; i++) {
949 950 PyObject *head;
950 951
951 952 if (nothead[i])
952 953 continue;
953 954 head = PyInt_FromSsize_t(i);
954 955 if (head == NULL || PyList_Append(heads, head) == -1) {
955 956 Py_XDECREF(head);
956 957 goto bail;
957 958 }
958 959 }
959 960
960 961 done:
961 962 self->headrevs = heads;
962 963 Py_XDECREF(filter);
963 964 free(nothead);
964 965 return list_copy(self->headrevs);
965 966 bail:
966 967 Py_XDECREF(filter);
967 968 Py_XDECREF(heads);
968 969 free(nothead);
969 970 return NULL;
970 971 }
971 972
972 973 /**
973 974 * Obtain the base revision index entry.
974 975 *
975 976 * Callers must ensure that rev >= 0 or illegal memory access may occur.
976 977 */
977 978 static inline int index_baserev(indexObject *self, int rev)
978 979 {
979 980 const char *data;
980 981 int result;
981 982
982 983 if (rev >= self->length) {
983 984 PyObject *tuple =
984 985 PyList_GET_ITEM(self->added, rev - self->length);
985 986 long ret;
986 987 if (!pylong_to_long(PyTuple_GET_ITEM(tuple, 3), &ret)) {
987 988 return -2;
988 989 }
989 990 result = (int)ret;
990 991 } else {
991 992 data = index_deref(self, rev);
992 993 if (data == NULL) {
993 994 return -2;
994 995 }
995 996
996 997 result = getbe32(data + 16);
997 998 }
998 999 if (result > rev) {
999 1000 PyErr_Format(
1000 1001 PyExc_ValueError,
1001 1002 "corrupted revlog, revision base above revision: %d, %d",
1002 1003 rev, result);
1003 1004 return -2;
1004 1005 }
1005 1006 if (result < -1) {
1006 1007 PyErr_Format(
1007 1008 PyExc_ValueError,
1008 1009 "corrupted revlog, revision base out of range: %d, %d", rev,
1009 1010 result);
1010 1011 return -2;
1011 1012 }
1012 1013 return result;
1013 1014 }
1014 1015
1015 1016 /**
1016 1017 * Find if a revision is a snapshot or not
1017 1018 *
1018 1019 * Only relevant for sparse-revlog case.
1019 1020 * Callers must ensure that rev is in a valid range.
1020 1021 */
1021 1022 static int index_issnapshotrev(indexObject *self, Py_ssize_t rev)
1022 1023 {
1023 1024 int ps[2];
1024 1025 Py_ssize_t base;
1025 1026 while (rev >= 0) {
1026 1027 base = (Py_ssize_t)index_baserev(self, rev);
1027 1028 if (base == rev) {
1028 1029 base = -1;
1029 1030 }
1030 1031 if (base == -2) {
1031 1032 assert(PyErr_Occurred());
1032 1033 return -1;
1033 1034 }
1034 1035 if (base == -1) {
1035 1036 return 1;
1036 1037 }
1037 1038 if (index_get_parents(self, rev, ps, (int)rev) < 0) {
1038 1039 assert(PyErr_Occurred());
1039 1040 return -1;
1040 1041 };
1041 1042 if (base == ps[0] || base == ps[1]) {
1042 1043 return 0;
1043 1044 }
1044 1045 rev = base;
1045 1046 }
1046 1047 return rev == -1;
1047 1048 }
1048 1049
1049 1050 static PyObject *index_issnapshot(indexObject *self, PyObject *value)
1050 1051 {
1051 1052 long rev;
1052 1053 int issnap;
1053 1054 Py_ssize_t length = index_length(self);
1054 1055
1055 1056 if (!pylong_to_long(value, &rev)) {
1056 1057 return NULL;
1057 1058 }
1058 1059 if (rev < -1 || rev >= length) {
1059 1060 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
1060 1061 rev);
1061 1062 return NULL;
1062 1063 };
1063 1064 issnap = index_issnapshotrev(self, (Py_ssize_t)rev);
1064 1065 if (issnap < 0) {
1065 1066 return NULL;
1066 1067 };
1067 1068 return PyBool_FromLong((long)issnap);
1068 1069 }
1069 1070
1070 1071 static PyObject *index_findsnapshots(indexObject *self, PyObject *args)
1071 1072 {
1072 1073 Py_ssize_t start_rev;
1073 1074 PyObject *cache;
1074 1075 Py_ssize_t base;
1075 1076 Py_ssize_t rev;
1076 1077 PyObject *key = NULL;
1077 1078 PyObject *value = NULL;
1078 1079 const Py_ssize_t length = index_length(self);
1079 1080 if (!PyArg_ParseTuple(args, "O!n", &PyDict_Type, &cache, &start_rev)) {
1080 1081 return NULL;
1081 1082 }
1082 1083 for (rev = start_rev; rev < length; rev++) {
1083 1084 int issnap;
1084 1085 PyObject *allvalues = NULL;
1085 1086 issnap = index_issnapshotrev(self, rev);
1086 1087 if (issnap < 0) {
1087 1088 goto bail;
1088 1089 }
1089 1090 if (issnap == 0) {
1090 1091 continue;
1091 1092 }
1092 1093 base = (Py_ssize_t)index_baserev(self, rev);
1093 1094 if (base == rev) {
1094 1095 base = -1;
1095 1096 }
1096 1097 if (base == -2) {
1097 1098 assert(PyErr_Occurred());
1098 1099 goto bail;
1099 1100 }
1100 1101 key = PyInt_FromSsize_t(base);
1101 1102 allvalues = PyDict_GetItem(cache, key);
1102 1103 if (allvalues == NULL && PyErr_Occurred()) {
1103 1104 goto bail;
1104 1105 }
1105 1106 if (allvalues == NULL) {
1106 1107 int r;
1107 1108 allvalues = PyList_New(0);
1108 1109 if (!allvalues) {
1109 1110 goto bail;
1110 1111 }
1111 1112 r = PyDict_SetItem(cache, key, allvalues);
1112 1113 Py_DECREF(allvalues);
1113 1114 if (r < 0) {
1114 1115 goto bail;
1115 1116 }
1116 1117 }
1117 1118 value = PyInt_FromSsize_t(rev);
1118 1119 if (PyList_Append(allvalues, value)) {
1119 1120 goto bail;
1120 1121 }
1121 1122 Py_CLEAR(key);
1122 1123 Py_CLEAR(value);
1123 1124 }
1124 1125 Py_RETURN_NONE;
1125 1126 bail:
1126 1127 Py_XDECREF(key);
1127 1128 Py_XDECREF(value);
1128 1129 return NULL;
1129 1130 }
1130 1131
1131 1132 static PyObject *index_deltachain(indexObject *self, PyObject *args)
1132 1133 {
1133 1134 int rev, generaldelta;
1134 1135 PyObject *stoparg;
1135 1136 int stoprev, iterrev, baserev = -1;
1136 1137 int stopped;
1137 1138 PyObject *chain = NULL, *result = NULL;
1138 1139 const Py_ssize_t length = index_length(self);
1139 1140
1140 1141 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
1141 1142 return NULL;
1142 1143 }
1143 1144
1144 1145 if (PyInt_Check(stoparg)) {
1145 1146 stoprev = (int)PyInt_AsLong(stoparg);
1146 1147 if (stoprev == -1 && PyErr_Occurred()) {
1147 1148 return NULL;
1148 1149 }
1149 1150 } else if (stoparg == Py_None) {
1150 1151 stoprev = -2;
1151 1152 } else {
1152 1153 PyErr_SetString(PyExc_ValueError,
1153 1154 "stoprev must be integer or None");
1154 1155 return NULL;
1155 1156 }
1156 1157
1157 1158 if (rev < 0 || rev >= length) {
1158 1159 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1159 1160 return NULL;
1160 1161 }
1161 1162
1162 1163 chain = PyList_New(0);
1163 1164 if (chain == NULL) {
1164 1165 return NULL;
1165 1166 }
1166 1167
1167 1168 baserev = index_baserev(self, rev);
1168 1169
1169 1170 /* This should never happen. */
1170 1171 if (baserev <= -2) {
1171 1172 /* Error should be set by index_deref() */
1172 1173 assert(PyErr_Occurred());
1173 1174 goto bail;
1174 1175 }
1175 1176
1176 1177 iterrev = rev;
1177 1178
1178 1179 while (iterrev != baserev && iterrev != stoprev) {
1179 1180 PyObject *value = PyInt_FromLong(iterrev);
1180 1181 if (value == NULL) {
1181 1182 goto bail;
1182 1183 }
1183 1184 if (PyList_Append(chain, value)) {
1184 1185 Py_DECREF(value);
1185 1186 goto bail;
1186 1187 }
1187 1188 Py_DECREF(value);
1188 1189
1189 1190 if (generaldelta) {
1190 1191 iterrev = baserev;
1191 1192 } else {
1192 1193 iterrev--;
1193 1194 }
1194 1195
1195 1196 if (iterrev < 0) {
1196 1197 break;
1197 1198 }
1198 1199
1199 1200 if (iterrev >= length) {
1200 1201 PyErr_SetString(PyExc_IndexError,
1201 1202 "revision outside index");
1202 1203 return NULL;
1203 1204 }
1204 1205
1205 1206 baserev = index_baserev(self, iterrev);
1206 1207
1207 1208 /* This should never happen. */
1208 1209 if (baserev <= -2) {
1209 1210 /* Error should be set by index_deref() */
1210 1211 assert(PyErr_Occurred());
1211 1212 goto bail;
1212 1213 }
1213 1214 }
1214 1215
1215 1216 if (iterrev == stoprev) {
1216 1217 stopped = 1;
1217 1218 } else {
1218 1219 PyObject *value = PyInt_FromLong(iterrev);
1219 1220 if (value == NULL) {
1220 1221 goto bail;
1221 1222 }
1222 1223 if (PyList_Append(chain, value)) {
1223 1224 Py_DECREF(value);
1224 1225 goto bail;
1225 1226 }
1226 1227 Py_DECREF(value);
1227 1228
1228 1229 stopped = 0;
1229 1230 }
1230 1231
1231 1232 if (PyList_Reverse(chain)) {
1232 1233 goto bail;
1233 1234 }
1234 1235
1235 1236 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
1236 1237 Py_DECREF(chain);
1237 1238 return result;
1238 1239
1239 1240 bail:
1240 1241 Py_DECREF(chain);
1241 1242 return NULL;
1242 1243 }
1243 1244
1244 1245 static inline int64_t
1245 1246 index_segment_span(indexObject *self, Py_ssize_t start_rev, Py_ssize_t end_rev)
1246 1247 {
1247 1248 int64_t start_offset;
1248 1249 int64_t end_offset;
1249 1250 int end_size;
1250 1251 start_offset = index_get_start(self, start_rev);
1251 1252 if (start_offset < 0) {
1252 1253 return -1;
1253 1254 }
1254 1255 end_offset = index_get_start(self, end_rev);
1255 1256 if (end_offset < 0) {
1256 1257 return -1;
1257 1258 }
1258 1259 end_size = index_get_length(self, end_rev);
1259 1260 if (end_size < 0) {
1260 1261 return -1;
1261 1262 }
1262 1263 if (end_offset < start_offset) {
1263 1264 PyErr_Format(PyExc_ValueError,
1264 1265 "corrupted revlog index: inconsistent offset "
1265 1266 "between revisions (%zd) and (%zd)",
1266 1267 start_rev, end_rev);
1267 1268 return -1;
1268 1269 }
1269 1270 return (end_offset - start_offset) + (int64_t)end_size;
1270 1271 }
1271 1272
1272 1273 /* returns endidx so that revs[startidx:endidx] has no empty trailing revs */
1273 1274 static Py_ssize_t trim_endidx(indexObject *self, const Py_ssize_t *revs,
1274 1275 Py_ssize_t startidx, Py_ssize_t endidx)
1275 1276 {
1276 1277 int length;
1277 1278 while (endidx > 1 && endidx > startidx) {
1278 1279 length = index_get_length(self, revs[endidx - 1]);
1279 1280 if (length < 0) {
1280 1281 return -1;
1281 1282 }
1282 1283 if (length != 0) {
1283 1284 break;
1284 1285 }
1285 1286 endidx -= 1;
1286 1287 }
1287 1288 return endidx;
1288 1289 }
1289 1290
1290 1291 struct Gap {
1291 1292 int64_t size;
1292 1293 Py_ssize_t idx;
1293 1294 };
1294 1295
1295 1296 static int gap_compare(const void *left, const void *right)
1296 1297 {
1297 1298 const struct Gap *l_left = ((const struct Gap *)left);
1298 1299 const struct Gap *l_right = ((const struct Gap *)right);
1299 1300 if (l_left->size < l_right->size) {
1300 1301 return -1;
1301 1302 } else if (l_left->size > l_right->size) {
1302 1303 return 1;
1303 1304 }
1304 1305 return 0;
1305 1306 }
1306 1307 static int Py_ssize_t_compare(const void *left, const void *right)
1307 1308 {
1308 1309 const Py_ssize_t l_left = *(const Py_ssize_t *)left;
1309 1310 const Py_ssize_t l_right = *(const Py_ssize_t *)right;
1310 1311 if (l_left < l_right) {
1311 1312 return -1;
1312 1313 } else if (l_left > l_right) {
1313 1314 return 1;
1314 1315 }
1315 1316 return 0;
1316 1317 }
1317 1318
1318 1319 static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
1319 1320 {
1320 1321 /* method arguments */
1321 1322 PyObject *list_revs = NULL; /* revisions in the chain */
1322 1323 double targetdensity = 0; /* min density to achieve */
1323 1324 Py_ssize_t mingapsize = 0; /* threshold to ignore gaps */
1324 1325
1325 1326 /* other core variables */
1326 1327 Py_ssize_t idxlen = index_length(self);
1327 1328 Py_ssize_t i; /* used for various iteration */
1328 1329 PyObject *result = NULL; /* the final return of the function */
1329 1330
1330 1331 /* generic information about the delta chain being slice */
1331 1332 Py_ssize_t num_revs = 0; /* size of the full delta chain */
1332 1333 Py_ssize_t *revs = NULL; /* native array of revision in the chain */
1333 1334 int64_t chainpayload = 0; /* sum of all delta in the chain */
1334 1335 int64_t deltachainspan = 0; /* distance from first byte to last byte */
1335 1336
1336 1337 /* variable used for slicing the delta chain */
1337 1338 int64_t readdata = 0; /* amount of data currently planned to be read */
1338 1339 double density = 0; /* ration of payload data compared to read ones */
1339 1340 int64_t previous_end;
1340 1341 struct Gap *gaps = NULL; /* array of notable gap in the chain */
1341 1342 Py_ssize_t num_gaps =
1342 1343 0; /* total number of notable gap recorded so far */
1343 1344 Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
1344 1345 Py_ssize_t num_selected = 0; /* number of gaps skipped */
1345 1346 PyObject *chunk = NULL; /* individual slice */
1346 1347 PyObject *allchunks = NULL; /* all slices */
1347 1348 Py_ssize_t previdx;
1348 1349
1349 1350 /* parsing argument */
1350 1351 if (!PyArg_ParseTuple(args, "O!dn", &PyList_Type, &list_revs,
1351 1352 &targetdensity, &mingapsize)) {
1352 1353 goto bail;
1353 1354 }
1354 1355
1355 1356 /* If the delta chain contains a single element, we do not need slicing
1356 1357 */
1357 1358 num_revs = PyList_GET_SIZE(list_revs);
1358 1359 if (num_revs <= 1) {
1359 1360 result = PyTuple_Pack(1, list_revs);
1360 1361 goto done;
1361 1362 }
1362 1363
1363 1364 /* Turn the python list into a native integer array (for efficiency) */
1364 1365 revs = (Py_ssize_t *)calloc(num_revs, sizeof(Py_ssize_t));
1365 1366 if (revs == NULL) {
1366 1367 PyErr_NoMemory();
1367 1368 goto bail;
1368 1369 }
1369 1370 for (i = 0; i < num_revs; i++) {
1370 1371 Py_ssize_t revnum = PyInt_AsLong(PyList_GET_ITEM(list_revs, i));
1371 1372 if (revnum == -1 && PyErr_Occurred()) {
1372 1373 goto bail;
1373 1374 }
1374 1375 if (revnum < nullrev || revnum >= idxlen) {
1375 1376 PyErr_Format(PyExc_IndexError,
1376 1377 "index out of range: %zd", revnum);
1377 1378 goto bail;
1378 1379 }
1379 1380 revs[i] = revnum;
1380 1381 }
1381 1382
1382 1383 /* Compute and check various property of the unsliced delta chain */
1383 1384 deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
1384 1385 if (deltachainspan < 0) {
1385 1386 goto bail;
1386 1387 }
1387 1388
1388 1389 if (deltachainspan <= mingapsize) {
1389 1390 result = PyTuple_Pack(1, list_revs);
1390 1391 goto done;
1391 1392 }
1392 1393 chainpayload = 0;
1393 1394 for (i = 0; i < num_revs; i++) {
1394 1395 int tmp = index_get_length(self, revs[i]);
1395 1396 if (tmp < 0) {
1396 1397 goto bail;
1397 1398 }
1398 1399 chainpayload += tmp;
1399 1400 }
1400 1401
1401 1402 readdata = deltachainspan;
1402 1403 density = 1.0;
1403 1404
1404 1405 if (0 < deltachainspan) {
1405 1406 density = (double)chainpayload / (double)deltachainspan;
1406 1407 }
1407 1408
1408 1409 if (density >= targetdensity) {
1409 1410 result = PyTuple_Pack(1, list_revs);
1410 1411 goto done;
1411 1412 }
1412 1413
1413 1414 /* if chain is too sparse, look for relevant gaps */
1414 1415 gaps = (struct Gap *)calloc(num_revs, sizeof(struct Gap));
1415 1416 if (gaps == NULL) {
1416 1417 PyErr_NoMemory();
1417 1418 goto bail;
1418 1419 }
1419 1420
1420 1421 previous_end = -1;
1421 1422 for (i = 0; i < num_revs; i++) {
1422 1423 int64_t revstart;
1423 1424 int revsize;
1424 1425 revstart = index_get_start(self, revs[i]);
1425 1426 if (revstart < 0) {
1426 1427 goto bail;
1427 1428 };
1428 1429 revsize = index_get_length(self, revs[i]);
1429 1430 if (revsize < 0) {
1430 1431 goto bail;
1431 1432 };
1432 1433 if (revsize == 0) {
1433 1434 continue;
1434 1435 }
1435 1436 if (previous_end >= 0) {
1436 1437 int64_t gapsize = revstart - previous_end;
1437 1438 if (gapsize > mingapsize) {
1438 1439 gaps[num_gaps].size = gapsize;
1439 1440 gaps[num_gaps].idx = i;
1440 1441 num_gaps += 1;
1441 1442 }
1442 1443 }
1443 1444 previous_end = revstart + revsize;
1444 1445 }
1445 1446 if (num_gaps == 0) {
1446 1447 result = PyTuple_Pack(1, list_revs);
1447 1448 goto done;
1448 1449 }
1449 1450 qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
1450 1451
1451 1452 /* Slice the largest gap first, they improve the density the most */
1452 1453 selected_indices =
1453 1454 (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
1454 1455 if (selected_indices == NULL) {
1455 1456 PyErr_NoMemory();
1456 1457 goto bail;
1457 1458 }
1458 1459
1459 1460 for (i = num_gaps - 1; i >= 0; i--) {
1460 1461 selected_indices[num_selected] = gaps[i].idx;
1461 1462 readdata -= gaps[i].size;
1462 1463 num_selected += 1;
1463 1464 if (readdata <= 0) {
1464 1465 density = 1.0;
1465 1466 } else {
1466 1467 density = (double)chainpayload / (double)readdata;
1467 1468 }
1468 1469 if (density >= targetdensity) {
1469 1470 break;
1470 1471 }
1471 1472 }
1472 1473 qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
1473 1474 &Py_ssize_t_compare);
1474 1475
1475 1476 /* create the resulting slice */
1476 1477 allchunks = PyList_New(0);
1477 1478 if (allchunks == NULL) {
1478 1479 goto bail;
1479 1480 }
1480 1481 previdx = 0;
1481 1482 selected_indices[num_selected] = num_revs;
1482 1483 for (i = 0; i <= num_selected; i++) {
1483 1484 Py_ssize_t idx = selected_indices[i];
1484 1485 Py_ssize_t endidx = trim_endidx(self, revs, previdx, idx);
1485 1486 if (endidx < 0) {
1486 1487 goto bail;
1487 1488 }
1488 1489 if (previdx < endidx) {
1489 1490 chunk = PyList_GetSlice(list_revs, previdx, endidx);
1490 1491 if (chunk == NULL) {
1491 1492 goto bail;
1492 1493 }
1493 1494 if (PyList_Append(allchunks, chunk) == -1) {
1494 1495 goto bail;
1495 1496 }
1496 1497 Py_DECREF(chunk);
1497 1498 chunk = NULL;
1498 1499 }
1499 1500 previdx = idx;
1500 1501 }
1501 1502 result = allchunks;
1502 1503 goto done;
1503 1504
1504 1505 bail:
1505 1506 Py_XDECREF(allchunks);
1506 1507 Py_XDECREF(chunk);
1507 1508 done:
1508 1509 free(revs);
1509 1510 free(gaps);
1510 1511 free(selected_indices);
1511 1512 return result;
1512 1513 }
1513 1514
1514 1515 static inline int nt_level(const char *node, Py_ssize_t level)
1515 1516 {
1516 1517 int v = node[level >> 1];
1517 1518 if (!(level & 1))
1518 1519 v >>= 4;
1519 1520 return v & 0xf;
1520 1521 }
1521 1522
1522 1523 /*
1523 1524 * Return values:
1524 1525 *
1525 1526 * -4: match is ambiguous (multiple candidates)
1526 1527 * -2: not found
1527 1528 * rest: valid rev
1528 1529 */
1529 1530 static int nt_find(nodetree *self, const char *node, Py_ssize_t nodelen,
1530 1531 int hex)
1531 1532 {
1532 1533 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
1533 1534 int level, maxlevel, off;
1534 1535
1535 1536 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
1536 1537 return -1;
1537 1538
1538 1539 if (hex)
1539 1540 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
1540 1541 else
1541 1542 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
1542 1543
1543 1544 for (level = off = 0; level < maxlevel; level++) {
1544 1545 int k = getnybble(node, level);
1545 1546 nodetreenode *n = &self->nodes[off];
1546 1547 int v = n->children[k];
1547 1548
1548 1549 if (v < 0) {
1549 1550 const char *n;
1550 1551 Py_ssize_t i;
1551 1552
1552 1553 v = -(v + 2);
1553 1554 n = index_node(self->index, v);
1554 1555 if (n == NULL)
1555 1556 return -2;
1556 1557 for (i = level; i < maxlevel; i++)
1557 1558 if (getnybble(node, i) != nt_level(n, i))
1558 1559 return -2;
1559 1560 return v;
1560 1561 }
1561 1562 if (v == 0)
1562 1563 return -2;
1563 1564 off = v;
1564 1565 }
1565 1566 /* multiple matches against an ambiguous prefix */
1566 1567 return -4;
1567 1568 }
1568 1569
1569 1570 static int nt_new(nodetree *self)
1570 1571 {
1571 1572 if (self->length == self->capacity) {
1572 1573 unsigned newcapacity;
1573 1574 nodetreenode *newnodes;
1574 1575 newcapacity = self->capacity * 2;
1575 1576 if (newcapacity >= INT_MAX / sizeof(nodetreenode)) {
1576 1577 PyErr_SetString(PyExc_MemoryError,
1577 1578 "overflow in nt_new");
1578 1579 return -1;
1579 1580 }
1580 1581 newnodes =
1581 1582 realloc(self->nodes, newcapacity * sizeof(nodetreenode));
1582 1583 if (newnodes == NULL) {
1583 1584 PyErr_SetString(PyExc_MemoryError, "out of memory");
1584 1585 return -1;
1585 1586 }
1586 1587 self->capacity = newcapacity;
1587 1588 self->nodes = newnodes;
1588 1589 memset(&self->nodes[self->length], 0,
1589 1590 sizeof(nodetreenode) * (self->capacity - self->length));
1590 1591 }
1591 1592 return self->length++;
1592 1593 }
1593 1594
1594 1595 static int nt_insert(nodetree *self, const char *node, int rev)
1595 1596 {
1596 1597 int level = 0;
1597 1598 int off = 0;
1598 1599
1599 1600 while (level < 40) {
1600 1601 int k = nt_level(node, level);
1601 1602 nodetreenode *n;
1602 1603 int v;
1603 1604
1604 1605 n = &self->nodes[off];
1605 1606 v = n->children[k];
1606 1607
1607 1608 if (v == 0) {
1608 1609 n->children[k] = -rev - 2;
1609 1610 return 0;
1610 1611 }
1611 1612 if (v < 0) {
1612 1613 const char *oldnode =
1613 1614 index_node_existing(self->index, -(v + 2));
1614 1615 int noff;
1615 1616
1616 1617 if (oldnode == NULL)
1617 1618 return -1;
1618 1619 if (!memcmp(oldnode, node, 20)) {
1619 1620 n->children[k] = -rev - 2;
1620 1621 return 0;
1621 1622 }
1622 1623 noff = nt_new(self);
1623 1624 if (noff == -1)
1624 1625 return -1;
1625 1626 /* self->nodes may have been changed by realloc */
1626 1627 self->nodes[off].children[k] = noff;
1627 1628 off = noff;
1628 1629 n = &self->nodes[off];
1629 1630 n->children[nt_level(oldnode, ++level)] = v;
1630 1631 if (level > self->depth)
1631 1632 self->depth = level;
1632 1633 self->splits += 1;
1633 1634 } else {
1634 1635 level += 1;
1635 1636 off = v;
1636 1637 }
1637 1638 }
1638 1639
1639 1640 return -1;
1640 1641 }
1641 1642
1642 1643 static PyObject *ntobj_insert(nodetreeObject *self, PyObject *args)
1643 1644 {
1644 1645 Py_ssize_t rev;
1645 1646 const char *node;
1646 1647 Py_ssize_t length;
1647 1648 if (!PyArg_ParseTuple(args, "n", &rev))
1648 1649 return NULL;
1649 1650 length = index_length(self->nt.index);
1650 1651 if (rev < 0 || rev >= length) {
1651 1652 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1652 1653 return NULL;
1653 1654 }
1654 1655 node = index_node_existing(self->nt.index, rev);
1655 1656 if (nt_insert(&self->nt, node, (int)rev) == -1)
1656 1657 return NULL;
1657 1658 Py_RETURN_NONE;
1658 1659 }
1659 1660
1660 1661 static int nt_delete_node(nodetree *self, const char *node)
1661 1662 {
1662 1663 /* rev==-2 happens to get encoded as 0, which is interpreted as not set
1663 1664 */
1664 1665 return nt_insert(self, node, -2);
1665 1666 }
1666 1667
1667 1668 static int nt_init(nodetree *self, indexObject *index, unsigned capacity)
1668 1669 {
1669 1670 /* Initialize before overflow-checking to avoid nt_dealloc() crash. */
1670 1671 self->nodes = NULL;
1671 1672
1672 1673 self->index = index;
1673 1674 /* The input capacity is in terms of revisions, while the field is in
1674 1675 * terms of nodetree nodes. */
1675 1676 self->capacity = (capacity < 4 ? 4 : capacity / 2);
1676 1677 self->depth = 0;
1677 1678 self->splits = 0;
1678 1679 if ((size_t)self->capacity > INT_MAX / sizeof(nodetreenode)) {
1679 1680 PyErr_SetString(PyExc_ValueError, "overflow in init_nt");
1680 1681 return -1;
1681 1682 }
1682 1683 self->nodes = calloc(self->capacity, sizeof(nodetreenode));
1683 1684 if (self->nodes == NULL) {
1684 1685 PyErr_NoMemory();
1685 1686 return -1;
1686 1687 }
1687 1688 self->length = 1;
1688 1689 return 0;
1689 1690 }
1690 1691
1691 1692 static int ntobj_init(nodetreeObject *self, PyObject *args)
1692 1693 {
1693 1694 PyObject *index;
1694 1695 unsigned capacity;
1695 1696 if (!PyArg_ParseTuple(args, "O!I", &HgRevlogIndex_Type, &index,
1696 1697 &capacity))
1697 1698 return -1;
1698 1699 Py_INCREF(index);
1699 1700 return nt_init(&self->nt, (indexObject *)index, capacity);
1700 1701 }
1701 1702
1702 1703 static int nt_partialmatch(nodetree *self, const char *node, Py_ssize_t nodelen)
1703 1704 {
1704 1705 return nt_find(self, node, nodelen, 1);
1705 1706 }
1706 1707
1707 1708 /*
1708 1709 * Find the length of the shortest unique prefix of node.
1709 1710 *
1710 1711 * Return values:
1711 1712 *
1712 1713 * -3: error (exception set)
1713 1714 * -2: not found (no exception set)
1714 1715 * rest: length of shortest prefix
1715 1716 */
1716 1717 static int nt_shortest(nodetree *self, const char *node)
1717 1718 {
1718 1719 int level, off;
1719 1720
1720 1721 for (level = off = 0; level < 40; level++) {
1721 1722 int k, v;
1722 1723 nodetreenode *n = &self->nodes[off];
1723 1724 k = nt_level(node, level);
1724 1725 v = n->children[k];
1725 1726 if (v < 0) {
1726 1727 const char *n;
1727 1728 v = -(v + 2);
1728 1729 n = index_node_existing(self->index, v);
1729 1730 if (n == NULL)
1730 1731 return -3;
1731 1732 if (memcmp(node, n, 20) != 0)
1732 1733 /*
1733 1734 * Found a unique prefix, but it wasn't for the
1734 1735 * requested node (i.e the requested node does
1735 1736 * not exist).
1736 1737 */
1737 1738 return -2;
1738 1739 return level + 1;
1739 1740 }
1740 1741 if (v == 0)
1741 1742 return -2;
1742 1743 off = v;
1743 1744 }
1744 1745 /*
1745 1746 * The node was still not unique after 40 hex digits, so this won't
1746 1747 * happen. Also, if we get here, then there's a programming error in
1747 1748 * this file that made us insert a node longer than 40 hex digits.
1748 1749 */
1749 1750 PyErr_SetString(PyExc_Exception, "broken node tree");
1750 1751 return -3;
1751 1752 }
1752 1753
1753 1754 static PyObject *ntobj_shortest(nodetreeObject *self, PyObject *args)
1754 1755 {
1755 1756 PyObject *val;
1756 1757 char *node;
1757 1758 int length;
1758 1759
1759 1760 if (!PyArg_ParseTuple(args, "O", &val))
1760 1761 return NULL;
1761 1762 if (node_check(val, &node) == -1)
1762 1763 return NULL;
1763 1764
1764 1765 length = nt_shortest(&self->nt, node);
1765 1766 if (length == -3)
1766 1767 return NULL;
1767 1768 if (length == -2) {
1768 1769 raise_revlog_error();
1769 1770 return NULL;
1770 1771 }
1771 1772 return PyInt_FromLong(length);
1772 1773 }
1773 1774
1774 1775 static void nt_dealloc(nodetree *self)
1775 1776 {
1776 1777 free(self->nodes);
1777 1778 self->nodes = NULL;
1778 1779 }
1779 1780
1780 1781 static void ntobj_dealloc(nodetreeObject *self)
1781 1782 {
1782 1783 Py_XDECREF(self->nt.index);
1783 1784 nt_dealloc(&self->nt);
1784 1785 PyObject_Del(self);
1785 1786 }
1786 1787
1787 1788 static PyMethodDef ntobj_methods[] = {
1788 1789 {"insert", (PyCFunction)ntobj_insert, METH_VARARGS,
1789 1790 "insert an index entry"},
1790 1791 {"shortest", (PyCFunction)ntobj_shortest, METH_VARARGS,
1791 1792 "find length of shortest hex nodeid of a binary ID"},
1792 1793 {NULL} /* Sentinel */
1793 1794 };
1794 1795
1795 1796 static PyTypeObject nodetreeType = {
1796 1797 PyVarObject_HEAD_INIT(NULL, 0) /* header */
1797 1798 "parsers.nodetree", /* tp_name */
1798 1799 sizeof(nodetreeObject), /* tp_basicsize */
1799 1800 0, /* tp_itemsize */
1800 1801 (destructor)ntobj_dealloc, /* tp_dealloc */
1801 1802 0, /* tp_print */
1802 1803 0, /* tp_getattr */
1803 1804 0, /* tp_setattr */
1804 1805 0, /* tp_compare */
1805 1806 0, /* tp_repr */
1806 1807 0, /* tp_as_number */
1807 1808 0, /* tp_as_sequence */
1808 1809 0, /* tp_as_mapping */
1809 1810 0, /* tp_hash */
1810 1811 0, /* tp_call */
1811 1812 0, /* tp_str */
1812 1813 0, /* tp_getattro */
1813 1814 0, /* tp_setattro */
1814 1815 0, /* tp_as_buffer */
1815 1816 Py_TPFLAGS_DEFAULT, /* tp_flags */
1816 1817 "nodetree", /* tp_doc */
1817 1818 0, /* tp_traverse */
1818 1819 0, /* tp_clear */
1819 1820 0, /* tp_richcompare */
1820 1821 0, /* tp_weaklistoffset */
1821 1822 0, /* tp_iter */
1822 1823 0, /* tp_iternext */
1823 1824 ntobj_methods, /* tp_methods */
1824 1825 0, /* tp_members */
1825 1826 0, /* tp_getset */
1826 1827 0, /* tp_base */
1827 1828 0, /* tp_dict */
1828 1829 0, /* tp_descr_get */
1829 1830 0, /* tp_descr_set */
1830 1831 0, /* tp_dictoffset */
1831 1832 (initproc)ntobj_init, /* tp_init */
1832 1833 0, /* tp_alloc */
1833 1834 };
1834 1835
1835 1836 static int index_init_nt(indexObject *self)
1836 1837 {
1837 1838 if (!self->ntinitialized) {
1838 1839 if (nt_init(&self->nt, self, (int)self->raw_length) == -1) {
1839 1840 nt_dealloc(&self->nt);
1840 1841 return -1;
1841 1842 }
1842 1843 if (nt_insert(&self->nt, nullid, -1) == -1) {
1843 1844 nt_dealloc(&self->nt);
1844 1845 return -1;
1845 1846 }
1846 1847 self->ntinitialized = 1;
1847 1848 self->ntrev = (int)index_length(self);
1848 1849 self->ntlookups = 1;
1849 1850 self->ntmisses = 0;
1850 1851 }
1851 1852 return 0;
1852 1853 }
1853 1854
1854 1855 /*
1855 1856 * Return values:
1856 1857 *
1857 1858 * -3: error (exception set)
1858 1859 * -2: not found (no exception set)
1859 1860 * rest: valid rev
1860 1861 */
1861 1862 static int index_find_node(indexObject *self, const char *node,
1862 1863 Py_ssize_t nodelen)
1863 1864 {
1864 1865 int rev;
1865 1866
1866 1867 if (index_init_nt(self) == -1)
1867 1868 return -3;
1868 1869
1869 1870 self->ntlookups++;
1870 1871 rev = nt_find(&self->nt, node, nodelen, 0);
1871 1872 if (rev >= -1)
1872 1873 return rev;
1873 1874
1874 1875 /*
1875 1876 * For the first handful of lookups, we scan the entire index,
1876 1877 * and cache only the matching nodes. This optimizes for cases
1877 1878 * like "hg tip", where only a few nodes are accessed.
1878 1879 *
1879 1880 * After that, we cache every node we visit, using a single
1880 1881 * scan amortized over multiple lookups. This gives the best
1881 1882 * bulk performance, e.g. for "hg log".
1882 1883 */
1883 1884 if (self->ntmisses++ < 4) {
1884 1885 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1885 1886 const char *n = index_node_existing(self, rev);
1886 1887 if (n == NULL)
1887 1888 return -3;
1888 1889 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1889 1890 if (nt_insert(&self->nt, n, rev) == -1)
1890 1891 return -3;
1891 1892 break;
1892 1893 }
1893 1894 }
1894 1895 } else {
1895 1896 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1896 1897 const char *n = index_node_existing(self, rev);
1897 1898 if (n == NULL)
1898 1899 return -3;
1899 1900 if (nt_insert(&self->nt, n, rev) == -1) {
1900 1901 self->ntrev = rev + 1;
1901 1902 return -3;
1902 1903 }
1903 1904 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1904 1905 break;
1905 1906 }
1906 1907 }
1907 1908 self->ntrev = rev;
1908 1909 }
1909 1910
1910 1911 if (rev >= 0)
1911 1912 return rev;
1912 1913 return -2;
1913 1914 }
1914 1915
1915 1916 static PyObject *index_getitem(indexObject *self, PyObject *value)
1916 1917 {
1917 1918 char *node;
1918 1919 int rev;
1919 1920
1920 1921 if (PyInt_Check(value)) {
1921 1922 long idx;
1922 1923 if (!pylong_to_long(value, &idx)) {
1923 1924 return NULL;
1924 1925 }
1925 1926 return index_get(self, idx);
1926 1927 }
1927 1928
1928 1929 if (node_check(value, &node) == -1)
1929 1930 return NULL;
1930 1931 rev = index_find_node(self, node, 20);
1931 1932 if (rev >= -1)
1932 1933 return PyInt_FromLong(rev);
1933 1934 if (rev == -2)
1934 1935 raise_revlog_error();
1935 1936 return NULL;
1936 1937 }
1937 1938
1938 1939 /*
1939 1940 * Fully populate the radix tree.
1940 1941 */
1941 1942 static int index_populate_nt(indexObject *self)
1942 1943 {
1943 1944 int rev;
1944 1945 if (self->ntrev > 0) {
1945 1946 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1946 1947 const char *n = index_node_existing(self, rev);
1947 1948 if (n == NULL)
1948 1949 return -1;
1949 1950 if (nt_insert(&self->nt, n, rev) == -1)
1950 1951 return -1;
1951 1952 }
1952 1953 self->ntrev = -1;
1953 1954 }
1954 1955 return 0;
1955 1956 }
1956 1957
1957 1958 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1958 1959 {
1959 1960 const char *fullnode;
1960 1961 Py_ssize_t nodelen;
1961 1962 char *node;
1962 1963 int rev, i;
1963 1964
1964 1965 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &node, &nodelen))
1965 1966 return NULL;
1966 1967
1967 1968 if (nodelen < 1) {
1968 1969 PyErr_SetString(PyExc_ValueError, "key too short");
1969 1970 return NULL;
1970 1971 }
1971 1972
1972 1973 if (nodelen > 40) {
1973 1974 PyErr_SetString(PyExc_ValueError, "key too long");
1974 1975 return NULL;
1975 1976 }
1976 1977
1977 1978 for (i = 0; i < nodelen; i++)
1978 1979 hexdigit(node, i);
1979 1980 if (PyErr_Occurred()) {
1980 1981 /* input contains non-hex characters */
1981 1982 PyErr_Clear();
1982 1983 Py_RETURN_NONE;
1983 1984 }
1984 1985
1985 1986 if (index_init_nt(self) == -1)
1986 1987 return NULL;
1987 1988 if (index_populate_nt(self) == -1)
1988 1989 return NULL;
1989 1990 rev = nt_partialmatch(&self->nt, node, nodelen);
1990 1991
1991 1992 switch (rev) {
1992 1993 case -4:
1993 1994 raise_revlog_error();
1994 1995 return NULL;
1995 1996 case -2:
1996 1997 Py_RETURN_NONE;
1997 1998 case -1:
1998 1999 return PyBytes_FromStringAndSize(nullid, 20);
1999 2000 }
2000 2001
2001 2002 fullnode = index_node_existing(self, rev);
2002 2003 if (fullnode == NULL) {
2003 2004 return NULL;
2004 2005 }
2005 2006 return PyBytes_FromStringAndSize(fullnode, 20);
2006 2007 }
2007 2008
2008 2009 static PyObject *index_shortest(indexObject *self, PyObject *args)
2009 2010 {
2010 2011 PyObject *val;
2011 2012 char *node;
2012 2013 int length;
2013 2014
2014 2015 if (!PyArg_ParseTuple(args, "O", &val))
2015 2016 return NULL;
2016 2017 if (node_check(val, &node) == -1)
2017 2018 return NULL;
2018 2019
2019 2020 self->ntlookups++;
2020 2021 if (index_init_nt(self) == -1)
2021 2022 return NULL;
2022 2023 if (index_populate_nt(self) == -1)
2023 2024 return NULL;
2024 2025 length = nt_shortest(&self->nt, node);
2025 2026 if (length == -3)
2026 2027 return NULL;
2027 2028 if (length == -2) {
2028 2029 raise_revlog_error();
2029 2030 return NULL;
2030 2031 }
2031 2032 return PyInt_FromLong(length);
2032 2033 }
2033 2034
2034 2035 static PyObject *index_m_get(indexObject *self, PyObject *args)
2035 2036 {
2036 2037 PyObject *val;
2037 2038 char *node;
2038 2039 int rev;
2039 2040
2040 2041 if (!PyArg_ParseTuple(args, "O", &val))
2041 2042 return NULL;
2042 2043 if (node_check(val, &node) == -1)
2043 2044 return NULL;
2044 2045 rev = index_find_node(self, node, 20);
2045 2046 if (rev == -3)
2046 2047 return NULL;
2047 2048 if (rev == -2)
2048 2049 Py_RETURN_NONE;
2049 2050 return PyInt_FromLong(rev);
2050 2051 }
2051 2052
2052 2053 static int index_contains(indexObject *self, PyObject *value)
2053 2054 {
2054 2055 char *node;
2055 2056
2056 2057 if (PyInt_Check(value)) {
2057 2058 long rev;
2058 2059 if (!pylong_to_long(value, &rev)) {
2059 2060 return -1;
2060 2061 }
2061 2062 return rev >= -1 && rev < index_length(self);
2062 2063 }
2063 2064
2064 2065 if (node_check(value, &node) == -1)
2065 2066 return -1;
2066 2067
2067 2068 switch (index_find_node(self, node, 20)) {
2068 2069 case -3:
2069 2070 return -1;
2070 2071 case -2:
2071 2072 return 0;
2072 2073 default:
2073 2074 return 1;
2074 2075 }
2075 2076 }
2076 2077
2077 2078 static PyObject *index_m_has_node(indexObject *self, PyObject *args)
2078 2079 {
2079 2080 int ret = index_contains(self, args);
2080 2081 if (ret < 0)
2081 2082 return NULL;
2082 2083 return PyBool_FromLong((long)ret);
2083 2084 }
2084 2085
2085 2086 static PyObject *index_m_rev(indexObject *self, PyObject *val)
2086 2087 {
2087 2088 char *node;
2088 2089 int rev;
2089 2090
2090 2091 if (node_check(val, &node) == -1)
2091 2092 return NULL;
2092 2093 rev = index_find_node(self, node, 20);
2093 2094 if (rev >= -1)
2094 2095 return PyInt_FromLong(rev);
2095 2096 if (rev == -2)
2096 2097 raise_revlog_error();
2097 2098 return NULL;
2098 2099 }
2099 2100
2100 2101 typedef uint64_t bitmask;
2101 2102
2102 2103 /*
2103 2104 * Given a disjoint set of revs, return all candidates for the
2104 2105 * greatest common ancestor. In revset notation, this is the set
2105 2106 * "heads(::a and ::b and ...)"
2106 2107 */
2107 2108 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
2108 2109 int revcount)
2109 2110 {
2110 2111 const bitmask allseen = (1ull << revcount) - 1;
2111 2112 const bitmask poison = 1ull << revcount;
2112 2113 PyObject *gca = PyList_New(0);
2113 2114 int i, v, interesting;
2114 2115 int maxrev = -1;
2115 2116 bitmask sp;
2116 2117 bitmask *seen;
2117 2118
2118 2119 if (gca == NULL)
2119 2120 return PyErr_NoMemory();
2120 2121
2121 2122 for (i = 0; i < revcount; i++) {
2122 2123 if (revs[i] > maxrev)
2123 2124 maxrev = revs[i];
2124 2125 }
2125 2126
2126 2127 seen = calloc(sizeof(*seen), maxrev + 1);
2127 2128 if (seen == NULL) {
2128 2129 Py_DECREF(gca);
2129 2130 return PyErr_NoMemory();
2130 2131 }
2131 2132
2132 2133 for (i = 0; i < revcount; i++)
2133 2134 seen[revs[i]] = 1ull << i;
2134 2135
2135 2136 interesting = revcount;
2136 2137
2137 2138 for (v = maxrev; v >= 0 && interesting; v--) {
2138 2139 bitmask sv = seen[v];
2139 2140 int parents[2];
2140 2141
2141 2142 if (!sv)
2142 2143 continue;
2143 2144
2144 2145 if (sv < poison) {
2145 2146 interesting -= 1;
2146 2147 if (sv == allseen) {
2147 2148 PyObject *obj = PyInt_FromLong(v);
2148 2149 if (obj == NULL)
2149 2150 goto bail;
2150 2151 if (PyList_Append(gca, obj) == -1) {
2151 2152 Py_DECREF(obj);
2152 2153 goto bail;
2153 2154 }
2154 2155 sv |= poison;
2155 2156 for (i = 0; i < revcount; i++) {
2156 2157 if (revs[i] == v)
2157 2158 goto done;
2158 2159 }
2159 2160 }
2160 2161 }
2161 2162 if (index_get_parents(self, v, parents, maxrev) < 0)
2162 2163 goto bail;
2163 2164
2164 2165 for (i = 0; i < 2; i++) {
2165 2166 int p = parents[i];
2166 2167 if (p == -1)
2167 2168 continue;
2168 2169 sp = seen[p];
2169 2170 if (sv < poison) {
2170 2171 if (sp == 0) {
2171 2172 seen[p] = sv;
2172 2173 interesting++;
2173 2174 } else if (sp != sv)
2174 2175 seen[p] |= sv;
2175 2176 } else {
2176 2177 if (sp && sp < poison)
2177 2178 interesting--;
2178 2179 seen[p] = sv;
2179 2180 }
2180 2181 }
2181 2182 }
2182 2183
2183 2184 done:
2184 2185 free(seen);
2185 2186 return gca;
2186 2187 bail:
2187 2188 free(seen);
2188 2189 Py_XDECREF(gca);
2189 2190 return NULL;
2190 2191 }
2191 2192
2192 2193 /*
2193 2194 * Given a disjoint set of revs, return the subset with the longest
2194 2195 * path to the root.
2195 2196 */
2196 2197 static PyObject *find_deepest(indexObject *self, PyObject *revs)
2197 2198 {
2198 2199 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
2199 2200 static const Py_ssize_t capacity = 24;
2200 2201 int *depth, *interesting = NULL;
2201 2202 int i, j, v, ninteresting;
2202 2203 PyObject *dict = NULL, *keys = NULL;
2203 2204 long *seen = NULL;
2204 2205 int maxrev = -1;
2205 2206 long final;
2206 2207
2207 2208 if (revcount > capacity) {
2208 2209 PyErr_Format(PyExc_OverflowError,
2209 2210 "bitset size (%ld) > capacity (%ld)",
2210 2211 (long)revcount, (long)capacity);
2211 2212 return NULL;
2212 2213 }
2213 2214
2214 2215 for (i = 0; i < revcount; i++) {
2215 2216 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2216 2217 if (n > maxrev)
2217 2218 maxrev = n;
2218 2219 }
2219 2220
2220 2221 depth = calloc(sizeof(*depth), maxrev + 1);
2221 2222 if (depth == NULL)
2222 2223 return PyErr_NoMemory();
2223 2224
2224 2225 seen = calloc(sizeof(*seen), maxrev + 1);
2225 2226 if (seen == NULL) {
2226 2227 PyErr_NoMemory();
2227 2228 goto bail;
2228 2229 }
2229 2230
2230 2231 interesting = calloc(sizeof(*interesting), ((size_t)1) << revcount);
2231 2232 if (interesting == NULL) {
2232 2233 PyErr_NoMemory();
2233 2234 goto bail;
2234 2235 }
2235 2236
2236 2237 if (PyList_Sort(revs) == -1)
2237 2238 goto bail;
2238 2239
2239 2240 for (i = 0; i < revcount; i++) {
2240 2241 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2241 2242 long b = 1l << i;
2242 2243 depth[n] = 1;
2243 2244 seen[n] = b;
2244 2245 interesting[b] = 1;
2245 2246 }
2246 2247
2247 2248 /* invariant: ninteresting is the number of non-zero entries in
2248 2249 * interesting. */
2249 2250 ninteresting = (int)revcount;
2250 2251
2251 2252 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
2252 2253 int dv = depth[v];
2253 2254 int parents[2];
2254 2255 long sv;
2255 2256
2256 2257 if (dv == 0)
2257 2258 continue;
2258 2259
2259 2260 sv = seen[v];
2260 2261 if (index_get_parents(self, v, parents, maxrev) < 0)
2261 2262 goto bail;
2262 2263
2263 2264 for (i = 0; i < 2; i++) {
2264 2265 int p = parents[i];
2265 2266 long sp;
2266 2267 int dp;
2267 2268
2268 2269 if (p == -1)
2269 2270 continue;
2270 2271
2271 2272 dp = depth[p];
2272 2273 sp = seen[p];
2273 2274 if (dp <= dv) {
2274 2275 depth[p] = dv + 1;
2275 2276 if (sp != sv) {
2276 2277 interesting[sv] += 1;
2277 2278 seen[p] = sv;
2278 2279 if (sp) {
2279 2280 interesting[sp] -= 1;
2280 2281 if (interesting[sp] == 0)
2281 2282 ninteresting -= 1;
2282 2283 }
2283 2284 }
2284 2285 } else if (dv == dp - 1) {
2285 2286 long nsp = sp | sv;
2286 2287 if (nsp == sp)
2287 2288 continue;
2288 2289 seen[p] = nsp;
2289 2290 interesting[sp] -= 1;
2290 2291 if (interesting[sp] == 0)
2291 2292 ninteresting -= 1;
2292 2293 if (interesting[nsp] == 0)
2293 2294 ninteresting += 1;
2294 2295 interesting[nsp] += 1;
2295 2296 }
2296 2297 }
2297 2298 interesting[sv] -= 1;
2298 2299 if (interesting[sv] == 0)
2299 2300 ninteresting -= 1;
2300 2301 }
2301 2302
2302 2303 final = 0;
2303 2304 j = ninteresting;
2304 2305 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
2305 2306 if (interesting[i] == 0)
2306 2307 continue;
2307 2308 final |= i;
2308 2309 j -= 1;
2309 2310 }
2310 2311 if (final == 0) {
2311 2312 keys = PyList_New(0);
2312 2313 goto bail;
2313 2314 }
2314 2315
2315 2316 dict = PyDict_New();
2316 2317 if (dict == NULL)
2317 2318 goto bail;
2318 2319
2319 2320 for (i = 0; i < revcount; i++) {
2320 2321 PyObject *key;
2321 2322
2322 2323 if ((final & (1 << i)) == 0)
2323 2324 continue;
2324 2325
2325 2326 key = PyList_GET_ITEM(revs, i);
2326 2327 Py_INCREF(key);
2327 2328 Py_INCREF(Py_None);
2328 2329 if (PyDict_SetItem(dict, key, Py_None) == -1) {
2329 2330 Py_DECREF(key);
2330 2331 Py_DECREF(Py_None);
2331 2332 goto bail;
2332 2333 }
2333 2334 }
2334 2335
2335 2336 keys = PyDict_Keys(dict);
2336 2337
2337 2338 bail:
2338 2339 free(depth);
2339 2340 free(seen);
2340 2341 free(interesting);
2341 2342 Py_XDECREF(dict);
2342 2343
2343 2344 return keys;
2344 2345 }
2345 2346
2346 2347 /*
2347 2348 * Given a (possibly overlapping) set of revs, return all the
2348 2349 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
2349 2350 */
2350 2351 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
2351 2352 {
2352 2353 PyObject *ret = NULL;
2353 2354 Py_ssize_t argcount, i, len;
2354 2355 bitmask repeat = 0;
2355 2356 int revcount = 0;
2356 2357 int *revs;
2357 2358
2358 2359 argcount = PySequence_Length(args);
2359 2360 revs = PyMem_Malloc(argcount * sizeof(*revs));
2360 2361 if (argcount > 0 && revs == NULL)
2361 2362 return PyErr_NoMemory();
2362 2363 len = index_length(self);
2363 2364
2364 2365 for (i = 0; i < argcount; i++) {
2365 2366 static const int capacity = 24;
2366 2367 PyObject *obj = PySequence_GetItem(args, i);
2367 2368 bitmask x;
2368 2369 long val;
2369 2370
2370 2371 if (!PyInt_Check(obj)) {
2371 2372 PyErr_SetString(PyExc_TypeError,
2372 2373 "arguments must all be ints");
2373 2374 Py_DECREF(obj);
2374 2375 goto bail;
2375 2376 }
2376 2377 val = PyInt_AsLong(obj);
2377 2378 Py_DECREF(obj);
2378 2379 if (val == -1) {
2379 2380 ret = PyList_New(0);
2380 2381 goto done;
2381 2382 }
2382 2383 if (val < 0 || val >= len) {
2383 2384 PyErr_SetString(PyExc_IndexError, "index out of range");
2384 2385 goto bail;
2385 2386 }
2386 2387 /* this cheesy bloom filter lets us avoid some more
2387 2388 * expensive duplicate checks in the common set-is-disjoint
2388 2389 * case */
2389 2390 x = 1ull << (val & 0x3f);
2390 2391 if (repeat & x) {
2391 2392 int k;
2392 2393 for (k = 0; k < revcount; k++) {
2393 2394 if (val == revs[k])
2394 2395 goto duplicate;
2395 2396 }
2396 2397 } else
2397 2398 repeat |= x;
2398 2399 if (revcount >= capacity) {
2399 2400 PyErr_Format(PyExc_OverflowError,
2400 2401 "bitset size (%d) > capacity (%d)",
2401 2402 revcount, capacity);
2402 2403 goto bail;
2403 2404 }
2404 2405 revs[revcount++] = (int)val;
2405 2406 duplicate:;
2406 2407 }
2407 2408
2408 2409 if (revcount == 0) {
2409 2410 ret = PyList_New(0);
2410 2411 goto done;
2411 2412 }
2412 2413 if (revcount == 1) {
2413 2414 PyObject *obj;
2414 2415 ret = PyList_New(1);
2415 2416 if (ret == NULL)
2416 2417 goto bail;
2417 2418 obj = PyInt_FromLong(revs[0]);
2418 2419 if (obj == NULL)
2419 2420 goto bail;
2420 2421 PyList_SET_ITEM(ret, 0, obj);
2421 2422 goto done;
2422 2423 }
2423 2424
2424 2425 ret = find_gca_candidates(self, revs, revcount);
2425 2426 if (ret == NULL)
2426 2427 goto bail;
2427 2428
2428 2429 done:
2429 2430 PyMem_Free(revs);
2430 2431 return ret;
2431 2432
2432 2433 bail:
2433 2434 PyMem_Free(revs);
2434 2435 Py_XDECREF(ret);
2435 2436 return NULL;
2436 2437 }
2437 2438
2438 2439 /*
2439 2440 * Given a (possibly overlapping) set of revs, return the greatest
2440 2441 * common ancestors: those with the longest path to the root.
2441 2442 */
2442 2443 static PyObject *index_ancestors(indexObject *self, PyObject *args)
2443 2444 {
2444 2445 PyObject *ret;
2445 2446 PyObject *gca = index_commonancestorsheads(self, args);
2446 2447 if (gca == NULL)
2447 2448 return NULL;
2448 2449
2449 2450 if (PyList_GET_SIZE(gca) <= 1) {
2450 2451 return gca;
2451 2452 }
2452 2453
2453 2454 ret = find_deepest(self, gca);
2454 2455 Py_DECREF(gca);
2455 2456 return ret;
2456 2457 }
2457 2458
2458 2459 /*
2459 2460 * Invalidate any trie entries introduced by added revs.
2460 2461 */
2461 2462 static void index_invalidate_added(indexObject *self, Py_ssize_t start)
2462 2463 {
2463 2464 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
2464 2465
2465 2466 for (i = start; i < len; i++) {
2466 2467 PyObject *tuple = PyList_GET_ITEM(self->added, i);
2467 2468 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
2468 2469
2469 2470 nt_delete_node(&self->nt, PyBytes_AS_STRING(node));
2470 2471 }
2471 2472
2472 2473 if (start == 0)
2473 2474 Py_CLEAR(self->added);
2474 2475 }
2475 2476
2476 2477 /*
2477 2478 * Delete a numeric range of revs, which must be at the end of the
2478 2479 * range.
2479 2480 */
2480 2481 static int index_slice_del(indexObject *self, PyObject *item)
2481 2482 {
2482 2483 Py_ssize_t start, stop, step, slicelength;
2483 2484 Py_ssize_t length = index_length(self) + 1;
2484 2485 int ret = 0;
2485 2486
2486 2487 /* Argument changed from PySliceObject* to PyObject* in Python 3. */
2487 2488 #ifdef IS_PY3K
2488 2489 if (PySlice_GetIndicesEx(item, length, &start, &stop, &step,
2489 2490 &slicelength) < 0)
2490 2491 #else
2491 2492 if (PySlice_GetIndicesEx((PySliceObject *)item, length, &start, &stop,
2492 2493 &step, &slicelength) < 0)
2493 2494 #endif
2494 2495 return -1;
2495 2496
2496 2497 if (slicelength <= 0)
2497 2498 return 0;
2498 2499
2499 2500 if ((step < 0 && start < stop) || (step > 0 && start > stop))
2500 2501 stop = start;
2501 2502
2502 2503 if (step < 0) {
2503 2504 stop = start + 1;
2504 2505 start = stop + step * (slicelength - 1) - 1;
2505 2506 step = -step;
2506 2507 }
2507 2508
2508 2509 if (step != 1) {
2509 2510 PyErr_SetString(PyExc_ValueError,
2510 2511 "revlog index delete requires step size of 1");
2511 2512 return -1;
2512 2513 }
2513 2514
2514 2515 if (stop != length - 1) {
2515 2516 PyErr_SetString(PyExc_IndexError,
2516 2517 "revlog index deletion indices are invalid");
2517 2518 return -1;
2518 2519 }
2519 2520
2520 2521 if (start < self->length) {
2521 2522 if (self->ntinitialized) {
2522 2523 Py_ssize_t i;
2523 2524
2524 2525 for (i = start; i < self->length; i++) {
2525 2526 const char *node = index_node_existing(self, i);
2526 2527 if (node == NULL)
2527 2528 return -1;
2528 2529
2529 2530 nt_delete_node(&self->nt, node);
2530 2531 }
2531 2532 if (self->added)
2532 2533 index_invalidate_added(self, 0);
2533 2534 if (self->ntrev > start)
2534 2535 self->ntrev = (int)start;
2535 2536 } else if (self->added) {
2536 2537 Py_CLEAR(self->added);
2537 2538 }
2538 2539
2539 2540 self->length = start;
2540 2541 if (start < self->raw_length) {
2541 2542 if (self->cache) {
2542 2543 Py_ssize_t i;
2543 2544 for (i = start; i < self->raw_length; i++)
2544 2545 Py_CLEAR(self->cache[i]);
2545 2546 }
2546 2547 self->raw_length = start;
2547 2548 }
2548 2549 goto done;
2549 2550 }
2550 2551
2551 2552 if (self->ntinitialized) {
2552 2553 index_invalidate_added(self, start - self->length);
2553 2554 if (self->ntrev > start)
2554 2555 self->ntrev = (int)start;
2555 2556 }
2556 2557 if (self->added)
2557 2558 ret = PyList_SetSlice(self->added, start - self->length,
2558 2559 PyList_GET_SIZE(self->added), NULL);
2559 2560 done:
2560 2561 Py_CLEAR(self->headrevs);
2561 2562 return ret;
2562 2563 }
2563 2564
2564 2565 /*
2565 2566 * Supported ops:
2566 2567 *
2567 2568 * slice deletion
2568 2569 * string assignment (extend node->rev mapping)
2569 2570 * string deletion (shrink node->rev mapping)
2570 2571 */
2571 2572 static int index_assign_subscript(indexObject *self, PyObject *item,
2572 2573 PyObject *value)
2573 2574 {
2574 2575 char *node;
2575 2576 long rev;
2576 2577
2577 2578 if (PySlice_Check(item) && value == NULL)
2578 2579 return index_slice_del(self, item);
2579 2580
2580 2581 if (node_check(item, &node) == -1)
2581 2582 return -1;
2582 2583
2583 2584 if (value == NULL)
2584 2585 return self->ntinitialized ? nt_delete_node(&self->nt, node)
2585 2586 : 0;
2586 2587 rev = PyInt_AsLong(value);
2587 2588 if (rev > INT_MAX || rev < 0) {
2588 2589 if (!PyErr_Occurred())
2589 2590 PyErr_SetString(PyExc_ValueError, "rev out of range");
2590 2591 return -1;
2591 2592 }
2592 2593
2593 2594 if (index_init_nt(self) == -1)
2594 2595 return -1;
2595 2596 return nt_insert(&self->nt, node, (int)rev);
2596 2597 }
2597 2598
2598 2599 /*
2599 2600 * Find all RevlogNG entries in an index that has inline data. Update
2600 2601 * the optional "offsets" table with those entries.
2601 2602 */
2602 2603 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
2603 2604 {
2604 2605 const char *data = (const char *)self->buf.buf;
2605 2606 Py_ssize_t pos = 0;
2606 2607 Py_ssize_t end = self->buf.len;
2607 2608 long incr = v1_hdrsize;
2608 2609 Py_ssize_t len = 0;
2609 2610
2610 2611 while (pos + v1_hdrsize <= end && pos >= 0) {
2611 2612 uint32_t comp_len;
2612 2613 /* 3rd element of header is length of compressed inline data */
2613 2614 comp_len = getbe32(data + pos + 8);
2614 2615 incr = v1_hdrsize + comp_len;
2615 2616 if (offsets)
2616 2617 offsets[len] = data + pos;
2617 2618 len++;
2618 2619 pos += incr;
2619 2620 }
2620 2621
2621 2622 if (pos != end) {
2622 2623 if (!PyErr_Occurred())
2623 2624 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2624 2625 return -1;
2625 2626 }
2626 2627
2627 2628 return len;
2628 2629 }
2629 2630
2630 2631 static int index_init(indexObject *self, PyObject *args)
2631 2632 {
2632 2633 PyObject *data_obj, *inlined_obj;
2633 2634 Py_ssize_t size;
2634 2635
2635 2636 /* Initialize before argument-checking to avoid index_dealloc() crash.
2636 2637 */
2637 2638 self->raw_length = 0;
2638 2639 self->added = NULL;
2639 2640 self->cache = NULL;
2640 2641 self->data = NULL;
2641 2642 memset(&self->buf, 0, sizeof(self->buf));
2642 2643 self->headrevs = NULL;
2643 2644 self->filteredrevs = Py_None;
2644 2645 Py_INCREF(Py_None);
2645 2646 self->ntinitialized = 0;
2646 2647 self->offsets = NULL;
2647 2648
2648 2649 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
2649 2650 return -1;
2650 2651 if (!PyObject_CheckBuffer(data_obj)) {
2651 2652 PyErr_SetString(PyExc_TypeError,
2652 2653 "data does not support buffer interface");
2653 2654 return -1;
2654 2655 }
2655 2656
2656 2657 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
2657 2658 return -1;
2658 2659 size = self->buf.len;
2659 2660
2660 2661 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
2661 2662 self->data = data_obj;
2662 2663
2663 2664 self->ntlookups = self->ntmisses = 0;
2664 2665 self->ntrev = -1;
2665 2666 Py_INCREF(self->data);
2666 2667
2667 2668 if (self->inlined) {
2668 2669 Py_ssize_t len = inline_scan(self, NULL);
2669 2670 if (len == -1)
2670 2671 goto bail;
2671 2672 self->raw_length = len;
2672 2673 self->length = len;
2673 2674 } else {
2674 2675 if (size % v1_hdrsize) {
2675 2676 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2676 2677 goto bail;
2677 2678 }
2678 2679 self->raw_length = size / v1_hdrsize;
2679 2680 self->length = self->raw_length;
2680 2681 }
2681 2682
2682 2683 return 0;
2683 2684 bail:
2684 2685 return -1;
2685 2686 }
2686 2687
2687 2688 static PyObject *index_nodemap(indexObject *self)
2688 2689 {
2689 2690 Py_INCREF(self);
2690 2691 return (PyObject *)self;
2691 2692 }
2692 2693
2693 2694 static void _index_clearcaches(indexObject *self)
2694 2695 {
2695 2696 if (self->cache) {
2696 2697 Py_ssize_t i;
2697 2698
2698 2699 for (i = 0; i < self->raw_length; i++)
2699 2700 Py_CLEAR(self->cache[i]);
2700 2701 free(self->cache);
2701 2702 self->cache = NULL;
2702 2703 }
2703 2704 if (self->offsets) {
2704 2705 PyMem_Free((void *)self->offsets);
2705 2706 self->offsets = NULL;
2706 2707 }
2707 2708 if (self->ntinitialized) {
2708 2709 nt_dealloc(&self->nt);
2709 2710 }
2710 2711 self->ntinitialized = 0;
2711 2712 Py_CLEAR(self->headrevs);
2712 2713 }
2713 2714
2714 2715 static PyObject *index_clearcaches(indexObject *self)
2715 2716 {
2716 2717 _index_clearcaches(self);
2717 2718 self->ntrev = -1;
2718 2719 self->ntlookups = self->ntmisses = 0;
2719 2720 Py_RETURN_NONE;
2720 2721 }
2721 2722
2722 2723 static void index_dealloc(indexObject *self)
2723 2724 {
2724 2725 _index_clearcaches(self);
2725 2726 Py_XDECREF(self->filteredrevs);
2726 2727 if (self->buf.buf) {
2727 2728 PyBuffer_Release(&self->buf);
2728 2729 memset(&self->buf, 0, sizeof(self->buf));
2729 2730 }
2730 2731 Py_XDECREF(self->data);
2731 2732 Py_XDECREF(self->added);
2732 2733 PyObject_Del(self);
2733 2734 }
2734 2735
2735 2736 static PySequenceMethods index_sequence_methods = {
2736 2737 (lenfunc)index_length, /* sq_length */
2737 2738 0, /* sq_concat */
2738 2739 0, /* sq_repeat */
2739 2740 (ssizeargfunc)index_get, /* sq_item */
2740 2741 0, /* sq_slice */
2741 2742 0, /* sq_ass_item */
2742 2743 0, /* sq_ass_slice */
2743 2744 (objobjproc)index_contains, /* sq_contains */
2744 2745 };
2745 2746
2746 2747 static PyMappingMethods index_mapping_methods = {
2747 2748 (lenfunc)index_length, /* mp_length */
2748 2749 (binaryfunc)index_getitem, /* mp_subscript */
2749 2750 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
2750 2751 };
2751 2752
2752 2753 static PyMethodDef index_methods[] = {
2753 2754 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
2754 2755 "return the gca set of the given revs"},
2755 2756 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
2756 2757 METH_VARARGS,
2757 2758 "return the heads of the common ancestors of the given revs"},
2758 2759 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
2759 2760 "clear the index caches"},
2760 2761 {"get", (PyCFunction)index_m_get, METH_VARARGS, "get an index entry"},
2761 2762 {"get_rev", (PyCFunction)index_m_get, METH_VARARGS,
2762 2763 "return `rev` associated with a node or None"},
2763 2764 {"has_node", (PyCFunction)index_m_has_node, METH_O,
2764 2765 "return True if the node exist in the index"},
2765 2766 {"rev", (PyCFunction)index_m_rev, METH_O,
2766 2767 "return `rev` associated with a node or raise RevlogError"},
2767 2768 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets, METH_VARARGS,
2768 2769 "compute phases"},
2769 2770 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
2770 2771 "reachableroots"},
2771 2772 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
2772 2773 "get head revisions"}, /* Can do filtering since 3.2 */
2773 2774 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
2774 2775 "get filtered head revisions"}, /* Can always do filtering */
2775 2776 {"issnapshot", (PyCFunction)index_issnapshot, METH_O,
2776 2777 "True if the object is a snapshot"},
2777 2778 {"findsnapshots", (PyCFunction)index_findsnapshots, METH_VARARGS,
2778 2779 "Gather snapshot data in a cache dict"},
2779 2780 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
2780 2781 "determine revisions with deltas to reconstruct fulltext"},
2781 2782 {"slicechunktodensity", (PyCFunction)index_slicechunktodensity,
2782 2783 METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"},
2783 2784 {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
2784 2785 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
2785 2786 "match a potentially ambiguous node ID"},
2786 2787 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
2787 2788 "find length of shortest hex nodeid of a binary ID"},
2788 2789 {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"},
2789 2790 {NULL} /* Sentinel */
2790 2791 };
2791 2792
2792 2793 static PyGetSetDef index_getset[] = {
2793 2794 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
2794 2795 {NULL} /* Sentinel */
2795 2796 };
2796 2797
2797 2798 PyTypeObject HgRevlogIndex_Type = {
2798 2799 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2799 2800 "parsers.index", /* tp_name */
2800 2801 sizeof(indexObject), /* tp_basicsize */
2801 2802 0, /* tp_itemsize */
2802 2803 (destructor)index_dealloc, /* tp_dealloc */
2803 2804 0, /* tp_print */
2804 2805 0, /* tp_getattr */
2805 2806 0, /* tp_setattr */
2806 2807 0, /* tp_compare */
2807 2808 0, /* tp_repr */
2808 2809 0, /* tp_as_number */
2809 2810 &index_sequence_methods, /* tp_as_sequence */
2810 2811 &index_mapping_methods, /* tp_as_mapping */
2811 2812 0, /* tp_hash */
2812 2813 0, /* tp_call */
2813 2814 0, /* tp_str */
2814 2815 0, /* tp_getattro */
2815 2816 0, /* tp_setattro */
2816 2817 0, /* tp_as_buffer */
2817 2818 Py_TPFLAGS_DEFAULT, /* tp_flags */
2818 2819 "revlog index", /* tp_doc */
2819 2820 0, /* tp_traverse */
2820 2821 0, /* tp_clear */
2821 2822 0, /* tp_richcompare */
2822 2823 0, /* tp_weaklistoffset */
2823 2824 0, /* tp_iter */
2824 2825 0, /* tp_iternext */
2825 2826 index_methods, /* tp_methods */
2826 2827 0, /* tp_members */
2827 2828 index_getset, /* tp_getset */
2828 2829 0, /* tp_base */
2829 2830 0, /* tp_dict */
2830 2831 0, /* tp_descr_get */
2831 2832 0, /* tp_descr_set */
2832 2833 0, /* tp_dictoffset */
2833 2834 (initproc)index_init, /* tp_init */
2834 2835 0, /* tp_alloc */
2835 2836 };
2836 2837
2837 2838 /*
2838 2839 * returns a tuple of the form (index, index, cache) with elements as
2839 2840 * follows:
2840 2841 *
2841 2842 * index: an index object that lazily parses RevlogNG records
2842 2843 * cache: if data is inlined, a tuple (0, index_file_content), else None
2843 2844 * index_file_content could be a string, or a buffer
2844 2845 *
2845 2846 * added complications are for backwards compatibility
2846 2847 */
2847 2848 PyObject *parse_index2(PyObject *self, PyObject *args)
2848 2849 {
2849 2850 PyObject *tuple = NULL, *cache = NULL;
2850 2851 indexObject *idx;
2851 2852 int ret;
2852 2853
2853 2854 idx = PyObject_New(indexObject, &HgRevlogIndex_Type);
2854 2855 if (idx == NULL)
2855 2856 goto bail;
2856 2857
2857 2858 ret = index_init(idx, args);
2858 2859 if (ret == -1)
2859 2860 goto bail;
2860 2861
2861 2862 if (idx->inlined) {
2862 2863 cache = Py_BuildValue("iO", 0, idx->data);
2863 2864 if (cache == NULL)
2864 2865 goto bail;
2865 2866 } else {
2866 2867 cache = Py_None;
2867 2868 Py_INCREF(cache);
2868 2869 }
2869 2870
2870 2871 tuple = Py_BuildValue("NN", idx, cache);
2871 2872 if (!tuple)
2872 2873 goto bail;
2873 2874 return tuple;
2874 2875
2875 2876 bail:
2876 2877 Py_XDECREF(idx);
2877 2878 Py_XDECREF(cache);
2878 2879 Py_XDECREF(tuple);
2879 2880 return NULL;
2880 2881 }
2881 2882
2882 2883 static Revlog_CAPI CAPI = {
2883 2884 /* increment the abi_version field upon each change in the Revlog_CAPI
2884 2885 struct or in the ABI of the listed functions */
2885 2886 2,
2886 2887 index_length,
2887 2888 index_node,
2888 2889 HgRevlogIndex_GetParents,
2889 2890 };
2890 2891
2891 2892 void revlog_module_init(PyObject *mod)
2892 2893 {
2893 2894 PyObject *caps = NULL;
2894 2895 HgRevlogIndex_Type.tp_new = PyType_GenericNew;
2895 2896 if (PyType_Ready(&HgRevlogIndex_Type) < 0)
2896 2897 return;
2897 2898 Py_INCREF(&HgRevlogIndex_Type);
2898 2899 PyModule_AddObject(mod, "index", (PyObject *)&HgRevlogIndex_Type);
2899 2900
2900 2901 nodetreeType.tp_new = PyType_GenericNew;
2901 2902 if (PyType_Ready(&nodetreeType) < 0)
2902 2903 return;
2903 2904 Py_INCREF(&nodetreeType);
2904 2905 PyModule_AddObject(mod, "nodetree", (PyObject *)&nodetreeType);
2905 2906
2906 2907 if (!nullentry) {
2907 2908 nullentry =
2908 2909 Py_BuildValue(PY23("iiiiiiis#", "iiiiiiiy#"), 0, 0, 0, -1,
2909 2910 -1, -1, -1, nullid, (Py_ssize_t)20);
2910 2911 }
2911 2912 if (nullentry)
2912 2913 PyObject_GC_UnTrack(nullentry);
2913 2914
2914 2915 caps = PyCapsule_New(&CAPI, "mercurial.cext.parsers.revlog_CAPI", NULL);
2915 2916 if (caps != NULL)
2916 2917 PyModule_AddObject(mod, "revlog_CAPI", caps);
2917 2918 }
General Comments 0
You need to be logged in to leave comments. Login now