##// END OF EJS Templates
delta: have a native implementation of _findsnapshot...
Boris Feld -
r41141:38e88450 default
parent child Browse files
Show More
@@ -1,2969 +1,3032
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #include <Python.h>
11 11 #include <assert.h>
12 12 #include <ctype.h>
13 13 #include <limits.h>
14 14 #include <stddef.h>
15 15 #include <stdlib.h>
16 16 #include <string.h>
17 17
18 18 #include "bitmanipulation.h"
19 19 #include "charencode.h"
20 20 #include "revlog.h"
21 21 #include "util.h"
22 22
23 23 #ifdef IS_PY3K
24 24 /* The mapping of Python types is meant to be temporary to get Python
25 25 * 3 to compile. We should remove this once Python 3 support is fully
26 26 * supported and proper types are used in the extensions themselves. */
27 27 #define PyInt_Check PyLong_Check
28 28 #define PyInt_FromLong PyLong_FromLong
29 29 #define PyInt_FromSsize_t PyLong_FromSsize_t
30 30 #define PyInt_AsLong PyLong_AsLong
31 31 #endif
32 32
33 33 typedef struct indexObjectStruct indexObject;
34 34
35 35 typedef struct {
36 36 int children[16];
37 37 } nodetreenode;
38 38
39 39 /*
40 40 * A base-16 trie for fast node->rev mapping.
41 41 *
42 42 * Positive value is index of the next node in the trie
43 43 * Negative value is a leaf: -(rev + 2)
44 44 * Zero is empty
45 45 */
46 46 typedef struct {
47 47 indexObject *index;
48 48 nodetreenode *nodes;
49 49 unsigned length; /* # nodes in use */
50 50 unsigned capacity; /* # nodes allocated */
51 51 int depth; /* maximum depth of tree */
52 52 int splits; /* # splits performed */
53 53 } nodetree;
54 54
55 55 typedef struct {
56 56 PyObject_HEAD /* ; */
57 57 nodetree nt;
58 58 } nodetreeObject;
59 59
60 60 /*
61 61 * This class has two behaviors.
62 62 *
63 63 * When used in a list-like way (with integer keys), we decode an
64 64 * entry in a RevlogNG index file on demand. Our last entry is a
65 65 * sentinel, always a nullid. We have limited support for
66 66 * integer-keyed insert and delete, only at elements right before the
67 67 * sentinel.
68 68 *
69 69 * With string keys, we lazily perform a reverse mapping from node to
70 70 * rev, using a base-16 trie.
71 71 */
72 72 struct indexObjectStruct {
73 73 PyObject_HEAD
74 74 /* Type-specific fields go here. */
75 75 PyObject *data; /* raw bytes of index */
76 76 Py_buffer buf; /* buffer of data */
77 77 PyObject **cache; /* cached tuples */
78 78 const char **offsets; /* populated on demand */
79 79 Py_ssize_t raw_length; /* original number of elements */
80 80 Py_ssize_t length; /* current number of elements */
81 81 PyObject *added; /* populated on demand */
82 82 PyObject *headrevs; /* cache, invalidated on changes */
83 83 PyObject *filteredrevs; /* filtered revs set */
84 84 nodetree nt; /* base-16 trie */
85 85 int ntinitialized; /* 0 or 1 */
86 86 int ntrev; /* last rev scanned */
87 87 int ntlookups; /* # lookups */
88 88 int ntmisses; /* # lookups that miss the cache */
89 89 int inlined;
90 90 };
91 91
92 92 static Py_ssize_t index_length(const indexObject *self)
93 93 {
94 94 if (self->added == NULL)
95 95 return self->length;
96 96 return self->length + PyList_GET_SIZE(self->added);
97 97 }
98 98
99 99 static PyObject *nullentry = NULL;
100 100 static const char nullid[20] = {0};
101 101 static const Py_ssize_t nullrev = -1;
102 102
103 103 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
104 104
105 105 #if LONG_MAX == 0x7fffffffL
106 106 static const char *const tuple_format = PY23("Kiiiiiis#", "Kiiiiiiy#");
107 107 #else
108 108 static const char *const tuple_format = PY23("kiiiiiis#", "kiiiiiiy#");
109 109 #endif
110 110
111 111 /* A RevlogNG v1 index entry is 64 bytes long. */
112 112 static const long v1_hdrsize = 64;
113 113
114 114 static void raise_revlog_error(void)
115 115 {
116 116 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
117 117
118 118 mod = PyImport_ImportModule("mercurial.error");
119 119 if (mod == NULL) {
120 120 goto cleanup;
121 121 }
122 122
123 123 dict = PyModule_GetDict(mod);
124 124 if (dict == NULL) {
125 125 goto cleanup;
126 126 }
127 127 Py_INCREF(dict);
128 128
129 129 errclass = PyDict_GetItemString(dict, "RevlogError");
130 130 if (errclass == NULL) {
131 131 PyErr_SetString(PyExc_SystemError,
132 132 "could not find RevlogError");
133 133 goto cleanup;
134 134 }
135 135
136 136 /* value of exception is ignored by callers */
137 137 PyErr_SetString(errclass, "RevlogError");
138 138
139 139 cleanup:
140 140 Py_XDECREF(dict);
141 141 Py_XDECREF(mod);
142 142 }
143 143
144 144 /*
145 145 * Return a pointer to the beginning of a RevlogNG record.
146 146 */
147 147 static const char *index_deref(indexObject *self, Py_ssize_t pos)
148 148 {
149 149 if (self->inlined && pos > 0) {
150 150 if (self->offsets == NULL) {
151 151 self->offsets = PyMem_Malloc(self->raw_length *
152 152 sizeof(*self->offsets));
153 153 if (self->offsets == NULL)
154 154 return (const char *)PyErr_NoMemory();
155 155 inline_scan(self, self->offsets);
156 156 }
157 157 return self->offsets[pos];
158 158 }
159 159
160 160 return (const char *)(self->buf.buf) + pos * v1_hdrsize;
161 161 }
162 162
163 163 /*
164 164 * Get parents of the given rev.
165 165 *
166 166 * The specified rev must be valid and must not be nullrev. A returned
167 167 * parent revision may be nullrev, but is guaranteed to be in valid range.
168 168 */
169 169 static inline int index_get_parents(indexObject *self, Py_ssize_t rev, int *ps,
170 170 int maxrev)
171 171 {
172 172 if (rev >= self->length) {
173 173 long tmp;
174 174 PyObject *tuple =
175 175 PyList_GET_ITEM(self->added, rev - self->length);
176 176 if (!pylong_to_long(PyTuple_GET_ITEM(tuple, 5), &tmp)) {
177 177 return -1;
178 178 }
179 179 ps[0] = (int)tmp;
180 180 if (!pylong_to_long(PyTuple_GET_ITEM(tuple, 6), &tmp)) {
181 181 return -1;
182 182 }
183 183 ps[1] = (int)tmp;
184 184 } else {
185 185 const char *data = index_deref(self, rev);
186 186 ps[0] = getbe32(data + 24);
187 187 ps[1] = getbe32(data + 28);
188 188 }
189 189 /* If index file is corrupted, ps[] may point to invalid revisions. So
190 190 * there is a risk of buffer overflow to trust them unconditionally. */
191 191 if (ps[0] < -1 || ps[0] > maxrev || ps[1] < -1 || ps[1] > maxrev) {
192 192 PyErr_SetString(PyExc_ValueError, "parent out of range");
193 193 return -1;
194 194 }
195 195 return 0;
196 196 }
197 197
198 198 /*
199 199 * Get parents of the given rev.
200 200 *
201 201 * If the specified rev is out of range, IndexError will be raised. If the
202 202 * revlog entry is corrupted, ValueError may be raised.
203 203 *
204 204 * Returns 0 on success or -1 on failure.
205 205 */
206 206 int HgRevlogIndex_GetParents(PyObject *op, int rev, int *ps)
207 207 {
208 208 int tiprev;
209 209 if (!op || !HgRevlogIndex_Check(op) || !ps) {
210 210 PyErr_BadInternalCall();
211 211 return -1;
212 212 }
213 213 tiprev = (int)index_length((indexObject *)op) - 1;
214 214 if (rev < -1 || rev > tiprev) {
215 215 PyErr_Format(PyExc_IndexError, "rev out of range: %d", rev);
216 216 return -1;
217 217 } else if (rev == -1) {
218 218 ps[0] = ps[1] = -1;
219 219 return 0;
220 220 } else {
221 221 return index_get_parents((indexObject *)op, rev, ps, tiprev);
222 222 }
223 223 }
224 224
225 225 static inline int64_t index_get_start(indexObject *self, Py_ssize_t rev)
226 226 {
227 227 uint64_t offset;
228 228 if (rev == nullrev) {
229 229 return 0;
230 230 }
231 231 if (rev >= self->length) {
232 232 PyObject *tuple;
233 233 PyObject *pylong;
234 234 PY_LONG_LONG tmp;
235 235 tuple = PyList_GET_ITEM(self->added, rev - self->length);
236 236 pylong = PyTuple_GET_ITEM(tuple, 0);
237 237 tmp = PyLong_AsLongLong(pylong);
238 238 if (tmp == -1 && PyErr_Occurred()) {
239 239 return -1;
240 240 }
241 241 if (tmp < 0) {
242 242 PyErr_Format(PyExc_OverflowError,
243 243 "revlog entry size out of bound (%lld)",
244 244 (long long)tmp);
245 245 return -1;
246 246 }
247 247 offset = (uint64_t)tmp;
248 248 } else {
249 249 const char *data = index_deref(self, rev);
250 250 offset = getbe32(data + 4);
251 251 if (rev == 0) {
252 252 /* mask out version number for the first entry */
253 253 offset &= 0xFFFF;
254 254 } else {
255 255 uint32_t offset_high = getbe32(data);
256 256 offset |= ((uint64_t)offset_high) << 32;
257 257 }
258 258 }
259 259 return (int64_t)(offset >> 16);
260 260 }
261 261
262 262 static inline int index_get_length(indexObject *self, Py_ssize_t rev)
263 263 {
264 264 if (rev == nullrev) {
265 265 return 0;
266 266 }
267 267 if (rev >= self->length) {
268 268 PyObject *tuple;
269 269 PyObject *pylong;
270 270 long ret;
271 271 tuple = PyList_GET_ITEM(self->added, rev - self->length);
272 272 pylong = PyTuple_GET_ITEM(tuple, 1);
273 273 ret = PyInt_AsLong(pylong);
274 274 if (ret == -1 && PyErr_Occurred()) {
275 275 return -1;
276 276 }
277 277 if (ret < 0 || ret > (long)INT_MAX) {
278 278 PyErr_Format(PyExc_OverflowError,
279 279 "revlog entry size out of bound (%ld)",
280 280 ret);
281 281 return -1;
282 282 }
283 283 return (int)ret;
284 284 } else {
285 285 const char *data = index_deref(self, rev);
286 286 int tmp = (int)getbe32(data + 8);
287 287 if (tmp < 0) {
288 288 PyErr_Format(PyExc_OverflowError,
289 289 "revlog entry size out of bound (%d)",
290 290 tmp);
291 291 return -1;
292 292 }
293 293 return tmp;
294 294 }
295 295 }
296 296
297 297 /*
298 298 * RevlogNG format (all in big endian, data may be inlined):
299 299 * 6 bytes: offset
300 300 * 2 bytes: flags
301 301 * 4 bytes: compressed length
302 302 * 4 bytes: uncompressed length
303 303 * 4 bytes: base revision
304 304 * 4 bytes: link revision
305 305 * 4 bytes: parent 1 revision
306 306 * 4 bytes: parent 2 revision
307 307 * 32 bytes: nodeid (only 20 bytes used)
308 308 */
309 309 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
310 310 {
311 311 uint64_t offset_flags;
312 312 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
313 313 const char *c_node_id;
314 314 const char *data;
315 315 Py_ssize_t length = index_length(self);
316 316 PyObject *entry;
317 317
318 318 if (pos == nullrev) {
319 319 Py_INCREF(nullentry);
320 320 return nullentry;
321 321 }
322 322
323 323 if (pos < 0 || pos >= length) {
324 324 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
325 325 return NULL;
326 326 }
327 327
328 328 if (pos >= self->length) {
329 329 PyObject *obj;
330 330 obj = PyList_GET_ITEM(self->added, pos - self->length);
331 331 Py_INCREF(obj);
332 332 return obj;
333 333 }
334 334
335 335 if (self->cache) {
336 336 if (self->cache[pos]) {
337 337 Py_INCREF(self->cache[pos]);
338 338 return self->cache[pos];
339 339 }
340 340 } else {
341 341 self->cache = calloc(self->raw_length, sizeof(PyObject *));
342 342 if (self->cache == NULL)
343 343 return PyErr_NoMemory();
344 344 }
345 345
346 346 data = index_deref(self, pos);
347 347 if (data == NULL)
348 348 return NULL;
349 349
350 350 offset_flags = getbe32(data + 4);
351 351 if (pos == 0) /* mask out version number for the first entry */
352 352 offset_flags &= 0xFFFF;
353 353 else {
354 354 uint32_t offset_high = getbe32(data);
355 355 offset_flags |= ((uint64_t)offset_high) << 32;
356 356 }
357 357
358 358 comp_len = getbe32(data + 8);
359 359 uncomp_len = getbe32(data + 12);
360 360 base_rev = getbe32(data + 16);
361 361 link_rev = getbe32(data + 20);
362 362 parent_1 = getbe32(data + 24);
363 363 parent_2 = getbe32(data + 28);
364 364 c_node_id = data + 32;
365 365
366 366 entry = Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len,
367 367 base_rev, link_rev, parent_1, parent_2, c_node_id,
368 368 20);
369 369
370 370 if (entry) {
371 371 PyObject_GC_UnTrack(entry);
372 372 Py_INCREF(entry);
373 373 }
374 374
375 375 self->cache[pos] = entry;
376 376
377 377 return entry;
378 378 }
379 379
380 380 /*
381 381 * Return the 20-byte SHA of the node corresponding to the given rev.
382 382 */
383 383 static const char *index_node(indexObject *self, Py_ssize_t pos)
384 384 {
385 385 Py_ssize_t length = index_length(self);
386 386 const char *data;
387 387
388 388 if (pos == nullrev)
389 389 return nullid;
390 390
391 391 if (pos >= length)
392 392 return NULL;
393 393
394 394 if (pos >= self->length) {
395 395 PyObject *tuple, *str;
396 396 tuple = PyList_GET_ITEM(self->added, pos - self->length);
397 397 str = PyTuple_GetItem(tuple, 7);
398 398 return str ? PyBytes_AS_STRING(str) : NULL;
399 399 }
400 400
401 401 data = index_deref(self, pos);
402 402 return data ? data + 32 : NULL;
403 403 }
404 404
405 405 /*
406 406 * Return the 20-byte SHA of the node corresponding to the given rev. The
407 407 * rev is assumed to be existing. If not, an exception is set.
408 408 */
409 409 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
410 410 {
411 411 const char *node = index_node(self, pos);
412 412 if (node == NULL) {
413 413 PyErr_Format(PyExc_IndexError, "could not access rev %d",
414 414 (int)pos);
415 415 }
416 416 return node;
417 417 }
418 418
419 419 static int nt_insert(nodetree *self, const char *node, int rev);
420 420
421 421 static int node_check(PyObject *obj, char **node)
422 422 {
423 423 Py_ssize_t nodelen;
424 424 if (PyBytes_AsStringAndSize(obj, node, &nodelen) == -1)
425 425 return -1;
426 426 if (nodelen == 20)
427 427 return 0;
428 428 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
429 429 return -1;
430 430 }
431 431
432 432 static PyObject *index_append(indexObject *self, PyObject *obj)
433 433 {
434 434 char *node;
435 435 Py_ssize_t len;
436 436
437 437 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
438 438 PyErr_SetString(PyExc_TypeError, "8-tuple required");
439 439 return NULL;
440 440 }
441 441
442 442 if (node_check(PyTuple_GET_ITEM(obj, 7), &node) == -1)
443 443 return NULL;
444 444
445 445 len = index_length(self);
446 446
447 447 if (self->added == NULL) {
448 448 self->added = PyList_New(0);
449 449 if (self->added == NULL)
450 450 return NULL;
451 451 }
452 452
453 453 if (PyList_Append(self->added, obj) == -1)
454 454 return NULL;
455 455
456 456 if (self->ntinitialized)
457 457 nt_insert(&self->nt, node, (int)len);
458 458
459 459 Py_CLEAR(self->headrevs);
460 460 Py_RETURN_NONE;
461 461 }
462 462
463 463 static PyObject *index_stats(indexObject *self)
464 464 {
465 465 PyObject *obj = PyDict_New();
466 466 PyObject *s = NULL;
467 467 PyObject *t = NULL;
468 468
469 469 if (obj == NULL)
470 470 return NULL;
471 471
472 472 #define istat(__n, __d) \
473 473 do { \
474 474 s = PyBytes_FromString(__d); \
475 475 t = PyInt_FromSsize_t(self->__n); \
476 476 if (!s || !t) \
477 477 goto bail; \
478 478 if (PyDict_SetItem(obj, s, t) == -1) \
479 479 goto bail; \
480 480 Py_CLEAR(s); \
481 481 Py_CLEAR(t); \
482 482 } while (0)
483 483
484 484 if (self->added) {
485 485 Py_ssize_t len = PyList_GET_SIZE(self->added);
486 486 s = PyBytes_FromString("index entries added");
487 487 t = PyInt_FromSsize_t(len);
488 488 if (!s || !t)
489 489 goto bail;
490 490 if (PyDict_SetItem(obj, s, t) == -1)
491 491 goto bail;
492 492 Py_CLEAR(s);
493 493 Py_CLEAR(t);
494 494 }
495 495
496 496 if (self->raw_length != self->length)
497 497 istat(raw_length, "revs on disk");
498 498 istat(length, "revs in memory");
499 499 istat(ntlookups, "node trie lookups");
500 500 istat(ntmisses, "node trie misses");
501 501 istat(ntrev, "node trie last rev scanned");
502 502 if (self->ntinitialized) {
503 503 istat(nt.capacity, "node trie capacity");
504 504 istat(nt.depth, "node trie depth");
505 505 istat(nt.length, "node trie count");
506 506 istat(nt.splits, "node trie splits");
507 507 }
508 508
509 509 #undef istat
510 510
511 511 return obj;
512 512
513 513 bail:
514 514 Py_XDECREF(obj);
515 515 Py_XDECREF(s);
516 516 Py_XDECREF(t);
517 517 return NULL;
518 518 }
519 519
520 520 /*
521 521 * When we cache a list, we want to be sure the caller can't mutate
522 522 * the cached copy.
523 523 */
524 524 static PyObject *list_copy(PyObject *list)
525 525 {
526 526 Py_ssize_t len = PyList_GET_SIZE(list);
527 527 PyObject *newlist = PyList_New(len);
528 528 Py_ssize_t i;
529 529
530 530 if (newlist == NULL)
531 531 return NULL;
532 532
533 533 for (i = 0; i < len; i++) {
534 534 PyObject *obj = PyList_GET_ITEM(list, i);
535 535 Py_INCREF(obj);
536 536 PyList_SET_ITEM(newlist, i, obj);
537 537 }
538 538
539 539 return newlist;
540 540 }
541 541
542 542 static int check_filter(PyObject *filter, Py_ssize_t arg)
543 543 {
544 544 if (filter) {
545 545 PyObject *arglist, *result;
546 546 int isfiltered;
547 547
548 548 arglist = Py_BuildValue("(n)", arg);
549 549 if (!arglist) {
550 550 return -1;
551 551 }
552 552
553 553 result = PyEval_CallObject(filter, arglist);
554 554 Py_DECREF(arglist);
555 555 if (!result) {
556 556 return -1;
557 557 }
558 558
559 559 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
560 560 * same as this function, so we can just return it directly.*/
561 561 isfiltered = PyObject_IsTrue(result);
562 562 Py_DECREF(result);
563 563 return isfiltered;
564 564 } else {
565 565 return 0;
566 566 }
567 567 }
568 568
569 569 static Py_ssize_t add_roots_get_min(indexObject *self, PyObject *list,
570 570 Py_ssize_t marker, char *phases)
571 571 {
572 572 PyObject *iter = NULL;
573 573 PyObject *iter_item = NULL;
574 574 Py_ssize_t min_idx = index_length(self) + 2;
575 575 long iter_item_long;
576 576
577 577 if (PyList_GET_SIZE(list) != 0) {
578 578 iter = PyObject_GetIter(list);
579 579 if (iter == NULL)
580 580 return -2;
581 581 while ((iter_item = PyIter_Next(iter))) {
582 582 if (!pylong_to_long(iter_item, &iter_item_long)) {
583 583 Py_DECREF(iter_item);
584 584 return -2;
585 585 }
586 586 Py_DECREF(iter_item);
587 587 if (iter_item_long < min_idx)
588 588 min_idx = iter_item_long;
589 589 phases[iter_item_long] = (char)marker;
590 590 }
591 591 Py_DECREF(iter);
592 592 }
593 593
594 594 return min_idx;
595 595 }
596 596
597 597 static inline void set_phase_from_parents(char *phases, int parent_1,
598 598 int parent_2, Py_ssize_t i)
599 599 {
600 600 if (parent_1 >= 0 && phases[parent_1] > phases[i])
601 601 phases[i] = phases[parent_1];
602 602 if (parent_2 >= 0 && phases[parent_2] > phases[i])
603 603 phases[i] = phases[parent_2];
604 604 }
605 605
606 606 static PyObject *reachableroots2(indexObject *self, PyObject *args)
607 607 {
608 608
609 609 /* Input */
610 610 long minroot;
611 611 PyObject *includepatharg = NULL;
612 612 int includepath = 0;
613 613 /* heads and roots are lists */
614 614 PyObject *heads = NULL;
615 615 PyObject *roots = NULL;
616 616 PyObject *reachable = NULL;
617 617
618 618 PyObject *val;
619 619 Py_ssize_t len = index_length(self);
620 620 long revnum;
621 621 Py_ssize_t k;
622 622 Py_ssize_t i;
623 623 Py_ssize_t l;
624 624 int r;
625 625 int parents[2];
626 626
627 627 /* Internal data structure:
628 628 * tovisit: array of length len+1 (all revs + nullrev), filled upto
629 629 * lentovisit
630 630 *
631 631 * revstates: array of length len+1 (all revs + nullrev) */
632 632 int *tovisit = NULL;
633 633 long lentovisit = 0;
634 634 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
635 635 char *revstates = NULL;
636 636
637 637 /* Get arguments */
638 638 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
639 639 &PyList_Type, &roots, &PyBool_Type,
640 640 &includepatharg))
641 641 goto bail;
642 642
643 643 if (includepatharg == Py_True)
644 644 includepath = 1;
645 645
646 646 /* Initialize return set */
647 647 reachable = PyList_New(0);
648 648 if (reachable == NULL)
649 649 goto bail;
650 650
651 651 /* Initialize internal datastructures */
652 652 tovisit = (int *)malloc((len + 1) * sizeof(int));
653 653 if (tovisit == NULL) {
654 654 PyErr_NoMemory();
655 655 goto bail;
656 656 }
657 657
658 658 revstates = (char *)calloc(len + 1, 1);
659 659 if (revstates == NULL) {
660 660 PyErr_NoMemory();
661 661 goto bail;
662 662 }
663 663
664 664 l = PyList_GET_SIZE(roots);
665 665 for (i = 0; i < l; i++) {
666 666 revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
667 667 if (revnum == -1 && PyErr_Occurred())
668 668 goto bail;
669 669 /* If root is out of range, e.g. wdir(), it must be unreachable
670 670 * from heads. So we can just ignore it. */
671 671 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
672 672 continue;
673 673 revstates[revnum + 1] |= RS_ROOT;
674 674 }
675 675
676 676 /* Populate tovisit with all the heads */
677 677 l = PyList_GET_SIZE(heads);
678 678 for (i = 0; i < l; i++) {
679 679 revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i));
680 680 if (revnum == -1 && PyErr_Occurred())
681 681 goto bail;
682 682 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
683 683 PyErr_SetString(PyExc_IndexError, "head out of range");
684 684 goto bail;
685 685 }
686 686 if (!(revstates[revnum + 1] & RS_SEEN)) {
687 687 tovisit[lentovisit++] = (int)revnum;
688 688 revstates[revnum + 1] |= RS_SEEN;
689 689 }
690 690 }
691 691
692 692 /* Visit the tovisit list and find the reachable roots */
693 693 k = 0;
694 694 while (k < lentovisit) {
695 695 /* Add the node to reachable if it is a root*/
696 696 revnum = tovisit[k++];
697 697 if (revstates[revnum + 1] & RS_ROOT) {
698 698 revstates[revnum + 1] |= RS_REACHABLE;
699 699 val = PyInt_FromLong(revnum);
700 700 if (val == NULL)
701 701 goto bail;
702 702 r = PyList_Append(reachable, val);
703 703 Py_DECREF(val);
704 704 if (r < 0)
705 705 goto bail;
706 706 if (includepath == 0)
707 707 continue;
708 708 }
709 709
710 710 /* Add its parents to the list of nodes to visit */
711 711 if (revnum == nullrev)
712 712 continue;
713 713 r = index_get_parents(self, revnum, parents, (int)len - 1);
714 714 if (r < 0)
715 715 goto bail;
716 716 for (i = 0; i < 2; i++) {
717 717 if (!(revstates[parents[i] + 1] & RS_SEEN) &&
718 718 parents[i] >= minroot) {
719 719 tovisit[lentovisit++] = parents[i];
720 720 revstates[parents[i] + 1] |= RS_SEEN;
721 721 }
722 722 }
723 723 }
724 724
725 725 /* Find all the nodes in between the roots we found and the heads
726 726 * and add them to the reachable set */
727 727 if (includepath == 1) {
728 728 long minidx = minroot;
729 729 if (minidx < 0)
730 730 minidx = 0;
731 731 for (i = minidx; i < len; i++) {
732 732 if (!(revstates[i + 1] & RS_SEEN))
733 733 continue;
734 734 r = index_get_parents(self, i, parents, (int)len - 1);
735 735 /* Corrupted index file, error is set from
736 736 * index_get_parents */
737 737 if (r < 0)
738 738 goto bail;
739 739 if (((revstates[parents[0] + 1] |
740 740 revstates[parents[1] + 1]) &
741 741 RS_REACHABLE) &&
742 742 !(revstates[i + 1] & RS_REACHABLE)) {
743 743 revstates[i + 1] |= RS_REACHABLE;
744 744 val = PyInt_FromSsize_t(i);
745 745 if (val == NULL)
746 746 goto bail;
747 747 r = PyList_Append(reachable, val);
748 748 Py_DECREF(val);
749 749 if (r < 0)
750 750 goto bail;
751 751 }
752 752 }
753 753 }
754 754
755 755 free(revstates);
756 756 free(tovisit);
757 757 return reachable;
758 758 bail:
759 759 Py_XDECREF(reachable);
760 760 free(revstates);
761 761 free(tovisit);
762 762 return NULL;
763 763 }
764 764
765 765 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
766 766 {
767 767 PyObject *roots = Py_None;
768 768 PyObject *ret = NULL;
769 769 PyObject *phasessize = NULL;
770 770 PyObject *phaseroots = NULL;
771 771 PyObject *phaseset = NULL;
772 772 PyObject *phasessetlist = NULL;
773 773 PyObject *rev = NULL;
774 774 Py_ssize_t len = index_length(self);
775 775 Py_ssize_t numphase = 0;
776 776 Py_ssize_t minrevallphases = 0;
777 777 Py_ssize_t minrevphase = 0;
778 778 Py_ssize_t i = 0;
779 779 char *phases = NULL;
780 780 long phase;
781 781
782 782 if (!PyArg_ParseTuple(args, "O", &roots))
783 783 goto done;
784 784 if (roots == NULL || !PyList_Check(roots)) {
785 785 PyErr_SetString(PyExc_TypeError, "roots must be a list");
786 786 goto done;
787 787 }
788 788
789 789 phases = calloc(
790 790 len, 1); /* phase per rev: {0: public, 1: draft, 2: secret} */
791 791 if (phases == NULL) {
792 792 PyErr_NoMemory();
793 793 goto done;
794 794 }
795 795 /* Put the phase information of all the roots in phases */
796 796 numphase = PyList_GET_SIZE(roots) + 1;
797 797 minrevallphases = len + 1;
798 798 phasessetlist = PyList_New(numphase);
799 799 if (phasessetlist == NULL)
800 800 goto done;
801 801
802 802 PyList_SET_ITEM(phasessetlist, 0, Py_None);
803 803 Py_INCREF(Py_None);
804 804
805 805 for (i = 0; i < numphase - 1; i++) {
806 806 phaseroots = PyList_GET_ITEM(roots, i);
807 807 phaseset = PySet_New(NULL);
808 808 if (phaseset == NULL)
809 809 goto release;
810 810 PyList_SET_ITEM(phasessetlist, i + 1, phaseset);
811 811 if (!PyList_Check(phaseroots)) {
812 812 PyErr_SetString(PyExc_TypeError,
813 813 "roots item must be a list");
814 814 goto release;
815 815 }
816 816 minrevphase =
817 817 add_roots_get_min(self, phaseroots, i + 1, phases);
818 818 if (minrevphase == -2) /* Error from add_roots_get_min */
819 819 goto release;
820 820 minrevallphases = MIN(minrevallphases, minrevphase);
821 821 }
822 822 /* Propagate the phase information from the roots to the revs */
823 823 if (minrevallphases != -1) {
824 824 int parents[2];
825 825 for (i = minrevallphases; i < len; i++) {
826 826 if (index_get_parents(self, i, parents, (int)len - 1) <
827 827 0)
828 828 goto release;
829 829 set_phase_from_parents(phases, parents[0], parents[1],
830 830 i);
831 831 }
832 832 }
833 833 /* Transform phase list to a python list */
834 834 phasessize = PyInt_FromSsize_t(len);
835 835 if (phasessize == NULL)
836 836 goto release;
837 837 for (i = 0; i < len; i++) {
838 838 phase = phases[i];
839 839 /* We only store the sets of phase for non public phase, the
840 840 * public phase is computed as a difference */
841 841 if (phase != 0) {
842 842 phaseset = PyList_GET_ITEM(phasessetlist, phase);
843 843 rev = PyInt_FromSsize_t(i);
844 844 if (rev == NULL)
845 845 goto release;
846 846 PySet_Add(phaseset, rev);
847 847 Py_XDECREF(rev);
848 848 }
849 849 }
850 850 ret = PyTuple_Pack(2, phasessize, phasessetlist);
851 851
852 852 release:
853 853 Py_XDECREF(phasessize);
854 854 Py_XDECREF(phasessetlist);
855 855 done:
856 856 free(phases);
857 857 return ret;
858 858 }
859 859
860 860 static PyObject *index_headrevs(indexObject *self, PyObject *args)
861 861 {
862 862 Py_ssize_t i, j, len;
863 863 char *nothead = NULL;
864 864 PyObject *heads = NULL;
865 865 PyObject *filter = NULL;
866 866 PyObject *filteredrevs = Py_None;
867 867
868 868 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
869 869 return NULL;
870 870 }
871 871
872 872 if (self->headrevs && filteredrevs == self->filteredrevs)
873 873 return list_copy(self->headrevs);
874 874
875 875 Py_DECREF(self->filteredrevs);
876 876 self->filteredrevs = filteredrevs;
877 877 Py_INCREF(filteredrevs);
878 878
879 879 if (filteredrevs != Py_None) {
880 880 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
881 881 if (!filter) {
882 882 PyErr_SetString(
883 883 PyExc_TypeError,
884 884 "filteredrevs has no attribute __contains__");
885 885 goto bail;
886 886 }
887 887 }
888 888
889 889 len = index_length(self);
890 890 heads = PyList_New(0);
891 891 if (heads == NULL)
892 892 goto bail;
893 893 if (len == 0) {
894 894 PyObject *nullid = PyInt_FromLong(-1);
895 895 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
896 896 Py_XDECREF(nullid);
897 897 goto bail;
898 898 }
899 899 goto done;
900 900 }
901 901
902 902 nothead = calloc(len, 1);
903 903 if (nothead == NULL) {
904 904 PyErr_NoMemory();
905 905 goto bail;
906 906 }
907 907
908 908 for (i = len - 1; i >= 0; i--) {
909 909 int isfiltered;
910 910 int parents[2];
911 911
912 912 /* If nothead[i] == 1, it means we've seen an unfiltered child
913 913 * of this node already, and therefore this node is not
914 914 * filtered. So we can skip the expensive check_filter step.
915 915 */
916 916 if (nothead[i] != 1) {
917 917 isfiltered = check_filter(filter, i);
918 918 if (isfiltered == -1) {
919 919 PyErr_SetString(PyExc_TypeError,
920 920 "unable to check filter");
921 921 goto bail;
922 922 }
923 923
924 924 if (isfiltered) {
925 925 nothead[i] = 1;
926 926 continue;
927 927 }
928 928 }
929 929
930 930 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
931 931 goto bail;
932 932 for (j = 0; j < 2; j++) {
933 933 if (parents[j] >= 0)
934 934 nothead[parents[j]] = 1;
935 935 }
936 936 }
937 937
938 938 for (i = 0; i < len; i++) {
939 939 PyObject *head;
940 940
941 941 if (nothead[i])
942 942 continue;
943 943 head = PyInt_FromSsize_t(i);
944 944 if (head == NULL || PyList_Append(heads, head) == -1) {
945 945 Py_XDECREF(head);
946 946 goto bail;
947 947 }
948 948 }
949 949
950 950 done:
951 951 self->headrevs = heads;
952 952 Py_XDECREF(filter);
953 953 free(nothead);
954 954 return list_copy(self->headrevs);
955 955 bail:
956 956 Py_XDECREF(filter);
957 957 Py_XDECREF(heads);
958 958 free(nothead);
959 959 return NULL;
960 960 }
961 961
962 962 /**
963 963 * Obtain the base revision index entry.
964 964 *
965 965 * Callers must ensure that rev >= 0 or illegal memory access may occur.
966 966 */
967 967 static inline int index_baserev(indexObject *self, int rev)
968 968 {
969 969 const char *data;
970 970 int result;
971 971
972 972 if (rev >= self->length) {
973 973 PyObject *tuple =
974 974 PyList_GET_ITEM(self->added, rev - self->length);
975 975 long ret;
976 976 if (!pylong_to_long(PyTuple_GET_ITEM(tuple, 3), &ret)) {
977 977 return -2;
978 978 }
979 979 result = (int)ret;
980 980 } else {
981 981 data = index_deref(self, rev);
982 982 if (data == NULL) {
983 983 return -2;
984 984 }
985 985
986 986 result = getbe32(data + 16);
987 987 }
988 988 if (result > rev) {
989 989 PyErr_Format(
990 990 PyExc_ValueError,
991 991 "corrupted revlog, revision base above revision: %d, %d",
992 992 rev, result);
993 993 return -2;
994 994 }
995 995 return result;
996 996 }
997 997
998 998 /**
999 999 * Find if a revision is a snapshot or not
1000 1000 *
1001 1001 * Only relevant for sparse-revlog case.
1002 1002 * Callers must ensure that rev is in a valid range.
1003 1003 */
1004 1004 static int index_issnapshotrev(indexObject *self, Py_ssize_t rev)
1005 1005 {
1006 1006 int ps[2];
1007 1007 Py_ssize_t base;
1008 1008 while (rev >= 0) {
1009 1009 base = (Py_ssize_t)index_baserev(self, rev);
1010 1010 if (base == rev) {
1011 1011 base = -1;
1012 1012 }
1013 1013 if (base == -2) {
1014 1014 assert(PyErr_Occurred());
1015 1015 return -1;
1016 1016 }
1017 1017 if (base == -1) {
1018 1018 return 1;
1019 1019 }
1020 1020 if (index_get_parents(self, rev, ps, (int)rev) < 0) {
1021 1021 assert(PyErr_Occurred());
1022 1022 return -1;
1023 1023 };
1024 1024 if (base == ps[0] || base == ps[1]) {
1025 1025 return 0;
1026 1026 }
1027 1027 rev = base;
1028 1028 }
1029 1029 return rev == -1;
1030 1030 }
1031 1031
1032 1032 static PyObject *index_issnapshot(indexObject *self, PyObject *value)
1033 1033 {
1034 1034 long rev;
1035 1035 int issnap;
1036 1036 Py_ssize_t length = index_length(self);
1037 1037
1038 1038 if (!pylong_to_long(value, &rev)) {
1039 1039 return NULL;
1040 1040 }
1041 1041 if (rev < -1 || rev >= length) {
1042 1042 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
1043 1043 rev);
1044 1044 return NULL;
1045 1045 };
1046 1046 issnap = index_issnapshotrev(self, (Py_ssize_t)rev);
1047 1047 if (issnap < 0) {
1048 1048 return NULL;
1049 1049 };
1050 1050 return PyBool_FromLong((long)issnap);
1051 1051 }
1052 1052
1053 static PyObject *index_findsnapshots(indexObject *self, PyObject *args)
1054 {
1055 Py_ssize_t start_rev;
1056 PyObject *cache;
1057 Py_ssize_t base;
1058 Py_ssize_t rev;
1059 PyObject *key = NULL;
1060 PyObject *value = NULL;
1061 const Py_ssize_t length = index_length(self);
1062 if (!PyArg_ParseTuple(args, "O!n", &PyDict_Type, &cache, &start_rev)) {
1063 return NULL;
1064 }
1065 for (rev = start_rev; rev < length; rev++) {
1066 int issnap;
1067 PyObject *allvalues = NULL;
1068 issnap = index_issnapshotrev(self, rev);
1069 if (issnap < 0) {
1070 goto bail;
1071 }
1072 if (issnap == 0) {
1073 continue;
1074 }
1075 base = (Py_ssize_t)index_baserev(self, rev);
1076 if (base == rev) {
1077 base = -1;
1078 }
1079 if (base == -2) {
1080 assert(PyErr_Occurred());
1081 goto bail;
1082 }
1083 key = PyInt_FromSsize_t(base);
1084 allvalues = PyDict_GetItem(cache, key);
1085 if (allvalues == NULL && PyErr_Occurred()) {
1086 goto bail;
1087 }
1088 if (allvalues == NULL) {
1089 int r;
1090 allvalues = PyList_New(0);
1091 if (!allvalues) {
1092 goto bail;
1093 }
1094 r = PyDict_SetItem(cache, key, allvalues);
1095 Py_DECREF(allvalues);
1096 if (r < 0) {
1097 goto bail;
1098 }
1099 }
1100 value = PyInt_FromSsize_t(rev);
1101 if (PyList_Append(allvalues, value)) {
1102 goto bail;
1103 }
1104 Py_CLEAR(key);
1105 Py_CLEAR(value);
1106 }
1107 Py_RETURN_NONE;
1108 bail:
1109 Py_XDECREF(key);
1110 Py_XDECREF(value);
1111 return NULL;
1112 }
1113
1053 1114 static PyObject *index_deltachain(indexObject *self, PyObject *args)
1054 1115 {
1055 1116 int rev, generaldelta;
1056 1117 PyObject *stoparg;
1057 1118 int stoprev, iterrev, baserev = -1;
1058 1119 int stopped;
1059 1120 PyObject *chain = NULL, *result = NULL;
1060 1121 const Py_ssize_t length = index_length(self);
1061 1122
1062 1123 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
1063 1124 return NULL;
1064 1125 }
1065 1126
1066 1127 if (PyInt_Check(stoparg)) {
1067 1128 stoprev = (int)PyInt_AsLong(stoparg);
1068 1129 if (stoprev == -1 && PyErr_Occurred()) {
1069 1130 return NULL;
1070 1131 }
1071 1132 } else if (stoparg == Py_None) {
1072 1133 stoprev = -2;
1073 1134 } else {
1074 1135 PyErr_SetString(PyExc_ValueError,
1075 1136 "stoprev must be integer or None");
1076 1137 return NULL;
1077 1138 }
1078 1139
1079 1140 if (rev < 0 || rev >= length) {
1080 1141 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1081 1142 return NULL;
1082 1143 }
1083 1144
1084 1145 chain = PyList_New(0);
1085 1146 if (chain == NULL) {
1086 1147 return NULL;
1087 1148 }
1088 1149
1089 1150 baserev = index_baserev(self, rev);
1090 1151
1091 1152 /* This should never happen. */
1092 1153 if (baserev <= -2) {
1093 1154 /* Error should be set by index_deref() */
1094 1155 assert(PyErr_Occurred());
1095 1156 goto bail;
1096 1157 }
1097 1158
1098 1159 iterrev = rev;
1099 1160
1100 1161 while (iterrev != baserev && iterrev != stoprev) {
1101 1162 PyObject *value = PyInt_FromLong(iterrev);
1102 1163 if (value == NULL) {
1103 1164 goto bail;
1104 1165 }
1105 1166 if (PyList_Append(chain, value)) {
1106 1167 Py_DECREF(value);
1107 1168 goto bail;
1108 1169 }
1109 1170 Py_DECREF(value);
1110 1171
1111 1172 if (generaldelta) {
1112 1173 iterrev = baserev;
1113 1174 } else {
1114 1175 iterrev--;
1115 1176 }
1116 1177
1117 1178 if (iterrev < 0) {
1118 1179 break;
1119 1180 }
1120 1181
1121 1182 if (iterrev >= length) {
1122 1183 PyErr_SetString(PyExc_IndexError,
1123 1184 "revision outside index");
1124 1185 return NULL;
1125 1186 }
1126 1187
1127 1188 baserev = index_baserev(self, iterrev);
1128 1189
1129 1190 /* This should never happen. */
1130 1191 if (baserev <= -2) {
1131 1192 /* Error should be set by index_deref() */
1132 1193 assert(PyErr_Occurred());
1133 1194 goto bail;
1134 1195 }
1135 1196 }
1136 1197
1137 1198 if (iterrev == stoprev) {
1138 1199 stopped = 1;
1139 1200 } else {
1140 1201 PyObject *value = PyInt_FromLong(iterrev);
1141 1202 if (value == NULL) {
1142 1203 goto bail;
1143 1204 }
1144 1205 if (PyList_Append(chain, value)) {
1145 1206 Py_DECREF(value);
1146 1207 goto bail;
1147 1208 }
1148 1209 Py_DECREF(value);
1149 1210
1150 1211 stopped = 0;
1151 1212 }
1152 1213
1153 1214 if (PyList_Reverse(chain)) {
1154 1215 goto bail;
1155 1216 }
1156 1217
1157 1218 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
1158 1219 Py_DECREF(chain);
1159 1220 return result;
1160 1221
1161 1222 bail:
1162 1223 Py_DECREF(chain);
1163 1224 return NULL;
1164 1225 }
1165 1226
1166 1227 static inline int64_t
1167 1228 index_segment_span(indexObject *self, Py_ssize_t start_rev, Py_ssize_t end_rev)
1168 1229 {
1169 1230 int64_t start_offset;
1170 1231 int64_t end_offset;
1171 1232 int end_size;
1172 1233 start_offset = index_get_start(self, start_rev);
1173 1234 if (start_offset < 0) {
1174 1235 return -1;
1175 1236 }
1176 1237 end_offset = index_get_start(self, end_rev);
1177 1238 if (end_offset < 0) {
1178 1239 return -1;
1179 1240 }
1180 1241 end_size = index_get_length(self, end_rev);
1181 1242 if (end_size < 0) {
1182 1243 return -1;
1183 1244 }
1184 1245 if (end_offset < start_offset) {
1185 1246 PyErr_Format(PyExc_ValueError,
1186 1247 "corrupted revlog index: inconsistent offset "
1187 1248 "between revisions (%zd) and (%zd)",
1188 1249 start_rev, end_rev);
1189 1250 return -1;
1190 1251 }
1191 1252 return (end_offset - start_offset) + (int64_t)end_size;
1192 1253 }
1193 1254
1194 1255 /* returns endidx so that revs[startidx:endidx] has no empty trailing revs */
1195 1256 static Py_ssize_t trim_endidx(indexObject *self, const Py_ssize_t *revs,
1196 1257 Py_ssize_t startidx, Py_ssize_t endidx)
1197 1258 {
1198 1259 int length;
1199 1260 while (endidx > 1 && endidx > startidx) {
1200 1261 length = index_get_length(self, revs[endidx - 1]);
1201 1262 if (length < 0) {
1202 1263 return -1;
1203 1264 }
1204 1265 if (length != 0) {
1205 1266 break;
1206 1267 }
1207 1268 endidx -= 1;
1208 1269 }
1209 1270 return endidx;
1210 1271 }
1211 1272
1212 1273 struct Gap {
1213 1274 int64_t size;
1214 1275 Py_ssize_t idx;
1215 1276 };
1216 1277
1217 1278 static int gap_compare(const void *left, const void *right)
1218 1279 {
1219 1280 const struct Gap *l_left = ((const struct Gap *)left);
1220 1281 const struct Gap *l_right = ((const struct Gap *)right);
1221 1282 if (l_left->size < l_right->size) {
1222 1283 return -1;
1223 1284 } else if (l_left->size > l_right->size) {
1224 1285 return 1;
1225 1286 }
1226 1287 return 0;
1227 1288 }
1228 1289 static int Py_ssize_t_compare(const void *left, const void *right)
1229 1290 {
1230 1291 const Py_ssize_t l_left = *(const Py_ssize_t *)left;
1231 1292 const Py_ssize_t l_right = *(const Py_ssize_t *)right;
1232 1293 if (l_left < l_right) {
1233 1294 return -1;
1234 1295 } else if (l_left > l_right) {
1235 1296 return 1;
1236 1297 }
1237 1298 return 0;
1238 1299 }
1239 1300
1240 1301 static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
1241 1302 {
1242 1303 /* method arguments */
1243 1304 PyObject *list_revs = NULL; /* revisions in the chain */
1244 1305 double targetdensity = 0; /* min density to achieve */
1245 1306 Py_ssize_t mingapsize = 0; /* threshold to ignore gaps */
1246 1307
1247 1308 /* other core variables */
1248 1309 Py_ssize_t idxlen = index_length(self);
1249 1310 Py_ssize_t i; /* used for various iteration */
1250 1311 PyObject *result = NULL; /* the final return of the function */
1251 1312
1252 1313 /* generic information about the delta chain being slice */
1253 1314 Py_ssize_t num_revs = 0; /* size of the full delta chain */
1254 1315 Py_ssize_t *revs = NULL; /* native array of revision in the chain */
1255 1316 int64_t chainpayload = 0; /* sum of all delta in the chain */
1256 1317 int64_t deltachainspan = 0; /* distance from first byte to last byte */
1257 1318
1258 1319 /* variable used for slicing the delta chain */
1259 1320 int64_t readdata = 0; /* amount of data currently planned to be read */
1260 1321 double density = 0; /* ration of payload data compared to read ones */
1261 1322 int64_t previous_end;
1262 1323 struct Gap *gaps = NULL; /* array of notable gap in the chain */
1263 1324 Py_ssize_t num_gaps =
1264 1325 0; /* total number of notable gap recorded so far */
1265 1326 Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
1266 1327 Py_ssize_t num_selected = 0; /* number of gaps skipped */
1267 1328 PyObject *chunk = NULL; /* individual slice */
1268 1329 PyObject *allchunks = NULL; /* all slices */
1269 1330 Py_ssize_t previdx;
1270 1331
1271 1332 /* parsing argument */
1272 1333 if (!PyArg_ParseTuple(args, "O!dn", &PyList_Type, &list_revs,
1273 1334 &targetdensity, &mingapsize)) {
1274 1335 goto bail;
1275 1336 }
1276 1337
1277 1338 /* If the delta chain contains a single element, we do not need slicing
1278 1339 */
1279 1340 num_revs = PyList_GET_SIZE(list_revs);
1280 1341 if (num_revs <= 1) {
1281 1342 result = PyTuple_Pack(1, list_revs);
1282 1343 goto done;
1283 1344 }
1284 1345
1285 1346 /* Turn the python list into a native integer array (for efficiency) */
1286 1347 revs = (Py_ssize_t *)calloc(num_revs, sizeof(Py_ssize_t));
1287 1348 if (revs == NULL) {
1288 1349 PyErr_NoMemory();
1289 1350 goto bail;
1290 1351 }
1291 1352 for (i = 0; i < num_revs; i++) {
1292 1353 Py_ssize_t revnum = PyInt_AsLong(PyList_GET_ITEM(list_revs, i));
1293 1354 if (revnum == -1 && PyErr_Occurred()) {
1294 1355 goto bail;
1295 1356 }
1296 1357 if (revnum < nullrev || revnum >= idxlen) {
1297 1358 PyErr_Format(PyExc_IndexError,
1298 1359 "index out of range: %zd", revnum);
1299 1360 goto bail;
1300 1361 }
1301 1362 revs[i] = revnum;
1302 1363 }
1303 1364
1304 1365 /* Compute and check various property of the unsliced delta chain */
1305 1366 deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
1306 1367 if (deltachainspan < 0) {
1307 1368 goto bail;
1308 1369 }
1309 1370
1310 1371 if (deltachainspan <= mingapsize) {
1311 1372 result = PyTuple_Pack(1, list_revs);
1312 1373 goto done;
1313 1374 }
1314 1375 chainpayload = 0;
1315 1376 for (i = 0; i < num_revs; i++) {
1316 1377 int tmp = index_get_length(self, revs[i]);
1317 1378 if (tmp < 0) {
1318 1379 goto bail;
1319 1380 }
1320 1381 chainpayload += tmp;
1321 1382 }
1322 1383
1323 1384 readdata = deltachainspan;
1324 1385 density = 1.0;
1325 1386
1326 1387 if (0 < deltachainspan) {
1327 1388 density = (double)chainpayload / (double)deltachainspan;
1328 1389 }
1329 1390
1330 1391 if (density >= targetdensity) {
1331 1392 result = PyTuple_Pack(1, list_revs);
1332 1393 goto done;
1333 1394 }
1334 1395
1335 1396 /* if chain is too sparse, look for relevant gaps */
1336 1397 gaps = (struct Gap *)calloc(num_revs, sizeof(struct Gap));
1337 1398 if (gaps == NULL) {
1338 1399 PyErr_NoMemory();
1339 1400 goto bail;
1340 1401 }
1341 1402
1342 1403 previous_end = -1;
1343 1404 for (i = 0; i < num_revs; i++) {
1344 1405 int64_t revstart;
1345 1406 int revsize;
1346 1407 revstart = index_get_start(self, revs[i]);
1347 1408 if (revstart < 0) {
1348 1409 goto bail;
1349 1410 };
1350 1411 revsize = index_get_length(self, revs[i]);
1351 1412 if (revsize < 0) {
1352 1413 goto bail;
1353 1414 };
1354 1415 if (revsize == 0) {
1355 1416 continue;
1356 1417 }
1357 1418 if (previous_end >= 0) {
1358 1419 int64_t gapsize = revstart - previous_end;
1359 1420 if (gapsize > mingapsize) {
1360 1421 gaps[num_gaps].size = gapsize;
1361 1422 gaps[num_gaps].idx = i;
1362 1423 num_gaps += 1;
1363 1424 }
1364 1425 }
1365 1426 previous_end = revstart + revsize;
1366 1427 }
1367 1428 if (num_gaps == 0) {
1368 1429 result = PyTuple_Pack(1, list_revs);
1369 1430 goto done;
1370 1431 }
1371 1432 qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
1372 1433
1373 1434 /* Slice the largest gap first, they improve the density the most */
1374 1435 selected_indices =
1375 1436 (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
1376 1437 if (selected_indices == NULL) {
1377 1438 PyErr_NoMemory();
1378 1439 goto bail;
1379 1440 }
1380 1441
1381 1442 for (i = num_gaps - 1; i >= 0; i--) {
1382 1443 selected_indices[num_selected] = gaps[i].idx;
1383 1444 readdata -= gaps[i].size;
1384 1445 num_selected += 1;
1385 1446 if (readdata <= 0) {
1386 1447 density = 1.0;
1387 1448 } else {
1388 1449 density = (double)chainpayload / (double)readdata;
1389 1450 }
1390 1451 if (density >= targetdensity) {
1391 1452 break;
1392 1453 }
1393 1454 }
1394 1455 qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
1395 1456 &Py_ssize_t_compare);
1396 1457
1397 1458 /* create the resulting slice */
1398 1459 allchunks = PyList_New(0);
1399 1460 if (allchunks == NULL) {
1400 1461 goto bail;
1401 1462 }
1402 1463 previdx = 0;
1403 1464 selected_indices[num_selected] = num_revs;
1404 1465 for (i = 0; i <= num_selected; i++) {
1405 1466 Py_ssize_t idx = selected_indices[i];
1406 1467 Py_ssize_t endidx = trim_endidx(self, revs, previdx, idx);
1407 1468 if (endidx < 0) {
1408 1469 goto bail;
1409 1470 }
1410 1471 if (previdx < endidx) {
1411 1472 chunk = PyList_GetSlice(list_revs, previdx, endidx);
1412 1473 if (chunk == NULL) {
1413 1474 goto bail;
1414 1475 }
1415 1476 if (PyList_Append(allchunks, chunk) == -1) {
1416 1477 goto bail;
1417 1478 }
1418 1479 Py_DECREF(chunk);
1419 1480 chunk = NULL;
1420 1481 }
1421 1482 previdx = idx;
1422 1483 }
1423 1484 result = allchunks;
1424 1485 goto done;
1425 1486
1426 1487 bail:
1427 1488 Py_XDECREF(allchunks);
1428 1489 Py_XDECREF(chunk);
1429 1490 done:
1430 1491 free(revs);
1431 1492 free(gaps);
1432 1493 free(selected_indices);
1433 1494 return result;
1434 1495 }
1435 1496
1436 1497 static inline int nt_level(const char *node, Py_ssize_t level)
1437 1498 {
1438 1499 int v = node[level >> 1];
1439 1500 if (!(level & 1))
1440 1501 v >>= 4;
1441 1502 return v & 0xf;
1442 1503 }
1443 1504
1444 1505 /*
1445 1506 * Return values:
1446 1507 *
1447 1508 * -4: match is ambiguous (multiple candidates)
1448 1509 * -2: not found
1449 1510 * rest: valid rev
1450 1511 */
1451 1512 static int nt_find(nodetree *self, const char *node, Py_ssize_t nodelen,
1452 1513 int hex)
1453 1514 {
1454 1515 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
1455 1516 int level, maxlevel, off;
1456 1517
1457 1518 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
1458 1519 return -1;
1459 1520
1460 1521 if (hex)
1461 1522 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
1462 1523 else
1463 1524 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
1464 1525
1465 1526 for (level = off = 0; level < maxlevel; level++) {
1466 1527 int k = getnybble(node, level);
1467 1528 nodetreenode *n = &self->nodes[off];
1468 1529 int v = n->children[k];
1469 1530
1470 1531 if (v < 0) {
1471 1532 const char *n;
1472 1533 Py_ssize_t i;
1473 1534
1474 1535 v = -(v + 2);
1475 1536 n = index_node(self->index, v);
1476 1537 if (n == NULL)
1477 1538 return -2;
1478 1539 for (i = level; i < maxlevel; i++)
1479 1540 if (getnybble(node, i) != nt_level(n, i))
1480 1541 return -2;
1481 1542 return v;
1482 1543 }
1483 1544 if (v == 0)
1484 1545 return -2;
1485 1546 off = v;
1486 1547 }
1487 1548 /* multiple matches against an ambiguous prefix */
1488 1549 return -4;
1489 1550 }
1490 1551
1491 1552 static int nt_new(nodetree *self)
1492 1553 {
1493 1554 if (self->length == self->capacity) {
1494 1555 unsigned newcapacity;
1495 1556 nodetreenode *newnodes;
1496 1557 newcapacity = self->capacity * 2;
1497 1558 if (newcapacity >= INT_MAX / sizeof(nodetreenode)) {
1498 1559 PyErr_SetString(PyExc_MemoryError,
1499 1560 "overflow in nt_new");
1500 1561 return -1;
1501 1562 }
1502 1563 newnodes =
1503 1564 realloc(self->nodes, newcapacity * sizeof(nodetreenode));
1504 1565 if (newnodes == NULL) {
1505 1566 PyErr_SetString(PyExc_MemoryError, "out of memory");
1506 1567 return -1;
1507 1568 }
1508 1569 self->capacity = newcapacity;
1509 1570 self->nodes = newnodes;
1510 1571 memset(&self->nodes[self->length], 0,
1511 1572 sizeof(nodetreenode) * (self->capacity - self->length));
1512 1573 }
1513 1574 return self->length++;
1514 1575 }
1515 1576
1516 1577 static int nt_insert(nodetree *self, const char *node, int rev)
1517 1578 {
1518 1579 int level = 0;
1519 1580 int off = 0;
1520 1581
1521 1582 while (level < 40) {
1522 1583 int k = nt_level(node, level);
1523 1584 nodetreenode *n;
1524 1585 int v;
1525 1586
1526 1587 n = &self->nodes[off];
1527 1588 v = n->children[k];
1528 1589
1529 1590 if (v == 0) {
1530 1591 n->children[k] = -rev - 2;
1531 1592 return 0;
1532 1593 }
1533 1594 if (v < 0) {
1534 1595 const char *oldnode =
1535 1596 index_node_existing(self->index, -(v + 2));
1536 1597 int noff;
1537 1598
1538 1599 if (oldnode == NULL)
1539 1600 return -1;
1540 1601 if (!memcmp(oldnode, node, 20)) {
1541 1602 n->children[k] = -rev - 2;
1542 1603 return 0;
1543 1604 }
1544 1605 noff = nt_new(self);
1545 1606 if (noff == -1)
1546 1607 return -1;
1547 1608 /* self->nodes may have been changed by realloc */
1548 1609 self->nodes[off].children[k] = noff;
1549 1610 off = noff;
1550 1611 n = &self->nodes[off];
1551 1612 n->children[nt_level(oldnode, ++level)] = v;
1552 1613 if (level > self->depth)
1553 1614 self->depth = level;
1554 1615 self->splits += 1;
1555 1616 } else {
1556 1617 level += 1;
1557 1618 off = v;
1558 1619 }
1559 1620 }
1560 1621
1561 1622 return -1;
1562 1623 }
1563 1624
1564 1625 static PyObject *ntobj_insert(nodetreeObject *self, PyObject *args)
1565 1626 {
1566 1627 Py_ssize_t rev;
1567 1628 const char *node;
1568 1629 Py_ssize_t length;
1569 1630 if (!PyArg_ParseTuple(args, "n", &rev))
1570 1631 return NULL;
1571 1632 length = index_length(self->nt.index);
1572 1633 if (rev < 0 || rev >= length) {
1573 1634 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1574 1635 return NULL;
1575 1636 }
1576 1637 node = index_node_existing(self->nt.index, rev);
1577 1638 if (nt_insert(&self->nt, node, (int)rev) == -1)
1578 1639 return NULL;
1579 1640 Py_RETURN_NONE;
1580 1641 }
1581 1642
1582 1643 static int nt_delete_node(nodetree *self, const char *node)
1583 1644 {
1584 1645 /* rev==-2 happens to get encoded as 0, which is interpreted as not set
1585 1646 */
1586 1647 return nt_insert(self, node, -2);
1587 1648 }
1588 1649
1589 1650 static int nt_init(nodetree *self, indexObject *index, unsigned capacity)
1590 1651 {
1591 1652 /* Initialize before overflow-checking to avoid nt_dealloc() crash. */
1592 1653 self->nodes = NULL;
1593 1654
1594 1655 self->index = index;
1595 1656 /* The input capacity is in terms of revisions, while the field is in
1596 1657 * terms of nodetree nodes. */
1597 1658 self->capacity = (capacity < 4 ? 4 : capacity / 2);
1598 1659 self->depth = 0;
1599 1660 self->splits = 0;
1600 1661 if ((size_t)self->capacity > INT_MAX / sizeof(nodetreenode)) {
1601 1662 PyErr_SetString(PyExc_ValueError, "overflow in init_nt");
1602 1663 return -1;
1603 1664 }
1604 1665 self->nodes = calloc(self->capacity, sizeof(nodetreenode));
1605 1666 if (self->nodes == NULL) {
1606 1667 PyErr_NoMemory();
1607 1668 return -1;
1608 1669 }
1609 1670 self->length = 1;
1610 1671 return 0;
1611 1672 }
1612 1673
1613 1674 static int ntobj_init(nodetreeObject *self, PyObject *args)
1614 1675 {
1615 1676 PyObject *index;
1616 1677 unsigned capacity;
1617 1678 if (!PyArg_ParseTuple(args, "O!I", &HgRevlogIndex_Type, &index,
1618 1679 &capacity))
1619 1680 return -1;
1620 1681 Py_INCREF(index);
1621 1682 return nt_init(&self->nt, (indexObject *)index, capacity);
1622 1683 }
1623 1684
1624 1685 static int nt_partialmatch(nodetree *self, const char *node, Py_ssize_t nodelen)
1625 1686 {
1626 1687 return nt_find(self, node, nodelen, 1);
1627 1688 }
1628 1689
1629 1690 /*
1630 1691 * Find the length of the shortest unique prefix of node.
1631 1692 *
1632 1693 * Return values:
1633 1694 *
1634 1695 * -3: error (exception set)
1635 1696 * -2: not found (no exception set)
1636 1697 * rest: length of shortest prefix
1637 1698 */
1638 1699 static int nt_shortest(nodetree *self, const char *node)
1639 1700 {
1640 1701 int level, off;
1641 1702
1642 1703 for (level = off = 0; level < 40; level++) {
1643 1704 int k, v;
1644 1705 nodetreenode *n = &self->nodes[off];
1645 1706 k = nt_level(node, level);
1646 1707 v = n->children[k];
1647 1708 if (v < 0) {
1648 1709 const char *n;
1649 1710 v = -(v + 2);
1650 1711 n = index_node_existing(self->index, v);
1651 1712 if (n == NULL)
1652 1713 return -3;
1653 1714 if (memcmp(node, n, 20) != 0)
1654 1715 /*
1655 1716 * Found a unique prefix, but it wasn't for the
1656 1717 * requested node (i.e the requested node does
1657 1718 * not exist).
1658 1719 */
1659 1720 return -2;
1660 1721 return level + 1;
1661 1722 }
1662 1723 if (v == 0)
1663 1724 return -2;
1664 1725 off = v;
1665 1726 }
1666 1727 /*
1667 1728 * The node was still not unique after 40 hex digits, so this won't
1668 1729 * happen. Also, if we get here, then there's a programming error in
1669 1730 * this file that made us insert a node longer than 40 hex digits.
1670 1731 */
1671 1732 PyErr_SetString(PyExc_Exception, "broken node tree");
1672 1733 return -3;
1673 1734 }
1674 1735
1675 1736 static PyObject *ntobj_shortest(nodetreeObject *self, PyObject *args)
1676 1737 {
1677 1738 PyObject *val;
1678 1739 char *node;
1679 1740 int length;
1680 1741
1681 1742 if (!PyArg_ParseTuple(args, "O", &val))
1682 1743 return NULL;
1683 1744 if (node_check(val, &node) == -1)
1684 1745 return NULL;
1685 1746
1686 1747 length = nt_shortest(&self->nt, node);
1687 1748 if (length == -3)
1688 1749 return NULL;
1689 1750 if (length == -2) {
1690 1751 raise_revlog_error();
1691 1752 return NULL;
1692 1753 }
1693 1754 return PyInt_FromLong(length);
1694 1755 }
1695 1756
1696 1757 static void nt_dealloc(nodetree *self)
1697 1758 {
1698 1759 free(self->nodes);
1699 1760 self->nodes = NULL;
1700 1761 }
1701 1762
1702 1763 static void ntobj_dealloc(nodetreeObject *self)
1703 1764 {
1704 1765 Py_XDECREF(self->nt.index);
1705 1766 nt_dealloc(&self->nt);
1706 1767 PyObject_Del(self);
1707 1768 }
1708 1769
1709 1770 static PyMethodDef ntobj_methods[] = {
1710 1771 {"insert", (PyCFunction)ntobj_insert, METH_VARARGS,
1711 1772 "insert an index entry"},
1712 1773 {"shortest", (PyCFunction)ntobj_shortest, METH_VARARGS,
1713 1774 "find length of shortest hex nodeid of a binary ID"},
1714 1775 {NULL} /* Sentinel */
1715 1776 };
1716 1777
1717 1778 static PyTypeObject nodetreeType = {
1718 1779 PyVarObject_HEAD_INIT(NULL, 0) /* header */
1719 1780 "parsers.nodetree", /* tp_name */
1720 1781 sizeof(nodetreeObject), /* tp_basicsize */
1721 1782 0, /* tp_itemsize */
1722 1783 (destructor)ntobj_dealloc, /* tp_dealloc */
1723 1784 0, /* tp_print */
1724 1785 0, /* tp_getattr */
1725 1786 0, /* tp_setattr */
1726 1787 0, /* tp_compare */
1727 1788 0, /* tp_repr */
1728 1789 0, /* tp_as_number */
1729 1790 0, /* tp_as_sequence */
1730 1791 0, /* tp_as_mapping */
1731 1792 0, /* tp_hash */
1732 1793 0, /* tp_call */
1733 1794 0, /* tp_str */
1734 1795 0, /* tp_getattro */
1735 1796 0, /* tp_setattro */
1736 1797 0, /* tp_as_buffer */
1737 1798 Py_TPFLAGS_DEFAULT, /* tp_flags */
1738 1799 "nodetree", /* tp_doc */
1739 1800 0, /* tp_traverse */
1740 1801 0, /* tp_clear */
1741 1802 0, /* tp_richcompare */
1742 1803 0, /* tp_weaklistoffset */
1743 1804 0, /* tp_iter */
1744 1805 0, /* tp_iternext */
1745 1806 ntobj_methods, /* tp_methods */
1746 1807 0, /* tp_members */
1747 1808 0, /* tp_getset */
1748 1809 0, /* tp_base */
1749 1810 0, /* tp_dict */
1750 1811 0, /* tp_descr_get */
1751 1812 0, /* tp_descr_set */
1752 1813 0, /* tp_dictoffset */
1753 1814 (initproc)ntobj_init, /* tp_init */
1754 1815 0, /* tp_alloc */
1755 1816 };
1756 1817
1757 1818 static int index_init_nt(indexObject *self)
1758 1819 {
1759 1820 if (!self->ntinitialized) {
1760 1821 if (nt_init(&self->nt, self, (int)self->raw_length) == -1) {
1761 1822 nt_dealloc(&self->nt);
1762 1823 return -1;
1763 1824 }
1764 1825 if (nt_insert(&self->nt, nullid, -1) == -1) {
1765 1826 nt_dealloc(&self->nt);
1766 1827 return -1;
1767 1828 }
1768 1829 self->ntinitialized = 1;
1769 1830 self->ntrev = (int)index_length(self);
1770 1831 self->ntlookups = 1;
1771 1832 self->ntmisses = 0;
1772 1833 }
1773 1834 return 0;
1774 1835 }
1775 1836
1776 1837 /*
1777 1838 * Return values:
1778 1839 *
1779 1840 * -3: error (exception set)
1780 1841 * -2: not found (no exception set)
1781 1842 * rest: valid rev
1782 1843 */
1783 1844 static int index_find_node(indexObject *self, const char *node,
1784 1845 Py_ssize_t nodelen)
1785 1846 {
1786 1847 int rev;
1787 1848
1788 1849 if (index_init_nt(self) == -1)
1789 1850 return -3;
1790 1851
1791 1852 self->ntlookups++;
1792 1853 rev = nt_find(&self->nt, node, nodelen, 0);
1793 1854 if (rev >= -1)
1794 1855 return rev;
1795 1856
1796 1857 /*
1797 1858 * For the first handful of lookups, we scan the entire index,
1798 1859 * and cache only the matching nodes. This optimizes for cases
1799 1860 * like "hg tip", where only a few nodes are accessed.
1800 1861 *
1801 1862 * After that, we cache every node we visit, using a single
1802 1863 * scan amortized over multiple lookups. This gives the best
1803 1864 * bulk performance, e.g. for "hg log".
1804 1865 */
1805 1866 if (self->ntmisses++ < 4) {
1806 1867 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1807 1868 const char *n = index_node_existing(self, rev);
1808 1869 if (n == NULL)
1809 1870 return -3;
1810 1871 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1811 1872 if (nt_insert(&self->nt, n, rev) == -1)
1812 1873 return -3;
1813 1874 break;
1814 1875 }
1815 1876 }
1816 1877 } else {
1817 1878 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1818 1879 const char *n = index_node_existing(self, rev);
1819 1880 if (n == NULL)
1820 1881 return -3;
1821 1882 if (nt_insert(&self->nt, n, rev) == -1) {
1822 1883 self->ntrev = rev + 1;
1823 1884 return -3;
1824 1885 }
1825 1886 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1826 1887 break;
1827 1888 }
1828 1889 }
1829 1890 self->ntrev = rev;
1830 1891 }
1831 1892
1832 1893 if (rev >= 0)
1833 1894 return rev;
1834 1895 return -2;
1835 1896 }
1836 1897
1837 1898 static PyObject *index_getitem(indexObject *self, PyObject *value)
1838 1899 {
1839 1900 char *node;
1840 1901 int rev;
1841 1902
1842 1903 if (PyInt_Check(value)) {
1843 1904 long idx;
1844 1905 if (!pylong_to_long(value, &idx)) {
1845 1906 return NULL;
1846 1907 }
1847 1908 return index_get(self, idx);
1848 1909 }
1849 1910
1850 1911 if (node_check(value, &node) == -1)
1851 1912 return NULL;
1852 1913 rev = index_find_node(self, node, 20);
1853 1914 if (rev >= -1)
1854 1915 return PyInt_FromLong(rev);
1855 1916 if (rev == -2)
1856 1917 raise_revlog_error();
1857 1918 return NULL;
1858 1919 }
1859 1920
1860 1921 /*
1861 1922 * Fully populate the radix tree.
1862 1923 */
1863 1924 static int index_populate_nt(indexObject *self)
1864 1925 {
1865 1926 int rev;
1866 1927 if (self->ntrev > 0) {
1867 1928 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1868 1929 const char *n = index_node_existing(self, rev);
1869 1930 if (n == NULL)
1870 1931 return -1;
1871 1932 if (nt_insert(&self->nt, n, rev) == -1)
1872 1933 return -1;
1873 1934 }
1874 1935 self->ntrev = -1;
1875 1936 }
1876 1937 return 0;
1877 1938 }
1878 1939
1879 1940 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1880 1941 {
1881 1942 const char *fullnode;
1882 1943 int nodelen;
1883 1944 char *node;
1884 1945 int rev, i;
1885 1946
1886 1947 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &node, &nodelen))
1887 1948 return NULL;
1888 1949
1889 1950 if (nodelen < 1) {
1890 1951 PyErr_SetString(PyExc_ValueError, "key too short");
1891 1952 return NULL;
1892 1953 }
1893 1954
1894 1955 if (nodelen > 40) {
1895 1956 PyErr_SetString(PyExc_ValueError, "key too long");
1896 1957 return NULL;
1897 1958 }
1898 1959
1899 1960 for (i = 0; i < nodelen; i++)
1900 1961 hexdigit(node, i);
1901 1962 if (PyErr_Occurred()) {
1902 1963 /* input contains non-hex characters */
1903 1964 PyErr_Clear();
1904 1965 Py_RETURN_NONE;
1905 1966 }
1906 1967
1907 1968 if (index_init_nt(self) == -1)
1908 1969 return NULL;
1909 1970 if (index_populate_nt(self) == -1)
1910 1971 return NULL;
1911 1972 rev = nt_partialmatch(&self->nt, node, nodelen);
1912 1973
1913 1974 switch (rev) {
1914 1975 case -4:
1915 1976 raise_revlog_error();
1916 1977 return NULL;
1917 1978 case -2:
1918 1979 Py_RETURN_NONE;
1919 1980 case -1:
1920 1981 return PyBytes_FromStringAndSize(nullid, 20);
1921 1982 }
1922 1983
1923 1984 fullnode = index_node_existing(self, rev);
1924 1985 if (fullnode == NULL) {
1925 1986 return NULL;
1926 1987 }
1927 1988 return PyBytes_FromStringAndSize(fullnode, 20);
1928 1989 }
1929 1990
1930 1991 static PyObject *index_shortest(indexObject *self, PyObject *args)
1931 1992 {
1932 1993 PyObject *val;
1933 1994 char *node;
1934 1995 int length;
1935 1996
1936 1997 if (!PyArg_ParseTuple(args, "O", &val))
1937 1998 return NULL;
1938 1999 if (node_check(val, &node) == -1)
1939 2000 return NULL;
1940 2001
1941 2002 self->ntlookups++;
1942 2003 if (index_init_nt(self) == -1)
1943 2004 return NULL;
1944 2005 if (index_populate_nt(self) == -1)
1945 2006 return NULL;
1946 2007 length = nt_shortest(&self->nt, node);
1947 2008 if (length == -3)
1948 2009 return NULL;
1949 2010 if (length == -2) {
1950 2011 raise_revlog_error();
1951 2012 return NULL;
1952 2013 }
1953 2014 return PyInt_FromLong(length);
1954 2015 }
1955 2016
1956 2017 static PyObject *index_m_get(indexObject *self, PyObject *args)
1957 2018 {
1958 2019 PyObject *val;
1959 2020 char *node;
1960 2021 int rev;
1961 2022
1962 2023 if (!PyArg_ParseTuple(args, "O", &val))
1963 2024 return NULL;
1964 2025 if (node_check(val, &node) == -1)
1965 2026 return NULL;
1966 2027 rev = index_find_node(self, node, 20);
1967 2028 if (rev == -3)
1968 2029 return NULL;
1969 2030 if (rev == -2)
1970 2031 Py_RETURN_NONE;
1971 2032 return PyInt_FromLong(rev);
1972 2033 }
1973 2034
1974 2035 static int index_contains(indexObject *self, PyObject *value)
1975 2036 {
1976 2037 char *node;
1977 2038
1978 2039 if (PyInt_Check(value)) {
1979 2040 long rev;
1980 2041 if (!pylong_to_long(value, &rev)) {
1981 2042 return -1;
1982 2043 }
1983 2044 return rev >= -1 && rev < index_length(self);
1984 2045 }
1985 2046
1986 2047 if (node_check(value, &node) == -1)
1987 2048 return -1;
1988 2049
1989 2050 switch (index_find_node(self, node, 20)) {
1990 2051 case -3:
1991 2052 return -1;
1992 2053 case -2:
1993 2054 return 0;
1994 2055 default:
1995 2056 return 1;
1996 2057 }
1997 2058 }
1998 2059
1999 2060 typedef uint64_t bitmask;
2000 2061
2001 2062 /*
2002 2063 * Given a disjoint set of revs, return all candidates for the
2003 2064 * greatest common ancestor. In revset notation, this is the set
2004 2065 * "heads(::a and ::b and ...)"
2005 2066 */
2006 2067 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
2007 2068 int revcount)
2008 2069 {
2009 2070 const bitmask allseen = (1ull << revcount) - 1;
2010 2071 const bitmask poison = 1ull << revcount;
2011 2072 PyObject *gca = PyList_New(0);
2012 2073 int i, v, interesting;
2013 2074 int maxrev = -1;
2014 2075 bitmask sp;
2015 2076 bitmask *seen;
2016 2077
2017 2078 if (gca == NULL)
2018 2079 return PyErr_NoMemory();
2019 2080
2020 2081 for (i = 0; i < revcount; i++) {
2021 2082 if (revs[i] > maxrev)
2022 2083 maxrev = revs[i];
2023 2084 }
2024 2085
2025 2086 seen = calloc(sizeof(*seen), maxrev + 1);
2026 2087 if (seen == NULL) {
2027 2088 Py_DECREF(gca);
2028 2089 return PyErr_NoMemory();
2029 2090 }
2030 2091
2031 2092 for (i = 0; i < revcount; i++)
2032 2093 seen[revs[i]] = 1ull << i;
2033 2094
2034 2095 interesting = revcount;
2035 2096
2036 2097 for (v = maxrev; v >= 0 && interesting; v--) {
2037 2098 bitmask sv = seen[v];
2038 2099 int parents[2];
2039 2100
2040 2101 if (!sv)
2041 2102 continue;
2042 2103
2043 2104 if (sv < poison) {
2044 2105 interesting -= 1;
2045 2106 if (sv == allseen) {
2046 2107 PyObject *obj = PyInt_FromLong(v);
2047 2108 if (obj == NULL)
2048 2109 goto bail;
2049 2110 if (PyList_Append(gca, obj) == -1) {
2050 2111 Py_DECREF(obj);
2051 2112 goto bail;
2052 2113 }
2053 2114 sv |= poison;
2054 2115 for (i = 0; i < revcount; i++) {
2055 2116 if (revs[i] == v)
2056 2117 goto done;
2057 2118 }
2058 2119 }
2059 2120 }
2060 2121 if (index_get_parents(self, v, parents, maxrev) < 0)
2061 2122 goto bail;
2062 2123
2063 2124 for (i = 0; i < 2; i++) {
2064 2125 int p = parents[i];
2065 2126 if (p == -1)
2066 2127 continue;
2067 2128 sp = seen[p];
2068 2129 if (sv < poison) {
2069 2130 if (sp == 0) {
2070 2131 seen[p] = sv;
2071 2132 interesting++;
2072 2133 } else if (sp != sv)
2073 2134 seen[p] |= sv;
2074 2135 } else {
2075 2136 if (sp && sp < poison)
2076 2137 interesting--;
2077 2138 seen[p] = sv;
2078 2139 }
2079 2140 }
2080 2141 }
2081 2142
2082 2143 done:
2083 2144 free(seen);
2084 2145 return gca;
2085 2146 bail:
2086 2147 free(seen);
2087 2148 Py_XDECREF(gca);
2088 2149 return NULL;
2089 2150 }
2090 2151
2091 2152 /*
2092 2153 * Given a disjoint set of revs, return the subset with the longest
2093 2154 * path to the root.
2094 2155 */
2095 2156 static PyObject *find_deepest(indexObject *self, PyObject *revs)
2096 2157 {
2097 2158 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
2098 2159 static const Py_ssize_t capacity = 24;
2099 2160 int *depth, *interesting = NULL;
2100 2161 int i, j, v, ninteresting;
2101 2162 PyObject *dict = NULL, *keys = NULL;
2102 2163 long *seen = NULL;
2103 2164 int maxrev = -1;
2104 2165 long final;
2105 2166
2106 2167 if (revcount > capacity) {
2107 2168 PyErr_Format(PyExc_OverflowError,
2108 2169 "bitset size (%ld) > capacity (%ld)",
2109 2170 (long)revcount, (long)capacity);
2110 2171 return NULL;
2111 2172 }
2112 2173
2113 2174 for (i = 0; i < revcount; i++) {
2114 2175 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2115 2176 if (n > maxrev)
2116 2177 maxrev = n;
2117 2178 }
2118 2179
2119 2180 depth = calloc(sizeof(*depth), maxrev + 1);
2120 2181 if (depth == NULL)
2121 2182 return PyErr_NoMemory();
2122 2183
2123 2184 seen = calloc(sizeof(*seen), maxrev + 1);
2124 2185 if (seen == NULL) {
2125 2186 PyErr_NoMemory();
2126 2187 goto bail;
2127 2188 }
2128 2189
2129 2190 interesting = calloc(sizeof(*interesting), ((size_t)1) << revcount);
2130 2191 if (interesting == NULL) {
2131 2192 PyErr_NoMemory();
2132 2193 goto bail;
2133 2194 }
2134 2195
2135 2196 if (PyList_Sort(revs) == -1)
2136 2197 goto bail;
2137 2198
2138 2199 for (i = 0; i < revcount; i++) {
2139 2200 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2140 2201 long b = 1l << i;
2141 2202 depth[n] = 1;
2142 2203 seen[n] = b;
2143 2204 interesting[b] = 1;
2144 2205 }
2145 2206
2146 2207 /* invariant: ninteresting is the number of non-zero entries in
2147 2208 * interesting. */
2148 2209 ninteresting = (int)revcount;
2149 2210
2150 2211 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
2151 2212 int dv = depth[v];
2152 2213 int parents[2];
2153 2214 long sv;
2154 2215
2155 2216 if (dv == 0)
2156 2217 continue;
2157 2218
2158 2219 sv = seen[v];
2159 2220 if (index_get_parents(self, v, parents, maxrev) < 0)
2160 2221 goto bail;
2161 2222
2162 2223 for (i = 0; i < 2; i++) {
2163 2224 int p = parents[i];
2164 2225 long sp;
2165 2226 int dp;
2166 2227
2167 2228 if (p == -1)
2168 2229 continue;
2169 2230
2170 2231 dp = depth[p];
2171 2232 sp = seen[p];
2172 2233 if (dp <= dv) {
2173 2234 depth[p] = dv + 1;
2174 2235 if (sp != sv) {
2175 2236 interesting[sv] += 1;
2176 2237 seen[p] = sv;
2177 2238 if (sp) {
2178 2239 interesting[sp] -= 1;
2179 2240 if (interesting[sp] == 0)
2180 2241 ninteresting -= 1;
2181 2242 }
2182 2243 }
2183 2244 } else if (dv == dp - 1) {
2184 2245 long nsp = sp | sv;
2185 2246 if (nsp == sp)
2186 2247 continue;
2187 2248 seen[p] = nsp;
2188 2249 interesting[sp] -= 1;
2189 2250 if (interesting[sp] == 0)
2190 2251 ninteresting -= 1;
2191 2252 if (interesting[nsp] == 0)
2192 2253 ninteresting += 1;
2193 2254 interesting[nsp] += 1;
2194 2255 }
2195 2256 }
2196 2257 interesting[sv] -= 1;
2197 2258 if (interesting[sv] == 0)
2198 2259 ninteresting -= 1;
2199 2260 }
2200 2261
2201 2262 final = 0;
2202 2263 j = ninteresting;
2203 2264 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
2204 2265 if (interesting[i] == 0)
2205 2266 continue;
2206 2267 final |= i;
2207 2268 j -= 1;
2208 2269 }
2209 2270 if (final == 0) {
2210 2271 keys = PyList_New(0);
2211 2272 goto bail;
2212 2273 }
2213 2274
2214 2275 dict = PyDict_New();
2215 2276 if (dict == NULL)
2216 2277 goto bail;
2217 2278
2218 2279 for (i = 0; i < revcount; i++) {
2219 2280 PyObject *key;
2220 2281
2221 2282 if ((final & (1 << i)) == 0)
2222 2283 continue;
2223 2284
2224 2285 key = PyList_GET_ITEM(revs, i);
2225 2286 Py_INCREF(key);
2226 2287 Py_INCREF(Py_None);
2227 2288 if (PyDict_SetItem(dict, key, Py_None) == -1) {
2228 2289 Py_DECREF(key);
2229 2290 Py_DECREF(Py_None);
2230 2291 goto bail;
2231 2292 }
2232 2293 }
2233 2294
2234 2295 keys = PyDict_Keys(dict);
2235 2296
2236 2297 bail:
2237 2298 free(depth);
2238 2299 free(seen);
2239 2300 free(interesting);
2240 2301 Py_XDECREF(dict);
2241 2302
2242 2303 return keys;
2243 2304 }
2244 2305
2245 2306 /*
2246 2307 * Given a (possibly overlapping) set of revs, return all the
2247 2308 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
2248 2309 */
2249 2310 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
2250 2311 {
2251 2312 PyObject *ret = NULL;
2252 2313 Py_ssize_t argcount, i, len;
2253 2314 bitmask repeat = 0;
2254 2315 int revcount = 0;
2255 2316 int *revs;
2256 2317
2257 2318 argcount = PySequence_Length(args);
2258 2319 revs = PyMem_Malloc(argcount * sizeof(*revs));
2259 2320 if (argcount > 0 && revs == NULL)
2260 2321 return PyErr_NoMemory();
2261 2322 len = index_length(self);
2262 2323
2263 2324 for (i = 0; i < argcount; i++) {
2264 2325 static const int capacity = 24;
2265 2326 PyObject *obj = PySequence_GetItem(args, i);
2266 2327 bitmask x;
2267 2328 long val;
2268 2329
2269 2330 if (!PyInt_Check(obj)) {
2270 2331 PyErr_SetString(PyExc_TypeError,
2271 2332 "arguments must all be ints");
2272 2333 Py_DECREF(obj);
2273 2334 goto bail;
2274 2335 }
2275 2336 val = PyInt_AsLong(obj);
2276 2337 Py_DECREF(obj);
2277 2338 if (val == -1) {
2278 2339 ret = PyList_New(0);
2279 2340 goto done;
2280 2341 }
2281 2342 if (val < 0 || val >= len) {
2282 2343 PyErr_SetString(PyExc_IndexError, "index out of range");
2283 2344 goto bail;
2284 2345 }
2285 2346 /* this cheesy bloom filter lets us avoid some more
2286 2347 * expensive duplicate checks in the common set-is-disjoint
2287 2348 * case */
2288 2349 x = 1ull << (val & 0x3f);
2289 2350 if (repeat & x) {
2290 2351 int k;
2291 2352 for (k = 0; k < revcount; k++) {
2292 2353 if (val == revs[k])
2293 2354 goto duplicate;
2294 2355 }
2295 2356 } else
2296 2357 repeat |= x;
2297 2358 if (revcount >= capacity) {
2298 2359 PyErr_Format(PyExc_OverflowError,
2299 2360 "bitset size (%d) > capacity (%d)",
2300 2361 revcount, capacity);
2301 2362 goto bail;
2302 2363 }
2303 2364 revs[revcount++] = (int)val;
2304 2365 duplicate:;
2305 2366 }
2306 2367
2307 2368 if (revcount == 0) {
2308 2369 ret = PyList_New(0);
2309 2370 goto done;
2310 2371 }
2311 2372 if (revcount == 1) {
2312 2373 PyObject *obj;
2313 2374 ret = PyList_New(1);
2314 2375 if (ret == NULL)
2315 2376 goto bail;
2316 2377 obj = PyInt_FromLong(revs[0]);
2317 2378 if (obj == NULL)
2318 2379 goto bail;
2319 2380 PyList_SET_ITEM(ret, 0, obj);
2320 2381 goto done;
2321 2382 }
2322 2383
2323 2384 ret = find_gca_candidates(self, revs, revcount);
2324 2385 if (ret == NULL)
2325 2386 goto bail;
2326 2387
2327 2388 done:
2328 2389 PyMem_Free(revs);
2329 2390 return ret;
2330 2391
2331 2392 bail:
2332 2393 PyMem_Free(revs);
2333 2394 Py_XDECREF(ret);
2334 2395 return NULL;
2335 2396 }
2336 2397
2337 2398 /*
2338 2399 * Given a (possibly overlapping) set of revs, return the greatest
2339 2400 * common ancestors: those with the longest path to the root.
2340 2401 */
2341 2402 static PyObject *index_ancestors(indexObject *self, PyObject *args)
2342 2403 {
2343 2404 PyObject *ret;
2344 2405 PyObject *gca = index_commonancestorsheads(self, args);
2345 2406 if (gca == NULL)
2346 2407 return NULL;
2347 2408
2348 2409 if (PyList_GET_SIZE(gca) <= 1) {
2349 2410 return gca;
2350 2411 }
2351 2412
2352 2413 ret = find_deepest(self, gca);
2353 2414 Py_DECREF(gca);
2354 2415 return ret;
2355 2416 }
2356 2417
2357 2418 /*
2358 2419 * Invalidate any trie entries introduced by added revs.
2359 2420 */
2360 2421 static void index_invalidate_added(indexObject *self, Py_ssize_t start)
2361 2422 {
2362 2423 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
2363 2424
2364 2425 for (i = start; i < len; i++) {
2365 2426 PyObject *tuple = PyList_GET_ITEM(self->added, i);
2366 2427 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
2367 2428
2368 2429 nt_delete_node(&self->nt, PyBytes_AS_STRING(node));
2369 2430 }
2370 2431
2371 2432 if (start == 0)
2372 2433 Py_CLEAR(self->added);
2373 2434 }
2374 2435
2375 2436 /*
2376 2437 * Delete a numeric range of revs, which must be at the end of the
2377 2438 * range, but exclude the sentinel nullid entry.
2378 2439 */
2379 2440 static int index_slice_del(indexObject *self, PyObject *item)
2380 2441 {
2381 2442 Py_ssize_t start, stop, step, slicelength;
2382 2443 Py_ssize_t length = index_length(self) + 1;
2383 2444 int ret = 0;
2384 2445
2385 2446 /* Argument changed from PySliceObject* to PyObject* in Python 3. */
2386 2447 #ifdef IS_PY3K
2387 2448 if (PySlice_GetIndicesEx(item, length, &start, &stop, &step,
2388 2449 &slicelength) < 0)
2389 2450 #else
2390 2451 if (PySlice_GetIndicesEx((PySliceObject *)item, length, &start, &stop,
2391 2452 &step, &slicelength) < 0)
2392 2453 #endif
2393 2454 return -1;
2394 2455
2395 2456 if (slicelength <= 0)
2396 2457 return 0;
2397 2458
2398 2459 if ((step < 0 && start < stop) || (step > 0 && start > stop))
2399 2460 stop = start;
2400 2461
2401 2462 if (step < 0) {
2402 2463 stop = start + 1;
2403 2464 start = stop + step * (slicelength - 1) - 1;
2404 2465 step = -step;
2405 2466 }
2406 2467
2407 2468 if (step != 1) {
2408 2469 PyErr_SetString(PyExc_ValueError,
2409 2470 "revlog index delete requires step size of 1");
2410 2471 return -1;
2411 2472 }
2412 2473
2413 2474 if (stop != length - 1) {
2414 2475 PyErr_SetString(PyExc_IndexError,
2415 2476 "revlog index deletion indices are invalid");
2416 2477 return -1;
2417 2478 }
2418 2479
2419 2480 if (start < self->length) {
2420 2481 if (self->ntinitialized) {
2421 2482 Py_ssize_t i;
2422 2483
2423 2484 for (i = start + 1; i < self->length; i++) {
2424 2485 const char *node = index_node_existing(self, i);
2425 2486 if (node == NULL)
2426 2487 return -1;
2427 2488
2428 2489 nt_delete_node(&self->nt, node);
2429 2490 }
2430 2491 if (self->added)
2431 2492 index_invalidate_added(self, 0);
2432 2493 if (self->ntrev > start)
2433 2494 self->ntrev = (int)start;
2434 2495 }
2435 2496 self->length = start;
2436 2497 if (start < self->raw_length) {
2437 2498 if (self->cache) {
2438 2499 Py_ssize_t i;
2439 2500 for (i = start; i < self->raw_length; i++)
2440 2501 Py_CLEAR(self->cache[i]);
2441 2502 }
2442 2503 self->raw_length = start;
2443 2504 }
2444 2505 goto done;
2445 2506 }
2446 2507
2447 2508 if (self->ntinitialized) {
2448 2509 index_invalidate_added(self, start - self->length);
2449 2510 if (self->ntrev > start)
2450 2511 self->ntrev = (int)start;
2451 2512 }
2452 2513 if (self->added)
2453 2514 ret = PyList_SetSlice(self->added, start - self->length,
2454 2515 PyList_GET_SIZE(self->added), NULL);
2455 2516 done:
2456 2517 Py_CLEAR(self->headrevs);
2457 2518 return ret;
2458 2519 }
2459 2520
2460 2521 /*
2461 2522 * Supported ops:
2462 2523 *
2463 2524 * slice deletion
2464 2525 * string assignment (extend node->rev mapping)
2465 2526 * string deletion (shrink node->rev mapping)
2466 2527 */
2467 2528 static int index_assign_subscript(indexObject *self, PyObject *item,
2468 2529 PyObject *value)
2469 2530 {
2470 2531 char *node;
2471 2532 long rev;
2472 2533
2473 2534 if (PySlice_Check(item) && value == NULL)
2474 2535 return index_slice_del(self, item);
2475 2536
2476 2537 if (node_check(item, &node) == -1)
2477 2538 return -1;
2478 2539
2479 2540 if (value == NULL)
2480 2541 return self->ntinitialized ? nt_delete_node(&self->nt, node)
2481 2542 : 0;
2482 2543 rev = PyInt_AsLong(value);
2483 2544 if (rev > INT_MAX || rev < 0) {
2484 2545 if (!PyErr_Occurred())
2485 2546 PyErr_SetString(PyExc_ValueError, "rev out of range");
2486 2547 return -1;
2487 2548 }
2488 2549
2489 2550 if (index_init_nt(self) == -1)
2490 2551 return -1;
2491 2552 return nt_insert(&self->nt, node, (int)rev);
2492 2553 }
2493 2554
2494 2555 /*
2495 2556 * Find all RevlogNG entries in an index that has inline data. Update
2496 2557 * the optional "offsets" table with those entries.
2497 2558 */
2498 2559 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
2499 2560 {
2500 2561 const char *data = (const char *)self->buf.buf;
2501 2562 Py_ssize_t pos = 0;
2502 2563 Py_ssize_t end = self->buf.len;
2503 2564 long incr = v1_hdrsize;
2504 2565 Py_ssize_t len = 0;
2505 2566
2506 2567 while (pos + v1_hdrsize <= end && pos >= 0) {
2507 2568 uint32_t comp_len;
2508 2569 /* 3rd element of header is length of compressed inline data */
2509 2570 comp_len = getbe32(data + pos + 8);
2510 2571 incr = v1_hdrsize + comp_len;
2511 2572 if (offsets)
2512 2573 offsets[len] = data + pos;
2513 2574 len++;
2514 2575 pos += incr;
2515 2576 }
2516 2577
2517 2578 if (pos != end) {
2518 2579 if (!PyErr_Occurred())
2519 2580 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2520 2581 return -1;
2521 2582 }
2522 2583
2523 2584 return len;
2524 2585 }
2525 2586
2526 2587 static int index_init(indexObject *self, PyObject *args)
2527 2588 {
2528 2589 PyObject *data_obj, *inlined_obj;
2529 2590 Py_ssize_t size;
2530 2591
2531 2592 /* Initialize before argument-checking to avoid index_dealloc() crash.
2532 2593 */
2533 2594 self->raw_length = 0;
2534 2595 self->added = NULL;
2535 2596 self->cache = NULL;
2536 2597 self->data = NULL;
2537 2598 memset(&self->buf, 0, sizeof(self->buf));
2538 2599 self->headrevs = NULL;
2539 2600 self->filteredrevs = Py_None;
2540 2601 Py_INCREF(Py_None);
2541 2602 self->ntinitialized = 0;
2542 2603 self->offsets = NULL;
2543 2604
2544 2605 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
2545 2606 return -1;
2546 2607 if (!PyObject_CheckBuffer(data_obj)) {
2547 2608 PyErr_SetString(PyExc_TypeError,
2548 2609 "data does not support buffer interface");
2549 2610 return -1;
2550 2611 }
2551 2612
2552 2613 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
2553 2614 return -1;
2554 2615 size = self->buf.len;
2555 2616
2556 2617 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
2557 2618 self->data = data_obj;
2558 2619
2559 2620 self->ntlookups = self->ntmisses = 0;
2560 2621 self->ntrev = -1;
2561 2622 Py_INCREF(self->data);
2562 2623
2563 2624 if (self->inlined) {
2564 2625 Py_ssize_t len = inline_scan(self, NULL);
2565 2626 if (len == -1)
2566 2627 goto bail;
2567 2628 self->raw_length = len;
2568 2629 self->length = len;
2569 2630 } else {
2570 2631 if (size % v1_hdrsize) {
2571 2632 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2572 2633 goto bail;
2573 2634 }
2574 2635 self->raw_length = size / v1_hdrsize;
2575 2636 self->length = self->raw_length;
2576 2637 }
2577 2638
2578 2639 return 0;
2579 2640 bail:
2580 2641 return -1;
2581 2642 }
2582 2643
2583 2644 static PyObject *index_nodemap(indexObject *self)
2584 2645 {
2585 2646 Py_INCREF(self);
2586 2647 return (PyObject *)self;
2587 2648 }
2588 2649
2589 2650 static void _index_clearcaches(indexObject *self)
2590 2651 {
2591 2652 if (self->cache) {
2592 2653 Py_ssize_t i;
2593 2654
2594 2655 for (i = 0; i < self->raw_length; i++)
2595 2656 Py_CLEAR(self->cache[i]);
2596 2657 free(self->cache);
2597 2658 self->cache = NULL;
2598 2659 }
2599 2660 if (self->offsets) {
2600 2661 PyMem_Free((void *)self->offsets);
2601 2662 self->offsets = NULL;
2602 2663 }
2603 2664 if (self->ntinitialized) {
2604 2665 nt_dealloc(&self->nt);
2605 2666 }
2606 2667 self->ntinitialized = 0;
2607 2668 Py_CLEAR(self->headrevs);
2608 2669 }
2609 2670
2610 2671 static PyObject *index_clearcaches(indexObject *self)
2611 2672 {
2612 2673 _index_clearcaches(self);
2613 2674 self->ntrev = -1;
2614 2675 self->ntlookups = self->ntmisses = 0;
2615 2676 Py_RETURN_NONE;
2616 2677 }
2617 2678
2618 2679 static void index_dealloc(indexObject *self)
2619 2680 {
2620 2681 _index_clearcaches(self);
2621 2682 Py_XDECREF(self->filteredrevs);
2622 2683 if (self->buf.buf) {
2623 2684 PyBuffer_Release(&self->buf);
2624 2685 memset(&self->buf, 0, sizeof(self->buf));
2625 2686 }
2626 2687 Py_XDECREF(self->data);
2627 2688 Py_XDECREF(self->added);
2628 2689 PyObject_Del(self);
2629 2690 }
2630 2691
2631 2692 static PySequenceMethods index_sequence_methods = {
2632 2693 (lenfunc)index_length, /* sq_length */
2633 2694 0, /* sq_concat */
2634 2695 0, /* sq_repeat */
2635 2696 (ssizeargfunc)index_get, /* sq_item */
2636 2697 0, /* sq_slice */
2637 2698 0, /* sq_ass_item */
2638 2699 0, /* sq_ass_slice */
2639 2700 (objobjproc)index_contains, /* sq_contains */
2640 2701 };
2641 2702
2642 2703 static PyMappingMethods index_mapping_methods = {
2643 2704 (lenfunc)index_length, /* mp_length */
2644 2705 (binaryfunc)index_getitem, /* mp_subscript */
2645 2706 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
2646 2707 };
2647 2708
2648 2709 static PyMethodDef index_methods[] = {
2649 2710 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
2650 2711 "return the gca set of the given revs"},
2651 2712 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
2652 2713 METH_VARARGS,
2653 2714 "return the heads of the common ancestors of the given revs"},
2654 2715 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
2655 2716 "clear the index caches"},
2656 2717 {"get", (PyCFunction)index_m_get, METH_VARARGS, "get an index entry"},
2657 2718 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets, METH_VARARGS,
2658 2719 "compute phases"},
2659 2720 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
2660 2721 "reachableroots"},
2661 2722 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
2662 2723 "get head revisions"}, /* Can do filtering since 3.2 */
2663 2724 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
2664 2725 "get filtered head revisions"}, /* Can always do filtering */
2665 2726 {"issnapshot", (PyCFunction)index_issnapshot, METH_O,
2666 2727 "True if the object is a snapshot"},
2728 {"findsnapshots", (PyCFunction)index_findsnapshots, METH_VARARGS,
2729 "Gather snapshot data in a cache dict"},
2667 2730 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
2668 2731 "determine revisions with deltas to reconstruct fulltext"},
2669 2732 {"slicechunktodensity", (PyCFunction)index_slicechunktodensity,
2670 2733 METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"},
2671 2734 {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
2672 2735 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
2673 2736 "match a potentially ambiguous node ID"},
2674 2737 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
2675 2738 "find length of shortest hex nodeid of a binary ID"},
2676 2739 {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"},
2677 2740 {NULL} /* Sentinel */
2678 2741 };
2679 2742
2680 2743 static PyGetSetDef index_getset[] = {
2681 2744 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
2682 2745 {NULL} /* Sentinel */
2683 2746 };
2684 2747
2685 2748 PyTypeObject HgRevlogIndex_Type = {
2686 2749 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2687 2750 "parsers.index", /* tp_name */
2688 2751 sizeof(indexObject), /* tp_basicsize */
2689 2752 0, /* tp_itemsize */
2690 2753 (destructor)index_dealloc, /* tp_dealloc */
2691 2754 0, /* tp_print */
2692 2755 0, /* tp_getattr */
2693 2756 0, /* tp_setattr */
2694 2757 0, /* tp_compare */
2695 2758 0, /* tp_repr */
2696 2759 0, /* tp_as_number */
2697 2760 &index_sequence_methods, /* tp_as_sequence */
2698 2761 &index_mapping_methods, /* tp_as_mapping */
2699 2762 0, /* tp_hash */
2700 2763 0, /* tp_call */
2701 2764 0, /* tp_str */
2702 2765 0, /* tp_getattro */
2703 2766 0, /* tp_setattro */
2704 2767 0, /* tp_as_buffer */
2705 2768 Py_TPFLAGS_DEFAULT, /* tp_flags */
2706 2769 "revlog index", /* tp_doc */
2707 2770 0, /* tp_traverse */
2708 2771 0, /* tp_clear */
2709 2772 0, /* tp_richcompare */
2710 2773 0, /* tp_weaklistoffset */
2711 2774 0, /* tp_iter */
2712 2775 0, /* tp_iternext */
2713 2776 index_methods, /* tp_methods */
2714 2777 0, /* tp_members */
2715 2778 index_getset, /* tp_getset */
2716 2779 0, /* tp_base */
2717 2780 0, /* tp_dict */
2718 2781 0, /* tp_descr_get */
2719 2782 0, /* tp_descr_set */
2720 2783 0, /* tp_dictoffset */
2721 2784 (initproc)index_init, /* tp_init */
2722 2785 0, /* tp_alloc */
2723 2786 };
2724 2787
2725 2788 /*
2726 2789 * returns a tuple of the form (index, index, cache) with elements as
2727 2790 * follows:
2728 2791 *
2729 2792 * index: an index object that lazily parses RevlogNG records
2730 2793 * cache: if data is inlined, a tuple (0, index_file_content), else None
2731 2794 * index_file_content could be a string, or a buffer
2732 2795 *
2733 2796 * added complications are for backwards compatibility
2734 2797 */
2735 2798 PyObject *parse_index2(PyObject *self, PyObject *args)
2736 2799 {
2737 2800 PyObject *tuple = NULL, *cache = NULL;
2738 2801 indexObject *idx;
2739 2802 int ret;
2740 2803
2741 2804 idx = PyObject_New(indexObject, &HgRevlogIndex_Type);
2742 2805 if (idx == NULL)
2743 2806 goto bail;
2744 2807
2745 2808 ret = index_init(idx, args);
2746 2809 if (ret == -1)
2747 2810 goto bail;
2748 2811
2749 2812 if (idx->inlined) {
2750 2813 cache = Py_BuildValue("iO", 0, idx->data);
2751 2814 if (cache == NULL)
2752 2815 goto bail;
2753 2816 } else {
2754 2817 cache = Py_None;
2755 2818 Py_INCREF(cache);
2756 2819 }
2757 2820
2758 2821 tuple = Py_BuildValue("NN", idx, cache);
2759 2822 if (!tuple)
2760 2823 goto bail;
2761 2824 return tuple;
2762 2825
2763 2826 bail:
2764 2827 Py_XDECREF(idx);
2765 2828 Py_XDECREF(cache);
2766 2829 Py_XDECREF(tuple);
2767 2830 return NULL;
2768 2831 }
2769 2832
2770 2833 #ifdef WITH_RUST
2771 2834
2772 2835 /* rustlazyancestors: iteration over ancestors implemented in Rust
2773 2836 *
2774 2837 * This class holds a reference to an index and to the Rust iterator.
2775 2838 */
2776 2839 typedef struct rustlazyancestorsObjectStruct rustlazyancestorsObject;
2777 2840
2778 2841 struct rustlazyancestorsObjectStruct {
2779 2842 PyObject_HEAD
2780 2843 /* Type-specific fields go here. */
2781 2844 indexObject *index; /* Ref kept to avoid GC'ing the index */
2782 2845 void *iter; /* Rust iterator */
2783 2846 };
2784 2847
2785 2848 /* FFI exposed from Rust code */
2786 2849 rustlazyancestorsObject *rustlazyancestors_init(indexObject *index,
2787 2850 /* intrevs vector */
2788 2851 Py_ssize_t initrevslen,
2789 2852 long *initrevs, long stoprev,
2790 2853 int inclusive);
2791 2854 void rustlazyancestors_drop(rustlazyancestorsObject *self);
2792 2855 int rustlazyancestors_next(rustlazyancestorsObject *self);
2793 2856 int rustlazyancestors_contains(rustlazyancestorsObject *self, long rev);
2794 2857
2795 2858 /* CPython instance methods */
2796 2859 static int rustla_init(rustlazyancestorsObject *self, PyObject *args)
2797 2860 {
2798 2861 PyObject *initrevsarg = NULL;
2799 2862 PyObject *inclusivearg = NULL;
2800 2863 long stoprev = 0;
2801 2864 long *initrevs = NULL;
2802 2865 int inclusive = 0;
2803 2866 Py_ssize_t i;
2804 2867
2805 2868 indexObject *index;
2806 2869 if (!PyArg_ParseTuple(args, "O!O!lO!", &HgRevlogIndex_Type, &index,
2807 2870 &PyList_Type, &initrevsarg, &stoprev,
2808 2871 &PyBool_Type, &inclusivearg))
2809 2872 return -1;
2810 2873
2811 2874 Py_INCREF(index);
2812 2875 self->index = index;
2813 2876
2814 2877 if (inclusivearg == Py_True)
2815 2878 inclusive = 1;
2816 2879
2817 2880 Py_ssize_t linit = PyList_GET_SIZE(initrevsarg);
2818 2881
2819 2882 initrevs = (long *)calloc(linit, sizeof(long));
2820 2883
2821 2884 if (initrevs == NULL) {
2822 2885 PyErr_NoMemory();
2823 2886 goto bail;
2824 2887 }
2825 2888
2826 2889 for (i = 0; i < linit; i++) {
2827 2890 initrevs[i] = PyInt_AsLong(PyList_GET_ITEM(initrevsarg, i));
2828 2891 }
2829 2892 if (PyErr_Occurred())
2830 2893 goto bail;
2831 2894
2832 2895 self->iter =
2833 2896 rustlazyancestors_init(index, linit, initrevs, stoprev, inclusive);
2834 2897 if (self->iter == NULL) {
2835 2898 /* if this is because of GraphError::ParentOutOfRange
2836 2899 * HgRevlogIndex_GetParents() has already set the proper
2837 2900 * exception */
2838 2901 goto bail;
2839 2902 }
2840 2903
2841 2904 free(initrevs);
2842 2905 return 0;
2843 2906
2844 2907 bail:
2845 2908 free(initrevs);
2846 2909 return -1;
2847 2910 };
2848 2911
2849 2912 static void rustla_dealloc(rustlazyancestorsObject *self)
2850 2913 {
2851 2914 Py_XDECREF(self->index);
2852 2915 if (self->iter != NULL) { /* can happen if rustla_init failed */
2853 2916 rustlazyancestors_drop(self->iter);
2854 2917 }
2855 2918 PyObject_Del(self);
2856 2919 }
2857 2920
2858 2921 static PyObject *rustla_next(rustlazyancestorsObject *self)
2859 2922 {
2860 2923 int res = rustlazyancestors_next(self->iter);
2861 2924 if (res == -1) {
2862 2925 /* Setting an explicit exception seems unnecessary
2863 2926 * as examples from Python source code (Objects/rangeobjets.c
2864 2927 * and Modules/_io/stringio.c) seem to demonstrate.
2865 2928 */
2866 2929 return NULL;
2867 2930 }
2868 2931 return PyInt_FromLong(res);
2869 2932 }
2870 2933
2871 2934 static int rustla_contains(rustlazyancestorsObject *self, PyObject *rev)
2872 2935 {
2873 2936 long lrev;
2874 2937 if (!pylong_to_long(rev, &lrev)) {
2875 2938 PyErr_Clear();
2876 2939 return 0;
2877 2940 }
2878 2941 return rustlazyancestors_contains(self->iter, lrev);
2879 2942 }
2880 2943
2881 2944 static PySequenceMethods rustla_sequence_methods = {
2882 2945 0, /* sq_length */
2883 2946 0, /* sq_concat */
2884 2947 0, /* sq_repeat */
2885 2948 0, /* sq_item */
2886 2949 0, /* sq_slice */
2887 2950 0, /* sq_ass_item */
2888 2951 0, /* sq_ass_slice */
2889 2952 (objobjproc)rustla_contains, /* sq_contains */
2890 2953 };
2891 2954
2892 2955 static PyTypeObject rustlazyancestorsType = {
2893 2956 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2894 2957 "parsers.rustlazyancestors", /* tp_name */
2895 2958 sizeof(rustlazyancestorsObject), /* tp_basicsize */
2896 2959 0, /* tp_itemsize */
2897 2960 (destructor)rustla_dealloc, /* tp_dealloc */
2898 2961 0, /* tp_print */
2899 2962 0, /* tp_getattr */
2900 2963 0, /* tp_setattr */
2901 2964 0, /* tp_compare */
2902 2965 0, /* tp_repr */
2903 2966 0, /* tp_as_number */
2904 2967 &rustla_sequence_methods, /* tp_as_sequence */
2905 2968 0, /* tp_as_mapping */
2906 2969 0, /* tp_hash */
2907 2970 0, /* tp_call */
2908 2971 0, /* tp_str */
2909 2972 0, /* tp_getattro */
2910 2973 0, /* tp_setattro */
2911 2974 0, /* tp_as_buffer */
2912 2975 Py_TPFLAGS_DEFAULT, /* tp_flags */
2913 2976 "Iterator over ancestors, implemented in Rust", /* tp_doc */
2914 2977 0, /* tp_traverse */
2915 2978 0, /* tp_clear */
2916 2979 0, /* tp_richcompare */
2917 2980 0, /* tp_weaklistoffset */
2918 2981 0, /* tp_iter */
2919 2982 (iternextfunc)rustla_next, /* tp_iternext */
2920 2983 0, /* tp_methods */
2921 2984 0, /* tp_members */
2922 2985 0, /* tp_getset */
2923 2986 0, /* tp_base */
2924 2987 0, /* tp_dict */
2925 2988 0, /* tp_descr_get */
2926 2989 0, /* tp_descr_set */
2927 2990 0, /* tp_dictoffset */
2928 2991 (initproc)rustla_init, /* tp_init */
2929 2992 0, /* tp_alloc */
2930 2993 };
2931 2994 #endif /* WITH_RUST */
2932 2995
2933 2996 void revlog_module_init(PyObject *mod)
2934 2997 {
2935 2998 PyObject *caps = NULL;
2936 2999 HgRevlogIndex_Type.tp_new = PyType_GenericNew;
2937 3000 if (PyType_Ready(&HgRevlogIndex_Type) < 0)
2938 3001 return;
2939 3002 Py_INCREF(&HgRevlogIndex_Type);
2940 3003 PyModule_AddObject(mod, "index", (PyObject *)&HgRevlogIndex_Type);
2941 3004
2942 3005 nodetreeType.tp_new = PyType_GenericNew;
2943 3006 if (PyType_Ready(&nodetreeType) < 0)
2944 3007 return;
2945 3008 Py_INCREF(&nodetreeType);
2946 3009 PyModule_AddObject(mod, "nodetree", (PyObject *)&nodetreeType);
2947 3010
2948 3011 if (!nullentry) {
2949 3012 nullentry = Py_BuildValue(PY23("iiiiiiis#", "iiiiiiiy#"), 0, 0,
2950 3013 0, -1, -1, -1, -1, nullid, 20);
2951 3014 }
2952 3015 if (nullentry)
2953 3016 PyObject_GC_UnTrack(nullentry);
2954 3017
2955 3018 caps = PyCapsule_New(HgRevlogIndex_GetParents,
2956 3019 "mercurial.cext.parsers.index_get_parents_CAPI",
2957 3020 NULL);
2958 3021 if (caps != NULL)
2959 3022 PyModule_AddObject(mod, "index_get_parents_CAPI", caps);
2960 3023
2961 3024 #ifdef WITH_RUST
2962 3025 rustlazyancestorsType.tp_new = PyType_GenericNew;
2963 3026 if (PyType_Ready(&rustlazyancestorsType) < 0)
2964 3027 return;
2965 3028 Py_INCREF(&rustlazyancestorsType);
2966 3029 PyModule_AddObject(mod, "rustlazyancestors",
2967 3030 (PyObject *)&rustlazyancestorsType);
2968 3031 #endif
2969 3032 }
@@ -1,1012 +1,1016
1 1 # revlogdeltas.py - Logic around delta computation for revlog
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 # Copyright 2018 Octobus <contact@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 """Helper class to compute deltas stored inside revlogs"""
9 9
10 10 from __future__ import absolute_import
11 11
12 12 import collections
13 13 import struct
14 14
15 15 # import stuff from node for others to import from revlog
16 16 from ..node import (
17 17 nullrev,
18 18 )
19 19 from ..i18n import _
20 20
21 21 from .constants import (
22 22 REVIDX_ISCENSORED,
23 23 REVIDX_RAWTEXT_CHANGING_FLAGS,
24 24 )
25 25
26 26 from ..thirdparty import (
27 27 attr,
28 28 )
29 29
30 30 from .. import (
31 31 error,
32 32 mdiff,
33 util,
33 34 )
34 35
35 36 # maximum <delta-chain-data>/<revision-text-length> ratio
36 37 LIMIT_DELTA2TEXT = 2
37 38
38 39 class _testrevlog(object):
39 40 """minimalist fake revlog to use in doctests"""
40 41
41 42 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
42 43 """data is an list of revision payload boundaries"""
43 44 self._data = data
44 45 self._srdensitythreshold = density
45 46 self._srmingapsize = mingap
46 47 self._snapshot = set(snapshot)
47 48 self.index = None
48 49
49 50 def start(self, rev):
50 51 if rev == nullrev:
51 52 return 0
52 53 if rev == 0:
53 54 return 0
54 55 return self._data[rev - 1]
55 56
56 57 def end(self, rev):
57 58 if rev == nullrev:
58 59 return 0
59 60 return self._data[rev]
60 61
61 62 def length(self, rev):
62 63 return self.end(rev) - self.start(rev)
63 64
64 65 def __len__(self):
65 66 return len(self._data)
66 67
67 68 def issnapshot(self, rev):
68 69 if rev == nullrev:
69 70 return True
70 71 return rev in self._snapshot
71 72
72 73 def slicechunk(revlog, revs, targetsize=None):
73 74 """slice revs to reduce the amount of unrelated data to be read from disk.
74 75
75 76 ``revs`` is sliced into groups that should be read in one time.
76 77 Assume that revs are sorted.
77 78
78 79 The initial chunk is sliced until the overall density (payload/chunks-span
79 80 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
80 81 `revlog._srmingapsize` is skipped.
81 82
82 83 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
83 84 For consistency with other slicing choice, this limit won't go lower than
84 85 `revlog._srmingapsize`.
85 86
86 87 If individual revisions chunk are larger than this limit, they will still
87 88 be raised individually.
88 89
89 90 >>> data = [
90 91 ... 5, #00 (5)
91 92 ... 10, #01 (5)
92 93 ... 12, #02 (2)
93 94 ... 12, #03 (empty)
94 95 ... 27, #04 (15)
95 96 ... 31, #05 (4)
96 97 ... 31, #06 (empty)
97 98 ... 42, #07 (11)
98 99 ... 47, #08 (5)
99 100 ... 47, #09 (empty)
100 101 ... 48, #10 (1)
101 102 ... 51, #11 (3)
102 103 ... 74, #12 (23)
103 104 ... 85, #13 (11)
104 105 ... 86, #14 (1)
105 106 ... 91, #15 (5)
106 107 ... ]
107 108 >>> revlog = _testrevlog(data, snapshot=range(16))
108 109
109 110 >>> list(slicechunk(revlog, list(range(16))))
110 111 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
111 112 >>> list(slicechunk(revlog, [0, 15]))
112 113 [[0], [15]]
113 114 >>> list(slicechunk(revlog, [0, 11, 15]))
114 115 [[0], [11], [15]]
115 116 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
116 117 [[0], [11, 13, 15]]
117 118 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
118 119 [[1, 2], [5, 8, 10, 11], [14]]
119 120
120 121 Slicing with a maximum chunk size
121 122 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
122 123 [[0], [11], [13], [15]]
123 124 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
124 125 [[0], [11], [13, 15]]
125 126
126 127 Slicing involving nullrev
127 128 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
128 129 [[-1, 0], [11], [13, 15]]
129 130 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
130 131 [[-1], [13], [15]]
131 132 """
132 133 if targetsize is not None:
133 134 targetsize = max(targetsize, revlog._srmingapsize)
134 135 # targetsize should not be specified when evaluating delta candidates:
135 136 # * targetsize is used to ensure we stay within specification when reading,
136 137 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
137 138 if densityslicing is None:
138 139 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
139 140 for chunk in densityslicing(revs,
140 141 revlog._srdensitythreshold,
141 142 revlog._srmingapsize):
142 143 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
143 144 yield subchunk
144 145
145 146 def _slicechunktosize(revlog, revs, targetsize=None):
146 147 """slice revs to match the target size
147 148
148 149 This is intended to be used on chunk that density slicing selected by that
149 150 are still too large compared to the read garantee of revlog. This might
150 151 happens when "minimal gap size" interrupted the slicing or when chain are
151 152 built in a way that create large blocks next to each other.
152 153
153 154 >>> data = [
154 155 ... 3, #0 (3)
155 156 ... 5, #1 (2)
156 157 ... 6, #2 (1)
157 158 ... 8, #3 (2)
158 159 ... 8, #4 (empty)
159 160 ... 11, #5 (3)
160 161 ... 12, #6 (1)
161 162 ... 13, #7 (1)
162 163 ... 14, #8 (1)
163 164 ... ]
164 165
165 166 == All snapshots cases ==
166 167 >>> revlog = _testrevlog(data, snapshot=range(9))
167 168
168 169 Cases where chunk is already small enough
169 170 >>> list(_slicechunktosize(revlog, [0], 3))
170 171 [[0]]
171 172 >>> list(_slicechunktosize(revlog, [6, 7], 3))
172 173 [[6, 7]]
173 174 >>> list(_slicechunktosize(revlog, [0], None))
174 175 [[0]]
175 176 >>> list(_slicechunktosize(revlog, [6, 7], None))
176 177 [[6, 7]]
177 178
178 179 cases where we need actual slicing
179 180 >>> list(_slicechunktosize(revlog, [0, 1], 3))
180 181 [[0], [1]]
181 182 >>> list(_slicechunktosize(revlog, [1, 3], 3))
182 183 [[1], [3]]
183 184 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
184 185 [[1, 2], [3]]
185 186 >>> list(_slicechunktosize(revlog, [3, 5], 3))
186 187 [[3], [5]]
187 188 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
188 189 [[3], [5]]
189 190 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
190 191 [[5], [6, 7, 8]]
191 192 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
192 193 [[0], [1, 2], [3], [5], [6, 7, 8]]
193 194
194 195 Case with too large individual chunk (must return valid chunk)
195 196 >>> list(_slicechunktosize(revlog, [0, 1], 2))
196 197 [[0], [1]]
197 198 >>> list(_slicechunktosize(revlog, [1, 3], 1))
198 199 [[1], [3]]
199 200 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
200 201 [[3], [5]]
201 202
202 203 == No Snapshot cases ==
203 204 >>> revlog = _testrevlog(data)
204 205
205 206 Cases where chunk is already small enough
206 207 >>> list(_slicechunktosize(revlog, [0], 3))
207 208 [[0]]
208 209 >>> list(_slicechunktosize(revlog, [6, 7], 3))
209 210 [[6, 7]]
210 211 >>> list(_slicechunktosize(revlog, [0], None))
211 212 [[0]]
212 213 >>> list(_slicechunktosize(revlog, [6, 7], None))
213 214 [[6, 7]]
214 215
215 216 cases where we need actual slicing
216 217 >>> list(_slicechunktosize(revlog, [0, 1], 3))
217 218 [[0], [1]]
218 219 >>> list(_slicechunktosize(revlog, [1, 3], 3))
219 220 [[1], [3]]
220 221 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
221 222 [[1], [2, 3]]
222 223 >>> list(_slicechunktosize(revlog, [3, 5], 3))
223 224 [[3], [5]]
224 225 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
225 226 [[3], [4, 5]]
226 227 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
227 228 [[5], [6, 7, 8]]
228 229 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
229 230 [[0], [1, 2], [3], [5], [6, 7, 8]]
230 231
231 232 Case with too large individual chunk (must return valid chunk)
232 233 >>> list(_slicechunktosize(revlog, [0, 1], 2))
233 234 [[0], [1]]
234 235 >>> list(_slicechunktosize(revlog, [1, 3], 1))
235 236 [[1], [3]]
236 237 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
237 238 [[3], [5]]
238 239
239 240 == mixed case ==
240 241 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
241 242 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
242 243 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
243 244 """
244 245 assert targetsize is None or 0 <= targetsize
245 246 startdata = revlog.start(revs[0])
246 247 enddata = revlog.end(revs[-1])
247 248 fullspan = enddata - startdata
248 249 if targetsize is None or fullspan <= targetsize:
249 250 yield revs
250 251 return
251 252
252 253 startrevidx = 0
253 254 endrevidx = 1
254 255 iterrevs = enumerate(revs)
255 256 next(iterrevs) # skip first rev.
256 257 # first step: get snapshots out of the way
257 258 for idx, r in iterrevs:
258 259 span = revlog.end(r) - startdata
259 260 snapshot = revlog.issnapshot(r)
260 261 if span <= targetsize and snapshot:
261 262 endrevidx = idx + 1
262 263 else:
263 264 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
264 265 if chunk:
265 266 yield chunk
266 267 startrevidx = idx
267 268 startdata = revlog.start(r)
268 269 endrevidx = idx + 1
269 270 if not snapshot:
270 271 break
271 272
272 273 # for the others, we use binary slicing to quickly converge toward valid
273 274 # chunks (otherwise, we might end up looking for start/end of many
274 275 # revisions). This logic is not looking for the perfect slicing point, it
275 276 # focuses on quickly converging toward valid chunks.
276 277 nbitem = len(revs)
277 278 while (enddata - startdata) > targetsize:
278 279 endrevidx = nbitem
279 280 if nbitem - startrevidx <= 1:
280 281 break # protect against individual chunk larger than limit
281 282 localenddata = revlog.end(revs[endrevidx - 1])
282 283 span = localenddata - startdata
283 284 while span > targetsize:
284 285 if endrevidx - startrevidx <= 1:
285 286 break # protect against individual chunk larger than limit
286 287 endrevidx -= (endrevidx - startrevidx) // 2
287 288 localenddata = revlog.end(revs[endrevidx - 1])
288 289 span = localenddata - startdata
289 290 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
290 291 if chunk:
291 292 yield chunk
292 293 startrevidx = endrevidx
293 294 startdata = revlog.start(revs[startrevidx])
294 295
295 296 chunk = _trimchunk(revlog, revs, startrevidx)
296 297 if chunk:
297 298 yield chunk
298 299
299 300 def _slicechunktodensity(revlog, revs, targetdensity=0.5,
300 301 mingapsize=0):
301 302 """slice revs to reduce the amount of unrelated data to be read from disk.
302 303
303 304 ``revs`` is sliced into groups that should be read in one time.
304 305 Assume that revs are sorted.
305 306
306 307 The initial chunk is sliced until the overall density (payload/chunks-span
307 308 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
308 309 skipped.
309 310
310 311 >>> revlog = _testrevlog([
311 312 ... 5, #00 (5)
312 313 ... 10, #01 (5)
313 314 ... 12, #02 (2)
314 315 ... 12, #03 (empty)
315 316 ... 27, #04 (15)
316 317 ... 31, #05 (4)
317 318 ... 31, #06 (empty)
318 319 ... 42, #07 (11)
319 320 ... 47, #08 (5)
320 321 ... 47, #09 (empty)
321 322 ... 48, #10 (1)
322 323 ... 51, #11 (3)
323 324 ... 74, #12 (23)
324 325 ... 85, #13 (11)
325 326 ... 86, #14 (1)
326 327 ... 91, #15 (5)
327 328 ... ])
328 329
329 330 >>> list(_slicechunktodensity(revlog, list(range(16))))
330 331 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
331 332 >>> list(_slicechunktodensity(revlog, [0, 15]))
332 333 [[0], [15]]
333 334 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
334 335 [[0], [11], [15]]
335 336 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
336 337 [[0], [11, 13, 15]]
337 338 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
338 339 [[1, 2], [5, 8, 10, 11], [14]]
339 340 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
340 341 ... mingapsize=20))
341 342 [[1, 2, 3, 5, 8, 10, 11], [14]]
342 343 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
343 344 ... targetdensity=0.95))
344 345 [[1, 2], [5], [8, 10, 11], [14]]
345 346 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
346 347 ... targetdensity=0.95, mingapsize=12))
347 348 [[1, 2], [5, 8, 10, 11], [14]]
348 349 """
349 350 start = revlog.start
350 351 length = revlog.length
351 352
352 353 if len(revs) <= 1:
353 354 yield revs
354 355 return
355 356
356 357 deltachainspan = segmentspan(revlog, revs)
357 358
358 359 if deltachainspan < mingapsize:
359 360 yield revs
360 361 return
361 362
362 363 readdata = deltachainspan
363 364 chainpayload = sum(length(r) for r in revs)
364 365
365 366 if deltachainspan:
366 367 density = chainpayload / float(deltachainspan)
367 368 else:
368 369 density = 1.0
369 370
370 371 if density >= targetdensity:
371 372 yield revs
372 373 return
373 374
374 375 # Store the gaps in a heap to have them sorted by decreasing size
375 376 gaps = []
376 377 prevend = None
377 378 for i, rev in enumerate(revs):
378 379 revstart = start(rev)
379 380 revlen = length(rev)
380 381
381 382 # Skip empty revisions to form larger holes
382 383 if revlen == 0:
383 384 continue
384 385
385 386 if prevend is not None:
386 387 gapsize = revstart - prevend
387 388 # only consider holes that are large enough
388 389 if gapsize > mingapsize:
389 390 gaps.append((gapsize, i))
390 391
391 392 prevend = revstart + revlen
392 393 # sort the gaps to pop them from largest to small
393 394 gaps.sort()
394 395
395 396 # Collect the indices of the largest holes until the density is acceptable
396 397 selected = []
397 398 while gaps and density < targetdensity:
398 399 gapsize, gapidx = gaps.pop()
399 400
400 401 selected.append(gapidx)
401 402
402 403 # the gap sizes are stored as negatives to be sorted decreasingly
403 404 # by the heap
404 405 readdata -= gapsize
405 406 if readdata > 0:
406 407 density = chainpayload / float(readdata)
407 408 else:
408 409 density = 1.0
409 410 selected.sort()
410 411
411 412 # Cut the revs at collected indices
412 413 previdx = 0
413 414 for idx in selected:
414 415
415 416 chunk = _trimchunk(revlog, revs, previdx, idx)
416 417 if chunk:
417 418 yield chunk
418 419
419 420 previdx = idx
420 421
421 422 chunk = _trimchunk(revlog, revs, previdx)
422 423 if chunk:
423 424 yield chunk
424 425
425 426 def _trimchunk(revlog, revs, startidx, endidx=None):
426 427 """returns revs[startidx:endidx] without empty trailing revs
427 428
428 429 Doctest Setup
429 430 >>> revlog = _testrevlog([
430 431 ... 5, #0
431 432 ... 10, #1
432 433 ... 12, #2
433 434 ... 12, #3 (empty)
434 435 ... 17, #4
435 436 ... 21, #5
436 437 ... 21, #6 (empty)
437 438 ... ])
438 439
439 440 Contiguous cases:
440 441 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
441 442 [0, 1, 2, 3, 4, 5]
442 443 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
443 444 [0, 1, 2, 3, 4]
444 445 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
445 446 [0, 1, 2]
446 447 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
447 448 [2]
448 449 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
449 450 [3, 4, 5]
450 451 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
451 452 [3, 4]
452 453
453 454 Discontiguous cases:
454 455 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
455 456 [1, 3, 5]
456 457 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
457 458 [1]
458 459 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
459 460 [3, 5]
460 461 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
461 462 [3, 5]
462 463 """
463 464 length = revlog.length
464 465
465 466 if endidx is None:
466 467 endidx = len(revs)
467 468
468 469 # If we have a non-emtpy delta candidate, there are nothing to trim
469 470 if revs[endidx - 1] < len(revlog):
470 471 # Trim empty revs at the end, except the very first revision of a chain
471 472 while (endidx > 1
472 473 and endidx > startidx
473 474 and length(revs[endidx - 1]) == 0):
474 475 endidx -= 1
475 476
476 477 return revs[startidx:endidx]
477 478
478 479 def segmentspan(revlog, revs):
479 480 """Get the byte span of a segment of revisions
480 481
481 482 revs is a sorted array of revision numbers
482 483
483 484 >>> revlog = _testrevlog([
484 485 ... 5, #0
485 486 ... 10, #1
486 487 ... 12, #2
487 488 ... 12, #3 (empty)
488 489 ... 17, #4
489 490 ... ])
490 491
491 492 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
492 493 17
493 494 >>> segmentspan(revlog, [0, 4])
494 495 17
495 496 >>> segmentspan(revlog, [3, 4])
496 497 5
497 498 >>> segmentspan(revlog, [1, 2, 3,])
498 499 7
499 500 >>> segmentspan(revlog, [1, 3])
500 501 7
501 502 """
502 503 if not revs:
503 504 return 0
504 505 end = revlog.end(revs[-1])
505 506 return end - revlog.start(revs[0])
506 507
507 508 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
508 509 """build full text from a (base, delta) pair and other metadata"""
509 510 # special case deltas which replace entire base; no need to decode
510 511 # base revision. this neatly avoids censored bases, which throw when
511 512 # they're decoded.
512 513 hlen = struct.calcsize(">lll")
513 514 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
514 515 len(delta) - hlen):
515 516 fulltext = delta[hlen:]
516 517 else:
517 518 # deltabase is rawtext before changed by flag processors, which is
518 519 # equivalent to non-raw text
519 520 basetext = revlog.revision(baserev, _df=fh, raw=False)
520 521 fulltext = mdiff.patch(basetext, delta)
521 522
522 523 try:
523 524 res = revlog._processflags(fulltext, flags, 'read', raw=True)
524 525 fulltext, validatehash = res
525 526 if validatehash:
526 527 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
527 528 if flags & REVIDX_ISCENSORED:
528 529 raise error.StorageError(_('node %s is not censored') %
529 530 expectednode)
530 531 except error.CensoredNodeError:
531 532 # must pass the censored index flag to add censored revisions
532 533 if not flags & REVIDX_ISCENSORED:
533 534 raise
534 535 return fulltext
535 536
536 537 @attr.s(slots=True, frozen=True)
537 538 class _deltainfo(object):
538 539 distance = attr.ib()
539 540 deltalen = attr.ib()
540 541 data = attr.ib()
541 542 base = attr.ib()
542 543 chainbase = attr.ib()
543 544 chainlen = attr.ib()
544 545 compresseddeltalen = attr.ib()
545 546 snapshotdepth = attr.ib()
546 547
547 548 def isgooddeltainfo(revlog, deltainfo, revinfo):
548 549 """Returns True if the given delta is good. Good means that it is within
549 550 the disk span, disk size, and chain length bounds that we know to be
550 551 performant."""
551 552 if deltainfo is None:
552 553 return False
553 554
554 555 # - 'deltainfo.distance' is the distance from the base revision --
555 556 # bounding it limits the amount of I/O we need to do.
556 557 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
557 558 # deltas we need to apply -- bounding it limits the amount of CPU
558 559 # we consume.
559 560
560 561 textlen = revinfo.textlen
561 562 defaultmax = textlen * 4
562 563 maxdist = revlog._maxdeltachainspan
563 564 if not maxdist:
564 565 maxdist = deltainfo.distance # ensure the conditional pass
565 566 maxdist = max(maxdist, defaultmax)
566 567
567 568 # Bad delta from read span:
568 569 #
569 570 # If the span of data read is larger than the maximum allowed.
570 571 #
571 572 # In the sparse-revlog case, we rely on the associated "sparse reading"
572 573 # to avoid issue related to the span of data. In theory, it would be
573 574 # possible to build pathological revlog where delta pattern would lead
574 575 # to too many reads. However, they do not happen in practice at all. So
575 576 # we skip the span check entirely.
576 577 if not revlog._sparserevlog and maxdist < deltainfo.distance:
577 578 return False
578 579
579 580 # Bad delta from new delta size:
580 581 #
581 582 # If the delta size is larger than the target text, storing the
582 583 # delta will be inefficient.
583 584 if textlen < deltainfo.deltalen:
584 585 return False
585 586
586 587 # Bad delta from cumulated payload size:
587 588 #
588 589 # If the sum of delta get larger than K * target text length.
589 590 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
590 591 return False
591 592
592 593 # Bad delta from chain length:
593 594 #
594 595 # If the number of delta in the chain gets too high.
595 596 if (revlog._maxchainlen
596 597 and revlog._maxchainlen < deltainfo.chainlen):
597 598 return False
598 599
599 600 # bad delta from intermediate snapshot size limit
600 601 #
601 602 # If an intermediate snapshot size is higher than the limit. The
602 603 # limit exist to prevent endless chain of intermediate delta to be
603 604 # created.
604 605 if (deltainfo.snapshotdepth is not None and
605 606 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
606 607 return False
607 608
608 609 # bad delta if new intermediate snapshot is larger than the previous
609 610 # snapshot
610 611 if (deltainfo.snapshotdepth
611 612 and revlog.length(deltainfo.base) < deltainfo.deltalen):
612 613 return False
613 614
614 615 return True
615 616
616 617 # If a revision's full text is that much bigger than a base candidate full
617 618 # text's, it is very unlikely that it will produce a valid delta. We no longer
618 619 # consider these candidates.
619 620 LIMIT_BASE2TEXT = 500
620 621
621 622 def _candidategroups(revlog, textlen, p1, p2, cachedelta):
622 623 """Provides group of revision to be tested as delta base
623 624
624 625 This top level function focus on emitting groups with unique and worthwhile
625 626 content. See _raw_candidate_groups for details about the group order.
626 627 """
627 628 # should we try to build a delta?
628 629 if not (len(revlog) and revlog._storedeltachains):
629 630 yield None
630 631 return
631 632
632 633 deltalength = revlog.length
633 634 deltaparent = revlog.deltaparent
634 635 sparse = revlog._sparserevlog
635 636 good = None
636 637
637 638 deltas_limit = textlen * LIMIT_DELTA2TEXT
638 639
639 640 tested = set([nullrev])
640 641 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
641 642 while True:
642 643 temptative = candidates.send(good)
643 644 if temptative is None:
644 645 break
645 646 group = []
646 647 for rev in temptative:
647 648 # skip over empty delta (no need to include them in a chain)
648 649 while (revlog._generaldelta
649 650 and not (rev == nullrev
650 651 or rev in tested
651 652 or deltalength(rev))):
652 653 tested.add(rev)
653 654 rev = deltaparent(rev)
654 655 # no need to try a delta against nullrev, this will be done as a
655 656 # last resort.
656 657 if rev == nullrev:
657 658 continue
658 659 # filter out revision we tested already
659 660 if rev in tested:
660 661 continue
661 662 tested.add(rev)
662 663 # filter out delta base that will never produce good delta
663 664 if deltas_limit < revlog.length(rev):
664 665 continue
665 666 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
666 667 continue
667 668 # no delta for rawtext-changing revs (see "candelta" for why)
668 669 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
669 670 continue
670 671 # If we reach here, we are about to build and test a delta.
671 672 # The delta building process will compute the chaininfo in all
672 673 # case, since that computation is cached, it is fine to access it
673 674 # here too.
674 675 chainlen, chainsize = revlog._chaininfo(rev)
675 676 # if chain will be too long, skip base
676 677 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
677 678 continue
678 679 # if chain already have too much data, skip base
679 680 if deltas_limit < chainsize:
680 681 continue
681 682 group.append(rev)
682 683 if group:
683 684 # XXX: in the sparse revlog case, group can become large,
684 685 # impacting performances. Some bounding or slicing mecanism
685 686 # would help to reduce this impact.
686 687 good = yield tuple(group)
687 688 yield None
688 689
689 690 def _findsnapshots(revlog, cache, start_rev):
690 691 """find snapshot from start_rev to tip"""
691 deltaparent = revlog.deltaparent
692 issnapshot = revlog.issnapshot
693 for rev in revlog.revs(start_rev):
694 if issnapshot(rev):
695 cache[deltaparent(rev)].append(rev)
692 if util.safehasattr(revlog.index, 'findsnapshots'):
693 revlog.index.findsnapshots(cache, start_rev)
694 else:
695 deltaparent = revlog.deltaparent
696 issnapshot = revlog.issnapshot
697 for rev in revlog.revs(start_rev):
698 if issnapshot(rev):
699 cache[deltaparent(rev)].append(rev)
696 700
697 701 def _refinedgroups(revlog, p1, p2, cachedelta):
698 702 good = None
699 703 # First we try to reuse a the delta contained in the bundle.
700 704 # (or from the source revlog)
701 705 #
702 706 # This logic only applies to general delta repositories and can be disabled
703 707 # through configuration. Disabling reuse source delta is useful when
704 708 # we want to make sure we recomputed "optimal" deltas.
705 709 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
706 710 # Assume what we received from the server is a good choice
707 711 # build delta will reuse the cache
708 712 good = yield (cachedelta[0],)
709 713 if good is not None:
710 714 yield None
711 715 return
712 716 for candidates in _rawgroups(revlog, p1, p2, cachedelta):
713 717 good = yield candidates
714 718 if good is not None:
715 719 break
716 720
717 721 # If sparse revlog is enabled, we can try to refine the available deltas
718 722 if not revlog._sparserevlog:
719 723 yield None
720 724 return
721 725
722 726 # if we have a refinable value, try to refine it
723 727 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
724 728 # refine snapshot down
725 729 previous = None
726 730 while previous != good:
727 731 previous = good
728 732 base = revlog.deltaparent(good)
729 733 if base == nullrev:
730 734 break
731 735 good = yield (base,)
732 736 # refine snapshot up
733 737 #
734 738 # XXX the _findsnapshots call can be expensive and is "duplicated" with
735 739 # the one done in `_rawgroups`. Once we start working on performance,
736 740 # we should make the two logics share this computation.
737 741 snapshots = collections.defaultdict(list)
738 742 _findsnapshots(revlog, snapshots, good + 1)
739 743 previous = None
740 744 while good != previous:
741 745 previous = good
742 746 children = tuple(sorted(c for c in snapshots[good]))
743 747 good = yield children
744 748
745 749 # we have found nothing
746 750 yield None
747 751
748 752 def _rawgroups(revlog, p1, p2, cachedelta):
749 753 """Provides group of revision to be tested as delta base
750 754
751 755 This lower level function focus on emitting delta theorically interresting
752 756 without looking it any practical details.
753 757
754 758 The group order aims at providing fast or small candidates first.
755 759 """
756 760 gdelta = revlog._generaldelta
757 761 sparse = revlog._sparserevlog
758 762 curr = len(revlog)
759 763 prev = curr - 1
760 764 deltachain = lambda rev: revlog._deltachain(rev)[0]
761 765
762 766 if gdelta:
763 767 # exclude already lazy tested base if any
764 768 parents = [p for p in (p1, p2) if p != nullrev]
765 769
766 770 if not revlog._deltabothparents and len(parents) == 2:
767 771 parents.sort()
768 772 # To minimize the chance of having to build a fulltext,
769 773 # pick first whichever parent is closest to us (max rev)
770 774 yield (parents[1],)
771 775 # then the other one (min rev) if the first did not fit
772 776 yield (parents[0],)
773 777 elif len(parents) > 0:
774 778 # Test all parents (1 or 2), and keep the best candidate
775 779 yield parents
776 780
777 781 if sparse and parents:
778 782 snapshots = collections.defaultdict(list) # map: base-rev: snapshot-rev
779 783 # See if we can use an existing snapshot in the parent chains to use as
780 784 # a base for a new intermediate-snapshot
781 785 #
782 786 # search for snapshot in parents delta chain
783 787 # map: snapshot-level: snapshot-rev
784 788 parents_snaps = collections.defaultdict(set)
785 789 candidate_chains = [deltachain(p) for p in parents]
786 790 for chain in candidate_chains:
787 791 for idx, s in enumerate(chain):
788 792 if not revlog.issnapshot(s):
789 793 break
790 794 parents_snaps[idx].add(s)
791 795 snapfloor = min(parents_snaps[0]) + 1
792 796 _findsnapshots(revlog, snapshots, snapfloor)
793 797 # search for the highest "unrelated" revision
794 798 #
795 799 # Adding snapshots used by "unrelated" revision increase the odd we
796 800 # reuse an independant, yet better snapshot chain.
797 801 #
798 802 # XXX instead of building a set of revisions, we could lazily enumerate
799 803 # over the chains. That would be more efficient, however we stick to
800 804 # simple code for now.
801 805 all_revs = set()
802 806 for chain in candidate_chains:
803 807 all_revs.update(chain)
804 808 other = None
805 809 for r in revlog.revs(prev, snapfloor):
806 810 if r not in all_revs:
807 811 other = r
808 812 break
809 813 if other is not None:
810 814 # To avoid unfair competition, we won't use unrelated intermediate
811 815 # snapshot that are deeper than the ones from the parent delta
812 816 # chain.
813 817 max_depth = max(parents_snaps.keys())
814 818 chain = deltachain(other)
815 819 for idx, s in enumerate(chain):
816 820 if s < snapfloor:
817 821 continue
818 822 if max_depth < idx:
819 823 break
820 824 if not revlog.issnapshot(s):
821 825 break
822 826 parents_snaps[idx].add(s)
823 827 # Test them as possible intermediate snapshot base
824 828 # We test them from highest to lowest level. High level one are more
825 829 # likely to result in small delta
826 830 floor = None
827 831 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
828 832 siblings = set()
829 833 for s in snaps:
830 834 siblings.update(snapshots[s])
831 835 # Before considering making a new intermediate snapshot, we check
832 836 # if an existing snapshot, children of base we consider, would be
833 837 # suitable.
834 838 #
835 839 # It give a change to reuse a delta chain "unrelated" to the
836 840 # current revision instead of starting our own. Without such
837 841 # re-use, topological branches would keep reopening new chains.
838 842 # Creating more and more snapshot as the repository grow.
839 843
840 844 if floor is not None:
841 845 # We only do this for siblings created after the one in our
842 846 # parent's delta chain. Those created before has less chances
843 847 # to be valid base since our ancestors had to create a new
844 848 # snapshot.
845 849 siblings = [r for r in siblings if floor < r]
846 850 yield tuple(sorted(siblings))
847 851 # then test the base from our parent's delta chain.
848 852 yield tuple(sorted(snaps))
849 853 floor = min(snaps)
850 854 # No suitable base found in the parent chain, search if any full
851 855 # snapshots emitted since parent's base would be a suitable base for an
852 856 # intermediate snapshot.
853 857 #
854 858 # It give a chance to reuse a delta chain unrelated to the current
855 859 # revisions instead of starting our own. Without such re-use,
856 860 # topological branches would keep reopening new full chains. Creating
857 861 # more and more snapshot as the repository grow.
858 862 yield tuple(snapshots[nullrev])
859 863
860 864 if not sparse:
861 865 # other approach failed try against prev to hopefully save us a
862 866 # fulltext.
863 867 yield (prev,)
864 868
865 869 class deltacomputer(object):
866 870 def __init__(self, revlog):
867 871 self.revlog = revlog
868 872
869 873 def buildtext(self, revinfo, fh):
870 874 """Builds a fulltext version of a revision
871 875
872 876 revinfo: _revisioninfo instance that contains all needed info
873 877 fh: file handle to either the .i or the .d revlog file,
874 878 depending on whether it is inlined or not
875 879 """
876 880 btext = revinfo.btext
877 881 if btext[0] is not None:
878 882 return btext[0]
879 883
880 884 revlog = self.revlog
881 885 cachedelta = revinfo.cachedelta
882 886 baserev = cachedelta[0]
883 887 delta = cachedelta[1]
884 888
885 889 fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,
886 890 revinfo.p1, revinfo.p2,
887 891 revinfo.flags, revinfo.node)
888 892 return fulltext
889 893
890 894 def _builddeltadiff(self, base, revinfo, fh):
891 895 revlog = self.revlog
892 896 t = self.buildtext(revinfo, fh)
893 897 if revlog.iscensored(base):
894 898 # deltas based on a censored revision must replace the
895 899 # full content in one patch, so delta works everywhere
896 900 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
897 901 delta = header + t
898 902 else:
899 903 ptext = revlog.revision(base, _df=fh, raw=True)
900 904 delta = mdiff.textdiff(ptext, t)
901 905
902 906 return delta
903 907
904 908 def _builddeltainfo(self, revinfo, base, fh):
905 909 # can we use the cached delta?
906 910 delta = None
907 911 if revinfo.cachedelta:
908 912 cachebase, cachediff = revinfo.cachedelta
909 913 #check if the diff still apply
910 914 currentbase = cachebase
911 915 while (currentbase != nullrev
912 916 and currentbase != base
913 917 and self.revlog.length(currentbase) == 0):
914 918 currentbase = self.revlog.deltaparent(currentbase)
915 919 if currentbase == base:
916 920 delta = revinfo.cachedelta[1]
917 921 if delta is None:
918 922 delta = self._builddeltadiff(base, revinfo, fh)
919 923 revlog = self.revlog
920 924 header, data = revlog.compress(delta)
921 925 deltalen = len(header) + len(data)
922 926 chainbase = revlog.chainbase(base)
923 927 offset = revlog.end(len(revlog) - 1)
924 928 dist = deltalen + offset - revlog.start(chainbase)
925 929 if revlog._generaldelta:
926 930 deltabase = base
927 931 else:
928 932 deltabase = chainbase
929 933 chainlen, compresseddeltalen = revlog._chaininfo(base)
930 934 chainlen += 1
931 935 compresseddeltalen += deltalen
932 936
933 937 revlog = self.revlog
934 938 snapshotdepth = None
935 939 if deltabase == nullrev:
936 940 snapshotdepth = 0
937 941 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
938 942 # A delta chain should always be one full snapshot,
939 943 # zero or more semi-snapshots, and zero or more deltas
940 944 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
941 945 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
942 946 snapshotdepth = len(revlog._deltachain(deltabase)[0])
943 947
944 948 return _deltainfo(dist, deltalen, (header, data), deltabase,
945 949 chainbase, chainlen, compresseddeltalen,
946 950 snapshotdepth)
947 951
948 952 def _fullsnapshotinfo(self, fh, revinfo):
949 953 curr = len(self.revlog)
950 954 rawtext = self.buildtext(revinfo, fh)
951 955 data = self.revlog.compress(rawtext)
952 956 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
953 957 deltabase = chainbase = curr
954 958 snapshotdepth = 0
955 959 chainlen = 1
956 960
957 961 return _deltainfo(dist, deltalen, data, deltabase,
958 962 chainbase, chainlen, compresseddeltalen,
959 963 snapshotdepth)
960 964
961 965 def finddeltainfo(self, revinfo, fh):
962 966 """Find an acceptable delta against a candidate revision
963 967
964 968 revinfo: information about the revision (instance of _revisioninfo)
965 969 fh: file handle to either the .i or the .d revlog file,
966 970 depending on whether it is inlined or not
967 971
968 972 Returns the first acceptable candidate revision, as ordered by
969 973 _candidategroups
970 974
971 975 If no suitable deltabase is found, we return delta info for a full
972 976 snapshot.
973 977 """
974 978 if not revinfo.textlen:
975 979 return self._fullsnapshotinfo(fh, revinfo)
976 980
977 981 # no delta for flag processor revision (see "candelta" for why)
978 982 # not calling candelta since only one revision needs test, also to
979 983 # avoid overhead fetching flags again.
980 984 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
981 985 return self._fullsnapshotinfo(fh, revinfo)
982 986
983 987 cachedelta = revinfo.cachedelta
984 988 p1 = revinfo.p1
985 989 p2 = revinfo.p2
986 990 revlog = self.revlog
987 991
988 992 deltainfo = None
989 993 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
990 994 groups = _candidategroups(self.revlog, revinfo.textlen,
991 995 p1r, p2r, cachedelta)
992 996 candidaterevs = next(groups)
993 997 while candidaterevs is not None:
994 998 nominateddeltas = []
995 999 if deltainfo is not None:
996 1000 # if we already found a good delta,
997 1001 # challenge it against refined candidates
998 1002 nominateddeltas.append(deltainfo)
999 1003 for candidaterev in candidaterevs:
1000 1004 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
1001 1005 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
1002 1006 nominateddeltas.append(candidatedelta)
1003 1007 if nominateddeltas:
1004 1008 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1005 1009 if deltainfo is not None:
1006 1010 candidaterevs = groups.send(deltainfo.base)
1007 1011 else:
1008 1012 candidaterevs = next(groups)
1009 1013
1010 1014 if deltainfo is None:
1011 1015 deltainfo = self._fullsnapshotinfo(fh, revinfo)
1012 1016 return deltainfo
General Comments 0
You need to be logged in to leave comments. Login now