##// END OF EJS Templates
revlog: use the native implementation of issnapshot...
Boris Feld -
r41118:a28833d7 default
parent child Browse files
Show More
@@ -1,2946 +1,2969
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #include <Python.h>
11 11 #include <assert.h>
12 12 #include <ctype.h>
13 13 #include <limits.h>
14 14 #include <stddef.h>
15 15 #include <stdlib.h>
16 16 #include <string.h>
17 17
18 18 #include "bitmanipulation.h"
19 19 #include "charencode.h"
20 20 #include "revlog.h"
21 21 #include "util.h"
22 22
23 23 #ifdef IS_PY3K
24 24 /* The mapping of Python types is meant to be temporary to get Python
25 25 * 3 to compile. We should remove this once Python 3 support is fully
26 26 * supported and proper types are used in the extensions themselves. */
27 27 #define PyInt_Check PyLong_Check
28 28 #define PyInt_FromLong PyLong_FromLong
29 29 #define PyInt_FromSsize_t PyLong_FromSsize_t
30 30 #define PyInt_AsLong PyLong_AsLong
31 31 #endif
32 32
33 33 typedef struct indexObjectStruct indexObject;
34 34
35 35 typedef struct {
36 36 int children[16];
37 37 } nodetreenode;
38 38
39 39 /*
40 40 * A base-16 trie for fast node->rev mapping.
41 41 *
42 42 * Positive value is index of the next node in the trie
43 43 * Negative value is a leaf: -(rev + 2)
44 44 * Zero is empty
45 45 */
46 46 typedef struct {
47 47 indexObject *index;
48 48 nodetreenode *nodes;
49 49 unsigned length; /* # nodes in use */
50 50 unsigned capacity; /* # nodes allocated */
51 51 int depth; /* maximum depth of tree */
52 52 int splits; /* # splits performed */
53 53 } nodetree;
54 54
55 55 typedef struct {
56 56 PyObject_HEAD /* ; */
57 57 nodetree nt;
58 58 } nodetreeObject;
59 59
60 60 /*
61 61 * This class has two behaviors.
62 62 *
63 63 * When used in a list-like way (with integer keys), we decode an
64 64 * entry in a RevlogNG index file on demand. Our last entry is a
65 65 * sentinel, always a nullid. We have limited support for
66 66 * integer-keyed insert and delete, only at elements right before the
67 67 * sentinel.
68 68 *
69 69 * With string keys, we lazily perform a reverse mapping from node to
70 70 * rev, using a base-16 trie.
71 71 */
72 72 struct indexObjectStruct {
73 73 PyObject_HEAD
74 74 /* Type-specific fields go here. */
75 75 PyObject *data; /* raw bytes of index */
76 76 Py_buffer buf; /* buffer of data */
77 77 PyObject **cache; /* cached tuples */
78 78 const char **offsets; /* populated on demand */
79 79 Py_ssize_t raw_length; /* original number of elements */
80 80 Py_ssize_t length; /* current number of elements */
81 81 PyObject *added; /* populated on demand */
82 82 PyObject *headrevs; /* cache, invalidated on changes */
83 83 PyObject *filteredrevs; /* filtered revs set */
84 84 nodetree nt; /* base-16 trie */
85 85 int ntinitialized; /* 0 or 1 */
86 86 int ntrev; /* last rev scanned */
87 87 int ntlookups; /* # lookups */
88 88 int ntmisses; /* # lookups that miss the cache */
89 89 int inlined;
90 90 };
91 91
92 92 static Py_ssize_t index_length(const indexObject *self)
93 93 {
94 94 if (self->added == NULL)
95 95 return self->length;
96 96 return self->length + PyList_GET_SIZE(self->added);
97 97 }
98 98
99 99 static PyObject *nullentry = NULL;
100 100 static const char nullid[20] = {0};
101 101 static const Py_ssize_t nullrev = -1;
102 102
103 103 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
104 104
105 105 #if LONG_MAX == 0x7fffffffL
106 106 static const char *const tuple_format = PY23("Kiiiiiis#", "Kiiiiiiy#");
107 107 #else
108 108 static const char *const tuple_format = PY23("kiiiiiis#", "kiiiiiiy#");
109 109 #endif
110 110
111 111 /* A RevlogNG v1 index entry is 64 bytes long. */
112 112 static const long v1_hdrsize = 64;
113 113
114 114 static void raise_revlog_error(void)
115 115 {
116 116 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
117 117
118 118 mod = PyImport_ImportModule("mercurial.error");
119 119 if (mod == NULL) {
120 120 goto cleanup;
121 121 }
122 122
123 123 dict = PyModule_GetDict(mod);
124 124 if (dict == NULL) {
125 125 goto cleanup;
126 126 }
127 127 Py_INCREF(dict);
128 128
129 129 errclass = PyDict_GetItemString(dict, "RevlogError");
130 130 if (errclass == NULL) {
131 131 PyErr_SetString(PyExc_SystemError,
132 132 "could not find RevlogError");
133 133 goto cleanup;
134 134 }
135 135
136 136 /* value of exception is ignored by callers */
137 137 PyErr_SetString(errclass, "RevlogError");
138 138
139 139 cleanup:
140 140 Py_XDECREF(dict);
141 141 Py_XDECREF(mod);
142 142 }
143 143
144 144 /*
145 145 * Return a pointer to the beginning of a RevlogNG record.
146 146 */
147 147 static const char *index_deref(indexObject *self, Py_ssize_t pos)
148 148 {
149 149 if (self->inlined && pos > 0) {
150 150 if (self->offsets == NULL) {
151 151 self->offsets = PyMem_Malloc(self->raw_length *
152 152 sizeof(*self->offsets));
153 153 if (self->offsets == NULL)
154 154 return (const char *)PyErr_NoMemory();
155 155 inline_scan(self, self->offsets);
156 156 }
157 157 return self->offsets[pos];
158 158 }
159 159
160 160 return (const char *)(self->buf.buf) + pos * v1_hdrsize;
161 161 }
162 162
163 163 /*
164 164 * Get parents of the given rev.
165 165 *
166 166 * The specified rev must be valid and must not be nullrev. A returned
167 167 * parent revision may be nullrev, but is guaranteed to be in valid range.
168 168 */
169 169 static inline int index_get_parents(indexObject *self, Py_ssize_t rev, int *ps,
170 170 int maxrev)
171 171 {
172 172 if (rev >= self->length) {
173 173 long tmp;
174 174 PyObject *tuple =
175 175 PyList_GET_ITEM(self->added, rev - self->length);
176 176 if (!pylong_to_long(PyTuple_GET_ITEM(tuple, 5), &tmp)) {
177 177 return -1;
178 178 }
179 179 ps[0] = (int)tmp;
180 180 if (!pylong_to_long(PyTuple_GET_ITEM(tuple, 6), &tmp)) {
181 181 return -1;
182 182 }
183 183 ps[1] = (int)tmp;
184 184 } else {
185 185 const char *data = index_deref(self, rev);
186 186 ps[0] = getbe32(data + 24);
187 187 ps[1] = getbe32(data + 28);
188 188 }
189 189 /* If index file is corrupted, ps[] may point to invalid revisions. So
190 190 * there is a risk of buffer overflow to trust them unconditionally. */
191 191 if (ps[0] < -1 || ps[0] > maxrev || ps[1] < -1 || ps[1] > maxrev) {
192 192 PyErr_SetString(PyExc_ValueError, "parent out of range");
193 193 return -1;
194 194 }
195 195 return 0;
196 196 }
197 197
198 198 /*
199 199 * Get parents of the given rev.
200 200 *
201 201 * If the specified rev is out of range, IndexError will be raised. If the
202 202 * revlog entry is corrupted, ValueError may be raised.
203 203 *
204 204 * Returns 0 on success or -1 on failure.
205 205 */
206 206 int HgRevlogIndex_GetParents(PyObject *op, int rev, int *ps)
207 207 {
208 208 int tiprev;
209 209 if (!op || !HgRevlogIndex_Check(op) || !ps) {
210 210 PyErr_BadInternalCall();
211 211 return -1;
212 212 }
213 213 tiprev = (int)index_length((indexObject *)op) - 1;
214 214 if (rev < -1 || rev > tiprev) {
215 215 PyErr_Format(PyExc_IndexError, "rev out of range: %d", rev);
216 216 return -1;
217 217 } else if (rev == -1) {
218 218 ps[0] = ps[1] = -1;
219 219 return 0;
220 220 } else {
221 221 return index_get_parents((indexObject *)op, rev, ps, tiprev);
222 222 }
223 223 }
224 224
225 225 static inline int64_t index_get_start(indexObject *self, Py_ssize_t rev)
226 226 {
227 227 uint64_t offset;
228 228 if (rev == nullrev) {
229 229 return 0;
230 230 }
231 231 if (rev >= self->length) {
232 232 PyObject *tuple;
233 233 PyObject *pylong;
234 234 PY_LONG_LONG tmp;
235 235 tuple = PyList_GET_ITEM(self->added, rev - self->length);
236 236 pylong = PyTuple_GET_ITEM(tuple, 0);
237 237 tmp = PyLong_AsLongLong(pylong);
238 238 if (tmp == -1 && PyErr_Occurred()) {
239 239 return -1;
240 240 }
241 241 if (tmp < 0) {
242 242 PyErr_Format(PyExc_OverflowError,
243 243 "revlog entry size out of bound (%lld)",
244 244 (long long)tmp);
245 245 return -1;
246 246 }
247 247 offset = (uint64_t)tmp;
248 248 } else {
249 249 const char *data = index_deref(self, rev);
250 250 offset = getbe32(data + 4);
251 251 if (rev == 0) {
252 252 /* mask out version number for the first entry */
253 253 offset &= 0xFFFF;
254 254 } else {
255 255 uint32_t offset_high = getbe32(data);
256 256 offset |= ((uint64_t)offset_high) << 32;
257 257 }
258 258 }
259 259 return (int64_t)(offset >> 16);
260 260 }
261 261
262 262 static inline int index_get_length(indexObject *self, Py_ssize_t rev)
263 263 {
264 264 if (rev == nullrev) {
265 265 return 0;
266 266 }
267 267 if (rev >= self->length) {
268 268 PyObject *tuple;
269 269 PyObject *pylong;
270 270 long ret;
271 271 tuple = PyList_GET_ITEM(self->added, rev - self->length);
272 272 pylong = PyTuple_GET_ITEM(tuple, 1);
273 273 ret = PyInt_AsLong(pylong);
274 274 if (ret == -1 && PyErr_Occurred()) {
275 275 return -1;
276 276 }
277 277 if (ret < 0 || ret > (long)INT_MAX) {
278 278 PyErr_Format(PyExc_OverflowError,
279 279 "revlog entry size out of bound (%ld)",
280 280 ret);
281 281 return -1;
282 282 }
283 283 return (int)ret;
284 284 } else {
285 285 const char *data = index_deref(self, rev);
286 286 int tmp = (int)getbe32(data + 8);
287 287 if (tmp < 0) {
288 288 PyErr_Format(PyExc_OverflowError,
289 289 "revlog entry size out of bound (%d)",
290 290 tmp);
291 291 return -1;
292 292 }
293 293 return tmp;
294 294 }
295 295 }
296 296
297 297 /*
298 298 * RevlogNG format (all in big endian, data may be inlined):
299 299 * 6 bytes: offset
300 300 * 2 bytes: flags
301 301 * 4 bytes: compressed length
302 302 * 4 bytes: uncompressed length
303 303 * 4 bytes: base revision
304 304 * 4 bytes: link revision
305 305 * 4 bytes: parent 1 revision
306 306 * 4 bytes: parent 2 revision
307 307 * 32 bytes: nodeid (only 20 bytes used)
308 308 */
309 309 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
310 310 {
311 311 uint64_t offset_flags;
312 312 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
313 313 const char *c_node_id;
314 314 const char *data;
315 315 Py_ssize_t length = index_length(self);
316 316 PyObject *entry;
317 317
318 318 if (pos == nullrev) {
319 319 Py_INCREF(nullentry);
320 320 return nullentry;
321 321 }
322 322
323 323 if (pos < 0 || pos >= length) {
324 324 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
325 325 return NULL;
326 326 }
327 327
328 328 if (pos >= self->length) {
329 329 PyObject *obj;
330 330 obj = PyList_GET_ITEM(self->added, pos - self->length);
331 331 Py_INCREF(obj);
332 332 return obj;
333 333 }
334 334
335 335 if (self->cache) {
336 336 if (self->cache[pos]) {
337 337 Py_INCREF(self->cache[pos]);
338 338 return self->cache[pos];
339 339 }
340 340 } else {
341 341 self->cache = calloc(self->raw_length, sizeof(PyObject *));
342 342 if (self->cache == NULL)
343 343 return PyErr_NoMemory();
344 344 }
345 345
346 346 data = index_deref(self, pos);
347 347 if (data == NULL)
348 348 return NULL;
349 349
350 350 offset_flags = getbe32(data + 4);
351 351 if (pos == 0) /* mask out version number for the first entry */
352 352 offset_flags &= 0xFFFF;
353 353 else {
354 354 uint32_t offset_high = getbe32(data);
355 355 offset_flags |= ((uint64_t)offset_high) << 32;
356 356 }
357 357
358 358 comp_len = getbe32(data + 8);
359 359 uncomp_len = getbe32(data + 12);
360 360 base_rev = getbe32(data + 16);
361 361 link_rev = getbe32(data + 20);
362 362 parent_1 = getbe32(data + 24);
363 363 parent_2 = getbe32(data + 28);
364 364 c_node_id = data + 32;
365 365
366 366 entry = Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len,
367 367 base_rev, link_rev, parent_1, parent_2, c_node_id,
368 368 20);
369 369
370 370 if (entry) {
371 371 PyObject_GC_UnTrack(entry);
372 372 Py_INCREF(entry);
373 373 }
374 374
375 375 self->cache[pos] = entry;
376 376
377 377 return entry;
378 378 }
379 379
380 380 /*
381 381 * Return the 20-byte SHA of the node corresponding to the given rev.
382 382 */
383 383 static const char *index_node(indexObject *self, Py_ssize_t pos)
384 384 {
385 385 Py_ssize_t length = index_length(self);
386 386 const char *data;
387 387
388 388 if (pos == nullrev)
389 389 return nullid;
390 390
391 391 if (pos >= length)
392 392 return NULL;
393 393
394 394 if (pos >= self->length) {
395 395 PyObject *tuple, *str;
396 396 tuple = PyList_GET_ITEM(self->added, pos - self->length);
397 397 str = PyTuple_GetItem(tuple, 7);
398 398 return str ? PyBytes_AS_STRING(str) : NULL;
399 399 }
400 400
401 401 data = index_deref(self, pos);
402 402 return data ? data + 32 : NULL;
403 403 }
404 404
405 405 /*
406 406 * Return the 20-byte SHA of the node corresponding to the given rev. The
407 407 * rev is assumed to be existing. If not, an exception is set.
408 408 */
409 409 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
410 410 {
411 411 const char *node = index_node(self, pos);
412 412 if (node == NULL) {
413 413 PyErr_Format(PyExc_IndexError, "could not access rev %d",
414 414 (int)pos);
415 415 }
416 416 return node;
417 417 }
418 418
419 419 static int nt_insert(nodetree *self, const char *node, int rev);
420 420
421 421 static int node_check(PyObject *obj, char **node)
422 422 {
423 423 Py_ssize_t nodelen;
424 424 if (PyBytes_AsStringAndSize(obj, node, &nodelen) == -1)
425 425 return -1;
426 426 if (nodelen == 20)
427 427 return 0;
428 428 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
429 429 return -1;
430 430 }
431 431
432 432 static PyObject *index_append(indexObject *self, PyObject *obj)
433 433 {
434 434 char *node;
435 435 Py_ssize_t len;
436 436
437 437 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
438 438 PyErr_SetString(PyExc_TypeError, "8-tuple required");
439 439 return NULL;
440 440 }
441 441
442 442 if (node_check(PyTuple_GET_ITEM(obj, 7), &node) == -1)
443 443 return NULL;
444 444
445 445 len = index_length(self);
446 446
447 447 if (self->added == NULL) {
448 448 self->added = PyList_New(0);
449 449 if (self->added == NULL)
450 450 return NULL;
451 451 }
452 452
453 453 if (PyList_Append(self->added, obj) == -1)
454 454 return NULL;
455 455
456 456 if (self->ntinitialized)
457 457 nt_insert(&self->nt, node, (int)len);
458 458
459 459 Py_CLEAR(self->headrevs);
460 460 Py_RETURN_NONE;
461 461 }
462 462
463 463 static PyObject *index_stats(indexObject *self)
464 464 {
465 465 PyObject *obj = PyDict_New();
466 466 PyObject *s = NULL;
467 467 PyObject *t = NULL;
468 468
469 469 if (obj == NULL)
470 470 return NULL;
471 471
472 472 #define istat(__n, __d) \
473 473 do { \
474 474 s = PyBytes_FromString(__d); \
475 475 t = PyInt_FromSsize_t(self->__n); \
476 476 if (!s || !t) \
477 477 goto bail; \
478 478 if (PyDict_SetItem(obj, s, t) == -1) \
479 479 goto bail; \
480 480 Py_CLEAR(s); \
481 481 Py_CLEAR(t); \
482 482 } while (0)
483 483
484 484 if (self->added) {
485 485 Py_ssize_t len = PyList_GET_SIZE(self->added);
486 486 s = PyBytes_FromString("index entries added");
487 487 t = PyInt_FromSsize_t(len);
488 488 if (!s || !t)
489 489 goto bail;
490 490 if (PyDict_SetItem(obj, s, t) == -1)
491 491 goto bail;
492 492 Py_CLEAR(s);
493 493 Py_CLEAR(t);
494 494 }
495 495
496 496 if (self->raw_length != self->length)
497 497 istat(raw_length, "revs on disk");
498 498 istat(length, "revs in memory");
499 499 istat(ntlookups, "node trie lookups");
500 500 istat(ntmisses, "node trie misses");
501 501 istat(ntrev, "node trie last rev scanned");
502 502 if (self->ntinitialized) {
503 503 istat(nt.capacity, "node trie capacity");
504 504 istat(nt.depth, "node trie depth");
505 505 istat(nt.length, "node trie count");
506 506 istat(nt.splits, "node trie splits");
507 507 }
508 508
509 509 #undef istat
510 510
511 511 return obj;
512 512
513 513 bail:
514 514 Py_XDECREF(obj);
515 515 Py_XDECREF(s);
516 516 Py_XDECREF(t);
517 517 return NULL;
518 518 }
519 519
520 520 /*
521 521 * When we cache a list, we want to be sure the caller can't mutate
522 522 * the cached copy.
523 523 */
524 524 static PyObject *list_copy(PyObject *list)
525 525 {
526 526 Py_ssize_t len = PyList_GET_SIZE(list);
527 527 PyObject *newlist = PyList_New(len);
528 528 Py_ssize_t i;
529 529
530 530 if (newlist == NULL)
531 531 return NULL;
532 532
533 533 for (i = 0; i < len; i++) {
534 534 PyObject *obj = PyList_GET_ITEM(list, i);
535 535 Py_INCREF(obj);
536 536 PyList_SET_ITEM(newlist, i, obj);
537 537 }
538 538
539 539 return newlist;
540 540 }
541 541
542 542 static int check_filter(PyObject *filter, Py_ssize_t arg)
543 543 {
544 544 if (filter) {
545 545 PyObject *arglist, *result;
546 546 int isfiltered;
547 547
548 548 arglist = Py_BuildValue("(n)", arg);
549 549 if (!arglist) {
550 550 return -1;
551 551 }
552 552
553 553 result = PyEval_CallObject(filter, arglist);
554 554 Py_DECREF(arglist);
555 555 if (!result) {
556 556 return -1;
557 557 }
558 558
559 559 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
560 560 * same as this function, so we can just return it directly.*/
561 561 isfiltered = PyObject_IsTrue(result);
562 562 Py_DECREF(result);
563 563 return isfiltered;
564 564 } else {
565 565 return 0;
566 566 }
567 567 }
568 568
569 569 static Py_ssize_t add_roots_get_min(indexObject *self, PyObject *list,
570 570 Py_ssize_t marker, char *phases)
571 571 {
572 572 PyObject *iter = NULL;
573 573 PyObject *iter_item = NULL;
574 574 Py_ssize_t min_idx = index_length(self) + 2;
575 575 long iter_item_long;
576 576
577 577 if (PyList_GET_SIZE(list) != 0) {
578 578 iter = PyObject_GetIter(list);
579 579 if (iter == NULL)
580 580 return -2;
581 581 while ((iter_item = PyIter_Next(iter))) {
582 582 if (!pylong_to_long(iter_item, &iter_item_long)) {
583 583 Py_DECREF(iter_item);
584 584 return -2;
585 585 }
586 586 Py_DECREF(iter_item);
587 587 if (iter_item_long < min_idx)
588 588 min_idx = iter_item_long;
589 589 phases[iter_item_long] = (char)marker;
590 590 }
591 591 Py_DECREF(iter);
592 592 }
593 593
594 594 return min_idx;
595 595 }
596 596
597 597 static inline void set_phase_from_parents(char *phases, int parent_1,
598 598 int parent_2, Py_ssize_t i)
599 599 {
600 600 if (parent_1 >= 0 && phases[parent_1] > phases[i])
601 601 phases[i] = phases[parent_1];
602 602 if (parent_2 >= 0 && phases[parent_2] > phases[i])
603 603 phases[i] = phases[parent_2];
604 604 }
605 605
606 606 static PyObject *reachableroots2(indexObject *self, PyObject *args)
607 607 {
608 608
609 609 /* Input */
610 610 long minroot;
611 611 PyObject *includepatharg = NULL;
612 612 int includepath = 0;
613 613 /* heads and roots are lists */
614 614 PyObject *heads = NULL;
615 615 PyObject *roots = NULL;
616 616 PyObject *reachable = NULL;
617 617
618 618 PyObject *val;
619 619 Py_ssize_t len = index_length(self);
620 620 long revnum;
621 621 Py_ssize_t k;
622 622 Py_ssize_t i;
623 623 Py_ssize_t l;
624 624 int r;
625 625 int parents[2];
626 626
627 627 /* Internal data structure:
628 628 * tovisit: array of length len+1 (all revs + nullrev), filled upto
629 629 * lentovisit
630 630 *
631 631 * revstates: array of length len+1 (all revs + nullrev) */
632 632 int *tovisit = NULL;
633 633 long lentovisit = 0;
634 634 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
635 635 char *revstates = NULL;
636 636
637 637 /* Get arguments */
638 638 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
639 639 &PyList_Type, &roots, &PyBool_Type,
640 640 &includepatharg))
641 641 goto bail;
642 642
643 643 if (includepatharg == Py_True)
644 644 includepath = 1;
645 645
646 646 /* Initialize return set */
647 647 reachable = PyList_New(0);
648 648 if (reachable == NULL)
649 649 goto bail;
650 650
651 651 /* Initialize internal datastructures */
652 652 tovisit = (int *)malloc((len + 1) * sizeof(int));
653 653 if (tovisit == NULL) {
654 654 PyErr_NoMemory();
655 655 goto bail;
656 656 }
657 657
658 658 revstates = (char *)calloc(len + 1, 1);
659 659 if (revstates == NULL) {
660 660 PyErr_NoMemory();
661 661 goto bail;
662 662 }
663 663
664 664 l = PyList_GET_SIZE(roots);
665 665 for (i = 0; i < l; i++) {
666 666 revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
667 667 if (revnum == -1 && PyErr_Occurred())
668 668 goto bail;
669 669 /* If root is out of range, e.g. wdir(), it must be unreachable
670 670 * from heads. So we can just ignore it. */
671 671 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
672 672 continue;
673 673 revstates[revnum + 1] |= RS_ROOT;
674 674 }
675 675
676 676 /* Populate tovisit with all the heads */
677 677 l = PyList_GET_SIZE(heads);
678 678 for (i = 0; i < l; i++) {
679 679 revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i));
680 680 if (revnum == -1 && PyErr_Occurred())
681 681 goto bail;
682 682 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
683 683 PyErr_SetString(PyExc_IndexError, "head out of range");
684 684 goto bail;
685 685 }
686 686 if (!(revstates[revnum + 1] & RS_SEEN)) {
687 687 tovisit[lentovisit++] = (int)revnum;
688 688 revstates[revnum + 1] |= RS_SEEN;
689 689 }
690 690 }
691 691
692 692 /* Visit the tovisit list and find the reachable roots */
693 693 k = 0;
694 694 while (k < lentovisit) {
695 695 /* Add the node to reachable if it is a root*/
696 696 revnum = tovisit[k++];
697 697 if (revstates[revnum + 1] & RS_ROOT) {
698 698 revstates[revnum + 1] |= RS_REACHABLE;
699 699 val = PyInt_FromLong(revnum);
700 700 if (val == NULL)
701 701 goto bail;
702 702 r = PyList_Append(reachable, val);
703 703 Py_DECREF(val);
704 704 if (r < 0)
705 705 goto bail;
706 706 if (includepath == 0)
707 707 continue;
708 708 }
709 709
710 710 /* Add its parents to the list of nodes to visit */
711 711 if (revnum == nullrev)
712 712 continue;
713 713 r = index_get_parents(self, revnum, parents, (int)len - 1);
714 714 if (r < 0)
715 715 goto bail;
716 716 for (i = 0; i < 2; i++) {
717 717 if (!(revstates[parents[i] + 1] & RS_SEEN) &&
718 718 parents[i] >= minroot) {
719 719 tovisit[lentovisit++] = parents[i];
720 720 revstates[parents[i] + 1] |= RS_SEEN;
721 721 }
722 722 }
723 723 }
724 724
725 725 /* Find all the nodes in between the roots we found and the heads
726 726 * and add them to the reachable set */
727 727 if (includepath == 1) {
728 728 long minidx = minroot;
729 729 if (minidx < 0)
730 730 minidx = 0;
731 731 for (i = minidx; i < len; i++) {
732 732 if (!(revstates[i + 1] & RS_SEEN))
733 733 continue;
734 734 r = index_get_parents(self, i, parents, (int)len - 1);
735 735 /* Corrupted index file, error is set from
736 736 * index_get_parents */
737 737 if (r < 0)
738 738 goto bail;
739 739 if (((revstates[parents[0] + 1] |
740 740 revstates[parents[1] + 1]) &
741 741 RS_REACHABLE) &&
742 742 !(revstates[i + 1] & RS_REACHABLE)) {
743 743 revstates[i + 1] |= RS_REACHABLE;
744 744 val = PyInt_FromSsize_t(i);
745 745 if (val == NULL)
746 746 goto bail;
747 747 r = PyList_Append(reachable, val);
748 748 Py_DECREF(val);
749 749 if (r < 0)
750 750 goto bail;
751 751 }
752 752 }
753 753 }
754 754
755 755 free(revstates);
756 756 free(tovisit);
757 757 return reachable;
758 758 bail:
759 759 Py_XDECREF(reachable);
760 760 free(revstates);
761 761 free(tovisit);
762 762 return NULL;
763 763 }
764 764
765 765 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
766 766 {
767 767 PyObject *roots = Py_None;
768 768 PyObject *ret = NULL;
769 769 PyObject *phasessize = NULL;
770 770 PyObject *phaseroots = NULL;
771 771 PyObject *phaseset = NULL;
772 772 PyObject *phasessetlist = NULL;
773 773 PyObject *rev = NULL;
774 774 Py_ssize_t len = index_length(self);
775 775 Py_ssize_t numphase = 0;
776 776 Py_ssize_t minrevallphases = 0;
777 777 Py_ssize_t minrevphase = 0;
778 778 Py_ssize_t i = 0;
779 779 char *phases = NULL;
780 780 long phase;
781 781
782 782 if (!PyArg_ParseTuple(args, "O", &roots))
783 783 goto done;
784 784 if (roots == NULL || !PyList_Check(roots)) {
785 785 PyErr_SetString(PyExc_TypeError, "roots must be a list");
786 786 goto done;
787 787 }
788 788
789 789 phases = calloc(
790 790 len, 1); /* phase per rev: {0: public, 1: draft, 2: secret} */
791 791 if (phases == NULL) {
792 792 PyErr_NoMemory();
793 793 goto done;
794 794 }
795 795 /* Put the phase information of all the roots in phases */
796 796 numphase = PyList_GET_SIZE(roots) + 1;
797 797 minrevallphases = len + 1;
798 798 phasessetlist = PyList_New(numphase);
799 799 if (phasessetlist == NULL)
800 800 goto done;
801 801
802 802 PyList_SET_ITEM(phasessetlist, 0, Py_None);
803 803 Py_INCREF(Py_None);
804 804
805 805 for (i = 0; i < numphase - 1; i++) {
806 806 phaseroots = PyList_GET_ITEM(roots, i);
807 807 phaseset = PySet_New(NULL);
808 808 if (phaseset == NULL)
809 809 goto release;
810 810 PyList_SET_ITEM(phasessetlist, i + 1, phaseset);
811 811 if (!PyList_Check(phaseroots)) {
812 812 PyErr_SetString(PyExc_TypeError,
813 813 "roots item must be a list");
814 814 goto release;
815 815 }
816 816 minrevphase =
817 817 add_roots_get_min(self, phaseroots, i + 1, phases);
818 818 if (minrevphase == -2) /* Error from add_roots_get_min */
819 819 goto release;
820 820 minrevallphases = MIN(minrevallphases, minrevphase);
821 821 }
822 822 /* Propagate the phase information from the roots to the revs */
823 823 if (minrevallphases != -1) {
824 824 int parents[2];
825 825 for (i = minrevallphases; i < len; i++) {
826 826 if (index_get_parents(self, i, parents, (int)len - 1) <
827 827 0)
828 828 goto release;
829 829 set_phase_from_parents(phases, parents[0], parents[1],
830 830 i);
831 831 }
832 832 }
833 833 /* Transform phase list to a python list */
834 834 phasessize = PyInt_FromSsize_t(len);
835 835 if (phasessize == NULL)
836 836 goto release;
837 837 for (i = 0; i < len; i++) {
838 838 phase = phases[i];
839 839 /* We only store the sets of phase for non public phase, the
840 840 * public phase is computed as a difference */
841 841 if (phase != 0) {
842 842 phaseset = PyList_GET_ITEM(phasessetlist, phase);
843 843 rev = PyInt_FromSsize_t(i);
844 844 if (rev == NULL)
845 845 goto release;
846 846 PySet_Add(phaseset, rev);
847 847 Py_XDECREF(rev);
848 848 }
849 849 }
850 850 ret = PyTuple_Pack(2, phasessize, phasessetlist);
851 851
852 852 release:
853 853 Py_XDECREF(phasessize);
854 854 Py_XDECREF(phasessetlist);
855 855 done:
856 856 free(phases);
857 857 return ret;
858 858 }
859 859
860 860 static PyObject *index_headrevs(indexObject *self, PyObject *args)
861 861 {
862 862 Py_ssize_t i, j, len;
863 863 char *nothead = NULL;
864 864 PyObject *heads = NULL;
865 865 PyObject *filter = NULL;
866 866 PyObject *filteredrevs = Py_None;
867 867
868 868 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
869 869 return NULL;
870 870 }
871 871
872 872 if (self->headrevs && filteredrevs == self->filteredrevs)
873 873 return list_copy(self->headrevs);
874 874
875 875 Py_DECREF(self->filteredrevs);
876 876 self->filteredrevs = filteredrevs;
877 877 Py_INCREF(filteredrevs);
878 878
879 879 if (filteredrevs != Py_None) {
880 880 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
881 881 if (!filter) {
882 882 PyErr_SetString(
883 883 PyExc_TypeError,
884 884 "filteredrevs has no attribute __contains__");
885 885 goto bail;
886 886 }
887 887 }
888 888
889 889 len = index_length(self);
890 890 heads = PyList_New(0);
891 891 if (heads == NULL)
892 892 goto bail;
893 893 if (len == 0) {
894 894 PyObject *nullid = PyInt_FromLong(-1);
895 895 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
896 896 Py_XDECREF(nullid);
897 897 goto bail;
898 898 }
899 899 goto done;
900 900 }
901 901
902 902 nothead = calloc(len, 1);
903 903 if (nothead == NULL) {
904 904 PyErr_NoMemory();
905 905 goto bail;
906 906 }
907 907
908 908 for (i = len - 1; i >= 0; i--) {
909 909 int isfiltered;
910 910 int parents[2];
911 911
912 912 /* If nothead[i] == 1, it means we've seen an unfiltered child
913 913 * of this node already, and therefore this node is not
914 914 * filtered. So we can skip the expensive check_filter step.
915 915 */
916 916 if (nothead[i] != 1) {
917 917 isfiltered = check_filter(filter, i);
918 918 if (isfiltered == -1) {
919 919 PyErr_SetString(PyExc_TypeError,
920 920 "unable to check filter");
921 921 goto bail;
922 922 }
923 923
924 924 if (isfiltered) {
925 925 nothead[i] = 1;
926 926 continue;
927 927 }
928 928 }
929 929
930 930 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
931 931 goto bail;
932 932 for (j = 0; j < 2; j++) {
933 933 if (parents[j] >= 0)
934 934 nothead[parents[j]] = 1;
935 935 }
936 936 }
937 937
938 938 for (i = 0; i < len; i++) {
939 939 PyObject *head;
940 940
941 941 if (nothead[i])
942 942 continue;
943 943 head = PyInt_FromSsize_t(i);
944 944 if (head == NULL || PyList_Append(heads, head) == -1) {
945 945 Py_XDECREF(head);
946 946 goto bail;
947 947 }
948 948 }
949 949
950 950 done:
951 951 self->headrevs = heads;
952 952 Py_XDECREF(filter);
953 953 free(nothead);
954 954 return list_copy(self->headrevs);
955 955 bail:
956 956 Py_XDECREF(filter);
957 957 Py_XDECREF(heads);
958 958 free(nothead);
959 959 return NULL;
960 960 }
961 961
962 962 /**
963 963 * Obtain the base revision index entry.
964 964 *
965 965 * Callers must ensure that rev >= 0 or illegal memory access may occur.
966 966 */
967 967 static inline int index_baserev(indexObject *self, int rev)
968 968 {
969 969 const char *data;
970 970 int result;
971 971
972 972 if (rev >= self->length) {
973 973 PyObject *tuple =
974 974 PyList_GET_ITEM(self->added, rev - self->length);
975 975 long ret;
976 976 if (!pylong_to_long(PyTuple_GET_ITEM(tuple, 3), &ret)) {
977 977 return -2;
978 978 }
979 979 result = (int)ret;
980 980 } else {
981 981 data = index_deref(self, rev);
982 982 if (data == NULL) {
983 983 return -2;
984 984 }
985 985
986 986 result = getbe32(data + 16);
987 987 }
988 988 if (result > rev) {
989 989 PyErr_Format(
990 990 PyExc_ValueError,
991 991 "corrupted revlog, revision base above revision: %d, %d",
992 992 rev, result);
993 993 return -2;
994 994 }
995 995 return result;
996 996 }
997 997
998 998 /**
999 999 * Find if a revision is a snapshot or not
1000 1000 *
1001 1001 * Only relevant for sparse-revlog case.
1002 1002 * Callers must ensure that rev is in a valid range.
1003 1003 */
1004 1004 static int index_issnapshotrev(indexObject *self, Py_ssize_t rev)
1005 1005 {
1006 1006 int ps[2];
1007 1007 Py_ssize_t base;
1008 1008 while (rev >= 0) {
1009 1009 base = (Py_ssize_t)index_baserev(self, rev);
1010 1010 if (base == rev) {
1011 1011 base = -1;
1012 1012 }
1013 1013 if (base == -2) {
1014 1014 assert(PyErr_Occurred());
1015 1015 return -1;
1016 1016 }
1017 1017 if (base == -1) {
1018 1018 return 1;
1019 1019 }
1020 1020 if (index_get_parents(self, rev, ps, (int)rev) < 0) {
1021 1021 assert(PyErr_Occurred());
1022 1022 return -1;
1023 1023 };
1024 1024 if (base == ps[0] || base == ps[1]) {
1025 1025 return 0;
1026 1026 }
1027 1027 rev = base;
1028 1028 }
1029 1029 return rev == -1;
1030 1030 }
1031 1031
1032 static PyObject *index_issnapshot(indexObject *self, PyObject *value)
1033 {
1034 long rev;
1035 int issnap;
1036 Py_ssize_t length = index_length(self);
1037
1038 if (!pylong_to_long(value, &rev)) {
1039 return NULL;
1040 }
1041 if (rev < -1 || rev >= length) {
1042 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
1043 rev);
1044 return NULL;
1045 };
1046 issnap = index_issnapshotrev(self, (Py_ssize_t)rev);
1047 if (issnap < 0) {
1048 return NULL;
1049 };
1050 return PyBool_FromLong((long)issnap);
1051 }
1052
1032 1053 static PyObject *index_deltachain(indexObject *self, PyObject *args)
1033 1054 {
1034 1055 int rev, generaldelta;
1035 1056 PyObject *stoparg;
1036 1057 int stoprev, iterrev, baserev = -1;
1037 1058 int stopped;
1038 1059 PyObject *chain = NULL, *result = NULL;
1039 1060 const Py_ssize_t length = index_length(self);
1040 1061
1041 1062 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
1042 1063 return NULL;
1043 1064 }
1044 1065
1045 1066 if (PyInt_Check(stoparg)) {
1046 1067 stoprev = (int)PyInt_AsLong(stoparg);
1047 1068 if (stoprev == -1 && PyErr_Occurred()) {
1048 1069 return NULL;
1049 1070 }
1050 1071 } else if (stoparg == Py_None) {
1051 1072 stoprev = -2;
1052 1073 } else {
1053 1074 PyErr_SetString(PyExc_ValueError,
1054 1075 "stoprev must be integer or None");
1055 1076 return NULL;
1056 1077 }
1057 1078
1058 1079 if (rev < 0 || rev >= length) {
1059 1080 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1060 1081 return NULL;
1061 1082 }
1062 1083
1063 1084 chain = PyList_New(0);
1064 1085 if (chain == NULL) {
1065 1086 return NULL;
1066 1087 }
1067 1088
1068 1089 baserev = index_baserev(self, rev);
1069 1090
1070 1091 /* This should never happen. */
1071 1092 if (baserev <= -2) {
1072 1093 /* Error should be set by index_deref() */
1073 1094 assert(PyErr_Occurred());
1074 1095 goto bail;
1075 1096 }
1076 1097
1077 1098 iterrev = rev;
1078 1099
1079 1100 while (iterrev != baserev && iterrev != stoprev) {
1080 1101 PyObject *value = PyInt_FromLong(iterrev);
1081 1102 if (value == NULL) {
1082 1103 goto bail;
1083 1104 }
1084 1105 if (PyList_Append(chain, value)) {
1085 1106 Py_DECREF(value);
1086 1107 goto bail;
1087 1108 }
1088 1109 Py_DECREF(value);
1089 1110
1090 1111 if (generaldelta) {
1091 1112 iterrev = baserev;
1092 1113 } else {
1093 1114 iterrev--;
1094 1115 }
1095 1116
1096 1117 if (iterrev < 0) {
1097 1118 break;
1098 1119 }
1099 1120
1100 1121 if (iterrev >= length) {
1101 1122 PyErr_SetString(PyExc_IndexError,
1102 1123 "revision outside index");
1103 1124 return NULL;
1104 1125 }
1105 1126
1106 1127 baserev = index_baserev(self, iterrev);
1107 1128
1108 1129 /* This should never happen. */
1109 1130 if (baserev <= -2) {
1110 1131 /* Error should be set by index_deref() */
1111 1132 assert(PyErr_Occurred());
1112 1133 goto bail;
1113 1134 }
1114 1135 }
1115 1136
1116 1137 if (iterrev == stoprev) {
1117 1138 stopped = 1;
1118 1139 } else {
1119 1140 PyObject *value = PyInt_FromLong(iterrev);
1120 1141 if (value == NULL) {
1121 1142 goto bail;
1122 1143 }
1123 1144 if (PyList_Append(chain, value)) {
1124 1145 Py_DECREF(value);
1125 1146 goto bail;
1126 1147 }
1127 1148 Py_DECREF(value);
1128 1149
1129 1150 stopped = 0;
1130 1151 }
1131 1152
1132 1153 if (PyList_Reverse(chain)) {
1133 1154 goto bail;
1134 1155 }
1135 1156
1136 1157 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
1137 1158 Py_DECREF(chain);
1138 1159 return result;
1139 1160
1140 1161 bail:
1141 1162 Py_DECREF(chain);
1142 1163 return NULL;
1143 1164 }
1144 1165
1145 1166 static inline int64_t
1146 1167 index_segment_span(indexObject *self, Py_ssize_t start_rev, Py_ssize_t end_rev)
1147 1168 {
1148 1169 int64_t start_offset;
1149 1170 int64_t end_offset;
1150 1171 int end_size;
1151 1172 start_offset = index_get_start(self, start_rev);
1152 1173 if (start_offset < 0) {
1153 1174 return -1;
1154 1175 }
1155 1176 end_offset = index_get_start(self, end_rev);
1156 1177 if (end_offset < 0) {
1157 1178 return -1;
1158 1179 }
1159 1180 end_size = index_get_length(self, end_rev);
1160 1181 if (end_size < 0) {
1161 1182 return -1;
1162 1183 }
1163 1184 if (end_offset < start_offset) {
1164 1185 PyErr_Format(PyExc_ValueError,
1165 1186 "corrupted revlog index: inconsistent offset "
1166 1187 "between revisions (%zd) and (%zd)",
1167 1188 start_rev, end_rev);
1168 1189 return -1;
1169 1190 }
1170 1191 return (end_offset - start_offset) + (int64_t)end_size;
1171 1192 }
1172 1193
1173 1194 /* returns endidx so that revs[startidx:endidx] has no empty trailing revs */
1174 1195 static Py_ssize_t trim_endidx(indexObject *self, const Py_ssize_t *revs,
1175 1196 Py_ssize_t startidx, Py_ssize_t endidx)
1176 1197 {
1177 1198 int length;
1178 1199 while (endidx > 1 && endidx > startidx) {
1179 1200 length = index_get_length(self, revs[endidx - 1]);
1180 1201 if (length < 0) {
1181 1202 return -1;
1182 1203 }
1183 1204 if (length != 0) {
1184 1205 break;
1185 1206 }
1186 1207 endidx -= 1;
1187 1208 }
1188 1209 return endidx;
1189 1210 }
1190 1211
1191 1212 struct Gap {
1192 1213 int64_t size;
1193 1214 Py_ssize_t idx;
1194 1215 };
1195 1216
1196 1217 static int gap_compare(const void *left, const void *right)
1197 1218 {
1198 1219 const struct Gap *l_left = ((const struct Gap *)left);
1199 1220 const struct Gap *l_right = ((const struct Gap *)right);
1200 1221 if (l_left->size < l_right->size) {
1201 1222 return -1;
1202 1223 } else if (l_left->size > l_right->size) {
1203 1224 return 1;
1204 1225 }
1205 1226 return 0;
1206 1227 }
1207 1228 static int Py_ssize_t_compare(const void *left, const void *right)
1208 1229 {
1209 1230 const Py_ssize_t l_left = *(const Py_ssize_t *)left;
1210 1231 const Py_ssize_t l_right = *(const Py_ssize_t *)right;
1211 1232 if (l_left < l_right) {
1212 1233 return -1;
1213 1234 } else if (l_left > l_right) {
1214 1235 return 1;
1215 1236 }
1216 1237 return 0;
1217 1238 }
1218 1239
1219 1240 static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
1220 1241 {
1221 1242 /* method arguments */
1222 1243 PyObject *list_revs = NULL; /* revisions in the chain */
1223 1244 double targetdensity = 0; /* min density to achieve */
1224 1245 Py_ssize_t mingapsize = 0; /* threshold to ignore gaps */
1225 1246
1226 1247 /* other core variables */
1227 1248 Py_ssize_t idxlen = index_length(self);
1228 1249 Py_ssize_t i; /* used for various iteration */
1229 1250 PyObject *result = NULL; /* the final return of the function */
1230 1251
1231 1252 /* generic information about the delta chain being slice */
1232 1253 Py_ssize_t num_revs = 0; /* size of the full delta chain */
1233 1254 Py_ssize_t *revs = NULL; /* native array of revision in the chain */
1234 1255 int64_t chainpayload = 0; /* sum of all delta in the chain */
1235 1256 int64_t deltachainspan = 0; /* distance from first byte to last byte */
1236 1257
1237 1258 /* variable used for slicing the delta chain */
1238 1259 int64_t readdata = 0; /* amount of data currently planned to be read */
1239 1260 double density = 0; /* ration of payload data compared to read ones */
1240 1261 int64_t previous_end;
1241 1262 struct Gap *gaps = NULL; /* array of notable gap in the chain */
1242 1263 Py_ssize_t num_gaps =
1243 1264 0; /* total number of notable gap recorded so far */
1244 1265 Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
1245 1266 Py_ssize_t num_selected = 0; /* number of gaps skipped */
1246 1267 PyObject *chunk = NULL; /* individual slice */
1247 1268 PyObject *allchunks = NULL; /* all slices */
1248 1269 Py_ssize_t previdx;
1249 1270
1250 1271 /* parsing argument */
1251 1272 if (!PyArg_ParseTuple(args, "O!dn", &PyList_Type, &list_revs,
1252 1273 &targetdensity, &mingapsize)) {
1253 1274 goto bail;
1254 1275 }
1255 1276
1256 1277 /* If the delta chain contains a single element, we do not need slicing
1257 1278 */
1258 1279 num_revs = PyList_GET_SIZE(list_revs);
1259 1280 if (num_revs <= 1) {
1260 1281 result = PyTuple_Pack(1, list_revs);
1261 1282 goto done;
1262 1283 }
1263 1284
1264 1285 /* Turn the python list into a native integer array (for efficiency) */
1265 1286 revs = (Py_ssize_t *)calloc(num_revs, sizeof(Py_ssize_t));
1266 1287 if (revs == NULL) {
1267 1288 PyErr_NoMemory();
1268 1289 goto bail;
1269 1290 }
1270 1291 for (i = 0; i < num_revs; i++) {
1271 1292 Py_ssize_t revnum = PyInt_AsLong(PyList_GET_ITEM(list_revs, i));
1272 1293 if (revnum == -1 && PyErr_Occurred()) {
1273 1294 goto bail;
1274 1295 }
1275 1296 if (revnum < nullrev || revnum >= idxlen) {
1276 1297 PyErr_Format(PyExc_IndexError,
1277 1298 "index out of range: %zd", revnum);
1278 1299 goto bail;
1279 1300 }
1280 1301 revs[i] = revnum;
1281 1302 }
1282 1303
1283 1304 /* Compute and check various property of the unsliced delta chain */
1284 1305 deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
1285 1306 if (deltachainspan < 0) {
1286 1307 goto bail;
1287 1308 }
1288 1309
1289 1310 if (deltachainspan <= mingapsize) {
1290 1311 result = PyTuple_Pack(1, list_revs);
1291 1312 goto done;
1292 1313 }
1293 1314 chainpayload = 0;
1294 1315 for (i = 0; i < num_revs; i++) {
1295 1316 int tmp = index_get_length(self, revs[i]);
1296 1317 if (tmp < 0) {
1297 1318 goto bail;
1298 1319 }
1299 1320 chainpayload += tmp;
1300 1321 }
1301 1322
1302 1323 readdata = deltachainspan;
1303 1324 density = 1.0;
1304 1325
1305 1326 if (0 < deltachainspan) {
1306 1327 density = (double)chainpayload / (double)deltachainspan;
1307 1328 }
1308 1329
1309 1330 if (density >= targetdensity) {
1310 1331 result = PyTuple_Pack(1, list_revs);
1311 1332 goto done;
1312 1333 }
1313 1334
1314 1335 /* if chain is too sparse, look for relevant gaps */
1315 1336 gaps = (struct Gap *)calloc(num_revs, sizeof(struct Gap));
1316 1337 if (gaps == NULL) {
1317 1338 PyErr_NoMemory();
1318 1339 goto bail;
1319 1340 }
1320 1341
1321 1342 previous_end = -1;
1322 1343 for (i = 0; i < num_revs; i++) {
1323 1344 int64_t revstart;
1324 1345 int revsize;
1325 1346 revstart = index_get_start(self, revs[i]);
1326 1347 if (revstart < 0) {
1327 1348 goto bail;
1328 1349 };
1329 1350 revsize = index_get_length(self, revs[i]);
1330 1351 if (revsize < 0) {
1331 1352 goto bail;
1332 1353 };
1333 1354 if (revsize == 0) {
1334 1355 continue;
1335 1356 }
1336 1357 if (previous_end >= 0) {
1337 1358 int64_t gapsize = revstart - previous_end;
1338 1359 if (gapsize > mingapsize) {
1339 1360 gaps[num_gaps].size = gapsize;
1340 1361 gaps[num_gaps].idx = i;
1341 1362 num_gaps += 1;
1342 1363 }
1343 1364 }
1344 1365 previous_end = revstart + revsize;
1345 1366 }
1346 1367 if (num_gaps == 0) {
1347 1368 result = PyTuple_Pack(1, list_revs);
1348 1369 goto done;
1349 1370 }
1350 1371 qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
1351 1372
1352 1373 /* Slice the largest gap first, they improve the density the most */
1353 1374 selected_indices =
1354 1375 (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
1355 1376 if (selected_indices == NULL) {
1356 1377 PyErr_NoMemory();
1357 1378 goto bail;
1358 1379 }
1359 1380
1360 1381 for (i = num_gaps - 1; i >= 0; i--) {
1361 1382 selected_indices[num_selected] = gaps[i].idx;
1362 1383 readdata -= gaps[i].size;
1363 1384 num_selected += 1;
1364 1385 if (readdata <= 0) {
1365 1386 density = 1.0;
1366 1387 } else {
1367 1388 density = (double)chainpayload / (double)readdata;
1368 1389 }
1369 1390 if (density >= targetdensity) {
1370 1391 break;
1371 1392 }
1372 1393 }
1373 1394 qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
1374 1395 &Py_ssize_t_compare);
1375 1396
1376 1397 /* create the resulting slice */
1377 1398 allchunks = PyList_New(0);
1378 1399 if (allchunks == NULL) {
1379 1400 goto bail;
1380 1401 }
1381 1402 previdx = 0;
1382 1403 selected_indices[num_selected] = num_revs;
1383 1404 for (i = 0; i <= num_selected; i++) {
1384 1405 Py_ssize_t idx = selected_indices[i];
1385 1406 Py_ssize_t endidx = trim_endidx(self, revs, previdx, idx);
1386 1407 if (endidx < 0) {
1387 1408 goto bail;
1388 1409 }
1389 1410 if (previdx < endidx) {
1390 1411 chunk = PyList_GetSlice(list_revs, previdx, endidx);
1391 1412 if (chunk == NULL) {
1392 1413 goto bail;
1393 1414 }
1394 1415 if (PyList_Append(allchunks, chunk) == -1) {
1395 1416 goto bail;
1396 1417 }
1397 1418 Py_DECREF(chunk);
1398 1419 chunk = NULL;
1399 1420 }
1400 1421 previdx = idx;
1401 1422 }
1402 1423 result = allchunks;
1403 1424 goto done;
1404 1425
1405 1426 bail:
1406 1427 Py_XDECREF(allchunks);
1407 1428 Py_XDECREF(chunk);
1408 1429 done:
1409 1430 free(revs);
1410 1431 free(gaps);
1411 1432 free(selected_indices);
1412 1433 return result;
1413 1434 }
1414 1435
1415 1436 static inline int nt_level(const char *node, Py_ssize_t level)
1416 1437 {
1417 1438 int v = node[level >> 1];
1418 1439 if (!(level & 1))
1419 1440 v >>= 4;
1420 1441 return v & 0xf;
1421 1442 }
1422 1443
1423 1444 /*
1424 1445 * Return values:
1425 1446 *
1426 1447 * -4: match is ambiguous (multiple candidates)
1427 1448 * -2: not found
1428 1449 * rest: valid rev
1429 1450 */
1430 1451 static int nt_find(nodetree *self, const char *node, Py_ssize_t nodelen,
1431 1452 int hex)
1432 1453 {
1433 1454 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
1434 1455 int level, maxlevel, off;
1435 1456
1436 1457 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
1437 1458 return -1;
1438 1459
1439 1460 if (hex)
1440 1461 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
1441 1462 else
1442 1463 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
1443 1464
1444 1465 for (level = off = 0; level < maxlevel; level++) {
1445 1466 int k = getnybble(node, level);
1446 1467 nodetreenode *n = &self->nodes[off];
1447 1468 int v = n->children[k];
1448 1469
1449 1470 if (v < 0) {
1450 1471 const char *n;
1451 1472 Py_ssize_t i;
1452 1473
1453 1474 v = -(v + 2);
1454 1475 n = index_node(self->index, v);
1455 1476 if (n == NULL)
1456 1477 return -2;
1457 1478 for (i = level; i < maxlevel; i++)
1458 1479 if (getnybble(node, i) != nt_level(n, i))
1459 1480 return -2;
1460 1481 return v;
1461 1482 }
1462 1483 if (v == 0)
1463 1484 return -2;
1464 1485 off = v;
1465 1486 }
1466 1487 /* multiple matches against an ambiguous prefix */
1467 1488 return -4;
1468 1489 }
1469 1490
1470 1491 static int nt_new(nodetree *self)
1471 1492 {
1472 1493 if (self->length == self->capacity) {
1473 1494 unsigned newcapacity;
1474 1495 nodetreenode *newnodes;
1475 1496 newcapacity = self->capacity * 2;
1476 1497 if (newcapacity >= INT_MAX / sizeof(nodetreenode)) {
1477 1498 PyErr_SetString(PyExc_MemoryError,
1478 1499 "overflow in nt_new");
1479 1500 return -1;
1480 1501 }
1481 1502 newnodes =
1482 1503 realloc(self->nodes, newcapacity * sizeof(nodetreenode));
1483 1504 if (newnodes == NULL) {
1484 1505 PyErr_SetString(PyExc_MemoryError, "out of memory");
1485 1506 return -1;
1486 1507 }
1487 1508 self->capacity = newcapacity;
1488 1509 self->nodes = newnodes;
1489 1510 memset(&self->nodes[self->length], 0,
1490 1511 sizeof(nodetreenode) * (self->capacity - self->length));
1491 1512 }
1492 1513 return self->length++;
1493 1514 }
1494 1515
1495 1516 static int nt_insert(nodetree *self, const char *node, int rev)
1496 1517 {
1497 1518 int level = 0;
1498 1519 int off = 0;
1499 1520
1500 1521 while (level < 40) {
1501 1522 int k = nt_level(node, level);
1502 1523 nodetreenode *n;
1503 1524 int v;
1504 1525
1505 1526 n = &self->nodes[off];
1506 1527 v = n->children[k];
1507 1528
1508 1529 if (v == 0) {
1509 1530 n->children[k] = -rev - 2;
1510 1531 return 0;
1511 1532 }
1512 1533 if (v < 0) {
1513 1534 const char *oldnode =
1514 1535 index_node_existing(self->index, -(v + 2));
1515 1536 int noff;
1516 1537
1517 1538 if (oldnode == NULL)
1518 1539 return -1;
1519 1540 if (!memcmp(oldnode, node, 20)) {
1520 1541 n->children[k] = -rev - 2;
1521 1542 return 0;
1522 1543 }
1523 1544 noff = nt_new(self);
1524 1545 if (noff == -1)
1525 1546 return -1;
1526 1547 /* self->nodes may have been changed by realloc */
1527 1548 self->nodes[off].children[k] = noff;
1528 1549 off = noff;
1529 1550 n = &self->nodes[off];
1530 1551 n->children[nt_level(oldnode, ++level)] = v;
1531 1552 if (level > self->depth)
1532 1553 self->depth = level;
1533 1554 self->splits += 1;
1534 1555 } else {
1535 1556 level += 1;
1536 1557 off = v;
1537 1558 }
1538 1559 }
1539 1560
1540 1561 return -1;
1541 1562 }
1542 1563
1543 1564 static PyObject *ntobj_insert(nodetreeObject *self, PyObject *args)
1544 1565 {
1545 1566 Py_ssize_t rev;
1546 1567 const char *node;
1547 1568 Py_ssize_t length;
1548 1569 if (!PyArg_ParseTuple(args, "n", &rev))
1549 1570 return NULL;
1550 1571 length = index_length(self->nt.index);
1551 1572 if (rev < 0 || rev >= length) {
1552 1573 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1553 1574 return NULL;
1554 1575 }
1555 1576 node = index_node_existing(self->nt.index, rev);
1556 1577 if (nt_insert(&self->nt, node, (int)rev) == -1)
1557 1578 return NULL;
1558 1579 Py_RETURN_NONE;
1559 1580 }
1560 1581
1561 1582 static int nt_delete_node(nodetree *self, const char *node)
1562 1583 {
1563 1584 /* rev==-2 happens to get encoded as 0, which is interpreted as not set
1564 1585 */
1565 1586 return nt_insert(self, node, -2);
1566 1587 }
1567 1588
1568 1589 static int nt_init(nodetree *self, indexObject *index, unsigned capacity)
1569 1590 {
1570 1591 /* Initialize before overflow-checking to avoid nt_dealloc() crash. */
1571 1592 self->nodes = NULL;
1572 1593
1573 1594 self->index = index;
1574 1595 /* The input capacity is in terms of revisions, while the field is in
1575 1596 * terms of nodetree nodes. */
1576 1597 self->capacity = (capacity < 4 ? 4 : capacity / 2);
1577 1598 self->depth = 0;
1578 1599 self->splits = 0;
1579 1600 if ((size_t)self->capacity > INT_MAX / sizeof(nodetreenode)) {
1580 1601 PyErr_SetString(PyExc_ValueError, "overflow in init_nt");
1581 1602 return -1;
1582 1603 }
1583 1604 self->nodes = calloc(self->capacity, sizeof(nodetreenode));
1584 1605 if (self->nodes == NULL) {
1585 1606 PyErr_NoMemory();
1586 1607 return -1;
1587 1608 }
1588 1609 self->length = 1;
1589 1610 return 0;
1590 1611 }
1591 1612
1592 1613 static int ntobj_init(nodetreeObject *self, PyObject *args)
1593 1614 {
1594 1615 PyObject *index;
1595 1616 unsigned capacity;
1596 1617 if (!PyArg_ParseTuple(args, "O!I", &HgRevlogIndex_Type, &index,
1597 1618 &capacity))
1598 1619 return -1;
1599 1620 Py_INCREF(index);
1600 1621 return nt_init(&self->nt, (indexObject *)index, capacity);
1601 1622 }
1602 1623
1603 1624 static int nt_partialmatch(nodetree *self, const char *node, Py_ssize_t nodelen)
1604 1625 {
1605 1626 return nt_find(self, node, nodelen, 1);
1606 1627 }
1607 1628
1608 1629 /*
1609 1630 * Find the length of the shortest unique prefix of node.
1610 1631 *
1611 1632 * Return values:
1612 1633 *
1613 1634 * -3: error (exception set)
1614 1635 * -2: not found (no exception set)
1615 1636 * rest: length of shortest prefix
1616 1637 */
1617 1638 static int nt_shortest(nodetree *self, const char *node)
1618 1639 {
1619 1640 int level, off;
1620 1641
1621 1642 for (level = off = 0; level < 40; level++) {
1622 1643 int k, v;
1623 1644 nodetreenode *n = &self->nodes[off];
1624 1645 k = nt_level(node, level);
1625 1646 v = n->children[k];
1626 1647 if (v < 0) {
1627 1648 const char *n;
1628 1649 v = -(v + 2);
1629 1650 n = index_node_existing(self->index, v);
1630 1651 if (n == NULL)
1631 1652 return -3;
1632 1653 if (memcmp(node, n, 20) != 0)
1633 1654 /*
1634 1655 * Found a unique prefix, but it wasn't for the
1635 1656 * requested node (i.e the requested node does
1636 1657 * not exist).
1637 1658 */
1638 1659 return -2;
1639 1660 return level + 1;
1640 1661 }
1641 1662 if (v == 0)
1642 1663 return -2;
1643 1664 off = v;
1644 1665 }
1645 1666 /*
1646 1667 * The node was still not unique after 40 hex digits, so this won't
1647 1668 * happen. Also, if we get here, then there's a programming error in
1648 1669 * this file that made us insert a node longer than 40 hex digits.
1649 1670 */
1650 1671 PyErr_SetString(PyExc_Exception, "broken node tree");
1651 1672 return -3;
1652 1673 }
1653 1674
1654 1675 static PyObject *ntobj_shortest(nodetreeObject *self, PyObject *args)
1655 1676 {
1656 1677 PyObject *val;
1657 1678 char *node;
1658 1679 int length;
1659 1680
1660 1681 if (!PyArg_ParseTuple(args, "O", &val))
1661 1682 return NULL;
1662 1683 if (node_check(val, &node) == -1)
1663 1684 return NULL;
1664 1685
1665 1686 length = nt_shortest(&self->nt, node);
1666 1687 if (length == -3)
1667 1688 return NULL;
1668 1689 if (length == -2) {
1669 1690 raise_revlog_error();
1670 1691 return NULL;
1671 1692 }
1672 1693 return PyInt_FromLong(length);
1673 1694 }
1674 1695
1675 1696 static void nt_dealloc(nodetree *self)
1676 1697 {
1677 1698 free(self->nodes);
1678 1699 self->nodes = NULL;
1679 1700 }
1680 1701
1681 1702 static void ntobj_dealloc(nodetreeObject *self)
1682 1703 {
1683 1704 Py_XDECREF(self->nt.index);
1684 1705 nt_dealloc(&self->nt);
1685 1706 PyObject_Del(self);
1686 1707 }
1687 1708
1688 1709 static PyMethodDef ntobj_methods[] = {
1689 1710 {"insert", (PyCFunction)ntobj_insert, METH_VARARGS,
1690 1711 "insert an index entry"},
1691 1712 {"shortest", (PyCFunction)ntobj_shortest, METH_VARARGS,
1692 1713 "find length of shortest hex nodeid of a binary ID"},
1693 1714 {NULL} /* Sentinel */
1694 1715 };
1695 1716
1696 1717 static PyTypeObject nodetreeType = {
1697 1718 PyVarObject_HEAD_INIT(NULL, 0) /* header */
1698 1719 "parsers.nodetree", /* tp_name */
1699 1720 sizeof(nodetreeObject), /* tp_basicsize */
1700 1721 0, /* tp_itemsize */
1701 1722 (destructor)ntobj_dealloc, /* tp_dealloc */
1702 1723 0, /* tp_print */
1703 1724 0, /* tp_getattr */
1704 1725 0, /* tp_setattr */
1705 1726 0, /* tp_compare */
1706 1727 0, /* tp_repr */
1707 1728 0, /* tp_as_number */
1708 1729 0, /* tp_as_sequence */
1709 1730 0, /* tp_as_mapping */
1710 1731 0, /* tp_hash */
1711 1732 0, /* tp_call */
1712 1733 0, /* tp_str */
1713 1734 0, /* tp_getattro */
1714 1735 0, /* tp_setattro */
1715 1736 0, /* tp_as_buffer */
1716 1737 Py_TPFLAGS_DEFAULT, /* tp_flags */
1717 1738 "nodetree", /* tp_doc */
1718 1739 0, /* tp_traverse */
1719 1740 0, /* tp_clear */
1720 1741 0, /* tp_richcompare */
1721 1742 0, /* tp_weaklistoffset */
1722 1743 0, /* tp_iter */
1723 1744 0, /* tp_iternext */
1724 1745 ntobj_methods, /* tp_methods */
1725 1746 0, /* tp_members */
1726 1747 0, /* tp_getset */
1727 1748 0, /* tp_base */
1728 1749 0, /* tp_dict */
1729 1750 0, /* tp_descr_get */
1730 1751 0, /* tp_descr_set */
1731 1752 0, /* tp_dictoffset */
1732 1753 (initproc)ntobj_init, /* tp_init */
1733 1754 0, /* tp_alloc */
1734 1755 };
1735 1756
1736 1757 static int index_init_nt(indexObject *self)
1737 1758 {
1738 1759 if (!self->ntinitialized) {
1739 1760 if (nt_init(&self->nt, self, (int)self->raw_length) == -1) {
1740 1761 nt_dealloc(&self->nt);
1741 1762 return -1;
1742 1763 }
1743 1764 if (nt_insert(&self->nt, nullid, -1) == -1) {
1744 1765 nt_dealloc(&self->nt);
1745 1766 return -1;
1746 1767 }
1747 1768 self->ntinitialized = 1;
1748 1769 self->ntrev = (int)index_length(self);
1749 1770 self->ntlookups = 1;
1750 1771 self->ntmisses = 0;
1751 1772 }
1752 1773 return 0;
1753 1774 }
1754 1775
1755 1776 /*
1756 1777 * Return values:
1757 1778 *
1758 1779 * -3: error (exception set)
1759 1780 * -2: not found (no exception set)
1760 1781 * rest: valid rev
1761 1782 */
1762 1783 static int index_find_node(indexObject *self, const char *node,
1763 1784 Py_ssize_t nodelen)
1764 1785 {
1765 1786 int rev;
1766 1787
1767 1788 if (index_init_nt(self) == -1)
1768 1789 return -3;
1769 1790
1770 1791 self->ntlookups++;
1771 1792 rev = nt_find(&self->nt, node, nodelen, 0);
1772 1793 if (rev >= -1)
1773 1794 return rev;
1774 1795
1775 1796 /*
1776 1797 * For the first handful of lookups, we scan the entire index,
1777 1798 * and cache only the matching nodes. This optimizes for cases
1778 1799 * like "hg tip", where only a few nodes are accessed.
1779 1800 *
1780 1801 * After that, we cache every node we visit, using a single
1781 1802 * scan amortized over multiple lookups. This gives the best
1782 1803 * bulk performance, e.g. for "hg log".
1783 1804 */
1784 1805 if (self->ntmisses++ < 4) {
1785 1806 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1786 1807 const char *n = index_node_existing(self, rev);
1787 1808 if (n == NULL)
1788 1809 return -3;
1789 1810 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1790 1811 if (nt_insert(&self->nt, n, rev) == -1)
1791 1812 return -3;
1792 1813 break;
1793 1814 }
1794 1815 }
1795 1816 } else {
1796 1817 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1797 1818 const char *n = index_node_existing(self, rev);
1798 1819 if (n == NULL)
1799 1820 return -3;
1800 1821 if (nt_insert(&self->nt, n, rev) == -1) {
1801 1822 self->ntrev = rev + 1;
1802 1823 return -3;
1803 1824 }
1804 1825 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1805 1826 break;
1806 1827 }
1807 1828 }
1808 1829 self->ntrev = rev;
1809 1830 }
1810 1831
1811 1832 if (rev >= 0)
1812 1833 return rev;
1813 1834 return -2;
1814 1835 }
1815 1836
1816 1837 static PyObject *index_getitem(indexObject *self, PyObject *value)
1817 1838 {
1818 1839 char *node;
1819 1840 int rev;
1820 1841
1821 1842 if (PyInt_Check(value)) {
1822 1843 long idx;
1823 1844 if (!pylong_to_long(value, &idx)) {
1824 1845 return NULL;
1825 1846 }
1826 1847 return index_get(self, idx);
1827 1848 }
1828 1849
1829 1850 if (node_check(value, &node) == -1)
1830 1851 return NULL;
1831 1852 rev = index_find_node(self, node, 20);
1832 1853 if (rev >= -1)
1833 1854 return PyInt_FromLong(rev);
1834 1855 if (rev == -2)
1835 1856 raise_revlog_error();
1836 1857 return NULL;
1837 1858 }
1838 1859
1839 1860 /*
1840 1861 * Fully populate the radix tree.
1841 1862 */
1842 1863 static int index_populate_nt(indexObject *self)
1843 1864 {
1844 1865 int rev;
1845 1866 if (self->ntrev > 0) {
1846 1867 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1847 1868 const char *n = index_node_existing(self, rev);
1848 1869 if (n == NULL)
1849 1870 return -1;
1850 1871 if (nt_insert(&self->nt, n, rev) == -1)
1851 1872 return -1;
1852 1873 }
1853 1874 self->ntrev = -1;
1854 1875 }
1855 1876 return 0;
1856 1877 }
1857 1878
1858 1879 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1859 1880 {
1860 1881 const char *fullnode;
1861 1882 int nodelen;
1862 1883 char *node;
1863 1884 int rev, i;
1864 1885
1865 1886 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &node, &nodelen))
1866 1887 return NULL;
1867 1888
1868 1889 if (nodelen < 1) {
1869 1890 PyErr_SetString(PyExc_ValueError, "key too short");
1870 1891 return NULL;
1871 1892 }
1872 1893
1873 1894 if (nodelen > 40) {
1874 1895 PyErr_SetString(PyExc_ValueError, "key too long");
1875 1896 return NULL;
1876 1897 }
1877 1898
1878 1899 for (i = 0; i < nodelen; i++)
1879 1900 hexdigit(node, i);
1880 1901 if (PyErr_Occurred()) {
1881 1902 /* input contains non-hex characters */
1882 1903 PyErr_Clear();
1883 1904 Py_RETURN_NONE;
1884 1905 }
1885 1906
1886 1907 if (index_init_nt(self) == -1)
1887 1908 return NULL;
1888 1909 if (index_populate_nt(self) == -1)
1889 1910 return NULL;
1890 1911 rev = nt_partialmatch(&self->nt, node, nodelen);
1891 1912
1892 1913 switch (rev) {
1893 1914 case -4:
1894 1915 raise_revlog_error();
1895 1916 return NULL;
1896 1917 case -2:
1897 1918 Py_RETURN_NONE;
1898 1919 case -1:
1899 1920 return PyBytes_FromStringAndSize(nullid, 20);
1900 1921 }
1901 1922
1902 1923 fullnode = index_node_existing(self, rev);
1903 1924 if (fullnode == NULL) {
1904 1925 return NULL;
1905 1926 }
1906 1927 return PyBytes_FromStringAndSize(fullnode, 20);
1907 1928 }
1908 1929
1909 1930 static PyObject *index_shortest(indexObject *self, PyObject *args)
1910 1931 {
1911 1932 PyObject *val;
1912 1933 char *node;
1913 1934 int length;
1914 1935
1915 1936 if (!PyArg_ParseTuple(args, "O", &val))
1916 1937 return NULL;
1917 1938 if (node_check(val, &node) == -1)
1918 1939 return NULL;
1919 1940
1920 1941 self->ntlookups++;
1921 1942 if (index_init_nt(self) == -1)
1922 1943 return NULL;
1923 1944 if (index_populate_nt(self) == -1)
1924 1945 return NULL;
1925 1946 length = nt_shortest(&self->nt, node);
1926 1947 if (length == -3)
1927 1948 return NULL;
1928 1949 if (length == -2) {
1929 1950 raise_revlog_error();
1930 1951 return NULL;
1931 1952 }
1932 1953 return PyInt_FromLong(length);
1933 1954 }
1934 1955
1935 1956 static PyObject *index_m_get(indexObject *self, PyObject *args)
1936 1957 {
1937 1958 PyObject *val;
1938 1959 char *node;
1939 1960 int rev;
1940 1961
1941 1962 if (!PyArg_ParseTuple(args, "O", &val))
1942 1963 return NULL;
1943 1964 if (node_check(val, &node) == -1)
1944 1965 return NULL;
1945 1966 rev = index_find_node(self, node, 20);
1946 1967 if (rev == -3)
1947 1968 return NULL;
1948 1969 if (rev == -2)
1949 1970 Py_RETURN_NONE;
1950 1971 return PyInt_FromLong(rev);
1951 1972 }
1952 1973
1953 1974 static int index_contains(indexObject *self, PyObject *value)
1954 1975 {
1955 1976 char *node;
1956 1977
1957 1978 if (PyInt_Check(value)) {
1958 1979 long rev;
1959 1980 if (!pylong_to_long(value, &rev)) {
1960 1981 return -1;
1961 1982 }
1962 1983 return rev >= -1 && rev < index_length(self);
1963 1984 }
1964 1985
1965 1986 if (node_check(value, &node) == -1)
1966 1987 return -1;
1967 1988
1968 1989 switch (index_find_node(self, node, 20)) {
1969 1990 case -3:
1970 1991 return -1;
1971 1992 case -2:
1972 1993 return 0;
1973 1994 default:
1974 1995 return 1;
1975 1996 }
1976 1997 }
1977 1998
1978 1999 typedef uint64_t bitmask;
1979 2000
1980 2001 /*
1981 2002 * Given a disjoint set of revs, return all candidates for the
1982 2003 * greatest common ancestor. In revset notation, this is the set
1983 2004 * "heads(::a and ::b and ...)"
1984 2005 */
1985 2006 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
1986 2007 int revcount)
1987 2008 {
1988 2009 const bitmask allseen = (1ull << revcount) - 1;
1989 2010 const bitmask poison = 1ull << revcount;
1990 2011 PyObject *gca = PyList_New(0);
1991 2012 int i, v, interesting;
1992 2013 int maxrev = -1;
1993 2014 bitmask sp;
1994 2015 bitmask *seen;
1995 2016
1996 2017 if (gca == NULL)
1997 2018 return PyErr_NoMemory();
1998 2019
1999 2020 for (i = 0; i < revcount; i++) {
2000 2021 if (revs[i] > maxrev)
2001 2022 maxrev = revs[i];
2002 2023 }
2003 2024
2004 2025 seen = calloc(sizeof(*seen), maxrev + 1);
2005 2026 if (seen == NULL) {
2006 2027 Py_DECREF(gca);
2007 2028 return PyErr_NoMemory();
2008 2029 }
2009 2030
2010 2031 for (i = 0; i < revcount; i++)
2011 2032 seen[revs[i]] = 1ull << i;
2012 2033
2013 2034 interesting = revcount;
2014 2035
2015 2036 for (v = maxrev; v >= 0 && interesting; v--) {
2016 2037 bitmask sv = seen[v];
2017 2038 int parents[2];
2018 2039
2019 2040 if (!sv)
2020 2041 continue;
2021 2042
2022 2043 if (sv < poison) {
2023 2044 interesting -= 1;
2024 2045 if (sv == allseen) {
2025 2046 PyObject *obj = PyInt_FromLong(v);
2026 2047 if (obj == NULL)
2027 2048 goto bail;
2028 2049 if (PyList_Append(gca, obj) == -1) {
2029 2050 Py_DECREF(obj);
2030 2051 goto bail;
2031 2052 }
2032 2053 sv |= poison;
2033 2054 for (i = 0; i < revcount; i++) {
2034 2055 if (revs[i] == v)
2035 2056 goto done;
2036 2057 }
2037 2058 }
2038 2059 }
2039 2060 if (index_get_parents(self, v, parents, maxrev) < 0)
2040 2061 goto bail;
2041 2062
2042 2063 for (i = 0; i < 2; i++) {
2043 2064 int p = parents[i];
2044 2065 if (p == -1)
2045 2066 continue;
2046 2067 sp = seen[p];
2047 2068 if (sv < poison) {
2048 2069 if (sp == 0) {
2049 2070 seen[p] = sv;
2050 2071 interesting++;
2051 2072 } else if (sp != sv)
2052 2073 seen[p] |= sv;
2053 2074 } else {
2054 2075 if (sp && sp < poison)
2055 2076 interesting--;
2056 2077 seen[p] = sv;
2057 2078 }
2058 2079 }
2059 2080 }
2060 2081
2061 2082 done:
2062 2083 free(seen);
2063 2084 return gca;
2064 2085 bail:
2065 2086 free(seen);
2066 2087 Py_XDECREF(gca);
2067 2088 return NULL;
2068 2089 }
2069 2090
2070 2091 /*
2071 2092 * Given a disjoint set of revs, return the subset with the longest
2072 2093 * path to the root.
2073 2094 */
2074 2095 static PyObject *find_deepest(indexObject *self, PyObject *revs)
2075 2096 {
2076 2097 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
2077 2098 static const Py_ssize_t capacity = 24;
2078 2099 int *depth, *interesting = NULL;
2079 2100 int i, j, v, ninteresting;
2080 2101 PyObject *dict = NULL, *keys = NULL;
2081 2102 long *seen = NULL;
2082 2103 int maxrev = -1;
2083 2104 long final;
2084 2105
2085 2106 if (revcount > capacity) {
2086 2107 PyErr_Format(PyExc_OverflowError,
2087 2108 "bitset size (%ld) > capacity (%ld)",
2088 2109 (long)revcount, (long)capacity);
2089 2110 return NULL;
2090 2111 }
2091 2112
2092 2113 for (i = 0; i < revcount; i++) {
2093 2114 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2094 2115 if (n > maxrev)
2095 2116 maxrev = n;
2096 2117 }
2097 2118
2098 2119 depth = calloc(sizeof(*depth), maxrev + 1);
2099 2120 if (depth == NULL)
2100 2121 return PyErr_NoMemory();
2101 2122
2102 2123 seen = calloc(sizeof(*seen), maxrev + 1);
2103 2124 if (seen == NULL) {
2104 2125 PyErr_NoMemory();
2105 2126 goto bail;
2106 2127 }
2107 2128
2108 2129 interesting = calloc(sizeof(*interesting), ((size_t)1) << revcount);
2109 2130 if (interesting == NULL) {
2110 2131 PyErr_NoMemory();
2111 2132 goto bail;
2112 2133 }
2113 2134
2114 2135 if (PyList_Sort(revs) == -1)
2115 2136 goto bail;
2116 2137
2117 2138 for (i = 0; i < revcount; i++) {
2118 2139 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2119 2140 long b = 1l << i;
2120 2141 depth[n] = 1;
2121 2142 seen[n] = b;
2122 2143 interesting[b] = 1;
2123 2144 }
2124 2145
2125 2146 /* invariant: ninteresting is the number of non-zero entries in
2126 2147 * interesting. */
2127 2148 ninteresting = (int)revcount;
2128 2149
2129 2150 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
2130 2151 int dv = depth[v];
2131 2152 int parents[2];
2132 2153 long sv;
2133 2154
2134 2155 if (dv == 0)
2135 2156 continue;
2136 2157
2137 2158 sv = seen[v];
2138 2159 if (index_get_parents(self, v, parents, maxrev) < 0)
2139 2160 goto bail;
2140 2161
2141 2162 for (i = 0; i < 2; i++) {
2142 2163 int p = parents[i];
2143 2164 long sp;
2144 2165 int dp;
2145 2166
2146 2167 if (p == -1)
2147 2168 continue;
2148 2169
2149 2170 dp = depth[p];
2150 2171 sp = seen[p];
2151 2172 if (dp <= dv) {
2152 2173 depth[p] = dv + 1;
2153 2174 if (sp != sv) {
2154 2175 interesting[sv] += 1;
2155 2176 seen[p] = sv;
2156 2177 if (sp) {
2157 2178 interesting[sp] -= 1;
2158 2179 if (interesting[sp] == 0)
2159 2180 ninteresting -= 1;
2160 2181 }
2161 2182 }
2162 2183 } else if (dv == dp - 1) {
2163 2184 long nsp = sp | sv;
2164 2185 if (nsp == sp)
2165 2186 continue;
2166 2187 seen[p] = nsp;
2167 2188 interesting[sp] -= 1;
2168 2189 if (interesting[sp] == 0)
2169 2190 ninteresting -= 1;
2170 2191 if (interesting[nsp] == 0)
2171 2192 ninteresting += 1;
2172 2193 interesting[nsp] += 1;
2173 2194 }
2174 2195 }
2175 2196 interesting[sv] -= 1;
2176 2197 if (interesting[sv] == 0)
2177 2198 ninteresting -= 1;
2178 2199 }
2179 2200
2180 2201 final = 0;
2181 2202 j = ninteresting;
2182 2203 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
2183 2204 if (interesting[i] == 0)
2184 2205 continue;
2185 2206 final |= i;
2186 2207 j -= 1;
2187 2208 }
2188 2209 if (final == 0) {
2189 2210 keys = PyList_New(0);
2190 2211 goto bail;
2191 2212 }
2192 2213
2193 2214 dict = PyDict_New();
2194 2215 if (dict == NULL)
2195 2216 goto bail;
2196 2217
2197 2218 for (i = 0; i < revcount; i++) {
2198 2219 PyObject *key;
2199 2220
2200 2221 if ((final & (1 << i)) == 0)
2201 2222 continue;
2202 2223
2203 2224 key = PyList_GET_ITEM(revs, i);
2204 2225 Py_INCREF(key);
2205 2226 Py_INCREF(Py_None);
2206 2227 if (PyDict_SetItem(dict, key, Py_None) == -1) {
2207 2228 Py_DECREF(key);
2208 2229 Py_DECREF(Py_None);
2209 2230 goto bail;
2210 2231 }
2211 2232 }
2212 2233
2213 2234 keys = PyDict_Keys(dict);
2214 2235
2215 2236 bail:
2216 2237 free(depth);
2217 2238 free(seen);
2218 2239 free(interesting);
2219 2240 Py_XDECREF(dict);
2220 2241
2221 2242 return keys;
2222 2243 }
2223 2244
2224 2245 /*
2225 2246 * Given a (possibly overlapping) set of revs, return all the
2226 2247 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
2227 2248 */
2228 2249 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
2229 2250 {
2230 2251 PyObject *ret = NULL;
2231 2252 Py_ssize_t argcount, i, len;
2232 2253 bitmask repeat = 0;
2233 2254 int revcount = 0;
2234 2255 int *revs;
2235 2256
2236 2257 argcount = PySequence_Length(args);
2237 2258 revs = PyMem_Malloc(argcount * sizeof(*revs));
2238 2259 if (argcount > 0 && revs == NULL)
2239 2260 return PyErr_NoMemory();
2240 2261 len = index_length(self);
2241 2262
2242 2263 for (i = 0; i < argcount; i++) {
2243 2264 static const int capacity = 24;
2244 2265 PyObject *obj = PySequence_GetItem(args, i);
2245 2266 bitmask x;
2246 2267 long val;
2247 2268
2248 2269 if (!PyInt_Check(obj)) {
2249 2270 PyErr_SetString(PyExc_TypeError,
2250 2271 "arguments must all be ints");
2251 2272 Py_DECREF(obj);
2252 2273 goto bail;
2253 2274 }
2254 2275 val = PyInt_AsLong(obj);
2255 2276 Py_DECREF(obj);
2256 2277 if (val == -1) {
2257 2278 ret = PyList_New(0);
2258 2279 goto done;
2259 2280 }
2260 2281 if (val < 0 || val >= len) {
2261 2282 PyErr_SetString(PyExc_IndexError, "index out of range");
2262 2283 goto bail;
2263 2284 }
2264 2285 /* this cheesy bloom filter lets us avoid some more
2265 2286 * expensive duplicate checks in the common set-is-disjoint
2266 2287 * case */
2267 2288 x = 1ull << (val & 0x3f);
2268 2289 if (repeat & x) {
2269 2290 int k;
2270 2291 for (k = 0; k < revcount; k++) {
2271 2292 if (val == revs[k])
2272 2293 goto duplicate;
2273 2294 }
2274 2295 } else
2275 2296 repeat |= x;
2276 2297 if (revcount >= capacity) {
2277 2298 PyErr_Format(PyExc_OverflowError,
2278 2299 "bitset size (%d) > capacity (%d)",
2279 2300 revcount, capacity);
2280 2301 goto bail;
2281 2302 }
2282 2303 revs[revcount++] = (int)val;
2283 2304 duplicate:;
2284 2305 }
2285 2306
2286 2307 if (revcount == 0) {
2287 2308 ret = PyList_New(0);
2288 2309 goto done;
2289 2310 }
2290 2311 if (revcount == 1) {
2291 2312 PyObject *obj;
2292 2313 ret = PyList_New(1);
2293 2314 if (ret == NULL)
2294 2315 goto bail;
2295 2316 obj = PyInt_FromLong(revs[0]);
2296 2317 if (obj == NULL)
2297 2318 goto bail;
2298 2319 PyList_SET_ITEM(ret, 0, obj);
2299 2320 goto done;
2300 2321 }
2301 2322
2302 2323 ret = find_gca_candidates(self, revs, revcount);
2303 2324 if (ret == NULL)
2304 2325 goto bail;
2305 2326
2306 2327 done:
2307 2328 PyMem_Free(revs);
2308 2329 return ret;
2309 2330
2310 2331 bail:
2311 2332 PyMem_Free(revs);
2312 2333 Py_XDECREF(ret);
2313 2334 return NULL;
2314 2335 }
2315 2336
2316 2337 /*
2317 2338 * Given a (possibly overlapping) set of revs, return the greatest
2318 2339 * common ancestors: those with the longest path to the root.
2319 2340 */
2320 2341 static PyObject *index_ancestors(indexObject *self, PyObject *args)
2321 2342 {
2322 2343 PyObject *ret;
2323 2344 PyObject *gca = index_commonancestorsheads(self, args);
2324 2345 if (gca == NULL)
2325 2346 return NULL;
2326 2347
2327 2348 if (PyList_GET_SIZE(gca) <= 1) {
2328 2349 return gca;
2329 2350 }
2330 2351
2331 2352 ret = find_deepest(self, gca);
2332 2353 Py_DECREF(gca);
2333 2354 return ret;
2334 2355 }
2335 2356
2336 2357 /*
2337 2358 * Invalidate any trie entries introduced by added revs.
2338 2359 */
2339 2360 static void index_invalidate_added(indexObject *self, Py_ssize_t start)
2340 2361 {
2341 2362 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
2342 2363
2343 2364 for (i = start; i < len; i++) {
2344 2365 PyObject *tuple = PyList_GET_ITEM(self->added, i);
2345 2366 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
2346 2367
2347 2368 nt_delete_node(&self->nt, PyBytes_AS_STRING(node));
2348 2369 }
2349 2370
2350 2371 if (start == 0)
2351 2372 Py_CLEAR(self->added);
2352 2373 }
2353 2374
2354 2375 /*
2355 2376 * Delete a numeric range of revs, which must be at the end of the
2356 2377 * range, but exclude the sentinel nullid entry.
2357 2378 */
2358 2379 static int index_slice_del(indexObject *self, PyObject *item)
2359 2380 {
2360 2381 Py_ssize_t start, stop, step, slicelength;
2361 2382 Py_ssize_t length = index_length(self) + 1;
2362 2383 int ret = 0;
2363 2384
2364 2385 /* Argument changed from PySliceObject* to PyObject* in Python 3. */
2365 2386 #ifdef IS_PY3K
2366 2387 if (PySlice_GetIndicesEx(item, length, &start, &stop, &step,
2367 2388 &slicelength) < 0)
2368 2389 #else
2369 2390 if (PySlice_GetIndicesEx((PySliceObject *)item, length, &start, &stop,
2370 2391 &step, &slicelength) < 0)
2371 2392 #endif
2372 2393 return -1;
2373 2394
2374 2395 if (slicelength <= 0)
2375 2396 return 0;
2376 2397
2377 2398 if ((step < 0 && start < stop) || (step > 0 && start > stop))
2378 2399 stop = start;
2379 2400
2380 2401 if (step < 0) {
2381 2402 stop = start + 1;
2382 2403 start = stop + step * (slicelength - 1) - 1;
2383 2404 step = -step;
2384 2405 }
2385 2406
2386 2407 if (step != 1) {
2387 2408 PyErr_SetString(PyExc_ValueError,
2388 2409 "revlog index delete requires step size of 1");
2389 2410 return -1;
2390 2411 }
2391 2412
2392 2413 if (stop != length - 1) {
2393 2414 PyErr_SetString(PyExc_IndexError,
2394 2415 "revlog index deletion indices are invalid");
2395 2416 return -1;
2396 2417 }
2397 2418
2398 2419 if (start < self->length) {
2399 2420 if (self->ntinitialized) {
2400 2421 Py_ssize_t i;
2401 2422
2402 2423 for (i = start + 1; i < self->length; i++) {
2403 2424 const char *node = index_node_existing(self, i);
2404 2425 if (node == NULL)
2405 2426 return -1;
2406 2427
2407 2428 nt_delete_node(&self->nt, node);
2408 2429 }
2409 2430 if (self->added)
2410 2431 index_invalidate_added(self, 0);
2411 2432 if (self->ntrev > start)
2412 2433 self->ntrev = (int)start;
2413 2434 }
2414 2435 self->length = start;
2415 2436 if (start < self->raw_length) {
2416 2437 if (self->cache) {
2417 2438 Py_ssize_t i;
2418 2439 for (i = start; i < self->raw_length; i++)
2419 2440 Py_CLEAR(self->cache[i]);
2420 2441 }
2421 2442 self->raw_length = start;
2422 2443 }
2423 2444 goto done;
2424 2445 }
2425 2446
2426 2447 if (self->ntinitialized) {
2427 2448 index_invalidate_added(self, start - self->length);
2428 2449 if (self->ntrev > start)
2429 2450 self->ntrev = (int)start;
2430 2451 }
2431 2452 if (self->added)
2432 2453 ret = PyList_SetSlice(self->added, start - self->length,
2433 2454 PyList_GET_SIZE(self->added), NULL);
2434 2455 done:
2435 2456 Py_CLEAR(self->headrevs);
2436 2457 return ret;
2437 2458 }
2438 2459
2439 2460 /*
2440 2461 * Supported ops:
2441 2462 *
2442 2463 * slice deletion
2443 2464 * string assignment (extend node->rev mapping)
2444 2465 * string deletion (shrink node->rev mapping)
2445 2466 */
2446 2467 static int index_assign_subscript(indexObject *self, PyObject *item,
2447 2468 PyObject *value)
2448 2469 {
2449 2470 char *node;
2450 2471 long rev;
2451 2472
2452 2473 if (PySlice_Check(item) && value == NULL)
2453 2474 return index_slice_del(self, item);
2454 2475
2455 2476 if (node_check(item, &node) == -1)
2456 2477 return -1;
2457 2478
2458 2479 if (value == NULL)
2459 2480 return self->ntinitialized ? nt_delete_node(&self->nt, node)
2460 2481 : 0;
2461 2482 rev = PyInt_AsLong(value);
2462 2483 if (rev > INT_MAX || rev < 0) {
2463 2484 if (!PyErr_Occurred())
2464 2485 PyErr_SetString(PyExc_ValueError, "rev out of range");
2465 2486 return -1;
2466 2487 }
2467 2488
2468 2489 if (index_init_nt(self) == -1)
2469 2490 return -1;
2470 2491 return nt_insert(&self->nt, node, (int)rev);
2471 2492 }
2472 2493
2473 2494 /*
2474 2495 * Find all RevlogNG entries in an index that has inline data. Update
2475 2496 * the optional "offsets" table with those entries.
2476 2497 */
2477 2498 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
2478 2499 {
2479 2500 const char *data = (const char *)self->buf.buf;
2480 2501 Py_ssize_t pos = 0;
2481 2502 Py_ssize_t end = self->buf.len;
2482 2503 long incr = v1_hdrsize;
2483 2504 Py_ssize_t len = 0;
2484 2505
2485 2506 while (pos + v1_hdrsize <= end && pos >= 0) {
2486 2507 uint32_t comp_len;
2487 2508 /* 3rd element of header is length of compressed inline data */
2488 2509 comp_len = getbe32(data + pos + 8);
2489 2510 incr = v1_hdrsize + comp_len;
2490 2511 if (offsets)
2491 2512 offsets[len] = data + pos;
2492 2513 len++;
2493 2514 pos += incr;
2494 2515 }
2495 2516
2496 2517 if (pos != end) {
2497 2518 if (!PyErr_Occurred())
2498 2519 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2499 2520 return -1;
2500 2521 }
2501 2522
2502 2523 return len;
2503 2524 }
2504 2525
2505 2526 static int index_init(indexObject *self, PyObject *args)
2506 2527 {
2507 2528 PyObject *data_obj, *inlined_obj;
2508 2529 Py_ssize_t size;
2509 2530
2510 2531 /* Initialize before argument-checking to avoid index_dealloc() crash.
2511 2532 */
2512 2533 self->raw_length = 0;
2513 2534 self->added = NULL;
2514 2535 self->cache = NULL;
2515 2536 self->data = NULL;
2516 2537 memset(&self->buf, 0, sizeof(self->buf));
2517 2538 self->headrevs = NULL;
2518 2539 self->filteredrevs = Py_None;
2519 2540 Py_INCREF(Py_None);
2520 2541 self->ntinitialized = 0;
2521 2542 self->offsets = NULL;
2522 2543
2523 2544 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
2524 2545 return -1;
2525 2546 if (!PyObject_CheckBuffer(data_obj)) {
2526 2547 PyErr_SetString(PyExc_TypeError,
2527 2548 "data does not support buffer interface");
2528 2549 return -1;
2529 2550 }
2530 2551
2531 2552 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
2532 2553 return -1;
2533 2554 size = self->buf.len;
2534 2555
2535 2556 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
2536 2557 self->data = data_obj;
2537 2558
2538 2559 self->ntlookups = self->ntmisses = 0;
2539 2560 self->ntrev = -1;
2540 2561 Py_INCREF(self->data);
2541 2562
2542 2563 if (self->inlined) {
2543 2564 Py_ssize_t len = inline_scan(self, NULL);
2544 2565 if (len == -1)
2545 2566 goto bail;
2546 2567 self->raw_length = len;
2547 2568 self->length = len;
2548 2569 } else {
2549 2570 if (size % v1_hdrsize) {
2550 2571 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2551 2572 goto bail;
2552 2573 }
2553 2574 self->raw_length = size / v1_hdrsize;
2554 2575 self->length = self->raw_length;
2555 2576 }
2556 2577
2557 2578 return 0;
2558 2579 bail:
2559 2580 return -1;
2560 2581 }
2561 2582
2562 2583 static PyObject *index_nodemap(indexObject *self)
2563 2584 {
2564 2585 Py_INCREF(self);
2565 2586 return (PyObject *)self;
2566 2587 }
2567 2588
2568 2589 static void _index_clearcaches(indexObject *self)
2569 2590 {
2570 2591 if (self->cache) {
2571 2592 Py_ssize_t i;
2572 2593
2573 2594 for (i = 0; i < self->raw_length; i++)
2574 2595 Py_CLEAR(self->cache[i]);
2575 2596 free(self->cache);
2576 2597 self->cache = NULL;
2577 2598 }
2578 2599 if (self->offsets) {
2579 2600 PyMem_Free((void *)self->offsets);
2580 2601 self->offsets = NULL;
2581 2602 }
2582 2603 if (self->ntinitialized) {
2583 2604 nt_dealloc(&self->nt);
2584 2605 }
2585 2606 self->ntinitialized = 0;
2586 2607 Py_CLEAR(self->headrevs);
2587 2608 }
2588 2609
2589 2610 static PyObject *index_clearcaches(indexObject *self)
2590 2611 {
2591 2612 _index_clearcaches(self);
2592 2613 self->ntrev = -1;
2593 2614 self->ntlookups = self->ntmisses = 0;
2594 2615 Py_RETURN_NONE;
2595 2616 }
2596 2617
2597 2618 static void index_dealloc(indexObject *self)
2598 2619 {
2599 2620 _index_clearcaches(self);
2600 2621 Py_XDECREF(self->filteredrevs);
2601 2622 if (self->buf.buf) {
2602 2623 PyBuffer_Release(&self->buf);
2603 2624 memset(&self->buf, 0, sizeof(self->buf));
2604 2625 }
2605 2626 Py_XDECREF(self->data);
2606 2627 Py_XDECREF(self->added);
2607 2628 PyObject_Del(self);
2608 2629 }
2609 2630
2610 2631 static PySequenceMethods index_sequence_methods = {
2611 2632 (lenfunc)index_length, /* sq_length */
2612 2633 0, /* sq_concat */
2613 2634 0, /* sq_repeat */
2614 2635 (ssizeargfunc)index_get, /* sq_item */
2615 2636 0, /* sq_slice */
2616 2637 0, /* sq_ass_item */
2617 2638 0, /* sq_ass_slice */
2618 2639 (objobjproc)index_contains, /* sq_contains */
2619 2640 };
2620 2641
2621 2642 static PyMappingMethods index_mapping_methods = {
2622 2643 (lenfunc)index_length, /* mp_length */
2623 2644 (binaryfunc)index_getitem, /* mp_subscript */
2624 2645 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
2625 2646 };
2626 2647
2627 2648 static PyMethodDef index_methods[] = {
2628 2649 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
2629 2650 "return the gca set of the given revs"},
2630 2651 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
2631 2652 METH_VARARGS,
2632 2653 "return the heads of the common ancestors of the given revs"},
2633 2654 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
2634 2655 "clear the index caches"},
2635 2656 {"get", (PyCFunction)index_m_get, METH_VARARGS, "get an index entry"},
2636 2657 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets, METH_VARARGS,
2637 2658 "compute phases"},
2638 2659 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
2639 2660 "reachableroots"},
2640 2661 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
2641 2662 "get head revisions"}, /* Can do filtering since 3.2 */
2642 2663 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
2643 2664 "get filtered head revisions"}, /* Can always do filtering */
2665 {"issnapshot", (PyCFunction)index_issnapshot, METH_O,
2666 "True if the object is a snapshot"},
2644 2667 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
2645 2668 "determine revisions with deltas to reconstruct fulltext"},
2646 2669 {"slicechunktodensity", (PyCFunction)index_slicechunktodensity,
2647 2670 METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"},
2648 2671 {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
2649 2672 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
2650 2673 "match a potentially ambiguous node ID"},
2651 2674 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
2652 2675 "find length of shortest hex nodeid of a binary ID"},
2653 2676 {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"},
2654 2677 {NULL} /* Sentinel */
2655 2678 };
2656 2679
2657 2680 static PyGetSetDef index_getset[] = {
2658 2681 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
2659 2682 {NULL} /* Sentinel */
2660 2683 };
2661 2684
2662 2685 PyTypeObject HgRevlogIndex_Type = {
2663 2686 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2664 2687 "parsers.index", /* tp_name */
2665 2688 sizeof(indexObject), /* tp_basicsize */
2666 2689 0, /* tp_itemsize */
2667 2690 (destructor)index_dealloc, /* tp_dealloc */
2668 2691 0, /* tp_print */
2669 2692 0, /* tp_getattr */
2670 2693 0, /* tp_setattr */
2671 2694 0, /* tp_compare */
2672 2695 0, /* tp_repr */
2673 2696 0, /* tp_as_number */
2674 2697 &index_sequence_methods, /* tp_as_sequence */
2675 2698 &index_mapping_methods, /* tp_as_mapping */
2676 2699 0, /* tp_hash */
2677 2700 0, /* tp_call */
2678 2701 0, /* tp_str */
2679 2702 0, /* tp_getattro */
2680 2703 0, /* tp_setattro */
2681 2704 0, /* tp_as_buffer */
2682 2705 Py_TPFLAGS_DEFAULT, /* tp_flags */
2683 2706 "revlog index", /* tp_doc */
2684 2707 0, /* tp_traverse */
2685 2708 0, /* tp_clear */
2686 2709 0, /* tp_richcompare */
2687 2710 0, /* tp_weaklistoffset */
2688 2711 0, /* tp_iter */
2689 2712 0, /* tp_iternext */
2690 2713 index_methods, /* tp_methods */
2691 2714 0, /* tp_members */
2692 2715 index_getset, /* tp_getset */
2693 2716 0, /* tp_base */
2694 2717 0, /* tp_dict */
2695 2718 0, /* tp_descr_get */
2696 2719 0, /* tp_descr_set */
2697 2720 0, /* tp_dictoffset */
2698 2721 (initproc)index_init, /* tp_init */
2699 2722 0, /* tp_alloc */
2700 2723 };
2701 2724
2702 2725 /*
2703 2726 * returns a tuple of the form (index, index, cache) with elements as
2704 2727 * follows:
2705 2728 *
2706 2729 * index: an index object that lazily parses RevlogNG records
2707 2730 * cache: if data is inlined, a tuple (0, index_file_content), else None
2708 2731 * index_file_content could be a string, or a buffer
2709 2732 *
2710 2733 * added complications are for backwards compatibility
2711 2734 */
2712 2735 PyObject *parse_index2(PyObject *self, PyObject *args)
2713 2736 {
2714 2737 PyObject *tuple = NULL, *cache = NULL;
2715 2738 indexObject *idx;
2716 2739 int ret;
2717 2740
2718 2741 idx = PyObject_New(indexObject, &HgRevlogIndex_Type);
2719 2742 if (idx == NULL)
2720 2743 goto bail;
2721 2744
2722 2745 ret = index_init(idx, args);
2723 2746 if (ret == -1)
2724 2747 goto bail;
2725 2748
2726 2749 if (idx->inlined) {
2727 2750 cache = Py_BuildValue("iO", 0, idx->data);
2728 2751 if (cache == NULL)
2729 2752 goto bail;
2730 2753 } else {
2731 2754 cache = Py_None;
2732 2755 Py_INCREF(cache);
2733 2756 }
2734 2757
2735 2758 tuple = Py_BuildValue("NN", idx, cache);
2736 2759 if (!tuple)
2737 2760 goto bail;
2738 2761 return tuple;
2739 2762
2740 2763 bail:
2741 2764 Py_XDECREF(idx);
2742 2765 Py_XDECREF(cache);
2743 2766 Py_XDECREF(tuple);
2744 2767 return NULL;
2745 2768 }
2746 2769
2747 2770 #ifdef WITH_RUST
2748 2771
2749 2772 /* rustlazyancestors: iteration over ancestors implemented in Rust
2750 2773 *
2751 2774 * This class holds a reference to an index and to the Rust iterator.
2752 2775 */
2753 2776 typedef struct rustlazyancestorsObjectStruct rustlazyancestorsObject;
2754 2777
2755 2778 struct rustlazyancestorsObjectStruct {
2756 2779 PyObject_HEAD
2757 2780 /* Type-specific fields go here. */
2758 2781 indexObject *index; /* Ref kept to avoid GC'ing the index */
2759 2782 void *iter; /* Rust iterator */
2760 2783 };
2761 2784
2762 2785 /* FFI exposed from Rust code */
2763 2786 rustlazyancestorsObject *rustlazyancestors_init(indexObject *index,
2764 2787 /* intrevs vector */
2765 2788 Py_ssize_t initrevslen,
2766 2789 long *initrevs, long stoprev,
2767 2790 int inclusive);
2768 2791 void rustlazyancestors_drop(rustlazyancestorsObject *self);
2769 2792 int rustlazyancestors_next(rustlazyancestorsObject *self);
2770 2793 int rustlazyancestors_contains(rustlazyancestorsObject *self, long rev);
2771 2794
2772 2795 /* CPython instance methods */
2773 2796 static int rustla_init(rustlazyancestorsObject *self, PyObject *args)
2774 2797 {
2775 2798 PyObject *initrevsarg = NULL;
2776 2799 PyObject *inclusivearg = NULL;
2777 2800 long stoprev = 0;
2778 2801 long *initrevs = NULL;
2779 2802 int inclusive = 0;
2780 2803 Py_ssize_t i;
2781 2804
2782 2805 indexObject *index;
2783 2806 if (!PyArg_ParseTuple(args, "O!O!lO!", &HgRevlogIndex_Type, &index,
2784 2807 &PyList_Type, &initrevsarg, &stoprev,
2785 2808 &PyBool_Type, &inclusivearg))
2786 2809 return -1;
2787 2810
2788 2811 Py_INCREF(index);
2789 2812 self->index = index;
2790 2813
2791 2814 if (inclusivearg == Py_True)
2792 2815 inclusive = 1;
2793 2816
2794 2817 Py_ssize_t linit = PyList_GET_SIZE(initrevsarg);
2795 2818
2796 2819 initrevs = (long *)calloc(linit, sizeof(long));
2797 2820
2798 2821 if (initrevs == NULL) {
2799 2822 PyErr_NoMemory();
2800 2823 goto bail;
2801 2824 }
2802 2825
2803 2826 for (i = 0; i < linit; i++) {
2804 2827 initrevs[i] = PyInt_AsLong(PyList_GET_ITEM(initrevsarg, i));
2805 2828 }
2806 2829 if (PyErr_Occurred())
2807 2830 goto bail;
2808 2831
2809 2832 self->iter =
2810 2833 rustlazyancestors_init(index, linit, initrevs, stoprev, inclusive);
2811 2834 if (self->iter == NULL) {
2812 2835 /* if this is because of GraphError::ParentOutOfRange
2813 2836 * HgRevlogIndex_GetParents() has already set the proper
2814 2837 * exception */
2815 2838 goto bail;
2816 2839 }
2817 2840
2818 2841 free(initrevs);
2819 2842 return 0;
2820 2843
2821 2844 bail:
2822 2845 free(initrevs);
2823 2846 return -1;
2824 2847 };
2825 2848
2826 2849 static void rustla_dealloc(rustlazyancestorsObject *self)
2827 2850 {
2828 2851 Py_XDECREF(self->index);
2829 2852 if (self->iter != NULL) { /* can happen if rustla_init failed */
2830 2853 rustlazyancestors_drop(self->iter);
2831 2854 }
2832 2855 PyObject_Del(self);
2833 2856 }
2834 2857
2835 2858 static PyObject *rustla_next(rustlazyancestorsObject *self)
2836 2859 {
2837 2860 int res = rustlazyancestors_next(self->iter);
2838 2861 if (res == -1) {
2839 2862 /* Setting an explicit exception seems unnecessary
2840 2863 * as examples from Python source code (Objects/rangeobjets.c
2841 2864 * and Modules/_io/stringio.c) seem to demonstrate.
2842 2865 */
2843 2866 return NULL;
2844 2867 }
2845 2868 return PyInt_FromLong(res);
2846 2869 }
2847 2870
2848 2871 static int rustla_contains(rustlazyancestorsObject *self, PyObject *rev)
2849 2872 {
2850 2873 long lrev;
2851 2874 if (!pylong_to_long(rev, &lrev)) {
2852 2875 PyErr_Clear();
2853 2876 return 0;
2854 2877 }
2855 2878 return rustlazyancestors_contains(self->iter, lrev);
2856 2879 }
2857 2880
2858 2881 static PySequenceMethods rustla_sequence_methods = {
2859 2882 0, /* sq_length */
2860 2883 0, /* sq_concat */
2861 2884 0, /* sq_repeat */
2862 2885 0, /* sq_item */
2863 2886 0, /* sq_slice */
2864 2887 0, /* sq_ass_item */
2865 2888 0, /* sq_ass_slice */
2866 2889 (objobjproc)rustla_contains, /* sq_contains */
2867 2890 };
2868 2891
2869 2892 static PyTypeObject rustlazyancestorsType = {
2870 2893 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2871 2894 "parsers.rustlazyancestors", /* tp_name */
2872 2895 sizeof(rustlazyancestorsObject), /* tp_basicsize */
2873 2896 0, /* tp_itemsize */
2874 2897 (destructor)rustla_dealloc, /* tp_dealloc */
2875 2898 0, /* tp_print */
2876 2899 0, /* tp_getattr */
2877 2900 0, /* tp_setattr */
2878 2901 0, /* tp_compare */
2879 2902 0, /* tp_repr */
2880 2903 0, /* tp_as_number */
2881 2904 &rustla_sequence_methods, /* tp_as_sequence */
2882 2905 0, /* tp_as_mapping */
2883 2906 0, /* tp_hash */
2884 2907 0, /* tp_call */
2885 2908 0, /* tp_str */
2886 2909 0, /* tp_getattro */
2887 2910 0, /* tp_setattro */
2888 2911 0, /* tp_as_buffer */
2889 2912 Py_TPFLAGS_DEFAULT, /* tp_flags */
2890 2913 "Iterator over ancestors, implemented in Rust", /* tp_doc */
2891 2914 0, /* tp_traverse */
2892 2915 0, /* tp_clear */
2893 2916 0, /* tp_richcompare */
2894 2917 0, /* tp_weaklistoffset */
2895 2918 0, /* tp_iter */
2896 2919 (iternextfunc)rustla_next, /* tp_iternext */
2897 2920 0, /* tp_methods */
2898 2921 0, /* tp_members */
2899 2922 0, /* tp_getset */
2900 2923 0, /* tp_base */
2901 2924 0, /* tp_dict */
2902 2925 0, /* tp_descr_get */
2903 2926 0, /* tp_descr_set */
2904 2927 0, /* tp_dictoffset */
2905 2928 (initproc)rustla_init, /* tp_init */
2906 2929 0, /* tp_alloc */
2907 2930 };
2908 2931 #endif /* WITH_RUST */
2909 2932
2910 2933 void revlog_module_init(PyObject *mod)
2911 2934 {
2912 2935 PyObject *caps = NULL;
2913 2936 HgRevlogIndex_Type.tp_new = PyType_GenericNew;
2914 2937 if (PyType_Ready(&HgRevlogIndex_Type) < 0)
2915 2938 return;
2916 2939 Py_INCREF(&HgRevlogIndex_Type);
2917 2940 PyModule_AddObject(mod, "index", (PyObject *)&HgRevlogIndex_Type);
2918 2941
2919 2942 nodetreeType.tp_new = PyType_GenericNew;
2920 2943 if (PyType_Ready(&nodetreeType) < 0)
2921 2944 return;
2922 2945 Py_INCREF(&nodetreeType);
2923 2946 PyModule_AddObject(mod, "nodetree", (PyObject *)&nodetreeType);
2924 2947
2925 2948 if (!nullentry) {
2926 2949 nullentry = Py_BuildValue(PY23("iiiiiiis#", "iiiiiiiy#"), 0, 0,
2927 2950 0, -1, -1, -1, -1, nullid, 20);
2928 2951 }
2929 2952 if (nullentry)
2930 2953 PyObject_GC_UnTrack(nullentry);
2931 2954
2932 2955 caps = PyCapsule_New(HgRevlogIndex_GetParents,
2933 2956 "mercurial.cext.parsers.index_get_parents_CAPI",
2934 2957 NULL);
2935 2958 if (caps != NULL)
2936 2959 PyModule_AddObject(mod, "index_get_parents_CAPI", caps);
2937 2960
2938 2961 #ifdef WITH_RUST
2939 2962 rustlazyancestorsType.tp_new = PyType_GenericNew;
2940 2963 if (PyType_Ready(&rustlazyancestorsType) < 0)
2941 2964 return;
2942 2965 Py_INCREF(&rustlazyancestorsType);
2943 2966 PyModule_AddObject(mod, "rustlazyancestors",
2944 2967 (PyObject *)&rustlazyancestorsType);
2945 2968 #endif
2946 2969 }
@@ -1,2603 +1,2607
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import os
20 20 import struct
21 21 import zlib
22 22
23 23 # import stuff from node for others to import from revlog
24 24 from .node import (
25 25 bin,
26 26 hex,
27 27 nullhex,
28 28 nullid,
29 29 nullrev,
30 30 short,
31 31 wdirfilenodeids,
32 32 wdirhex,
33 33 wdirid,
34 34 wdirrev,
35 35 )
36 36 from .i18n import _
37 37 from .revlogutils.constants import (
38 38 FLAG_GENERALDELTA,
39 39 FLAG_INLINE_DATA,
40 40 REVIDX_DEFAULT_FLAGS,
41 41 REVIDX_ELLIPSIS,
42 42 REVIDX_EXTSTORED,
43 43 REVIDX_FLAGS_ORDER,
44 44 REVIDX_ISCENSORED,
45 45 REVIDX_KNOWN_FLAGS,
46 46 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 47 REVLOGV0,
48 48 REVLOGV1,
49 49 REVLOGV1_FLAGS,
50 50 REVLOGV2,
51 51 REVLOGV2_FLAGS,
52 52 REVLOG_DEFAULT_FLAGS,
53 53 REVLOG_DEFAULT_FORMAT,
54 54 REVLOG_DEFAULT_VERSION,
55 55 )
56 56 from .thirdparty import (
57 57 attr,
58 58 )
59 59 from . import (
60 60 ancestor,
61 61 dagop,
62 62 error,
63 63 mdiff,
64 64 policy,
65 65 pycompat,
66 66 repository,
67 67 templatefilters,
68 68 util,
69 69 )
70 70 from .revlogutils import (
71 71 deltas as deltautil,
72 72 )
73 73 from .utils import (
74 74 interfaceutil,
75 75 storageutil,
76 76 stringutil,
77 77 )
78 78
79 79 # blanked usage of all the name to prevent pyflakes constraints
80 80 # We need these name available in the module for extensions.
81 81 REVLOGV0
82 82 REVLOGV1
83 83 REVLOGV2
84 84 FLAG_INLINE_DATA
85 85 FLAG_GENERALDELTA
86 86 REVLOG_DEFAULT_FLAGS
87 87 REVLOG_DEFAULT_FORMAT
88 88 REVLOG_DEFAULT_VERSION
89 89 REVLOGV1_FLAGS
90 90 REVLOGV2_FLAGS
91 91 REVIDX_ISCENSORED
92 92 REVIDX_ELLIPSIS
93 93 REVIDX_EXTSTORED
94 94 REVIDX_DEFAULT_FLAGS
95 95 REVIDX_FLAGS_ORDER
96 96 REVIDX_KNOWN_FLAGS
97 97 REVIDX_RAWTEXT_CHANGING_FLAGS
98 98
99 99 parsers = policy.importmod(r'parsers')
100 100
101 101 # Aliased for performance.
102 102 _zlibdecompress = zlib.decompress
103 103
104 104 # max size of revlog with inline data
105 105 _maxinline = 131072
106 106 _chunksize = 1048576
107 107
108 108 # Store flag processors (cf. 'addflagprocessor()' to register)
109 109 _flagprocessors = {
110 110 REVIDX_ISCENSORED: None,
111 111 }
112 112
113 113 # Flag processors for REVIDX_ELLIPSIS.
114 114 def ellipsisreadprocessor(rl, text):
115 115 return text, False
116 116
117 117 def ellipsiswriteprocessor(rl, text):
118 118 return text, False
119 119
120 120 def ellipsisrawprocessor(rl, text):
121 121 return False
122 122
123 123 ellipsisprocessor = (
124 124 ellipsisreadprocessor,
125 125 ellipsiswriteprocessor,
126 126 ellipsisrawprocessor,
127 127 )
128 128
129 129 def addflagprocessor(flag, processor):
130 130 """Register a flag processor on a revision data flag.
131 131
132 132 Invariant:
133 133 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
134 134 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
135 135 - Only one flag processor can be registered on a specific flag.
136 136 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
137 137 following signatures:
138 138 - (read) f(self, rawtext) -> text, bool
139 139 - (write) f(self, text) -> rawtext, bool
140 140 - (raw) f(self, rawtext) -> bool
141 141 "text" is presented to the user. "rawtext" is stored in revlog data, not
142 142 directly visible to the user.
143 143 The boolean returned by these transforms is used to determine whether
144 144 the returned text can be used for hash integrity checking. For example,
145 145 if "write" returns False, then "text" is used to generate hash. If
146 146 "write" returns True, that basically means "rawtext" returned by "write"
147 147 should be used to generate hash. Usually, "write" and "read" return
148 148 different booleans. And "raw" returns a same boolean as "write".
149 149
150 150 Note: The 'raw' transform is used for changegroup generation and in some
151 151 debug commands. In this case the transform only indicates whether the
152 152 contents can be used for hash integrity checks.
153 153 """
154 154 _insertflagprocessor(flag, processor, _flagprocessors)
155 155
156 156 def _insertflagprocessor(flag, processor, flagprocessors):
157 157 if not flag & REVIDX_KNOWN_FLAGS:
158 158 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
159 159 raise error.ProgrammingError(msg)
160 160 if flag not in REVIDX_FLAGS_ORDER:
161 161 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
162 162 raise error.ProgrammingError(msg)
163 163 if flag in flagprocessors:
164 164 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
165 165 raise error.Abort(msg)
166 166 flagprocessors[flag] = processor
167 167
168 168 def getoffset(q):
169 169 return int(q >> 16)
170 170
171 171 def gettype(q):
172 172 return int(q & 0xFFFF)
173 173
174 174 def offset_type(offset, type):
175 175 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
176 176 raise ValueError('unknown revlog index flags')
177 177 return int(int(offset) << 16 | type)
178 178
179 179 @attr.s(slots=True, frozen=True)
180 180 class _revisioninfo(object):
181 181 """Information about a revision that allows building its fulltext
182 182 node: expected hash of the revision
183 183 p1, p2: parent revs of the revision
184 184 btext: built text cache consisting of a one-element list
185 185 cachedelta: (baserev, uncompressed_delta) or None
186 186 flags: flags associated to the revision storage
187 187
188 188 One of btext[0] or cachedelta must be set.
189 189 """
190 190 node = attr.ib()
191 191 p1 = attr.ib()
192 192 p2 = attr.ib()
193 193 btext = attr.ib()
194 194 textlen = attr.ib()
195 195 cachedelta = attr.ib()
196 196 flags = attr.ib()
197 197
198 198 @interfaceutil.implementer(repository.irevisiondelta)
199 199 @attr.s(slots=True)
200 200 class revlogrevisiondelta(object):
201 201 node = attr.ib()
202 202 p1node = attr.ib()
203 203 p2node = attr.ib()
204 204 basenode = attr.ib()
205 205 flags = attr.ib()
206 206 baserevisionsize = attr.ib()
207 207 revision = attr.ib()
208 208 delta = attr.ib()
209 209 linknode = attr.ib(default=None)
210 210
211 211 @interfaceutil.implementer(repository.iverifyproblem)
212 212 @attr.s(frozen=True)
213 213 class revlogproblem(object):
214 214 warning = attr.ib(default=None)
215 215 error = attr.ib(default=None)
216 216 node = attr.ib(default=None)
217 217
218 218 # index v0:
219 219 # 4 bytes: offset
220 220 # 4 bytes: compressed length
221 221 # 4 bytes: base rev
222 222 # 4 bytes: link rev
223 223 # 20 bytes: parent 1 nodeid
224 224 # 20 bytes: parent 2 nodeid
225 225 # 20 bytes: nodeid
226 226 indexformatv0 = struct.Struct(">4l20s20s20s")
227 227 indexformatv0_pack = indexformatv0.pack
228 228 indexformatv0_unpack = indexformatv0.unpack
229 229
230 230 class revlogoldindex(list):
231 231 def __getitem__(self, i):
232 232 if i == -1:
233 233 return (0, 0, 0, -1, -1, -1, -1, nullid)
234 234 return list.__getitem__(self, i)
235 235
236 236 class revlogoldio(object):
237 237 def __init__(self):
238 238 self.size = indexformatv0.size
239 239
240 240 def parseindex(self, data, inline):
241 241 s = self.size
242 242 index = []
243 243 nodemap = {nullid: nullrev}
244 244 n = off = 0
245 245 l = len(data)
246 246 while off + s <= l:
247 247 cur = data[off:off + s]
248 248 off += s
249 249 e = indexformatv0_unpack(cur)
250 250 # transform to revlogv1 format
251 251 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
252 252 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
253 253 index.append(e2)
254 254 nodemap[e[6]] = n
255 255 n += 1
256 256
257 257 return revlogoldindex(index), nodemap, None
258 258
259 259 def packentry(self, entry, node, version, rev):
260 260 if gettype(entry[0]):
261 261 raise error.RevlogError(_('index entry flags need revlog '
262 262 'version 1'))
263 263 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
264 264 node(entry[5]), node(entry[6]), entry[7])
265 265 return indexformatv0_pack(*e2)
266 266
267 267 # index ng:
268 268 # 6 bytes: offset
269 269 # 2 bytes: flags
270 270 # 4 bytes: compressed length
271 271 # 4 bytes: uncompressed length
272 272 # 4 bytes: base rev
273 273 # 4 bytes: link rev
274 274 # 4 bytes: parent 1 rev
275 275 # 4 bytes: parent 2 rev
276 276 # 32 bytes: nodeid
277 277 indexformatng = struct.Struct(">Qiiiiii20s12x")
278 278 indexformatng_pack = indexformatng.pack
279 279 versionformat = struct.Struct(">I")
280 280 versionformat_pack = versionformat.pack
281 281 versionformat_unpack = versionformat.unpack
282 282
283 283 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
284 284 # signed integer)
285 285 _maxentrysize = 0x7fffffff
286 286
287 287 class revlogio(object):
288 288 def __init__(self):
289 289 self.size = indexformatng.size
290 290
291 291 def parseindex(self, data, inline):
292 292 # call the C implementation to parse the index data
293 293 index, cache = parsers.parse_index2(data, inline)
294 294 return index, getattr(index, 'nodemap', None), cache
295 295
296 296 def packentry(self, entry, node, version, rev):
297 297 p = indexformatng_pack(*entry)
298 298 if rev == 0:
299 299 p = versionformat_pack(version) + p[4:]
300 300 return p
301 301
302 302 class revlog(object):
303 303 """
304 304 the underlying revision storage object
305 305
306 306 A revlog consists of two parts, an index and the revision data.
307 307
308 308 The index is a file with a fixed record size containing
309 309 information on each revision, including its nodeid (hash), the
310 310 nodeids of its parents, the position and offset of its data within
311 311 the data file, and the revision it's based on. Finally, each entry
312 312 contains a linkrev entry that can serve as a pointer to external
313 313 data.
314 314
315 315 The revision data itself is a linear collection of data chunks.
316 316 Each chunk represents a revision and is usually represented as a
317 317 delta against the previous chunk. To bound lookup time, runs of
318 318 deltas are limited to about 2 times the length of the original
319 319 version data. This makes retrieval of a version proportional to
320 320 its size, or O(1) relative to the number of revisions.
321 321
322 322 Both pieces of the revlog are written to in an append-only
323 323 fashion, which means we never need to rewrite a file to insert or
324 324 remove data, and can use some simple techniques to avoid the need
325 325 for locking while reading.
326 326
327 327 If checkambig, indexfile is opened with checkambig=True at
328 328 writing, to avoid file stat ambiguity.
329 329
330 330 If mmaplargeindex is True, and an mmapindexthreshold is set, the
331 331 index will be mmapped rather than read if it is larger than the
332 332 configured threshold.
333 333
334 334 If censorable is True, the revlog can have censored revisions.
335 335 """
336 336 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
337 337 mmaplargeindex=False, censorable=False):
338 338 """
339 339 create a revlog object
340 340
341 341 opener is a function that abstracts the file opening operation
342 342 and can be used to implement COW semantics or the like.
343 343 """
344 344 self.indexfile = indexfile
345 345 self.datafile = datafile or (indexfile[:-2] + ".d")
346 346 self.opener = opener
347 347 # When True, indexfile is opened with checkambig=True at writing, to
348 348 # avoid file stat ambiguity.
349 349 self._checkambig = checkambig
350 350 self._censorable = censorable
351 351 # 3-tuple of (node, rev, text) for a raw revision.
352 352 self._revisioncache = None
353 353 # Maps rev to chain base rev.
354 354 self._chainbasecache = util.lrucachedict(100)
355 355 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
356 356 self._chunkcache = (0, '')
357 357 # How much data to read and cache into the raw revlog data cache.
358 358 self._chunkcachesize = 65536
359 359 self._maxchainlen = None
360 360 self._deltabothparents = True
361 361 self.index = []
362 362 # Mapping of partial identifiers to full nodes.
363 363 self._pcache = {}
364 364 # Mapping of revision integer to full node.
365 365 self._nodecache = {nullid: nullrev}
366 366 self._nodepos = None
367 367 self._compengine = 'zlib'
368 368 self._maxdeltachainspan = -1
369 369 self._withsparseread = False
370 370 self._sparserevlog = False
371 371 self._srdensitythreshold = 0.50
372 372 self._srmingapsize = 262144
373 373
374 374 # Make copy of flag processors so each revlog instance can support
375 375 # custom flags.
376 376 self._flagprocessors = dict(_flagprocessors)
377 377
378 378 # 2-tuple of file handles being used for active writing.
379 379 self._writinghandles = None
380 380
381 381 mmapindexthreshold = None
382 382 v = REVLOG_DEFAULT_VERSION
383 383 opts = getattr(opener, 'options', None)
384 384 if opts is not None:
385 385 if 'revlogv2' in opts:
386 386 # version 2 revlogs always use generaldelta.
387 387 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
388 388 elif 'revlogv1' in opts:
389 389 if 'generaldelta' in opts:
390 390 v |= FLAG_GENERALDELTA
391 391 else:
392 392 v = 0
393 393 if 'chunkcachesize' in opts:
394 394 self._chunkcachesize = opts['chunkcachesize']
395 395 if 'maxchainlen' in opts:
396 396 self._maxchainlen = opts['maxchainlen']
397 397 if 'deltabothparents' in opts:
398 398 self._deltabothparents = opts['deltabothparents']
399 399 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
400 400 if 'compengine' in opts:
401 401 self._compengine = opts['compengine']
402 402 if 'maxdeltachainspan' in opts:
403 403 self._maxdeltachainspan = opts['maxdeltachainspan']
404 404 if mmaplargeindex and 'mmapindexthreshold' in opts:
405 405 mmapindexthreshold = opts['mmapindexthreshold']
406 406 self._sparserevlog = bool(opts.get('sparse-revlog', False))
407 407 withsparseread = bool(opts.get('with-sparse-read', False))
408 408 # sparse-revlog forces sparse-read
409 409 self._withsparseread = self._sparserevlog or withsparseread
410 410 if 'sparse-read-density-threshold' in opts:
411 411 self._srdensitythreshold = opts['sparse-read-density-threshold']
412 412 if 'sparse-read-min-gap-size' in opts:
413 413 self._srmingapsize = opts['sparse-read-min-gap-size']
414 414 if opts.get('enableellipsis'):
415 415 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
416 416
417 417 # revlog v0 doesn't have flag processors
418 418 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
419 419 _insertflagprocessor(flag, processor, self._flagprocessors)
420 420
421 421 if self._chunkcachesize <= 0:
422 422 raise error.RevlogError(_('revlog chunk cache size %r is not '
423 423 'greater than 0') % self._chunkcachesize)
424 424 elif self._chunkcachesize & (self._chunkcachesize - 1):
425 425 raise error.RevlogError(_('revlog chunk cache size %r is not a '
426 426 'power of 2') % self._chunkcachesize)
427 427
428 428 self._loadindex(v, mmapindexthreshold)
429 429
430 430 def _loadindex(self, v, mmapindexthreshold):
431 431 indexdata = ''
432 432 self._initempty = True
433 433 try:
434 434 with self._indexfp() as f:
435 435 if (mmapindexthreshold is not None and
436 436 self.opener.fstat(f).st_size >= mmapindexthreshold):
437 437 indexdata = util.buffer(util.mmapread(f))
438 438 else:
439 439 indexdata = f.read()
440 440 if len(indexdata) > 0:
441 441 v = versionformat_unpack(indexdata[:4])[0]
442 442 self._initempty = False
443 443 except IOError as inst:
444 444 if inst.errno != errno.ENOENT:
445 445 raise
446 446
447 447 self.version = v
448 448 self._inline = v & FLAG_INLINE_DATA
449 449 self._generaldelta = v & FLAG_GENERALDELTA
450 450 flags = v & ~0xFFFF
451 451 fmt = v & 0xFFFF
452 452 if fmt == REVLOGV0:
453 453 if flags:
454 454 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
455 455 'revlog %s') %
456 456 (flags >> 16, fmt, self.indexfile))
457 457 elif fmt == REVLOGV1:
458 458 if flags & ~REVLOGV1_FLAGS:
459 459 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
460 460 'revlog %s') %
461 461 (flags >> 16, fmt, self.indexfile))
462 462 elif fmt == REVLOGV2:
463 463 if flags & ~REVLOGV2_FLAGS:
464 464 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
465 465 'revlog %s') %
466 466 (flags >> 16, fmt, self.indexfile))
467 467 else:
468 468 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
469 469 (fmt, self.indexfile))
470 470
471 471 self._storedeltachains = True
472 472
473 473 self._io = revlogio()
474 474 if self.version == REVLOGV0:
475 475 self._io = revlogoldio()
476 476 try:
477 477 d = self._io.parseindex(indexdata, self._inline)
478 478 except (ValueError, IndexError):
479 479 raise error.RevlogError(_("index %s is corrupted") %
480 480 self.indexfile)
481 481 self.index, nodemap, self._chunkcache = d
482 482 if nodemap is not None:
483 483 self.nodemap = self._nodecache = nodemap
484 484 if not self._chunkcache:
485 485 self._chunkclear()
486 486 # revnum -> (chain-length, sum-delta-length)
487 487 self._chaininfocache = {}
488 488 # revlog header -> revlog compressor
489 489 self._decompressors = {}
490 490
491 491 @util.propertycache
492 492 def _compressor(self):
493 493 return util.compengines[self._compengine].revlogcompressor()
494 494
495 495 def _indexfp(self, mode='r'):
496 496 """file object for the revlog's index file"""
497 497 args = {r'mode': mode}
498 498 if mode != 'r':
499 499 args[r'checkambig'] = self._checkambig
500 500 if mode == 'w':
501 501 args[r'atomictemp'] = True
502 502 return self.opener(self.indexfile, **args)
503 503
504 504 def _datafp(self, mode='r'):
505 505 """file object for the revlog's data file"""
506 506 return self.opener(self.datafile, mode=mode)
507 507
508 508 @contextlib.contextmanager
509 509 def _datareadfp(self, existingfp=None):
510 510 """file object suitable to read data"""
511 511 # Use explicit file handle, if given.
512 512 if existingfp is not None:
513 513 yield existingfp
514 514
515 515 # Use a file handle being actively used for writes, if available.
516 516 # There is some danger to doing this because reads will seek the
517 517 # file. However, _writeentry() performs a SEEK_END before all writes,
518 518 # so we should be safe.
519 519 elif self._writinghandles:
520 520 if self._inline:
521 521 yield self._writinghandles[0]
522 522 else:
523 523 yield self._writinghandles[1]
524 524
525 525 # Otherwise open a new file handle.
526 526 else:
527 527 if self._inline:
528 528 func = self._indexfp
529 529 else:
530 530 func = self._datafp
531 531 with func() as fp:
532 532 yield fp
533 533
534 534 def tip(self):
535 535 return self.node(len(self.index) - 1)
536 536 def __contains__(self, rev):
537 537 return 0 <= rev < len(self)
538 538 def __len__(self):
539 539 return len(self.index)
540 540 def __iter__(self):
541 541 return iter(pycompat.xrange(len(self)))
542 542 def revs(self, start=0, stop=None):
543 543 """iterate over all rev in this revlog (from start to stop)"""
544 544 return storageutil.iterrevs(len(self), start=start, stop=stop)
545 545
546 546 @util.propertycache
547 547 def nodemap(self):
548 548 if self.index:
549 549 # populate mapping down to the initial node
550 550 node0 = self.index[0][7] # get around changelog filtering
551 551 self.rev(node0)
552 552 return self._nodecache
553 553
554 554 def hasnode(self, node):
555 555 try:
556 556 self.rev(node)
557 557 return True
558 558 except KeyError:
559 559 return False
560 560
561 561 def candelta(self, baserev, rev):
562 562 """whether two revisions (baserev, rev) can be delta-ed or not"""
563 563 # Disable delta if either rev requires a content-changing flag
564 564 # processor (ex. LFS). This is because such flag processor can alter
565 565 # the rawtext content that the delta will be based on, and two clients
566 566 # could have a same revlog node with different flags (i.e. different
567 567 # rawtext contents) and the delta could be incompatible.
568 568 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
569 569 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
570 570 return False
571 571 return True
572 572
573 573 def clearcaches(self):
574 574 self._revisioncache = None
575 575 self._chainbasecache.clear()
576 576 self._chunkcache = (0, '')
577 577 self._pcache = {}
578 578
579 579 try:
580 580 self._nodecache.clearcaches()
581 581 except AttributeError:
582 582 self._nodecache = {nullid: nullrev}
583 583 self._nodepos = None
584 584
585 585 def rev(self, node):
586 586 try:
587 587 return self._nodecache[node]
588 588 except TypeError:
589 589 raise
590 590 except error.RevlogError:
591 591 # parsers.c radix tree lookup failed
592 592 if node == wdirid or node in wdirfilenodeids:
593 593 raise error.WdirUnsupported
594 594 raise error.LookupError(node, self.indexfile, _('no node'))
595 595 except KeyError:
596 596 # pure python cache lookup failed
597 597 n = self._nodecache
598 598 i = self.index
599 599 p = self._nodepos
600 600 if p is None:
601 601 p = len(i) - 1
602 602 else:
603 603 assert p < len(i)
604 604 for r in pycompat.xrange(p, -1, -1):
605 605 v = i[r][7]
606 606 n[v] = r
607 607 if v == node:
608 608 self._nodepos = r - 1
609 609 return r
610 610 if node == wdirid or node in wdirfilenodeids:
611 611 raise error.WdirUnsupported
612 612 raise error.LookupError(node, self.indexfile, _('no node'))
613 613
614 614 # Accessors for index entries.
615 615
616 616 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
617 617 # are flags.
618 618 def start(self, rev):
619 619 return int(self.index[rev][0] >> 16)
620 620
621 621 def flags(self, rev):
622 622 return self.index[rev][0] & 0xFFFF
623 623
624 624 def length(self, rev):
625 625 return self.index[rev][1]
626 626
627 627 def rawsize(self, rev):
628 628 """return the length of the uncompressed text for a given revision"""
629 629 l = self.index[rev][2]
630 630 if l >= 0:
631 631 return l
632 632
633 633 t = self.revision(rev, raw=True)
634 634 return len(t)
635 635
636 636 def size(self, rev):
637 637 """length of non-raw text (processed by a "read" flag processor)"""
638 638 # fast path: if no "read" flag processor could change the content,
639 639 # size is rawsize. note: ELLIPSIS is known to not change the content.
640 640 flags = self.flags(rev)
641 641 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
642 642 return self.rawsize(rev)
643 643
644 644 return len(self.revision(rev, raw=False))
645 645
646 646 def chainbase(self, rev):
647 647 base = self._chainbasecache.get(rev)
648 648 if base is not None:
649 649 return base
650 650
651 651 index = self.index
652 652 iterrev = rev
653 653 base = index[iterrev][3]
654 654 while base != iterrev:
655 655 iterrev = base
656 656 base = index[iterrev][3]
657 657
658 658 self._chainbasecache[rev] = base
659 659 return base
660 660
661 661 def linkrev(self, rev):
662 662 return self.index[rev][4]
663 663
664 664 def parentrevs(self, rev):
665 665 try:
666 666 entry = self.index[rev]
667 667 except IndexError:
668 668 if rev == wdirrev:
669 669 raise error.WdirUnsupported
670 670 raise
671 671
672 672 return entry[5], entry[6]
673 673
674 674 # fast parentrevs(rev) where rev isn't filtered
675 675 _uncheckedparentrevs = parentrevs
676 676
677 677 def node(self, rev):
678 678 try:
679 679 return self.index[rev][7]
680 680 except IndexError:
681 681 if rev == wdirrev:
682 682 raise error.WdirUnsupported
683 683 raise
684 684
685 685 # Derived from index values.
686 686
687 687 def end(self, rev):
688 688 return self.start(rev) + self.length(rev)
689 689
690 690 def parents(self, node):
691 691 i = self.index
692 692 d = i[self.rev(node)]
693 693 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
694 694
695 695 def chainlen(self, rev):
696 696 return self._chaininfo(rev)[0]
697 697
698 698 def _chaininfo(self, rev):
699 699 chaininfocache = self._chaininfocache
700 700 if rev in chaininfocache:
701 701 return chaininfocache[rev]
702 702 index = self.index
703 703 generaldelta = self._generaldelta
704 704 iterrev = rev
705 705 e = index[iterrev]
706 706 clen = 0
707 707 compresseddeltalen = 0
708 708 while iterrev != e[3]:
709 709 clen += 1
710 710 compresseddeltalen += e[1]
711 711 if generaldelta:
712 712 iterrev = e[3]
713 713 else:
714 714 iterrev -= 1
715 715 if iterrev in chaininfocache:
716 716 t = chaininfocache[iterrev]
717 717 clen += t[0]
718 718 compresseddeltalen += t[1]
719 719 break
720 720 e = index[iterrev]
721 721 else:
722 722 # Add text length of base since decompressing that also takes
723 723 # work. For cache hits the length is already included.
724 724 compresseddeltalen += e[1]
725 725 r = (clen, compresseddeltalen)
726 726 chaininfocache[rev] = r
727 727 return r
728 728
729 729 def _deltachain(self, rev, stoprev=None):
730 730 """Obtain the delta chain for a revision.
731 731
732 732 ``stoprev`` specifies a revision to stop at. If not specified, we
733 733 stop at the base of the chain.
734 734
735 735 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
736 736 revs in ascending order and ``stopped`` is a bool indicating whether
737 737 ``stoprev`` was hit.
738 738 """
739 739 # Try C implementation.
740 740 try:
741 741 return self.index.deltachain(rev, stoprev, self._generaldelta)
742 742 except AttributeError:
743 743 pass
744 744
745 745 chain = []
746 746
747 747 # Alias to prevent attribute lookup in tight loop.
748 748 index = self.index
749 749 generaldelta = self._generaldelta
750 750
751 751 iterrev = rev
752 752 e = index[iterrev]
753 753 while iterrev != e[3] and iterrev != stoprev:
754 754 chain.append(iterrev)
755 755 if generaldelta:
756 756 iterrev = e[3]
757 757 else:
758 758 iterrev -= 1
759 759 e = index[iterrev]
760 760
761 761 if iterrev == stoprev:
762 762 stopped = True
763 763 else:
764 764 chain.append(iterrev)
765 765 stopped = False
766 766
767 767 chain.reverse()
768 768 return chain, stopped
769 769
770 770 def ancestors(self, revs, stoprev=0, inclusive=False):
771 771 """Generate the ancestors of 'revs' in reverse revision order.
772 772 Does not generate revs lower than stoprev.
773 773
774 774 See the documentation for ancestor.lazyancestors for more details."""
775 775
776 776 # first, make sure start revisions aren't filtered
777 777 revs = list(revs)
778 778 checkrev = self.node
779 779 for r in revs:
780 780 checkrev(r)
781 781 # and we're sure ancestors aren't filtered as well
782 782 if util.safehasattr(parsers, 'rustlazyancestors'):
783 783 return ancestor.rustlazyancestors(
784 784 self.index, revs,
785 785 stoprev=stoprev, inclusive=inclusive)
786 786 return ancestor.lazyancestors(self._uncheckedparentrevs, revs,
787 787 stoprev=stoprev, inclusive=inclusive)
788 788
789 789 def descendants(self, revs):
790 790 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
791 791
792 792 def findcommonmissing(self, common=None, heads=None):
793 793 """Return a tuple of the ancestors of common and the ancestors of heads
794 794 that are not ancestors of common. In revset terminology, we return the
795 795 tuple:
796 796
797 797 ::common, (::heads) - (::common)
798 798
799 799 The list is sorted by revision number, meaning it is
800 800 topologically sorted.
801 801
802 802 'heads' and 'common' are both lists of node IDs. If heads is
803 803 not supplied, uses all of the revlog's heads. If common is not
804 804 supplied, uses nullid."""
805 805 if common is None:
806 806 common = [nullid]
807 807 if heads is None:
808 808 heads = self.heads()
809 809
810 810 common = [self.rev(n) for n in common]
811 811 heads = [self.rev(n) for n in heads]
812 812
813 813 # we want the ancestors, but inclusive
814 814 class lazyset(object):
815 815 def __init__(self, lazyvalues):
816 816 self.addedvalues = set()
817 817 self.lazyvalues = lazyvalues
818 818
819 819 def __contains__(self, value):
820 820 return value in self.addedvalues or value in self.lazyvalues
821 821
822 822 def __iter__(self):
823 823 added = self.addedvalues
824 824 for r in added:
825 825 yield r
826 826 for r in self.lazyvalues:
827 827 if not r in added:
828 828 yield r
829 829
830 830 def add(self, value):
831 831 self.addedvalues.add(value)
832 832
833 833 def update(self, values):
834 834 self.addedvalues.update(values)
835 835
836 836 has = lazyset(self.ancestors(common))
837 837 has.add(nullrev)
838 838 has.update(common)
839 839
840 840 # take all ancestors from heads that aren't in has
841 841 missing = set()
842 842 visit = collections.deque(r for r in heads if r not in has)
843 843 while visit:
844 844 r = visit.popleft()
845 845 if r in missing:
846 846 continue
847 847 else:
848 848 missing.add(r)
849 849 for p in self.parentrevs(r):
850 850 if p not in has:
851 851 visit.append(p)
852 852 missing = list(missing)
853 853 missing.sort()
854 854 return has, [self.node(miss) for miss in missing]
855 855
856 856 def incrementalmissingrevs(self, common=None):
857 857 """Return an object that can be used to incrementally compute the
858 858 revision numbers of the ancestors of arbitrary sets that are not
859 859 ancestors of common. This is an ancestor.incrementalmissingancestors
860 860 object.
861 861
862 862 'common' is a list of revision numbers. If common is not supplied, uses
863 863 nullrev.
864 864 """
865 865 if common is None:
866 866 common = [nullrev]
867 867
868 868 return ancestor.incrementalmissingancestors(self.parentrevs, common)
869 869
870 870 def findmissingrevs(self, common=None, heads=None):
871 871 """Return the revision numbers of the ancestors of heads that
872 872 are not ancestors of common.
873 873
874 874 More specifically, return a list of revision numbers corresponding to
875 875 nodes N such that every N satisfies the following constraints:
876 876
877 877 1. N is an ancestor of some node in 'heads'
878 878 2. N is not an ancestor of any node in 'common'
879 879
880 880 The list is sorted by revision number, meaning it is
881 881 topologically sorted.
882 882
883 883 'heads' and 'common' are both lists of revision numbers. If heads is
884 884 not supplied, uses all of the revlog's heads. If common is not
885 885 supplied, uses nullid."""
886 886 if common is None:
887 887 common = [nullrev]
888 888 if heads is None:
889 889 heads = self.headrevs()
890 890
891 891 inc = self.incrementalmissingrevs(common=common)
892 892 return inc.missingancestors(heads)
893 893
894 894 def findmissing(self, common=None, heads=None):
895 895 """Return the ancestors of heads that are not ancestors of common.
896 896
897 897 More specifically, return a list of nodes N such that every N
898 898 satisfies the following constraints:
899 899
900 900 1. N is an ancestor of some node in 'heads'
901 901 2. N is not an ancestor of any node in 'common'
902 902
903 903 The list is sorted by revision number, meaning it is
904 904 topologically sorted.
905 905
906 906 'heads' and 'common' are both lists of node IDs. If heads is
907 907 not supplied, uses all of the revlog's heads. If common is not
908 908 supplied, uses nullid."""
909 909 if common is None:
910 910 common = [nullid]
911 911 if heads is None:
912 912 heads = self.heads()
913 913
914 914 common = [self.rev(n) for n in common]
915 915 heads = [self.rev(n) for n in heads]
916 916
917 917 inc = self.incrementalmissingrevs(common=common)
918 918 return [self.node(r) for r in inc.missingancestors(heads)]
919 919
920 920 def nodesbetween(self, roots=None, heads=None):
921 921 """Return a topological path from 'roots' to 'heads'.
922 922
923 923 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
924 924 topologically sorted list of all nodes N that satisfy both of
925 925 these constraints:
926 926
927 927 1. N is a descendant of some node in 'roots'
928 928 2. N is an ancestor of some node in 'heads'
929 929
930 930 Every node is considered to be both a descendant and an ancestor
931 931 of itself, so every reachable node in 'roots' and 'heads' will be
932 932 included in 'nodes'.
933 933
934 934 'outroots' is the list of reachable nodes in 'roots', i.e., the
935 935 subset of 'roots' that is returned in 'nodes'. Likewise,
936 936 'outheads' is the subset of 'heads' that is also in 'nodes'.
937 937
938 938 'roots' and 'heads' are both lists of node IDs. If 'roots' is
939 939 unspecified, uses nullid as the only root. If 'heads' is
940 940 unspecified, uses list of all of the revlog's heads."""
941 941 nonodes = ([], [], [])
942 942 if roots is not None:
943 943 roots = list(roots)
944 944 if not roots:
945 945 return nonodes
946 946 lowestrev = min([self.rev(n) for n in roots])
947 947 else:
948 948 roots = [nullid] # Everybody's a descendant of nullid
949 949 lowestrev = nullrev
950 950 if (lowestrev == nullrev) and (heads is None):
951 951 # We want _all_ the nodes!
952 952 return ([self.node(r) for r in self], [nullid], list(self.heads()))
953 953 if heads is None:
954 954 # All nodes are ancestors, so the latest ancestor is the last
955 955 # node.
956 956 highestrev = len(self) - 1
957 957 # Set ancestors to None to signal that every node is an ancestor.
958 958 ancestors = None
959 959 # Set heads to an empty dictionary for later discovery of heads
960 960 heads = {}
961 961 else:
962 962 heads = list(heads)
963 963 if not heads:
964 964 return nonodes
965 965 ancestors = set()
966 966 # Turn heads into a dictionary so we can remove 'fake' heads.
967 967 # Also, later we will be using it to filter out the heads we can't
968 968 # find from roots.
969 969 heads = dict.fromkeys(heads, False)
970 970 # Start at the top and keep marking parents until we're done.
971 971 nodestotag = set(heads)
972 972 # Remember where the top was so we can use it as a limit later.
973 973 highestrev = max([self.rev(n) for n in nodestotag])
974 974 while nodestotag:
975 975 # grab a node to tag
976 976 n = nodestotag.pop()
977 977 # Never tag nullid
978 978 if n == nullid:
979 979 continue
980 980 # A node's revision number represents its place in a
981 981 # topologically sorted list of nodes.
982 982 r = self.rev(n)
983 983 if r >= lowestrev:
984 984 if n not in ancestors:
985 985 # If we are possibly a descendant of one of the roots
986 986 # and we haven't already been marked as an ancestor
987 987 ancestors.add(n) # Mark as ancestor
988 988 # Add non-nullid parents to list of nodes to tag.
989 989 nodestotag.update([p for p in self.parents(n) if
990 990 p != nullid])
991 991 elif n in heads: # We've seen it before, is it a fake head?
992 992 # So it is, real heads should not be the ancestors of
993 993 # any other heads.
994 994 heads.pop(n)
995 995 if not ancestors:
996 996 return nonodes
997 997 # Now that we have our set of ancestors, we want to remove any
998 998 # roots that are not ancestors.
999 999
1000 1000 # If one of the roots was nullid, everything is included anyway.
1001 1001 if lowestrev > nullrev:
1002 1002 # But, since we weren't, let's recompute the lowest rev to not
1003 1003 # include roots that aren't ancestors.
1004 1004
1005 1005 # Filter out roots that aren't ancestors of heads
1006 1006 roots = [root for root in roots if root in ancestors]
1007 1007 # Recompute the lowest revision
1008 1008 if roots:
1009 1009 lowestrev = min([self.rev(root) for root in roots])
1010 1010 else:
1011 1011 # No more roots? Return empty list
1012 1012 return nonodes
1013 1013 else:
1014 1014 # We are descending from nullid, and don't need to care about
1015 1015 # any other roots.
1016 1016 lowestrev = nullrev
1017 1017 roots = [nullid]
1018 1018 # Transform our roots list into a set.
1019 1019 descendants = set(roots)
1020 1020 # Also, keep the original roots so we can filter out roots that aren't
1021 1021 # 'real' roots (i.e. are descended from other roots).
1022 1022 roots = descendants.copy()
1023 1023 # Our topologically sorted list of output nodes.
1024 1024 orderedout = []
1025 1025 # Don't start at nullid since we don't want nullid in our output list,
1026 1026 # and if nullid shows up in descendants, empty parents will look like
1027 1027 # they're descendants.
1028 1028 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1029 1029 n = self.node(r)
1030 1030 isdescendant = False
1031 1031 if lowestrev == nullrev: # Everybody is a descendant of nullid
1032 1032 isdescendant = True
1033 1033 elif n in descendants:
1034 1034 # n is already a descendant
1035 1035 isdescendant = True
1036 1036 # This check only needs to be done here because all the roots
1037 1037 # will start being marked is descendants before the loop.
1038 1038 if n in roots:
1039 1039 # If n was a root, check if it's a 'real' root.
1040 1040 p = tuple(self.parents(n))
1041 1041 # If any of its parents are descendants, it's not a root.
1042 1042 if (p[0] in descendants) or (p[1] in descendants):
1043 1043 roots.remove(n)
1044 1044 else:
1045 1045 p = tuple(self.parents(n))
1046 1046 # A node is a descendant if either of its parents are
1047 1047 # descendants. (We seeded the dependents list with the roots
1048 1048 # up there, remember?)
1049 1049 if (p[0] in descendants) or (p[1] in descendants):
1050 1050 descendants.add(n)
1051 1051 isdescendant = True
1052 1052 if isdescendant and ((ancestors is None) or (n in ancestors)):
1053 1053 # Only include nodes that are both descendants and ancestors.
1054 1054 orderedout.append(n)
1055 1055 if (ancestors is not None) and (n in heads):
1056 1056 # We're trying to figure out which heads are reachable
1057 1057 # from roots.
1058 1058 # Mark this head as having been reached
1059 1059 heads[n] = True
1060 1060 elif ancestors is None:
1061 1061 # Otherwise, we're trying to discover the heads.
1062 1062 # Assume this is a head because if it isn't, the next step
1063 1063 # will eventually remove it.
1064 1064 heads[n] = True
1065 1065 # But, obviously its parents aren't.
1066 1066 for p in self.parents(n):
1067 1067 heads.pop(p, None)
1068 1068 heads = [head for head, flag in heads.iteritems() if flag]
1069 1069 roots = list(roots)
1070 1070 assert orderedout
1071 1071 assert roots
1072 1072 assert heads
1073 1073 return (orderedout, roots, heads)
1074 1074
1075 1075 def headrevs(self):
1076 1076 try:
1077 1077 return self.index.headrevs()
1078 1078 except AttributeError:
1079 1079 return self._headrevs()
1080 1080
1081 1081 def computephases(self, roots):
1082 1082 return self.index.computephasesmapsets(roots)
1083 1083
1084 1084 def _headrevs(self):
1085 1085 count = len(self)
1086 1086 if not count:
1087 1087 return [nullrev]
1088 1088 # we won't iter over filtered rev so nobody is a head at start
1089 1089 ishead = [0] * (count + 1)
1090 1090 index = self.index
1091 1091 for r in self:
1092 1092 ishead[r] = 1 # I may be an head
1093 1093 e = index[r]
1094 1094 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1095 1095 return [r for r, val in enumerate(ishead) if val]
1096 1096
1097 1097 def heads(self, start=None, stop=None):
1098 1098 """return the list of all nodes that have no children
1099 1099
1100 1100 if start is specified, only heads that are descendants of
1101 1101 start will be returned
1102 1102 if stop is specified, it will consider all the revs from stop
1103 1103 as if they had no children
1104 1104 """
1105 1105 if start is None and stop is None:
1106 1106 if not len(self):
1107 1107 return [nullid]
1108 1108 return [self.node(r) for r in self.headrevs()]
1109 1109
1110 1110 if start is None:
1111 1111 start = nullrev
1112 1112 else:
1113 1113 start = self.rev(start)
1114 1114
1115 1115 stoprevs = set(self.rev(n) for n in stop or [])
1116 1116
1117 1117 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1118 1118 stoprevs=stoprevs)
1119 1119
1120 1120 return [self.node(rev) for rev in revs]
1121 1121
1122 1122 def children(self, node):
1123 1123 """find the children of a given node"""
1124 1124 c = []
1125 1125 p = self.rev(node)
1126 1126 for r in self.revs(start=p + 1):
1127 1127 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1128 1128 if prevs:
1129 1129 for pr in prevs:
1130 1130 if pr == p:
1131 1131 c.append(self.node(r))
1132 1132 elif p == nullrev:
1133 1133 c.append(self.node(r))
1134 1134 return c
1135 1135
1136 1136 def commonancestorsheads(self, a, b):
1137 1137 """calculate all the heads of the common ancestors of nodes a and b"""
1138 1138 a, b = self.rev(a), self.rev(b)
1139 1139 ancs = self._commonancestorsheads(a, b)
1140 1140 return pycompat.maplist(self.node, ancs)
1141 1141
1142 1142 def _commonancestorsheads(self, *revs):
1143 1143 """calculate all the heads of the common ancestors of revs"""
1144 1144 try:
1145 1145 ancs = self.index.commonancestorsheads(*revs)
1146 1146 except (AttributeError, OverflowError): # C implementation failed
1147 1147 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1148 1148 return ancs
1149 1149
1150 1150 def isancestor(self, a, b):
1151 1151 """return True if node a is an ancestor of node b
1152 1152
1153 1153 A revision is considered an ancestor of itself."""
1154 1154 a, b = self.rev(a), self.rev(b)
1155 1155 return self.isancestorrev(a, b)
1156 1156
1157 1157 def isancestorrev(self, a, b):
1158 1158 """return True if revision a is an ancestor of revision b
1159 1159
1160 1160 A revision is considered an ancestor of itself.
1161 1161
1162 1162 The implementation of this is trivial but the use of
1163 1163 commonancestorsheads is not."""
1164 1164 if a == nullrev:
1165 1165 return True
1166 1166 elif a == b:
1167 1167 return True
1168 1168 elif a > b:
1169 1169 return False
1170 1170 return a in self._commonancestorsheads(a, b)
1171 1171
1172 1172 def ancestor(self, a, b):
1173 1173 """calculate the "best" common ancestor of nodes a and b"""
1174 1174
1175 1175 a, b = self.rev(a), self.rev(b)
1176 1176 try:
1177 1177 ancs = self.index.ancestors(a, b)
1178 1178 except (AttributeError, OverflowError):
1179 1179 ancs = ancestor.ancestors(self.parentrevs, a, b)
1180 1180 if ancs:
1181 1181 # choose a consistent winner when there's a tie
1182 1182 return min(map(self.node, ancs))
1183 1183 return nullid
1184 1184
1185 1185 def _match(self, id):
1186 1186 if isinstance(id, int):
1187 1187 # rev
1188 1188 return self.node(id)
1189 1189 if len(id) == 20:
1190 1190 # possibly a binary node
1191 1191 # odds of a binary node being all hex in ASCII are 1 in 10**25
1192 1192 try:
1193 1193 node = id
1194 1194 self.rev(node) # quick search the index
1195 1195 return node
1196 1196 except error.LookupError:
1197 1197 pass # may be partial hex id
1198 1198 try:
1199 1199 # str(rev)
1200 1200 rev = int(id)
1201 1201 if "%d" % rev != id:
1202 1202 raise ValueError
1203 1203 if rev < 0:
1204 1204 rev = len(self) + rev
1205 1205 if rev < 0 or rev >= len(self):
1206 1206 raise ValueError
1207 1207 return self.node(rev)
1208 1208 except (ValueError, OverflowError):
1209 1209 pass
1210 1210 if len(id) == 40:
1211 1211 try:
1212 1212 # a full hex nodeid?
1213 1213 node = bin(id)
1214 1214 self.rev(node)
1215 1215 return node
1216 1216 except (TypeError, error.LookupError):
1217 1217 pass
1218 1218
1219 1219 def _partialmatch(self, id):
1220 1220 # we don't care wdirfilenodeids as they should be always full hash
1221 1221 maybewdir = wdirhex.startswith(id)
1222 1222 try:
1223 1223 partial = self.index.partialmatch(id)
1224 1224 if partial and self.hasnode(partial):
1225 1225 if maybewdir:
1226 1226 # single 'ff...' match in radix tree, ambiguous with wdir
1227 1227 raise error.RevlogError
1228 1228 return partial
1229 1229 if maybewdir:
1230 1230 # no 'ff...' match in radix tree, wdir identified
1231 1231 raise error.WdirUnsupported
1232 1232 return None
1233 1233 except error.RevlogError:
1234 1234 # parsers.c radix tree lookup gave multiple matches
1235 1235 # fast path: for unfiltered changelog, radix tree is accurate
1236 1236 if not getattr(self, 'filteredrevs', None):
1237 1237 raise error.AmbiguousPrefixLookupError(
1238 1238 id, self.indexfile, _('ambiguous identifier'))
1239 1239 # fall through to slow path that filters hidden revisions
1240 1240 except (AttributeError, ValueError):
1241 1241 # we are pure python, or key was too short to search radix tree
1242 1242 pass
1243 1243
1244 1244 if id in self._pcache:
1245 1245 return self._pcache[id]
1246 1246
1247 1247 if len(id) <= 40:
1248 1248 try:
1249 1249 # hex(node)[:...]
1250 1250 l = len(id) // 2 # grab an even number of digits
1251 1251 prefix = bin(id[:l * 2])
1252 1252 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1253 1253 nl = [n for n in nl if hex(n).startswith(id) and
1254 1254 self.hasnode(n)]
1255 1255 if nullhex.startswith(id):
1256 1256 nl.append(nullid)
1257 1257 if len(nl) > 0:
1258 1258 if len(nl) == 1 and not maybewdir:
1259 1259 self._pcache[id] = nl[0]
1260 1260 return nl[0]
1261 1261 raise error.AmbiguousPrefixLookupError(
1262 1262 id, self.indexfile, _('ambiguous identifier'))
1263 1263 if maybewdir:
1264 1264 raise error.WdirUnsupported
1265 1265 return None
1266 1266 except TypeError:
1267 1267 pass
1268 1268
1269 1269 def lookup(self, id):
1270 1270 """locate a node based on:
1271 1271 - revision number or str(revision number)
1272 1272 - nodeid or subset of hex nodeid
1273 1273 """
1274 1274 n = self._match(id)
1275 1275 if n is not None:
1276 1276 return n
1277 1277 n = self._partialmatch(id)
1278 1278 if n:
1279 1279 return n
1280 1280
1281 1281 raise error.LookupError(id, self.indexfile, _('no match found'))
1282 1282
1283 1283 def shortest(self, node, minlength=1):
1284 1284 """Find the shortest unambiguous prefix that matches node."""
1285 1285 def isvalid(prefix):
1286 1286 try:
1287 1287 node = self._partialmatch(prefix)
1288 1288 except error.AmbiguousPrefixLookupError:
1289 1289 return False
1290 1290 except error.WdirUnsupported:
1291 1291 # single 'ff...' match
1292 1292 return True
1293 1293 if node is None:
1294 1294 raise error.LookupError(node, self.indexfile, _('no node'))
1295 1295 return True
1296 1296
1297 1297 def maybewdir(prefix):
1298 1298 return all(c == 'f' for c in prefix)
1299 1299
1300 1300 hexnode = hex(node)
1301 1301
1302 1302 def disambiguate(hexnode, minlength):
1303 1303 """Disambiguate against wdirid."""
1304 1304 for length in range(minlength, 41):
1305 1305 prefix = hexnode[:length]
1306 1306 if not maybewdir(prefix):
1307 1307 return prefix
1308 1308
1309 1309 if not getattr(self, 'filteredrevs', None):
1310 1310 try:
1311 1311 length = max(self.index.shortest(node), minlength)
1312 1312 return disambiguate(hexnode, length)
1313 1313 except error.RevlogError:
1314 1314 if node != wdirid:
1315 1315 raise error.LookupError(node, self.indexfile, _('no node'))
1316 1316 except AttributeError:
1317 1317 # Fall through to pure code
1318 1318 pass
1319 1319
1320 1320 if node == wdirid:
1321 1321 for length in range(minlength, 41):
1322 1322 prefix = hexnode[:length]
1323 1323 if isvalid(prefix):
1324 1324 return prefix
1325 1325
1326 1326 for length in range(minlength, 41):
1327 1327 prefix = hexnode[:length]
1328 1328 if isvalid(prefix):
1329 1329 return disambiguate(hexnode, length)
1330 1330
1331 1331 def cmp(self, node, text):
1332 1332 """compare text with a given file revision
1333 1333
1334 1334 returns True if text is different than what is stored.
1335 1335 """
1336 1336 p1, p2 = self.parents(node)
1337 1337 return storageutil.hashrevisionsha1(text, p1, p2) != node
1338 1338
1339 1339 def _cachesegment(self, offset, data):
1340 1340 """Add a segment to the revlog cache.
1341 1341
1342 1342 Accepts an absolute offset and the data that is at that location.
1343 1343 """
1344 1344 o, d = self._chunkcache
1345 1345 # try to add to existing cache
1346 1346 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1347 1347 self._chunkcache = o, d + data
1348 1348 else:
1349 1349 self._chunkcache = offset, data
1350 1350
1351 1351 def _readsegment(self, offset, length, df=None):
1352 1352 """Load a segment of raw data from the revlog.
1353 1353
1354 1354 Accepts an absolute offset, length to read, and an optional existing
1355 1355 file handle to read from.
1356 1356
1357 1357 If an existing file handle is passed, it will be seeked and the
1358 1358 original seek position will NOT be restored.
1359 1359
1360 1360 Returns a str or buffer of raw byte data.
1361 1361
1362 1362 Raises if the requested number of bytes could not be read.
1363 1363 """
1364 1364 # Cache data both forward and backward around the requested
1365 1365 # data, in a fixed size window. This helps speed up operations
1366 1366 # involving reading the revlog backwards.
1367 1367 cachesize = self._chunkcachesize
1368 1368 realoffset = offset & ~(cachesize - 1)
1369 1369 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1370 1370 - realoffset)
1371 1371 with self._datareadfp(df) as df:
1372 1372 df.seek(realoffset)
1373 1373 d = df.read(reallength)
1374 1374
1375 1375 self._cachesegment(realoffset, d)
1376 1376 if offset != realoffset or reallength != length:
1377 1377 startoffset = offset - realoffset
1378 1378 if len(d) - startoffset < length:
1379 1379 raise error.RevlogError(
1380 1380 _('partial read of revlog %s; expected %d bytes from '
1381 1381 'offset %d, got %d') %
1382 1382 (self.indexfile if self._inline else self.datafile,
1383 1383 length, realoffset, len(d) - startoffset))
1384 1384
1385 1385 return util.buffer(d, startoffset, length)
1386 1386
1387 1387 if len(d) < length:
1388 1388 raise error.RevlogError(
1389 1389 _('partial read of revlog %s; expected %d bytes from offset '
1390 1390 '%d, got %d') %
1391 1391 (self.indexfile if self._inline else self.datafile,
1392 1392 length, offset, len(d)))
1393 1393
1394 1394 return d
1395 1395
1396 1396 def _getsegment(self, offset, length, df=None):
1397 1397 """Obtain a segment of raw data from the revlog.
1398 1398
1399 1399 Accepts an absolute offset, length of bytes to obtain, and an
1400 1400 optional file handle to the already-opened revlog. If the file
1401 1401 handle is used, it's original seek position will not be preserved.
1402 1402
1403 1403 Requests for data may be returned from a cache.
1404 1404
1405 1405 Returns a str or a buffer instance of raw byte data.
1406 1406 """
1407 1407 o, d = self._chunkcache
1408 1408 l = len(d)
1409 1409
1410 1410 # is it in the cache?
1411 1411 cachestart = offset - o
1412 1412 cacheend = cachestart + length
1413 1413 if cachestart >= 0 and cacheend <= l:
1414 1414 if cachestart == 0 and cacheend == l:
1415 1415 return d # avoid a copy
1416 1416 return util.buffer(d, cachestart, cacheend - cachestart)
1417 1417
1418 1418 return self._readsegment(offset, length, df=df)
1419 1419
1420 1420 def _getsegmentforrevs(self, startrev, endrev, df=None):
1421 1421 """Obtain a segment of raw data corresponding to a range of revisions.
1422 1422
1423 1423 Accepts the start and end revisions and an optional already-open
1424 1424 file handle to be used for reading. If the file handle is read, its
1425 1425 seek position will not be preserved.
1426 1426
1427 1427 Requests for data may be satisfied by a cache.
1428 1428
1429 1429 Returns a 2-tuple of (offset, data) for the requested range of
1430 1430 revisions. Offset is the integer offset from the beginning of the
1431 1431 revlog and data is a str or buffer of the raw byte data.
1432 1432
1433 1433 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1434 1434 to determine where each revision's data begins and ends.
1435 1435 """
1436 1436 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1437 1437 # (functions are expensive).
1438 1438 index = self.index
1439 1439 istart = index[startrev]
1440 1440 start = int(istart[0] >> 16)
1441 1441 if startrev == endrev:
1442 1442 end = start + istart[1]
1443 1443 else:
1444 1444 iend = index[endrev]
1445 1445 end = int(iend[0] >> 16) + iend[1]
1446 1446
1447 1447 if self._inline:
1448 1448 start += (startrev + 1) * self._io.size
1449 1449 end += (endrev + 1) * self._io.size
1450 1450 length = end - start
1451 1451
1452 1452 return start, self._getsegment(start, length, df=df)
1453 1453
1454 1454 def _chunk(self, rev, df=None):
1455 1455 """Obtain a single decompressed chunk for a revision.
1456 1456
1457 1457 Accepts an integer revision and an optional already-open file handle
1458 1458 to be used for reading. If used, the seek position of the file will not
1459 1459 be preserved.
1460 1460
1461 1461 Returns a str holding uncompressed data for the requested revision.
1462 1462 """
1463 1463 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1464 1464
1465 1465 def _chunks(self, revs, df=None, targetsize=None):
1466 1466 """Obtain decompressed chunks for the specified revisions.
1467 1467
1468 1468 Accepts an iterable of numeric revisions that are assumed to be in
1469 1469 ascending order. Also accepts an optional already-open file handle
1470 1470 to be used for reading. If used, the seek position of the file will
1471 1471 not be preserved.
1472 1472
1473 1473 This function is similar to calling ``self._chunk()`` multiple times,
1474 1474 but is faster.
1475 1475
1476 1476 Returns a list with decompressed data for each requested revision.
1477 1477 """
1478 1478 if not revs:
1479 1479 return []
1480 1480 start = self.start
1481 1481 length = self.length
1482 1482 inline = self._inline
1483 1483 iosize = self._io.size
1484 1484 buffer = util.buffer
1485 1485
1486 1486 l = []
1487 1487 ladd = l.append
1488 1488
1489 1489 if not self._withsparseread:
1490 1490 slicedchunks = (revs,)
1491 1491 else:
1492 1492 slicedchunks = deltautil.slicechunk(self, revs,
1493 1493 targetsize=targetsize)
1494 1494
1495 1495 for revschunk in slicedchunks:
1496 1496 firstrev = revschunk[0]
1497 1497 # Skip trailing revisions with empty diff
1498 1498 for lastrev in revschunk[::-1]:
1499 1499 if length(lastrev) != 0:
1500 1500 break
1501 1501
1502 1502 try:
1503 1503 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1504 1504 except OverflowError:
1505 1505 # issue4215 - we can't cache a run of chunks greater than
1506 1506 # 2G on Windows
1507 1507 return [self._chunk(rev, df=df) for rev in revschunk]
1508 1508
1509 1509 decomp = self.decompress
1510 1510 for rev in revschunk:
1511 1511 chunkstart = start(rev)
1512 1512 if inline:
1513 1513 chunkstart += (rev + 1) * iosize
1514 1514 chunklength = length(rev)
1515 1515 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1516 1516
1517 1517 return l
1518 1518
1519 1519 def _chunkclear(self):
1520 1520 """Clear the raw chunk cache."""
1521 1521 self._chunkcache = (0, '')
1522 1522
1523 1523 def deltaparent(self, rev):
1524 1524 """return deltaparent of the given revision"""
1525 1525 base = self.index[rev][3]
1526 1526 if base == rev:
1527 1527 return nullrev
1528 1528 elif self._generaldelta:
1529 1529 return base
1530 1530 else:
1531 1531 return rev - 1
1532 1532
1533 1533 def issnapshot(self, rev):
1534 1534 """tells whether rev is a snapshot
1535 1535 """
1536 if not self._sparserevlog:
1537 return self.deltaparent(rev) == nullrev
1538 elif util.safehasattr(self.index, 'issnapshot'):
1539 # directly assign the method to cache the testing and access
1540 self.issnapshot = self.index.issnapshot
1541 return self.issnapshot(rev)
1536 1542 if rev == nullrev:
1537 1543 return True
1538 1544 entry = self.index[rev]
1539 1545 base = entry[3]
1540 1546 if base == rev:
1541 1547 return True
1542 elif not self._sparserevlog:
1543 return False
1544 1548 if base == nullrev:
1545 1549 return True
1546 1550 p1 = entry[5]
1547 1551 p2 = entry[6]
1548 1552 if base == p1 or base == p2:
1549 1553 return False
1550 1554 return self.issnapshot(base)
1551 1555
1552 1556 def snapshotdepth(self, rev):
1553 1557 """number of snapshot in the chain before this one"""
1554 1558 if not self.issnapshot(rev):
1555 1559 raise error.ProgrammingError('revision %d not a snapshot')
1556 1560 return len(self._deltachain(rev)[0]) - 1
1557 1561
1558 1562 def revdiff(self, rev1, rev2):
1559 1563 """return or calculate a delta between two revisions
1560 1564
1561 1565 The delta calculated is in binary form and is intended to be written to
1562 1566 revlog data directly. So this function needs raw revision data.
1563 1567 """
1564 1568 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1565 1569 return bytes(self._chunk(rev2))
1566 1570
1567 1571 return mdiff.textdiff(self.revision(rev1, raw=True),
1568 1572 self.revision(rev2, raw=True))
1569 1573
1570 1574 def revision(self, nodeorrev, _df=None, raw=False):
1571 1575 """return an uncompressed revision of a given node or revision
1572 1576 number.
1573 1577
1574 1578 _df - an existing file handle to read from. (internal-only)
1575 1579 raw - an optional argument specifying if the revision data is to be
1576 1580 treated as raw data when applying flag transforms. 'raw' should be set
1577 1581 to True when generating changegroups or in debug commands.
1578 1582 """
1579 1583 if isinstance(nodeorrev, int):
1580 1584 rev = nodeorrev
1581 1585 node = self.node(rev)
1582 1586 else:
1583 1587 node = nodeorrev
1584 1588 rev = None
1585 1589
1586 1590 cachedrev = None
1587 1591 flags = None
1588 1592 rawtext = None
1589 1593 if node == nullid:
1590 1594 return ""
1591 1595 if self._revisioncache:
1592 1596 if self._revisioncache[0] == node:
1593 1597 # _cache only stores rawtext
1594 1598 if raw:
1595 1599 return self._revisioncache[2]
1596 1600 # duplicated, but good for perf
1597 1601 if rev is None:
1598 1602 rev = self.rev(node)
1599 1603 if flags is None:
1600 1604 flags = self.flags(rev)
1601 1605 # no extra flags set, no flag processor runs, text = rawtext
1602 1606 if flags == REVIDX_DEFAULT_FLAGS:
1603 1607 return self._revisioncache[2]
1604 1608 # rawtext is reusable. need to run flag processor
1605 1609 rawtext = self._revisioncache[2]
1606 1610
1607 1611 cachedrev = self._revisioncache[1]
1608 1612
1609 1613 # look up what we need to read
1610 1614 if rawtext is None:
1611 1615 if rev is None:
1612 1616 rev = self.rev(node)
1613 1617
1614 1618 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1615 1619 if stopped:
1616 1620 rawtext = self._revisioncache[2]
1617 1621
1618 1622 # drop cache to save memory
1619 1623 self._revisioncache = None
1620 1624
1621 1625 targetsize = None
1622 1626 rawsize = self.index[rev][2]
1623 1627 if 0 <= rawsize:
1624 1628 targetsize = 4 * rawsize
1625 1629
1626 1630 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1627 1631 if rawtext is None:
1628 1632 rawtext = bytes(bins[0])
1629 1633 bins = bins[1:]
1630 1634
1631 1635 rawtext = mdiff.patches(rawtext, bins)
1632 1636 self._revisioncache = (node, rev, rawtext)
1633 1637
1634 1638 if flags is None:
1635 1639 if rev is None:
1636 1640 rev = self.rev(node)
1637 1641 flags = self.flags(rev)
1638 1642
1639 1643 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1640 1644 if validatehash:
1641 1645 self.checkhash(text, node, rev=rev)
1642 1646
1643 1647 return text
1644 1648
1645 1649 def hash(self, text, p1, p2):
1646 1650 """Compute a node hash.
1647 1651
1648 1652 Available as a function so that subclasses can replace the hash
1649 1653 as needed.
1650 1654 """
1651 1655 return storageutil.hashrevisionsha1(text, p1, p2)
1652 1656
1653 1657 def _processflags(self, text, flags, operation, raw=False):
1654 1658 """Inspect revision data flags and applies transforms defined by
1655 1659 registered flag processors.
1656 1660
1657 1661 ``text`` - the revision data to process
1658 1662 ``flags`` - the revision flags
1659 1663 ``operation`` - the operation being performed (read or write)
1660 1664 ``raw`` - an optional argument describing if the raw transform should be
1661 1665 applied.
1662 1666
1663 1667 This method processes the flags in the order (or reverse order if
1664 1668 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1665 1669 flag processors registered for present flags. The order of flags defined
1666 1670 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1667 1671
1668 1672 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1669 1673 processed text and ``validatehash`` is a bool indicating whether the
1670 1674 returned text should be checked for hash integrity.
1671 1675
1672 1676 Note: If the ``raw`` argument is set, it has precedence over the
1673 1677 operation and will only update the value of ``validatehash``.
1674 1678 """
1675 1679 # fast path: no flag processors will run
1676 1680 if flags == 0:
1677 1681 return text, True
1678 1682 if not operation in ('read', 'write'):
1679 1683 raise error.ProgrammingError(_("invalid '%s' operation") %
1680 1684 operation)
1681 1685 # Check all flags are known.
1682 1686 if flags & ~REVIDX_KNOWN_FLAGS:
1683 1687 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1684 1688 (flags & ~REVIDX_KNOWN_FLAGS))
1685 1689 validatehash = True
1686 1690 # Depending on the operation (read or write), the order might be
1687 1691 # reversed due to non-commutative transforms.
1688 1692 orderedflags = REVIDX_FLAGS_ORDER
1689 1693 if operation == 'write':
1690 1694 orderedflags = reversed(orderedflags)
1691 1695
1692 1696 for flag in orderedflags:
1693 1697 # If a flagprocessor has been registered for a known flag, apply the
1694 1698 # related operation transform and update result tuple.
1695 1699 if flag & flags:
1696 1700 vhash = True
1697 1701
1698 1702 if flag not in self._flagprocessors:
1699 1703 message = _("missing processor for flag '%#x'") % (flag)
1700 1704 raise error.RevlogError(message)
1701 1705
1702 1706 processor = self._flagprocessors[flag]
1703 1707 if processor is not None:
1704 1708 readtransform, writetransform, rawtransform = processor
1705 1709
1706 1710 if raw:
1707 1711 vhash = rawtransform(self, text)
1708 1712 elif operation == 'read':
1709 1713 text, vhash = readtransform(self, text)
1710 1714 else: # write operation
1711 1715 text, vhash = writetransform(self, text)
1712 1716 validatehash = validatehash and vhash
1713 1717
1714 1718 return text, validatehash
1715 1719
1716 1720 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1717 1721 """Check node hash integrity.
1718 1722
1719 1723 Available as a function so that subclasses can extend hash mismatch
1720 1724 behaviors as needed.
1721 1725 """
1722 1726 try:
1723 1727 if p1 is None and p2 is None:
1724 1728 p1, p2 = self.parents(node)
1725 1729 if node != self.hash(text, p1, p2):
1726 1730 # Clear the revision cache on hash failure. The revision cache
1727 1731 # only stores the raw revision and clearing the cache does have
1728 1732 # the side-effect that we won't have a cache hit when the raw
1729 1733 # revision data is accessed. But this case should be rare and
1730 1734 # it is extra work to teach the cache about the hash
1731 1735 # verification state.
1732 1736 if self._revisioncache and self._revisioncache[0] == node:
1733 1737 self._revisioncache = None
1734 1738
1735 1739 revornode = rev
1736 1740 if revornode is None:
1737 1741 revornode = templatefilters.short(hex(node))
1738 1742 raise error.RevlogError(_("integrity check failed on %s:%s")
1739 1743 % (self.indexfile, pycompat.bytestr(revornode)))
1740 1744 except error.RevlogError:
1741 1745 if self._censorable and storageutil.iscensoredtext(text):
1742 1746 raise error.CensoredNodeError(self.indexfile, node, text)
1743 1747 raise
1744 1748
1745 1749 def _enforceinlinesize(self, tr, fp=None):
1746 1750 """Check if the revlog is too big for inline and convert if so.
1747 1751
1748 1752 This should be called after revisions are added to the revlog. If the
1749 1753 revlog has grown too large to be an inline revlog, it will convert it
1750 1754 to use multiple index and data files.
1751 1755 """
1752 1756 tiprev = len(self) - 1
1753 1757 if (not self._inline or
1754 1758 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1755 1759 return
1756 1760
1757 1761 trinfo = tr.find(self.indexfile)
1758 1762 if trinfo is None:
1759 1763 raise error.RevlogError(_("%s not found in the transaction")
1760 1764 % self.indexfile)
1761 1765
1762 1766 trindex = trinfo[2]
1763 1767 if trindex is not None:
1764 1768 dataoff = self.start(trindex)
1765 1769 else:
1766 1770 # revlog was stripped at start of transaction, use all leftover data
1767 1771 trindex = len(self) - 1
1768 1772 dataoff = self.end(tiprev)
1769 1773
1770 1774 tr.add(self.datafile, dataoff)
1771 1775
1772 1776 if fp:
1773 1777 fp.flush()
1774 1778 fp.close()
1775 1779 # We can't use the cached file handle after close(). So prevent
1776 1780 # its usage.
1777 1781 self._writinghandles = None
1778 1782
1779 1783 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1780 1784 for r in self:
1781 1785 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1782 1786
1783 1787 with self._indexfp('w') as fp:
1784 1788 self.version &= ~FLAG_INLINE_DATA
1785 1789 self._inline = False
1786 1790 io = self._io
1787 1791 for i in self:
1788 1792 e = io.packentry(self.index[i], self.node, self.version, i)
1789 1793 fp.write(e)
1790 1794
1791 1795 # the temp file replace the real index when we exit the context
1792 1796 # manager
1793 1797
1794 1798 tr.replace(self.indexfile, trindex * self._io.size)
1795 1799 self._chunkclear()
1796 1800
1797 1801 def _nodeduplicatecallback(self, transaction, node):
1798 1802 """called when trying to add a node already stored.
1799 1803 """
1800 1804
1801 1805 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1802 1806 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1803 1807 """add a revision to the log
1804 1808
1805 1809 text - the revision data to add
1806 1810 transaction - the transaction object used for rollback
1807 1811 link - the linkrev data to add
1808 1812 p1, p2 - the parent nodeids of the revision
1809 1813 cachedelta - an optional precomputed delta
1810 1814 node - nodeid of revision; typically node is not specified, and it is
1811 1815 computed by default as hash(text, p1, p2), however subclasses might
1812 1816 use different hashing method (and override checkhash() in such case)
1813 1817 flags - the known flags to set on the revision
1814 1818 deltacomputer - an optional deltacomputer instance shared between
1815 1819 multiple calls
1816 1820 """
1817 1821 if link == nullrev:
1818 1822 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1819 1823 % self.indexfile)
1820 1824
1821 1825 if flags:
1822 1826 node = node or self.hash(text, p1, p2)
1823 1827
1824 1828 rawtext, validatehash = self._processflags(text, flags, 'write')
1825 1829
1826 1830 # If the flag processor modifies the revision data, ignore any provided
1827 1831 # cachedelta.
1828 1832 if rawtext != text:
1829 1833 cachedelta = None
1830 1834
1831 1835 if len(rawtext) > _maxentrysize:
1832 1836 raise error.RevlogError(
1833 1837 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1834 1838 % (self.indexfile, len(rawtext)))
1835 1839
1836 1840 node = node or self.hash(rawtext, p1, p2)
1837 1841 if node in self.nodemap:
1838 1842 return node
1839 1843
1840 1844 if validatehash:
1841 1845 self.checkhash(rawtext, node, p1=p1, p2=p2)
1842 1846
1843 1847 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1844 1848 flags, cachedelta=cachedelta,
1845 1849 deltacomputer=deltacomputer)
1846 1850
1847 1851 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1848 1852 cachedelta=None, deltacomputer=None):
1849 1853 """add a raw revision with known flags, node and parents
1850 1854 useful when reusing a revision not stored in this revlog (ex: received
1851 1855 over wire, or read from an external bundle).
1852 1856 """
1853 1857 dfh = None
1854 1858 if not self._inline:
1855 1859 dfh = self._datafp("a+")
1856 1860 ifh = self._indexfp("a+")
1857 1861 try:
1858 1862 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1859 1863 flags, cachedelta, ifh, dfh,
1860 1864 deltacomputer=deltacomputer)
1861 1865 finally:
1862 1866 if dfh:
1863 1867 dfh.close()
1864 1868 ifh.close()
1865 1869
1866 1870 def compress(self, data):
1867 1871 """Generate a possibly-compressed representation of data."""
1868 1872 if not data:
1869 1873 return '', data
1870 1874
1871 1875 compressed = self._compressor.compress(data)
1872 1876
1873 1877 if compressed:
1874 1878 # The revlog compressor added the header in the returned data.
1875 1879 return '', compressed
1876 1880
1877 1881 if data[0:1] == '\0':
1878 1882 return '', data
1879 1883 return 'u', data
1880 1884
1881 1885 def decompress(self, data):
1882 1886 """Decompress a revlog chunk.
1883 1887
1884 1888 The chunk is expected to begin with a header identifying the
1885 1889 format type so it can be routed to an appropriate decompressor.
1886 1890 """
1887 1891 if not data:
1888 1892 return data
1889 1893
1890 1894 # Revlogs are read much more frequently than they are written and many
1891 1895 # chunks only take microseconds to decompress, so performance is
1892 1896 # important here.
1893 1897 #
1894 1898 # We can make a few assumptions about revlogs:
1895 1899 #
1896 1900 # 1) the majority of chunks will be compressed (as opposed to inline
1897 1901 # raw data).
1898 1902 # 2) decompressing *any* data will likely by at least 10x slower than
1899 1903 # returning raw inline data.
1900 1904 # 3) we want to prioritize common and officially supported compression
1901 1905 # engines
1902 1906 #
1903 1907 # It follows that we want to optimize for "decompress compressed data
1904 1908 # when encoded with common and officially supported compression engines"
1905 1909 # case over "raw data" and "data encoded by less common or non-official
1906 1910 # compression engines." That is why we have the inline lookup first
1907 1911 # followed by the compengines lookup.
1908 1912 #
1909 1913 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1910 1914 # compressed chunks. And this matters for changelog and manifest reads.
1911 1915 t = data[0:1]
1912 1916
1913 1917 if t == 'x':
1914 1918 try:
1915 1919 return _zlibdecompress(data)
1916 1920 except zlib.error as e:
1917 1921 raise error.RevlogError(_('revlog decompress error: %s') %
1918 1922 stringutil.forcebytestr(e))
1919 1923 # '\0' is more common than 'u' so it goes first.
1920 1924 elif t == '\0':
1921 1925 return data
1922 1926 elif t == 'u':
1923 1927 return util.buffer(data, 1)
1924 1928
1925 1929 try:
1926 1930 compressor = self._decompressors[t]
1927 1931 except KeyError:
1928 1932 try:
1929 1933 engine = util.compengines.forrevlogheader(t)
1930 1934 compressor = engine.revlogcompressor()
1931 1935 self._decompressors[t] = compressor
1932 1936 except KeyError:
1933 1937 raise error.RevlogError(_('unknown compression type %r') % t)
1934 1938
1935 1939 return compressor.decompress(data)
1936 1940
1937 1941 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1938 1942 cachedelta, ifh, dfh, alwayscache=False,
1939 1943 deltacomputer=None):
1940 1944 """internal function to add revisions to the log
1941 1945
1942 1946 see addrevision for argument descriptions.
1943 1947
1944 1948 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1945 1949
1946 1950 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1947 1951 be used.
1948 1952
1949 1953 invariants:
1950 1954 - rawtext is optional (can be None); if not set, cachedelta must be set.
1951 1955 if both are set, they must correspond to each other.
1952 1956 """
1953 1957 if node == nullid:
1954 1958 raise error.RevlogError(_("%s: attempt to add null revision") %
1955 1959 self.indexfile)
1956 1960 if node == wdirid or node in wdirfilenodeids:
1957 1961 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1958 1962 self.indexfile)
1959 1963
1960 1964 if self._inline:
1961 1965 fh = ifh
1962 1966 else:
1963 1967 fh = dfh
1964 1968
1965 1969 btext = [rawtext]
1966 1970
1967 1971 curr = len(self)
1968 1972 prev = curr - 1
1969 1973 offset = self.end(prev)
1970 1974 p1r, p2r = self.rev(p1), self.rev(p2)
1971 1975
1972 1976 # full versions are inserted when the needed deltas
1973 1977 # become comparable to the uncompressed text
1974 1978 if rawtext is None:
1975 1979 # need rawtext size, before changed by flag processors, which is
1976 1980 # the non-raw size. use revlog explicitly to avoid filelog's extra
1977 1981 # logic that might remove metadata size.
1978 1982 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
1979 1983 cachedelta[1])
1980 1984 else:
1981 1985 textlen = len(rawtext)
1982 1986
1983 1987 if deltacomputer is None:
1984 1988 deltacomputer = deltautil.deltacomputer(self)
1985 1989
1986 1990 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
1987 1991
1988 1992 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
1989 1993
1990 1994 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
1991 1995 deltainfo.base, link, p1r, p2r, node)
1992 1996 self.index.append(e)
1993 1997 self.nodemap[node] = curr
1994 1998
1995 1999 # Reset the pure node cache start lookup offset to account for new
1996 2000 # revision.
1997 2001 if self._nodepos is not None:
1998 2002 self._nodepos = curr
1999 2003
2000 2004 entry = self._io.packentry(e, self.node, self.version, curr)
2001 2005 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2002 2006 link, offset)
2003 2007
2004 2008 rawtext = btext[0]
2005 2009
2006 2010 if alwayscache and rawtext is None:
2007 2011 rawtext = deltacomputer.buildtext(revinfo, fh)
2008 2012
2009 2013 if type(rawtext) == bytes: # only accept immutable objects
2010 2014 self._revisioncache = (node, curr, rawtext)
2011 2015 self._chainbasecache[curr] = deltainfo.chainbase
2012 2016 return node
2013 2017
2014 2018 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2015 2019 # Files opened in a+ mode have inconsistent behavior on various
2016 2020 # platforms. Windows requires that a file positioning call be made
2017 2021 # when the file handle transitions between reads and writes. See
2018 2022 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2019 2023 # platforms, Python or the platform itself can be buggy. Some versions
2020 2024 # of Solaris have been observed to not append at the end of the file
2021 2025 # if the file was seeked to before the end. See issue4943 for more.
2022 2026 #
2023 2027 # We work around this issue by inserting a seek() before writing.
2024 2028 # Note: This is likely not necessary on Python 3. However, because
2025 2029 # the file handle is reused for reads and may be seeked there, we need
2026 2030 # to be careful before changing this.
2027 2031 ifh.seek(0, os.SEEK_END)
2028 2032 if dfh:
2029 2033 dfh.seek(0, os.SEEK_END)
2030 2034
2031 2035 curr = len(self) - 1
2032 2036 if not self._inline:
2033 2037 transaction.add(self.datafile, offset)
2034 2038 transaction.add(self.indexfile, curr * len(entry))
2035 2039 if data[0]:
2036 2040 dfh.write(data[0])
2037 2041 dfh.write(data[1])
2038 2042 ifh.write(entry)
2039 2043 else:
2040 2044 offset += curr * self._io.size
2041 2045 transaction.add(self.indexfile, offset, curr)
2042 2046 ifh.write(entry)
2043 2047 ifh.write(data[0])
2044 2048 ifh.write(data[1])
2045 2049 self._enforceinlinesize(transaction, ifh)
2046 2050
2047 2051 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2048 2052 """
2049 2053 add a delta group
2050 2054
2051 2055 given a set of deltas, add them to the revision log. the
2052 2056 first delta is against its parent, which should be in our
2053 2057 log, the rest are against the previous delta.
2054 2058
2055 2059 If ``addrevisioncb`` is defined, it will be called with arguments of
2056 2060 this revlog and the node that was added.
2057 2061 """
2058 2062
2059 2063 if self._writinghandles:
2060 2064 raise error.ProgrammingError('cannot nest addgroup() calls')
2061 2065
2062 2066 nodes = []
2063 2067
2064 2068 r = len(self)
2065 2069 end = 0
2066 2070 if r:
2067 2071 end = self.end(r - 1)
2068 2072 ifh = self._indexfp("a+")
2069 2073 isize = r * self._io.size
2070 2074 if self._inline:
2071 2075 transaction.add(self.indexfile, end + isize, r)
2072 2076 dfh = None
2073 2077 else:
2074 2078 transaction.add(self.indexfile, isize, r)
2075 2079 transaction.add(self.datafile, end)
2076 2080 dfh = self._datafp("a+")
2077 2081 def flush():
2078 2082 if dfh:
2079 2083 dfh.flush()
2080 2084 ifh.flush()
2081 2085
2082 2086 self._writinghandles = (ifh, dfh)
2083 2087
2084 2088 try:
2085 2089 deltacomputer = deltautil.deltacomputer(self)
2086 2090 # loop through our set of deltas
2087 2091 for data in deltas:
2088 2092 node, p1, p2, linknode, deltabase, delta, flags = data
2089 2093 link = linkmapper(linknode)
2090 2094 flags = flags or REVIDX_DEFAULT_FLAGS
2091 2095
2092 2096 nodes.append(node)
2093 2097
2094 2098 if node in self.nodemap:
2095 2099 self._nodeduplicatecallback(transaction, node)
2096 2100 # this can happen if two branches make the same change
2097 2101 continue
2098 2102
2099 2103 for p in (p1, p2):
2100 2104 if p not in self.nodemap:
2101 2105 raise error.LookupError(p, self.indexfile,
2102 2106 _('unknown parent'))
2103 2107
2104 2108 if deltabase not in self.nodemap:
2105 2109 raise error.LookupError(deltabase, self.indexfile,
2106 2110 _('unknown delta base'))
2107 2111
2108 2112 baserev = self.rev(deltabase)
2109 2113
2110 2114 if baserev != nullrev and self.iscensored(baserev):
2111 2115 # if base is censored, delta must be full replacement in a
2112 2116 # single patch operation
2113 2117 hlen = struct.calcsize(">lll")
2114 2118 oldlen = self.rawsize(baserev)
2115 2119 newlen = len(delta) - hlen
2116 2120 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2117 2121 raise error.CensoredBaseError(self.indexfile,
2118 2122 self.node(baserev))
2119 2123
2120 2124 if not flags and self._peek_iscensored(baserev, delta, flush):
2121 2125 flags |= REVIDX_ISCENSORED
2122 2126
2123 2127 # We assume consumers of addrevisioncb will want to retrieve
2124 2128 # the added revision, which will require a call to
2125 2129 # revision(). revision() will fast path if there is a cache
2126 2130 # hit. So, we tell _addrevision() to always cache in this case.
2127 2131 # We're only using addgroup() in the context of changegroup
2128 2132 # generation so the revision data can always be handled as raw
2129 2133 # by the flagprocessor.
2130 2134 self._addrevision(node, None, transaction, link,
2131 2135 p1, p2, flags, (baserev, delta),
2132 2136 ifh, dfh,
2133 2137 alwayscache=bool(addrevisioncb),
2134 2138 deltacomputer=deltacomputer)
2135 2139
2136 2140 if addrevisioncb:
2137 2141 addrevisioncb(self, node)
2138 2142
2139 2143 if not dfh and not self._inline:
2140 2144 # addrevision switched from inline to conventional
2141 2145 # reopen the index
2142 2146 ifh.close()
2143 2147 dfh = self._datafp("a+")
2144 2148 ifh = self._indexfp("a+")
2145 2149 self._writinghandles = (ifh, dfh)
2146 2150 finally:
2147 2151 self._writinghandles = None
2148 2152
2149 2153 if dfh:
2150 2154 dfh.close()
2151 2155 ifh.close()
2152 2156
2153 2157 return nodes
2154 2158
2155 2159 def iscensored(self, rev):
2156 2160 """Check if a file revision is censored."""
2157 2161 if not self._censorable:
2158 2162 return False
2159 2163
2160 2164 return self.flags(rev) & REVIDX_ISCENSORED
2161 2165
2162 2166 def _peek_iscensored(self, baserev, delta, flush):
2163 2167 """Quickly check if a delta produces a censored revision."""
2164 2168 if not self._censorable:
2165 2169 return False
2166 2170
2167 2171 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2168 2172
2169 2173 def getstrippoint(self, minlink):
2170 2174 """find the minimum rev that must be stripped to strip the linkrev
2171 2175
2172 2176 Returns a tuple containing the minimum rev and a set of all revs that
2173 2177 have linkrevs that will be broken by this strip.
2174 2178 """
2175 2179 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2176 2180 self.headrevs(),
2177 2181 self.linkrev, self.parentrevs)
2178 2182
2179 2183 def strip(self, minlink, transaction):
2180 2184 """truncate the revlog on the first revision with a linkrev >= minlink
2181 2185
2182 2186 This function is called when we're stripping revision minlink and
2183 2187 its descendants from the repository.
2184 2188
2185 2189 We have to remove all revisions with linkrev >= minlink, because
2186 2190 the equivalent changelog revisions will be renumbered after the
2187 2191 strip.
2188 2192
2189 2193 So we truncate the revlog on the first of these revisions, and
2190 2194 trust that the caller has saved the revisions that shouldn't be
2191 2195 removed and that it'll re-add them after this truncation.
2192 2196 """
2193 2197 if len(self) == 0:
2194 2198 return
2195 2199
2196 2200 rev, _ = self.getstrippoint(minlink)
2197 2201 if rev == len(self):
2198 2202 return
2199 2203
2200 2204 # first truncate the files on disk
2201 2205 end = self.start(rev)
2202 2206 if not self._inline:
2203 2207 transaction.add(self.datafile, end)
2204 2208 end = rev * self._io.size
2205 2209 else:
2206 2210 end += rev * self._io.size
2207 2211
2208 2212 transaction.add(self.indexfile, end)
2209 2213
2210 2214 # then reset internal state in memory to forget those revisions
2211 2215 self._revisioncache = None
2212 2216 self._chaininfocache = {}
2213 2217 self._chunkclear()
2214 2218 for x in pycompat.xrange(rev, len(self)):
2215 2219 del self.nodemap[self.node(x)]
2216 2220
2217 2221 del self.index[rev:-1]
2218 2222 self._nodepos = None
2219 2223
2220 2224 def checksize(self):
2221 2225 expected = 0
2222 2226 if len(self):
2223 2227 expected = max(0, self.end(len(self) - 1))
2224 2228
2225 2229 try:
2226 2230 with self._datafp() as f:
2227 2231 f.seek(0, 2)
2228 2232 actual = f.tell()
2229 2233 dd = actual - expected
2230 2234 except IOError as inst:
2231 2235 if inst.errno != errno.ENOENT:
2232 2236 raise
2233 2237 dd = 0
2234 2238
2235 2239 try:
2236 2240 f = self.opener(self.indexfile)
2237 2241 f.seek(0, 2)
2238 2242 actual = f.tell()
2239 2243 f.close()
2240 2244 s = self._io.size
2241 2245 i = max(0, actual // s)
2242 2246 di = actual - (i * s)
2243 2247 if self._inline:
2244 2248 databytes = 0
2245 2249 for r in self:
2246 2250 databytes += max(0, self.length(r))
2247 2251 dd = 0
2248 2252 di = actual - len(self) * s - databytes
2249 2253 except IOError as inst:
2250 2254 if inst.errno != errno.ENOENT:
2251 2255 raise
2252 2256 di = 0
2253 2257
2254 2258 return (dd, di)
2255 2259
2256 2260 def files(self):
2257 2261 res = [self.indexfile]
2258 2262 if not self._inline:
2259 2263 res.append(self.datafile)
2260 2264 return res
2261 2265
2262 2266 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2263 2267 assumehaveparentrevisions=False,
2264 2268 deltamode=repository.CG_DELTAMODE_STD):
2265 2269 if nodesorder not in ('nodes', 'storage', 'linear', None):
2266 2270 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2267 2271 nodesorder)
2268 2272
2269 2273 if nodesorder is None and not self._generaldelta:
2270 2274 nodesorder = 'storage'
2271 2275
2272 2276 if (not self._storedeltachains and
2273 2277 deltamode != repository.CG_DELTAMODE_PREV):
2274 2278 deltamode = repository.CG_DELTAMODE_FULL
2275 2279
2276 2280 return storageutil.emitrevisions(
2277 2281 self, nodes, nodesorder, revlogrevisiondelta,
2278 2282 deltaparentfn=self.deltaparent,
2279 2283 candeltafn=self.candelta,
2280 2284 rawsizefn=self.rawsize,
2281 2285 revdifffn=self.revdiff,
2282 2286 flagsfn=self.flags,
2283 2287 deltamode=deltamode,
2284 2288 revisiondata=revisiondata,
2285 2289 assumehaveparentrevisions=assumehaveparentrevisions)
2286 2290
2287 2291 DELTAREUSEALWAYS = 'always'
2288 2292 DELTAREUSESAMEREVS = 'samerevs'
2289 2293 DELTAREUSENEVER = 'never'
2290 2294
2291 2295 DELTAREUSEFULLADD = 'fulladd'
2292 2296
2293 2297 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2294 2298
2295 2299 def clone(self, tr, destrevlog, addrevisioncb=None,
2296 2300 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2297 2301 """Copy this revlog to another, possibly with format changes.
2298 2302
2299 2303 The destination revlog will contain the same revisions and nodes.
2300 2304 However, it may not be bit-for-bit identical due to e.g. delta encoding
2301 2305 differences.
2302 2306
2303 2307 The ``deltareuse`` argument control how deltas from the existing revlog
2304 2308 are preserved in the destination revlog. The argument can have the
2305 2309 following values:
2306 2310
2307 2311 DELTAREUSEALWAYS
2308 2312 Deltas will always be reused (if possible), even if the destination
2309 2313 revlog would not select the same revisions for the delta. This is the
2310 2314 fastest mode of operation.
2311 2315 DELTAREUSESAMEREVS
2312 2316 Deltas will be reused if the destination revlog would pick the same
2313 2317 revisions for the delta. This mode strikes a balance between speed
2314 2318 and optimization.
2315 2319 DELTAREUSENEVER
2316 2320 Deltas will never be reused. This is the slowest mode of execution.
2317 2321 This mode can be used to recompute deltas (e.g. if the diff/delta
2318 2322 algorithm changes).
2319 2323
2320 2324 Delta computation can be slow, so the choice of delta reuse policy can
2321 2325 significantly affect run time.
2322 2326
2323 2327 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2324 2328 two extremes. Deltas will be reused if they are appropriate. But if the
2325 2329 delta could choose a better revision, it will do so. This means if you
2326 2330 are converting a non-generaldelta revlog to a generaldelta revlog,
2327 2331 deltas will be recomputed if the delta's parent isn't a parent of the
2328 2332 revision.
2329 2333
2330 2334 In addition to the delta policy, the ``forcedeltabothparents``
2331 2335 argument controls whether to force compute deltas against both parents
2332 2336 for merges. By default, the current default is used.
2333 2337 """
2334 2338 if deltareuse not in self.DELTAREUSEALL:
2335 2339 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2336 2340
2337 2341 if len(destrevlog):
2338 2342 raise ValueError(_('destination revlog is not empty'))
2339 2343
2340 2344 if getattr(self, 'filteredrevs', None):
2341 2345 raise ValueError(_('source revlog has filtered revisions'))
2342 2346 if getattr(destrevlog, 'filteredrevs', None):
2343 2347 raise ValueError(_('destination revlog has filtered revisions'))
2344 2348
2345 2349 # lazydeltabase controls whether to reuse a cached delta, if possible.
2346 2350 oldlazydeltabase = destrevlog._lazydeltabase
2347 2351 oldamd = destrevlog._deltabothparents
2348 2352
2349 2353 try:
2350 2354 if deltareuse == self.DELTAREUSEALWAYS:
2351 2355 destrevlog._lazydeltabase = True
2352 2356 elif deltareuse == self.DELTAREUSESAMEREVS:
2353 2357 destrevlog._lazydeltabase = False
2354 2358
2355 2359 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2356 2360
2357 2361 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2358 2362 self.DELTAREUSESAMEREVS)
2359 2363
2360 2364 deltacomputer = deltautil.deltacomputer(destrevlog)
2361 2365 index = self.index
2362 2366 for rev in self:
2363 2367 entry = index[rev]
2364 2368
2365 2369 # Some classes override linkrev to take filtered revs into
2366 2370 # account. Use raw entry from index.
2367 2371 flags = entry[0] & 0xffff
2368 2372 linkrev = entry[4]
2369 2373 p1 = index[entry[5]][7]
2370 2374 p2 = index[entry[6]][7]
2371 2375 node = entry[7]
2372 2376
2373 2377 # (Possibly) reuse the delta from the revlog if allowed and
2374 2378 # the revlog chunk is a delta.
2375 2379 cachedelta = None
2376 2380 rawtext = None
2377 2381 if populatecachedelta:
2378 2382 dp = self.deltaparent(rev)
2379 2383 if dp != nullrev:
2380 2384 cachedelta = (dp, bytes(self._chunk(rev)))
2381 2385
2382 2386 if not cachedelta:
2383 2387 rawtext = self.revision(rev, raw=True)
2384 2388
2385 2389
2386 2390 if deltareuse == self.DELTAREUSEFULLADD:
2387 2391 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2388 2392 cachedelta=cachedelta,
2389 2393 node=node, flags=flags,
2390 2394 deltacomputer=deltacomputer)
2391 2395 else:
2392 2396 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2393 2397 checkambig=False)
2394 2398 dfh = None
2395 2399 if not destrevlog._inline:
2396 2400 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2397 2401 try:
2398 2402 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2399 2403 p2, flags, cachedelta, ifh, dfh,
2400 2404 deltacomputer=deltacomputer)
2401 2405 finally:
2402 2406 if dfh:
2403 2407 dfh.close()
2404 2408 ifh.close()
2405 2409
2406 2410 if addrevisioncb:
2407 2411 addrevisioncb(self, rev, node)
2408 2412 finally:
2409 2413 destrevlog._lazydeltabase = oldlazydeltabase
2410 2414 destrevlog._deltabothparents = oldamd
2411 2415
2412 2416 def censorrevision(self, tr, censornode, tombstone=b''):
2413 2417 if (self.version & 0xFFFF) == REVLOGV0:
2414 2418 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2415 2419 self.version)
2416 2420
2417 2421 censorrev = self.rev(censornode)
2418 2422 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2419 2423
2420 2424 if len(tombstone) > self.rawsize(censorrev):
2421 2425 raise error.Abort(_('censor tombstone must be no longer than '
2422 2426 'censored data'))
2423 2427
2424 2428 # Rewriting the revlog in place is hard. Our strategy for censoring is
2425 2429 # to create a new revlog, copy all revisions to it, then replace the
2426 2430 # revlogs on transaction close.
2427 2431
2428 2432 newindexfile = self.indexfile + b'.tmpcensored'
2429 2433 newdatafile = self.datafile + b'.tmpcensored'
2430 2434
2431 2435 # This is a bit dangerous. We could easily have a mismatch of state.
2432 2436 newrl = revlog(self.opener, newindexfile, newdatafile,
2433 2437 censorable=True)
2434 2438 newrl.version = self.version
2435 2439 newrl._generaldelta = self._generaldelta
2436 2440 newrl._io = self._io
2437 2441
2438 2442 for rev in self.revs():
2439 2443 node = self.node(rev)
2440 2444 p1, p2 = self.parents(node)
2441 2445
2442 2446 if rev == censorrev:
2443 2447 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2444 2448 p1, p2, censornode, REVIDX_ISCENSORED)
2445 2449
2446 2450 if newrl.deltaparent(rev) != nullrev:
2447 2451 raise error.Abort(_('censored revision stored as delta; '
2448 2452 'cannot censor'),
2449 2453 hint=_('censoring of revlogs is not '
2450 2454 'fully implemented; please report '
2451 2455 'this bug'))
2452 2456 continue
2453 2457
2454 2458 if self.iscensored(rev):
2455 2459 if self.deltaparent(rev) != nullrev:
2456 2460 raise error.Abort(_('cannot censor due to censored '
2457 2461 'revision having delta stored'))
2458 2462 rawtext = self._chunk(rev)
2459 2463 else:
2460 2464 rawtext = self.revision(rev, raw=True)
2461 2465
2462 2466 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2463 2467 self.flags(rev))
2464 2468
2465 2469 tr.addbackup(self.indexfile, location='store')
2466 2470 if not self._inline:
2467 2471 tr.addbackup(self.datafile, location='store')
2468 2472
2469 2473 self.opener.rename(newrl.indexfile, self.indexfile)
2470 2474 if not self._inline:
2471 2475 self.opener.rename(newrl.datafile, self.datafile)
2472 2476
2473 2477 self.clearcaches()
2474 2478 self._loadindex(self.version, None)
2475 2479
2476 2480 def verifyintegrity(self, state):
2477 2481 """Verifies the integrity of the revlog.
2478 2482
2479 2483 Yields ``revlogproblem`` instances describing problems that are
2480 2484 found.
2481 2485 """
2482 2486 dd, di = self.checksize()
2483 2487 if dd:
2484 2488 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2485 2489 if di:
2486 2490 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2487 2491
2488 2492 version = self.version & 0xFFFF
2489 2493
2490 2494 # The verifier tells us what version revlog we should be.
2491 2495 if version != state['expectedversion']:
2492 2496 yield revlogproblem(
2493 2497 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2494 2498 (self.indexfile, version, state['expectedversion']))
2495 2499
2496 2500 state['skipread'] = set()
2497 2501
2498 2502 for rev in self:
2499 2503 node = self.node(rev)
2500 2504
2501 2505 # Verify contents. 4 cases to care about:
2502 2506 #
2503 2507 # common: the most common case
2504 2508 # rename: with a rename
2505 2509 # meta: file content starts with b'\1\n', the metadata
2506 2510 # header defined in filelog.py, but without a rename
2507 2511 # ext: content stored externally
2508 2512 #
2509 2513 # More formally, their differences are shown below:
2510 2514 #
2511 2515 # | common | rename | meta | ext
2512 2516 # -------------------------------------------------------
2513 2517 # flags() | 0 | 0 | 0 | not 0
2514 2518 # renamed() | False | True | False | ?
2515 2519 # rawtext[0:2]=='\1\n'| False | True | True | ?
2516 2520 #
2517 2521 # "rawtext" means the raw text stored in revlog data, which
2518 2522 # could be retrieved by "revision(rev, raw=True)". "text"
2519 2523 # mentioned below is "revision(rev, raw=False)".
2520 2524 #
2521 2525 # There are 3 different lengths stored physically:
2522 2526 # 1. L1: rawsize, stored in revlog index
2523 2527 # 2. L2: len(rawtext), stored in revlog data
2524 2528 # 3. L3: len(text), stored in revlog data if flags==0, or
2525 2529 # possibly somewhere else if flags!=0
2526 2530 #
2527 2531 # L1 should be equal to L2. L3 could be different from them.
2528 2532 # "text" may or may not affect commit hash depending on flag
2529 2533 # processors (see revlog.addflagprocessor).
2530 2534 #
2531 2535 # | common | rename | meta | ext
2532 2536 # -------------------------------------------------
2533 2537 # rawsize() | L1 | L1 | L1 | L1
2534 2538 # size() | L1 | L2-LM | L1(*) | L1 (?)
2535 2539 # len(rawtext) | L2 | L2 | L2 | L2
2536 2540 # len(text) | L2 | L2 | L2 | L3
2537 2541 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2538 2542 #
2539 2543 # LM: length of metadata, depending on rawtext
2540 2544 # (*): not ideal, see comment in filelog.size
2541 2545 # (?): could be "- len(meta)" if the resolved content has
2542 2546 # rename metadata
2543 2547 #
2544 2548 # Checks needed to be done:
2545 2549 # 1. length check: L1 == L2, in all cases.
2546 2550 # 2. hash check: depending on flag processor, we may need to
2547 2551 # use either "text" (external), or "rawtext" (in revlog).
2548 2552
2549 2553 try:
2550 2554 skipflags = state.get('skipflags', 0)
2551 2555 if skipflags:
2552 2556 skipflags &= self.flags(rev)
2553 2557
2554 2558 if skipflags:
2555 2559 state['skipread'].add(node)
2556 2560 else:
2557 2561 # Side-effect: read content and verify hash.
2558 2562 self.revision(node)
2559 2563
2560 2564 l1 = self.rawsize(rev)
2561 2565 l2 = len(self.revision(node, raw=True))
2562 2566
2563 2567 if l1 != l2:
2564 2568 yield revlogproblem(
2565 2569 error=_('unpacked size is %d, %d expected') % (l2, l1),
2566 2570 node=node)
2567 2571
2568 2572 except error.CensoredNodeError:
2569 2573 if state['erroroncensored']:
2570 2574 yield revlogproblem(error=_('censored file data'),
2571 2575 node=node)
2572 2576 state['skipread'].add(node)
2573 2577 except Exception as e:
2574 2578 yield revlogproblem(
2575 2579 error=_('unpacking %s: %s') % (short(node),
2576 2580 stringutil.forcebytestr(e)),
2577 2581 node=node)
2578 2582 state['skipread'].add(node)
2579 2583
2580 2584 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2581 2585 revisionscount=False, trackedsize=False,
2582 2586 storedsize=False):
2583 2587 d = {}
2584 2588
2585 2589 if exclusivefiles:
2586 2590 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2587 2591 if not self._inline:
2588 2592 d['exclusivefiles'].append((self.opener, self.datafile))
2589 2593
2590 2594 if sharedfiles:
2591 2595 d['sharedfiles'] = []
2592 2596
2593 2597 if revisionscount:
2594 2598 d['revisionscount'] = len(self)
2595 2599
2596 2600 if trackedsize:
2597 2601 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2598 2602
2599 2603 if storedsize:
2600 2604 d['storedsize'] = sum(self.opener.stat(path).st_size
2601 2605 for path in self.files())
2602 2606
2603 2607 return d
General Comments 0
You need to be logged in to leave comments. Login now