##// END OF EJS Templates
revlog: add a `entry_binary` method on index...
marmoute -
r47808:0d8ff1f4 default
parent child Browse files
Show More
@@ -1,2984 +1,3018 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Olivia Mackall <olivia@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #define PY_SSIZE_T_CLEAN
11 11 #include <Python.h>
12 12 #include <assert.h>
13 13 #include <ctype.h>
14 14 #include <limits.h>
15 15 #include <stddef.h>
16 16 #include <stdlib.h>
17 17 #include <string.h>
18 18 #include <structmember.h>
19 19
20 20 #include "bitmanipulation.h"
21 21 #include "charencode.h"
22 22 #include "compat.h"
23 23 #include "revlog.h"
24 24 #include "util.h"
25 25
26 26 #ifdef IS_PY3K
27 27 /* The mapping of Python types is meant to be temporary to get Python
28 28 * 3 to compile. We should remove this once Python 3 support is fully
29 29 * supported and proper types are used in the extensions themselves. */
30 30 #define PyInt_Check PyLong_Check
31 31 #define PyInt_FromLong PyLong_FromLong
32 32 #define PyInt_FromSsize_t PyLong_FromSsize_t
33 33 #define PyInt_AsLong PyLong_AsLong
34 34 #endif
35 35
36 36 typedef struct indexObjectStruct indexObject;
37 37
38 38 typedef struct {
39 39 int children[16];
40 40 } nodetreenode;
41 41
42 42 typedef struct {
43 43 int abi_version;
44 44 Py_ssize_t (*index_length)(const indexObject *);
45 45 const char *(*index_node)(indexObject *, Py_ssize_t);
46 46 int (*index_parents)(PyObject *, int, int *);
47 47 } Revlog_CAPI;
48 48
49 49 /*
50 50 * A base-16 trie for fast node->rev mapping.
51 51 *
52 52 * Positive value is index of the next node in the trie
53 53 * Negative value is a leaf: -(rev + 2)
54 54 * Zero is empty
55 55 */
56 56 typedef struct {
57 57 indexObject *index;
58 58 nodetreenode *nodes;
59 59 Py_ssize_t nodelen;
60 60 size_t length; /* # nodes in use */
61 61 size_t capacity; /* # nodes allocated */
62 62 int depth; /* maximum depth of tree */
63 63 int splits; /* # splits performed */
64 64 } nodetree;
65 65
66 66 typedef struct {
67 67 PyObject_HEAD /* ; */
68 68 nodetree nt;
69 69 } nodetreeObject;
70 70
71 71 /*
72 72 * This class has two behaviors.
73 73 *
74 74 * When used in a list-like way (with integer keys), we decode an
75 75 * entry in a RevlogNG index file on demand. We have limited support for
76 76 * integer-keyed insert and delete, only at elements right before the
77 77 * end.
78 78 *
79 79 * With string keys, we lazily perform a reverse mapping from node to
80 80 * rev, using a base-16 trie.
81 81 */
82 82 struct indexObjectStruct {
83 83 PyObject_HEAD
84 84 /* Type-specific fields go here. */
85 85 PyObject *data; /* raw bytes of index */
86 86 Py_ssize_t nodelen; /* digest size of the hash, 20 for SHA-1 */
87 87 PyObject *nullentry; /* fast path for references to null */
88 88 Py_buffer buf; /* buffer of data */
89 89 const char **offsets; /* populated on demand */
90 90 Py_ssize_t length; /* current on-disk number of elements */
91 91 unsigned new_length; /* number of added elements */
92 92 unsigned added_length; /* space reserved for added elements */
93 93 char *added; /* populated on demand */
94 94 PyObject *headrevs; /* cache, invalidated on changes */
95 95 PyObject *filteredrevs; /* filtered revs set */
96 96 nodetree nt; /* base-16 trie */
97 97 int ntinitialized; /* 0 or 1 */
98 98 int ntrev; /* last rev scanned */
99 99 int ntlookups; /* # lookups */
100 100 int ntmisses; /* # lookups that miss the cache */
101 101 int inlined;
102 102 long hdrsize; /* size of index headers. Differs in v1 v.s. v2 format */
103 103 };
104 104
105 105 static Py_ssize_t index_length(const indexObject *self)
106 106 {
107 107 return self->length + self->new_length;
108 108 }
109 109
110 110 static const char nullid[32] = {0};
111 111 static const Py_ssize_t nullrev = -1;
112 112
113 113 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
114 114
115 115 static int index_find_node(indexObject *self, const char *node);
116 116
117 117 #if LONG_MAX == 0x7fffffffL
118 118 static const char *const v1_tuple_format = PY23("Kiiiiiis#", "Kiiiiiiy#");
119 119 static const char *const v2_tuple_format = PY23("Kiiiiiis#Ki", "Kiiiiiiy#Ki");
120 120 #else
121 121 static const char *const v1_tuple_format = PY23("kiiiiiis#", "kiiiiiiy#");
122 122 static const char *const v2_tuple_format = PY23("kiiiiiis#ki", "kiiiiiiy#ki");
123 123 #endif
124 124
125 125 /* A RevlogNG v1 index entry is 64 bytes long. */
126 126 static const long v1_hdrsize = 64;
127 127
128 128 /* A Revlogv2 index entry is 96 bytes long. */
129 129 static const long v2_hdrsize = 96;
130 130
131 131 static void raise_revlog_error(void)
132 132 {
133 133 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
134 134
135 135 mod = PyImport_ImportModule("mercurial.error");
136 136 if (mod == NULL) {
137 137 goto cleanup;
138 138 }
139 139
140 140 dict = PyModule_GetDict(mod);
141 141 if (dict == NULL) {
142 142 goto cleanup;
143 143 }
144 144 Py_INCREF(dict);
145 145
146 146 errclass = PyDict_GetItemString(dict, "RevlogError");
147 147 if (errclass == NULL) {
148 148 PyErr_SetString(PyExc_SystemError,
149 149 "could not find RevlogError");
150 150 goto cleanup;
151 151 }
152 152
153 153 /* value of exception is ignored by callers */
154 154 PyErr_SetString(errclass, "RevlogError");
155 155
156 156 cleanup:
157 157 Py_XDECREF(dict);
158 158 Py_XDECREF(mod);
159 159 }
160 160
161 161 /*
162 162 * Return a pointer to the beginning of a RevlogNG record.
163 163 */
164 164 static const char *index_deref(indexObject *self, Py_ssize_t pos)
165 165 {
166 166 if (pos >= self->length)
167 167 return self->added + (pos - self->length) * self->hdrsize;
168 168
169 169 if (self->inlined && pos > 0) {
170 170 if (self->offsets == NULL) {
171 171 Py_ssize_t ret;
172 172 self->offsets =
173 173 PyMem_Malloc(self->length * sizeof(*self->offsets));
174 174 if (self->offsets == NULL)
175 175 return (const char *)PyErr_NoMemory();
176 176 ret = inline_scan(self, self->offsets);
177 177 if (ret == -1) {
178 178 return NULL;
179 179 };
180 180 }
181 181 return self->offsets[pos];
182 182 }
183 183
184 184 return (const char *)(self->buf.buf) + pos * self->hdrsize;
185 185 }
186 186
187 187 /*
188 188 * Get parents of the given rev.
189 189 *
190 190 * The specified rev must be valid and must not be nullrev. A returned
191 191 * parent revision may be nullrev, but is guaranteed to be in valid range.
192 192 */
193 193 static inline int index_get_parents(indexObject *self, Py_ssize_t rev, int *ps,
194 194 int maxrev)
195 195 {
196 196 const char *data = index_deref(self, rev);
197 197
198 198 ps[0] = getbe32(data + 24);
199 199 ps[1] = getbe32(data + 28);
200 200
201 201 /* If index file is corrupted, ps[] may point to invalid revisions. So
202 202 * there is a risk of buffer overflow to trust them unconditionally. */
203 203 if (ps[0] < -1 || ps[0] > maxrev || ps[1] < -1 || ps[1] > maxrev) {
204 204 PyErr_SetString(PyExc_ValueError, "parent out of range");
205 205 return -1;
206 206 }
207 207 return 0;
208 208 }
209 209
210 210 /*
211 211 * Get parents of the given rev.
212 212 *
213 213 * If the specified rev is out of range, IndexError will be raised. If the
214 214 * revlog entry is corrupted, ValueError may be raised.
215 215 *
216 216 * Returns 0 on success or -1 on failure.
217 217 */
218 218 static int HgRevlogIndex_GetParents(PyObject *op, int rev, int *ps)
219 219 {
220 220 int tiprev;
221 221 if (!op || !HgRevlogIndex_Check(op) || !ps) {
222 222 PyErr_BadInternalCall();
223 223 return -1;
224 224 }
225 225 tiprev = (int)index_length((indexObject *)op) - 1;
226 226 if (rev < -1 || rev > tiprev) {
227 227 PyErr_Format(PyExc_IndexError, "rev out of range: %d", rev);
228 228 return -1;
229 229 } else if (rev == -1) {
230 230 ps[0] = ps[1] = -1;
231 231 return 0;
232 232 } else {
233 233 return index_get_parents((indexObject *)op, rev, ps, tiprev);
234 234 }
235 235 }
236 236
237 237 static inline int64_t index_get_start(indexObject *self, Py_ssize_t rev)
238 238 {
239 239 const char *data;
240 240 uint64_t offset;
241 241
242 242 if (rev == nullrev)
243 243 return 0;
244 244
245 245 data = index_deref(self, rev);
246 246 offset = getbe32(data + 4);
247 247 if (rev == 0) {
248 248 /* mask out version number for the first entry */
249 249 offset &= 0xFFFF;
250 250 } else {
251 251 uint32_t offset_high = getbe32(data);
252 252 offset |= ((uint64_t)offset_high) << 32;
253 253 }
254 254 return (int64_t)(offset >> 16);
255 255 }
256 256
257 257 static inline int index_get_length(indexObject *self, Py_ssize_t rev)
258 258 {
259 259 const char *data;
260 260 int tmp;
261 261
262 262 if (rev == nullrev)
263 263 return 0;
264 264
265 265 data = index_deref(self, rev);
266 266
267 267 tmp = (int)getbe32(data + 8);
268 268 if (tmp < 0) {
269 269 PyErr_Format(PyExc_OverflowError,
270 270 "revlog entry size out of bound (%d)", tmp);
271 271 return -1;
272 272 }
273 273 return tmp;
274 274 }
275 275
276 276 /*
277 277 * RevlogNG format (all in big endian, data may be inlined):
278 278 * 6 bytes: offset
279 279 * 2 bytes: flags
280 280 * 4 bytes: compressed length
281 281 * 4 bytes: uncompressed length
282 282 * 4 bytes: base revision
283 283 * 4 bytes: link revision
284 284 * 4 bytes: parent 1 revision
285 285 * 4 bytes: parent 2 revision
286 286 * 32 bytes: nodeid (only 20 bytes used with SHA-1)
287 287 */
288 288 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
289 289 {
290 290 uint64_t offset_flags, sidedata_offset;
291 291 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
292 292 sidedata_comp_len;
293 293 const char *c_node_id;
294 294 const char *data;
295 295 Py_ssize_t length = index_length(self);
296 296
297 297 if (pos == nullrev) {
298 298 Py_INCREF(self->nullentry);
299 299 return self->nullentry;
300 300 }
301 301
302 302 if (pos < 0 || pos >= length) {
303 303 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
304 304 return NULL;
305 305 }
306 306
307 307 data = index_deref(self, pos);
308 308 if (data == NULL)
309 309 return NULL;
310 310
311 311 offset_flags = getbe32(data + 4);
312 312 /*
313 313 * The first entry on-disk needs the version number masked out,
314 314 * but this doesn't apply if entries are added to an empty index.
315 315 */
316 316 if (self->length && pos == 0)
317 317 offset_flags &= 0xFFFF;
318 318 else {
319 319 uint32_t offset_high = getbe32(data);
320 320 offset_flags |= ((uint64_t)offset_high) << 32;
321 321 }
322 322
323 323 comp_len = getbe32(data + 8);
324 324 uncomp_len = getbe32(data + 12);
325 325 base_rev = getbe32(data + 16);
326 326 link_rev = getbe32(data + 20);
327 327 parent_1 = getbe32(data + 24);
328 328 parent_2 = getbe32(data + 28);
329 329 c_node_id = data + 32;
330 330
331 331 if (self->hdrsize == v1_hdrsize) {
332 332 return Py_BuildValue(v1_tuple_format, offset_flags, comp_len,
333 333 uncomp_len, base_rev, link_rev, parent_1,
334 334 parent_2, c_node_id, self->nodelen);
335 335 } else {
336 336 sidedata_offset = getbe64(data + 64);
337 337 sidedata_comp_len = getbe32(data + 72);
338 338
339 339 return Py_BuildValue(v2_tuple_format, offset_flags, comp_len,
340 340 uncomp_len, base_rev, link_rev, parent_1,
341 341 parent_2, c_node_id, self->nodelen,
342 342 sidedata_offset, sidedata_comp_len);
343 343 }
344 344 }
345 /*
346 * Return the raw binary string representing a revision
347 */
348 static PyObject *index_entry_binary(indexObject *self, PyObject *args)
349 {
350 long rev;
351 int header;
352 const char *data;
353 char entry[v2_hdrsize];
354
355 Py_ssize_t length = index_length(self);
356
357 if (!PyArg_ParseTuple(args, "lI", &rev, &header)) {
358 return NULL;
359 }
360 if (rev < 0 || rev >= length) {
361 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
362 rev);
363 return NULL;
364 };
365
366 data = index_deref(self, rev);
367 if (data == NULL)
368 return NULL;
369 if (rev == 0) {
370 // put the header at the start of the first entry
371 memcpy(entry, data, self->hdrsize);
372 putbe32(header, entry);
373 return PyBytes_FromStringAndSize(entry, self->hdrsize);
374 }
375 return PyBytes_FromStringAndSize(data, self->hdrsize);
376 }
345 377
346 378 /*
347 379 * Return the hash of node corresponding to the given rev.
348 380 */
349 381 static const char *index_node(indexObject *self, Py_ssize_t pos)
350 382 {
351 383 Py_ssize_t length = index_length(self);
352 384 const char *data;
353 385
354 386 if (pos == nullrev)
355 387 return nullid;
356 388
357 389 if (pos >= length)
358 390 return NULL;
359 391
360 392 data = index_deref(self, pos);
361 393 return data ? data + 32 : NULL;
362 394 }
363 395
364 396 /*
365 397 * Return the hash of the node corresponding to the given rev. The
366 398 * rev is assumed to be existing. If not, an exception is set.
367 399 */
368 400 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
369 401 {
370 402 const char *node = index_node(self, pos);
371 403 if (node == NULL) {
372 404 PyErr_Format(PyExc_IndexError, "could not access rev %d",
373 405 (int)pos);
374 406 }
375 407 return node;
376 408 }
377 409
378 410 static int nt_insert(nodetree *self, const char *node, int rev);
379 411
380 412 static int node_check(Py_ssize_t nodelen, PyObject *obj, char **node)
381 413 {
382 414 Py_ssize_t thisnodelen;
383 415 if (PyBytes_AsStringAndSize(obj, node, &thisnodelen) == -1)
384 416 return -1;
385 417 if (nodelen == thisnodelen)
386 418 return 0;
387 419 PyErr_Format(PyExc_ValueError, "node len %zd != expected node len %zd",
388 420 thisnodelen, nodelen);
389 421 return -1;
390 422 }
391 423
392 424 static PyObject *index_append(indexObject *self, PyObject *obj)
393 425 {
394 426 uint64_t offset_flags, sidedata_offset;
395 427 int rev, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
396 428 Py_ssize_t c_node_id_len, sidedata_comp_len;
397 429 const char *c_node_id;
398 430 char *data;
399 431
400 432 if (self->hdrsize == v1_hdrsize) {
401 433 if (!PyArg_ParseTuple(obj, v1_tuple_format, &offset_flags,
402 434 &comp_len, &uncomp_len, &base_rev,
403 435 &link_rev, &parent_1, &parent_2,
404 436 &c_node_id, &c_node_id_len)) {
405 437 PyErr_SetString(PyExc_TypeError, "8-tuple required");
406 438 return NULL;
407 439 }
408 440 } else {
409 441 if (!PyArg_ParseTuple(obj, v2_tuple_format, &offset_flags,
410 442 &comp_len, &uncomp_len, &base_rev,
411 443 &link_rev, &parent_1, &parent_2,
412 444 &c_node_id, &c_node_id_len,
413 445 &sidedata_offset, &sidedata_comp_len)) {
414 446 PyErr_SetString(PyExc_TypeError, "10-tuple required");
415 447 return NULL;
416 448 }
417 449 }
418 450
419 451 if (c_node_id_len != self->nodelen) {
420 452 PyErr_SetString(PyExc_TypeError, "invalid node");
421 453 return NULL;
422 454 }
423 455
424 456 if (self->new_length == self->added_length) {
425 457 size_t new_added_length =
426 458 self->added_length ? self->added_length * 2 : 4096;
427 459 void *new_added = PyMem_Realloc(self->added, new_added_length *
428 460 self->hdrsize);
429 461 if (!new_added)
430 462 return PyErr_NoMemory();
431 463 self->added = new_added;
432 464 self->added_length = new_added_length;
433 465 }
434 466 rev = self->length + self->new_length;
435 467 data = self->added + self->hdrsize * self->new_length++;
436 468 putbe32(offset_flags >> 32, data);
437 469 putbe32(offset_flags & 0xffffffffU, data + 4);
438 470 putbe32(comp_len, data + 8);
439 471 putbe32(uncomp_len, data + 12);
440 472 putbe32(base_rev, data + 16);
441 473 putbe32(link_rev, data + 20);
442 474 putbe32(parent_1, data + 24);
443 475 putbe32(parent_2, data + 28);
444 476 memcpy(data + 32, c_node_id, c_node_id_len);
445 477 /* Padding since SHA-1 is only 20 bytes for now */
446 478 memset(data + 32 + c_node_id_len, 0, 32 - c_node_id_len);
447 479 if (self->hdrsize != v1_hdrsize) {
448 480 putbe64(sidedata_offset, data + 64);
449 481 putbe32(sidedata_comp_len, data + 72);
450 482 /* Padding for 96 bytes alignment */
451 483 memset(data + 76, 0, self->hdrsize - 76);
452 484 }
453 485
454 486 if (self->ntinitialized)
455 487 nt_insert(&self->nt, c_node_id, rev);
456 488
457 489 Py_CLEAR(self->headrevs);
458 490 Py_RETURN_NONE;
459 491 }
460 492
461 493 /* Replace an existing index entry's sidedata offset and length with new ones.
462 494 This cannot be used outside of the context of sidedata rewriting,
463 495 inside the transaction that creates the given revision. */
464 496 static PyObject *index_replace_sidedata_info(indexObject *self, PyObject *args)
465 497 {
466 498 uint64_t sidedata_offset;
467 499 int rev;
468 500 Py_ssize_t sidedata_comp_len;
469 501 char *data;
470 502 #if LONG_MAX == 0x7fffffffL
471 503 const char *const sidedata_format = PY23("nKi", "nKi");
472 504 #else
473 505 const char *const sidedata_format = PY23("nki", "nki");
474 506 #endif
475 507
476 508 if (self->hdrsize == v1_hdrsize || self->inlined) {
477 509 /*
478 510 There is a bug in the transaction handling when going from an
479 511 inline revlog to a separate index and data file. Turn it off until
480 512 it's fixed, since v2 revlogs sometimes get rewritten on exchange.
481 513 See issue6485.
482 514 */
483 515 raise_revlog_error();
484 516 return NULL;
485 517 }
486 518
487 519 if (!PyArg_ParseTuple(args, sidedata_format, &rev, &sidedata_offset,
488 520 &sidedata_comp_len))
489 521 return NULL;
490 522
491 523 if (rev < 0 || rev >= index_length(self)) {
492 524 PyErr_SetString(PyExc_IndexError, "revision outside index");
493 525 return NULL;
494 526 }
495 527 if (rev < self->length) {
496 528 PyErr_SetString(
497 529 PyExc_IndexError,
498 530 "cannot rewrite entries outside of this transaction");
499 531 return NULL;
500 532 }
501 533
502 534 /* Find the newly added node, offset from the "already on-disk" length
503 535 */
504 536 data = self->added + self->hdrsize * (rev - self->length);
505 537 putbe64(sidedata_offset, data + 64);
506 538 putbe32(sidedata_comp_len, data + 72);
507 539
508 540 Py_RETURN_NONE;
509 541 }
510 542
511 543 static PyObject *index_stats(indexObject *self)
512 544 {
513 545 PyObject *obj = PyDict_New();
514 546 PyObject *s = NULL;
515 547 PyObject *t = NULL;
516 548
517 549 if (obj == NULL)
518 550 return NULL;
519 551
520 552 #define istat(__n, __d) \
521 553 do { \
522 554 s = PyBytes_FromString(__d); \
523 555 t = PyInt_FromSsize_t(self->__n); \
524 556 if (!s || !t) \
525 557 goto bail; \
526 558 if (PyDict_SetItem(obj, s, t) == -1) \
527 559 goto bail; \
528 560 Py_CLEAR(s); \
529 561 Py_CLEAR(t); \
530 562 } while (0)
531 563
532 564 if (self->added_length)
533 565 istat(new_length, "index entries added");
534 566 istat(length, "revs in memory");
535 567 istat(ntlookups, "node trie lookups");
536 568 istat(ntmisses, "node trie misses");
537 569 istat(ntrev, "node trie last rev scanned");
538 570 if (self->ntinitialized) {
539 571 istat(nt.capacity, "node trie capacity");
540 572 istat(nt.depth, "node trie depth");
541 573 istat(nt.length, "node trie count");
542 574 istat(nt.splits, "node trie splits");
543 575 }
544 576
545 577 #undef istat
546 578
547 579 return obj;
548 580
549 581 bail:
550 582 Py_XDECREF(obj);
551 583 Py_XDECREF(s);
552 584 Py_XDECREF(t);
553 585 return NULL;
554 586 }
555 587
556 588 /*
557 589 * When we cache a list, we want to be sure the caller can't mutate
558 590 * the cached copy.
559 591 */
560 592 static PyObject *list_copy(PyObject *list)
561 593 {
562 594 Py_ssize_t len = PyList_GET_SIZE(list);
563 595 PyObject *newlist = PyList_New(len);
564 596 Py_ssize_t i;
565 597
566 598 if (newlist == NULL)
567 599 return NULL;
568 600
569 601 for (i = 0; i < len; i++) {
570 602 PyObject *obj = PyList_GET_ITEM(list, i);
571 603 Py_INCREF(obj);
572 604 PyList_SET_ITEM(newlist, i, obj);
573 605 }
574 606
575 607 return newlist;
576 608 }
577 609
578 610 static int check_filter(PyObject *filter, Py_ssize_t arg)
579 611 {
580 612 if (filter) {
581 613 PyObject *arglist, *result;
582 614 int isfiltered;
583 615
584 616 arglist = Py_BuildValue("(n)", arg);
585 617 if (!arglist) {
586 618 return -1;
587 619 }
588 620
589 621 result = PyObject_Call(filter, arglist, NULL);
590 622 Py_DECREF(arglist);
591 623 if (!result) {
592 624 return -1;
593 625 }
594 626
595 627 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
596 628 * same as this function, so we can just return it directly.*/
597 629 isfiltered = PyObject_IsTrue(result);
598 630 Py_DECREF(result);
599 631 return isfiltered;
600 632 } else {
601 633 return 0;
602 634 }
603 635 }
604 636
605 637 static inline void set_phase_from_parents(char *phases, int parent_1,
606 638 int parent_2, Py_ssize_t i)
607 639 {
608 640 if (parent_1 >= 0 && phases[parent_1] > phases[i])
609 641 phases[i] = phases[parent_1];
610 642 if (parent_2 >= 0 && phases[parent_2] > phases[i])
611 643 phases[i] = phases[parent_2];
612 644 }
613 645
614 646 static PyObject *reachableroots2(indexObject *self, PyObject *args)
615 647 {
616 648
617 649 /* Input */
618 650 long minroot;
619 651 PyObject *includepatharg = NULL;
620 652 int includepath = 0;
621 653 /* heads and roots are lists */
622 654 PyObject *heads = NULL;
623 655 PyObject *roots = NULL;
624 656 PyObject *reachable = NULL;
625 657
626 658 PyObject *val;
627 659 Py_ssize_t len = index_length(self);
628 660 long revnum;
629 661 Py_ssize_t k;
630 662 Py_ssize_t i;
631 663 Py_ssize_t l;
632 664 int r;
633 665 int parents[2];
634 666
635 667 /* Internal data structure:
636 668 * tovisit: array of length len+1 (all revs + nullrev), filled upto
637 669 * lentovisit
638 670 *
639 671 * revstates: array of length len+1 (all revs + nullrev) */
640 672 int *tovisit = NULL;
641 673 long lentovisit = 0;
642 674 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
643 675 char *revstates = NULL;
644 676
645 677 /* Get arguments */
646 678 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
647 679 &PyList_Type, &roots, &PyBool_Type,
648 680 &includepatharg))
649 681 goto bail;
650 682
651 683 if (includepatharg == Py_True)
652 684 includepath = 1;
653 685
654 686 /* Initialize return set */
655 687 reachable = PyList_New(0);
656 688 if (reachable == NULL)
657 689 goto bail;
658 690
659 691 /* Initialize internal datastructures */
660 692 tovisit = (int *)malloc((len + 1) * sizeof(int));
661 693 if (tovisit == NULL) {
662 694 PyErr_NoMemory();
663 695 goto bail;
664 696 }
665 697
666 698 revstates = (char *)calloc(len + 1, 1);
667 699 if (revstates == NULL) {
668 700 PyErr_NoMemory();
669 701 goto bail;
670 702 }
671 703
672 704 l = PyList_GET_SIZE(roots);
673 705 for (i = 0; i < l; i++) {
674 706 revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
675 707 if (revnum == -1 && PyErr_Occurred())
676 708 goto bail;
677 709 /* If root is out of range, e.g. wdir(), it must be unreachable
678 710 * from heads. So we can just ignore it. */
679 711 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
680 712 continue;
681 713 revstates[revnum + 1] |= RS_ROOT;
682 714 }
683 715
684 716 /* Populate tovisit with all the heads */
685 717 l = PyList_GET_SIZE(heads);
686 718 for (i = 0; i < l; i++) {
687 719 revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i));
688 720 if (revnum == -1 && PyErr_Occurred())
689 721 goto bail;
690 722 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
691 723 PyErr_SetString(PyExc_IndexError, "head out of range");
692 724 goto bail;
693 725 }
694 726 if (!(revstates[revnum + 1] & RS_SEEN)) {
695 727 tovisit[lentovisit++] = (int)revnum;
696 728 revstates[revnum + 1] |= RS_SEEN;
697 729 }
698 730 }
699 731
700 732 /* Visit the tovisit list and find the reachable roots */
701 733 k = 0;
702 734 while (k < lentovisit) {
703 735 /* Add the node to reachable if it is a root*/
704 736 revnum = tovisit[k++];
705 737 if (revstates[revnum + 1] & RS_ROOT) {
706 738 revstates[revnum + 1] |= RS_REACHABLE;
707 739 val = PyInt_FromLong(revnum);
708 740 if (val == NULL)
709 741 goto bail;
710 742 r = PyList_Append(reachable, val);
711 743 Py_DECREF(val);
712 744 if (r < 0)
713 745 goto bail;
714 746 if (includepath == 0)
715 747 continue;
716 748 }
717 749
718 750 /* Add its parents to the list of nodes to visit */
719 751 if (revnum == nullrev)
720 752 continue;
721 753 r = index_get_parents(self, revnum, parents, (int)len - 1);
722 754 if (r < 0)
723 755 goto bail;
724 756 for (i = 0; i < 2; i++) {
725 757 if (!(revstates[parents[i] + 1] & RS_SEEN) &&
726 758 parents[i] >= minroot) {
727 759 tovisit[lentovisit++] = parents[i];
728 760 revstates[parents[i] + 1] |= RS_SEEN;
729 761 }
730 762 }
731 763 }
732 764
733 765 /* Find all the nodes in between the roots we found and the heads
734 766 * and add them to the reachable set */
735 767 if (includepath == 1) {
736 768 long minidx = minroot;
737 769 if (minidx < 0)
738 770 minidx = 0;
739 771 for (i = minidx; i < len; i++) {
740 772 if (!(revstates[i + 1] & RS_SEEN))
741 773 continue;
742 774 r = index_get_parents(self, i, parents, (int)len - 1);
743 775 /* Corrupted index file, error is set from
744 776 * index_get_parents */
745 777 if (r < 0)
746 778 goto bail;
747 779 if (((revstates[parents[0] + 1] |
748 780 revstates[parents[1] + 1]) &
749 781 RS_REACHABLE) &&
750 782 !(revstates[i + 1] & RS_REACHABLE)) {
751 783 revstates[i + 1] |= RS_REACHABLE;
752 784 val = PyInt_FromSsize_t(i);
753 785 if (val == NULL)
754 786 goto bail;
755 787 r = PyList_Append(reachable, val);
756 788 Py_DECREF(val);
757 789 if (r < 0)
758 790 goto bail;
759 791 }
760 792 }
761 793 }
762 794
763 795 free(revstates);
764 796 free(tovisit);
765 797 return reachable;
766 798 bail:
767 799 Py_XDECREF(reachable);
768 800 free(revstates);
769 801 free(tovisit);
770 802 return NULL;
771 803 }
772 804
773 805 static int add_roots_get_min(indexObject *self, PyObject *roots, char *phases,
774 806 char phase)
775 807 {
776 808 Py_ssize_t len = index_length(self);
777 809 PyObject *item;
778 810 PyObject *iterator;
779 811 int rev, minrev = -1;
780 812 char *node;
781 813
782 814 if (!PySet_Check(roots)) {
783 815 PyErr_SetString(PyExc_TypeError,
784 816 "roots must be a set of nodes");
785 817 return -2;
786 818 }
787 819 iterator = PyObject_GetIter(roots);
788 820 if (iterator == NULL)
789 821 return -2;
790 822 while ((item = PyIter_Next(iterator))) {
791 823 if (node_check(self->nodelen, item, &node) == -1)
792 824 goto failed;
793 825 rev = index_find_node(self, node);
794 826 /* null is implicitly public, so negative is invalid */
795 827 if (rev < 0 || rev >= len)
796 828 goto failed;
797 829 phases[rev] = phase;
798 830 if (minrev == -1 || minrev > rev)
799 831 minrev = rev;
800 832 Py_DECREF(item);
801 833 }
802 834 Py_DECREF(iterator);
803 835 return minrev;
804 836 failed:
805 837 Py_DECREF(iterator);
806 838 Py_DECREF(item);
807 839 return -2;
808 840 }
809 841
810 842 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
811 843 {
812 844 /* 0: public (untracked), 1: draft, 2: secret, 32: archive,
813 845 96: internal */
814 846 static const char trackedphases[] = {1, 2, 32, 96};
815 847 PyObject *roots = Py_None;
816 848 PyObject *phasesetsdict = NULL;
817 849 PyObject *phasesets[4] = {NULL, NULL, NULL, NULL};
818 850 Py_ssize_t len = index_length(self);
819 851 char *phases = NULL;
820 852 int minphaserev = -1, rev, i;
821 853 const int numphases = (int)(sizeof(phasesets) / sizeof(phasesets[0]));
822 854
823 855 if (!PyArg_ParseTuple(args, "O", &roots))
824 856 return NULL;
825 857 if (roots == NULL || !PyDict_Check(roots)) {
826 858 PyErr_SetString(PyExc_TypeError, "roots must be a dictionary");
827 859 return NULL;
828 860 }
829 861
830 862 phases = calloc(len, 1);
831 863 if (phases == NULL) {
832 864 PyErr_NoMemory();
833 865 return NULL;
834 866 }
835 867
836 868 for (i = 0; i < numphases; ++i) {
837 869 PyObject *pyphase = PyInt_FromLong(trackedphases[i]);
838 870 PyObject *phaseroots = NULL;
839 871 if (pyphase == NULL)
840 872 goto release;
841 873 phaseroots = PyDict_GetItem(roots, pyphase);
842 874 Py_DECREF(pyphase);
843 875 if (phaseroots == NULL)
844 876 continue;
845 877 rev = add_roots_get_min(self, phaseroots, phases,
846 878 trackedphases[i]);
847 879 if (rev == -2)
848 880 goto release;
849 881 if (rev != -1 && (minphaserev == -1 || rev < minphaserev))
850 882 minphaserev = rev;
851 883 }
852 884
853 885 for (i = 0; i < numphases; ++i) {
854 886 phasesets[i] = PySet_New(NULL);
855 887 if (phasesets[i] == NULL)
856 888 goto release;
857 889 }
858 890
859 891 if (minphaserev == -1)
860 892 minphaserev = len;
861 893 for (rev = minphaserev; rev < len; ++rev) {
862 894 PyObject *pyphase = NULL;
863 895 PyObject *pyrev = NULL;
864 896 int parents[2];
865 897 /*
866 898 * The parent lookup could be skipped for phaseroots, but
867 899 * phase --force would historically not recompute them
868 900 * correctly, leaving descendents with a lower phase around.
869 901 * As such, unconditionally recompute the phase.
870 902 */
871 903 if (index_get_parents(self, rev, parents, (int)len - 1) < 0)
872 904 goto release;
873 905 set_phase_from_parents(phases, parents[0], parents[1], rev);
874 906 switch (phases[rev]) {
875 907 case 0:
876 908 continue;
877 909 case 1:
878 910 pyphase = phasesets[0];
879 911 break;
880 912 case 2:
881 913 pyphase = phasesets[1];
882 914 break;
883 915 case 32:
884 916 pyphase = phasesets[2];
885 917 break;
886 918 case 96:
887 919 pyphase = phasesets[3];
888 920 break;
889 921 default:
890 922 /* this should never happen since the phase number is
891 923 * specified by this function. */
892 924 PyErr_SetString(PyExc_SystemError,
893 925 "bad phase number in internal list");
894 926 goto release;
895 927 }
896 928 pyrev = PyInt_FromLong(rev);
897 929 if (pyrev == NULL)
898 930 goto release;
899 931 if (PySet_Add(pyphase, pyrev) == -1) {
900 932 Py_DECREF(pyrev);
901 933 goto release;
902 934 }
903 935 Py_DECREF(pyrev);
904 936 }
905 937
906 938 phasesetsdict = _dict_new_presized(numphases);
907 939 if (phasesetsdict == NULL)
908 940 goto release;
909 941 for (i = 0; i < numphases; ++i) {
910 942 PyObject *pyphase = PyInt_FromLong(trackedphases[i]);
911 943 if (pyphase == NULL)
912 944 goto release;
913 945 if (PyDict_SetItem(phasesetsdict, pyphase, phasesets[i]) ==
914 946 -1) {
915 947 Py_DECREF(pyphase);
916 948 goto release;
917 949 }
918 950 Py_DECREF(phasesets[i]);
919 951 phasesets[i] = NULL;
920 952 }
921 953
922 954 return Py_BuildValue("nN", len, phasesetsdict);
923 955
924 956 release:
925 957 for (i = 0; i < numphases; ++i)
926 958 Py_XDECREF(phasesets[i]);
927 959 Py_XDECREF(phasesetsdict);
928 960
929 961 free(phases);
930 962 return NULL;
931 963 }
932 964
933 965 static PyObject *index_headrevs(indexObject *self, PyObject *args)
934 966 {
935 967 Py_ssize_t i, j, len;
936 968 char *nothead = NULL;
937 969 PyObject *heads = NULL;
938 970 PyObject *filter = NULL;
939 971 PyObject *filteredrevs = Py_None;
940 972
941 973 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
942 974 return NULL;
943 975 }
944 976
945 977 if (self->headrevs && filteredrevs == self->filteredrevs)
946 978 return list_copy(self->headrevs);
947 979
948 980 Py_DECREF(self->filteredrevs);
949 981 self->filteredrevs = filteredrevs;
950 982 Py_INCREF(filteredrevs);
951 983
952 984 if (filteredrevs != Py_None) {
953 985 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
954 986 if (!filter) {
955 987 PyErr_SetString(
956 988 PyExc_TypeError,
957 989 "filteredrevs has no attribute __contains__");
958 990 goto bail;
959 991 }
960 992 }
961 993
962 994 len = index_length(self);
963 995 heads = PyList_New(0);
964 996 if (heads == NULL)
965 997 goto bail;
966 998 if (len == 0) {
967 999 PyObject *nullid = PyInt_FromLong(-1);
968 1000 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
969 1001 Py_XDECREF(nullid);
970 1002 goto bail;
971 1003 }
972 1004 goto done;
973 1005 }
974 1006
975 1007 nothead = calloc(len, 1);
976 1008 if (nothead == NULL) {
977 1009 PyErr_NoMemory();
978 1010 goto bail;
979 1011 }
980 1012
981 1013 for (i = len - 1; i >= 0; i--) {
982 1014 int isfiltered;
983 1015 int parents[2];
984 1016
985 1017 /* If nothead[i] == 1, it means we've seen an unfiltered child
986 1018 * of this node already, and therefore this node is not
987 1019 * filtered. So we can skip the expensive check_filter step.
988 1020 */
989 1021 if (nothead[i] != 1) {
990 1022 isfiltered = check_filter(filter, i);
991 1023 if (isfiltered == -1) {
992 1024 PyErr_SetString(PyExc_TypeError,
993 1025 "unable to check filter");
994 1026 goto bail;
995 1027 }
996 1028
997 1029 if (isfiltered) {
998 1030 nothead[i] = 1;
999 1031 continue;
1000 1032 }
1001 1033 }
1002 1034
1003 1035 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
1004 1036 goto bail;
1005 1037 for (j = 0; j < 2; j++) {
1006 1038 if (parents[j] >= 0)
1007 1039 nothead[parents[j]] = 1;
1008 1040 }
1009 1041 }
1010 1042
1011 1043 for (i = 0; i < len; i++) {
1012 1044 PyObject *head;
1013 1045
1014 1046 if (nothead[i])
1015 1047 continue;
1016 1048 head = PyInt_FromSsize_t(i);
1017 1049 if (head == NULL || PyList_Append(heads, head) == -1) {
1018 1050 Py_XDECREF(head);
1019 1051 goto bail;
1020 1052 }
1021 1053 }
1022 1054
1023 1055 done:
1024 1056 self->headrevs = heads;
1025 1057 Py_XDECREF(filter);
1026 1058 free(nothead);
1027 1059 return list_copy(self->headrevs);
1028 1060 bail:
1029 1061 Py_XDECREF(filter);
1030 1062 Py_XDECREF(heads);
1031 1063 free(nothead);
1032 1064 return NULL;
1033 1065 }
1034 1066
1035 1067 /**
1036 1068 * Obtain the base revision index entry.
1037 1069 *
1038 1070 * Callers must ensure that rev >= 0 or illegal memory access may occur.
1039 1071 */
1040 1072 static inline int index_baserev(indexObject *self, int rev)
1041 1073 {
1042 1074 const char *data;
1043 1075 int result;
1044 1076
1045 1077 data = index_deref(self, rev);
1046 1078 if (data == NULL)
1047 1079 return -2;
1048 1080 result = getbe32(data + 16);
1049 1081
1050 1082 if (result > rev) {
1051 1083 PyErr_Format(
1052 1084 PyExc_ValueError,
1053 1085 "corrupted revlog, revision base above revision: %d, %d",
1054 1086 rev, result);
1055 1087 return -2;
1056 1088 }
1057 1089 if (result < -1) {
1058 1090 PyErr_Format(
1059 1091 PyExc_ValueError,
1060 1092 "corrupted revlog, revision base out of range: %d, %d", rev,
1061 1093 result);
1062 1094 return -2;
1063 1095 }
1064 1096 return result;
1065 1097 }
1066 1098
1067 1099 /**
1068 1100 * Find if a revision is a snapshot or not
1069 1101 *
1070 1102 * Only relevant for sparse-revlog case.
1071 1103 * Callers must ensure that rev is in a valid range.
1072 1104 */
1073 1105 static int index_issnapshotrev(indexObject *self, Py_ssize_t rev)
1074 1106 {
1075 1107 int ps[2];
1076 1108 Py_ssize_t base;
1077 1109 while (rev >= 0) {
1078 1110 base = (Py_ssize_t)index_baserev(self, rev);
1079 1111 if (base == rev) {
1080 1112 base = -1;
1081 1113 }
1082 1114 if (base == -2) {
1083 1115 assert(PyErr_Occurred());
1084 1116 return -1;
1085 1117 }
1086 1118 if (base == -1) {
1087 1119 return 1;
1088 1120 }
1089 1121 if (index_get_parents(self, rev, ps, (int)rev) < 0) {
1090 1122 assert(PyErr_Occurred());
1091 1123 return -1;
1092 1124 };
1093 1125 if (base == ps[0] || base == ps[1]) {
1094 1126 return 0;
1095 1127 }
1096 1128 rev = base;
1097 1129 }
1098 1130 return rev == -1;
1099 1131 }
1100 1132
1101 1133 static PyObject *index_issnapshot(indexObject *self, PyObject *value)
1102 1134 {
1103 1135 long rev;
1104 1136 int issnap;
1105 1137 Py_ssize_t length = index_length(self);
1106 1138
1107 1139 if (!pylong_to_long(value, &rev)) {
1108 1140 return NULL;
1109 1141 }
1110 1142 if (rev < -1 || rev >= length) {
1111 1143 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
1112 1144 rev);
1113 1145 return NULL;
1114 1146 };
1115 1147 issnap = index_issnapshotrev(self, (Py_ssize_t)rev);
1116 1148 if (issnap < 0) {
1117 1149 return NULL;
1118 1150 };
1119 1151 return PyBool_FromLong((long)issnap);
1120 1152 }
1121 1153
1122 1154 static PyObject *index_findsnapshots(indexObject *self, PyObject *args)
1123 1155 {
1124 1156 Py_ssize_t start_rev;
1125 1157 PyObject *cache;
1126 1158 Py_ssize_t base;
1127 1159 Py_ssize_t rev;
1128 1160 PyObject *key = NULL;
1129 1161 PyObject *value = NULL;
1130 1162 const Py_ssize_t length = index_length(self);
1131 1163 if (!PyArg_ParseTuple(args, "O!n", &PyDict_Type, &cache, &start_rev)) {
1132 1164 return NULL;
1133 1165 }
1134 1166 for (rev = start_rev; rev < length; rev++) {
1135 1167 int issnap;
1136 1168 PyObject *allvalues = NULL;
1137 1169 issnap = index_issnapshotrev(self, rev);
1138 1170 if (issnap < 0) {
1139 1171 goto bail;
1140 1172 }
1141 1173 if (issnap == 0) {
1142 1174 continue;
1143 1175 }
1144 1176 base = (Py_ssize_t)index_baserev(self, rev);
1145 1177 if (base == rev) {
1146 1178 base = -1;
1147 1179 }
1148 1180 if (base == -2) {
1149 1181 assert(PyErr_Occurred());
1150 1182 goto bail;
1151 1183 }
1152 1184 key = PyInt_FromSsize_t(base);
1153 1185 allvalues = PyDict_GetItem(cache, key);
1154 1186 if (allvalues == NULL && PyErr_Occurred()) {
1155 1187 goto bail;
1156 1188 }
1157 1189 if (allvalues == NULL) {
1158 1190 int r;
1159 1191 allvalues = PyList_New(0);
1160 1192 if (!allvalues) {
1161 1193 goto bail;
1162 1194 }
1163 1195 r = PyDict_SetItem(cache, key, allvalues);
1164 1196 Py_DECREF(allvalues);
1165 1197 if (r < 0) {
1166 1198 goto bail;
1167 1199 }
1168 1200 }
1169 1201 value = PyInt_FromSsize_t(rev);
1170 1202 if (PyList_Append(allvalues, value)) {
1171 1203 goto bail;
1172 1204 }
1173 1205 Py_CLEAR(key);
1174 1206 Py_CLEAR(value);
1175 1207 }
1176 1208 Py_RETURN_NONE;
1177 1209 bail:
1178 1210 Py_XDECREF(key);
1179 1211 Py_XDECREF(value);
1180 1212 return NULL;
1181 1213 }
1182 1214
1183 1215 static PyObject *index_deltachain(indexObject *self, PyObject *args)
1184 1216 {
1185 1217 int rev, generaldelta;
1186 1218 PyObject *stoparg;
1187 1219 int stoprev, iterrev, baserev = -1;
1188 1220 int stopped;
1189 1221 PyObject *chain = NULL, *result = NULL;
1190 1222 const Py_ssize_t length = index_length(self);
1191 1223
1192 1224 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
1193 1225 return NULL;
1194 1226 }
1195 1227
1196 1228 if (PyInt_Check(stoparg)) {
1197 1229 stoprev = (int)PyInt_AsLong(stoparg);
1198 1230 if (stoprev == -1 && PyErr_Occurred()) {
1199 1231 return NULL;
1200 1232 }
1201 1233 } else if (stoparg == Py_None) {
1202 1234 stoprev = -2;
1203 1235 } else {
1204 1236 PyErr_SetString(PyExc_ValueError,
1205 1237 "stoprev must be integer or None");
1206 1238 return NULL;
1207 1239 }
1208 1240
1209 1241 if (rev < 0 || rev >= length) {
1210 1242 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1211 1243 return NULL;
1212 1244 }
1213 1245
1214 1246 chain = PyList_New(0);
1215 1247 if (chain == NULL) {
1216 1248 return NULL;
1217 1249 }
1218 1250
1219 1251 baserev = index_baserev(self, rev);
1220 1252
1221 1253 /* This should never happen. */
1222 1254 if (baserev <= -2) {
1223 1255 /* Error should be set by index_deref() */
1224 1256 assert(PyErr_Occurred());
1225 1257 goto bail;
1226 1258 }
1227 1259
1228 1260 iterrev = rev;
1229 1261
1230 1262 while (iterrev != baserev && iterrev != stoprev) {
1231 1263 PyObject *value = PyInt_FromLong(iterrev);
1232 1264 if (value == NULL) {
1233 1265 goto bail;
1234 1266 }
1235 1267 if (PyList_Append(chain, value)) {
1236 1268 Py_DECREF(value);
1237 1269 goto bail;
1238 1270 }
1239 1271 Py_DECREF(value);
1240 1272
1241 1273 if (generaldelta) {
1242 1274 iterrev = baserev;
1243 1275 } else {
1244 1276 iterrev--;
1245 1277 }
1246 1278
1247 1279 if (iterrev < 0) {
1248 1280 break;
1249 1281 }
1250 1282
1251 1283 if (iterrev >= length) {
1252 1284 PyErr_SetString(PyExc_IndexError,
1253 1285 "revision outside index");
1254 1286 return NULL;
1255 1287 }
1256 1288
1257 1289 baserev = index_baserev(self, iterrev);
1258 1290
1259 1291 /* This should never happen. */
1260 1292 if (baserev <= -2) {
1261 1293 /* Error should be set by index_deref() */
1262 1294 assert(PyErr_Occurred());
1263 1295 goto bail;
1264 1296 }
1265 1297 }
1266 1298
1267 1299 if (iterrev == stoprev) {
1268 1300 stopped = 1;
1269 1301 } else {
1270 1302 PyObject *value = PyInt_FromLong(iterrev);
1271 1303 if (value == NULL) {
1272 1304 goto bail;
1273 1305 }
1274 1306 if (PyList_Append(chain, value)) {
1275 1307 Py_DECREF(value);
1276 1308 goto bail;
1277 1309 }
1278 1310 Py_DECREF(value);
1279 1311
1280 1312 stopped = 0;
1281 1313 }
1282 1314
1283 1315 if (PyList_Reverse(chain)) {
1284 1316 goto bail;
1285 1317 }
1286 1318
1287 1319 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
1288 1320 Py_DECREF(chain);
1289 1321 return result;
1290 1322
1291 1323 bail:
1292 1324 Py_DECREF(chain);
1293 1325 return NULL;
1294 1326 }
1295 1327
1296 1328 static inline int64_t
1297 1329 index_segment_span(indexObject *self, Py_ssize_t start_rev, Py_ssize_t end_rev)
1298 1330 {
1299 1331 int64_t start_offset;
1300 1332 int64_t end_offset;
1301 1333 int end_size;
1302 1334 start_offset = index_get_start(self, start_rev);
1303 1335 if (start_offset < 0) {
1304 1336 return -1;
1305 1337 }
1306 1338 end_offset = index_get_start(self, end_rev);
1307 1339 if (end_offset < 0) {
1308 1340 return -1;
1309 1341 }
1310 1342 end_size = index_get_length(self, end_rev);
1311 1343 if (end_size < 0) {
1312 1344 return -1;
1313 1345 }
1314 1346 if (end_offset < start_offset) {
1315 1347 PyErr_Format(PyExc_ValueError,
1316 1348 "corrupted revlog index: inconsistent offset "
1317 1349 "between revisions (%zd) and (%zd)",
1318 1350 start_rev, end_rev);
1319 1351 return -1;
1320 1352 }
1321 1353 return (end_offset - start_offset) + (int64_t)end_size;
1322 1354 }
1323 1355
1324 1356 /* returns endidx so that revs[startidx:endidx] has no empty trailing revs */
1325 1357 static Py_ssize_t trim_endidx(indexObject *self, const Py_ssize_t *revs,
1326 1358 Py_ssize_t startidx, Py_ssize_t endidx)
1327 1359 {
1328 1360 int length;
1329 1361 while (endidx > 1 && endidx > startidx) {
1330 1362 length = index_get_length(self, revs[endidx - 1]);
1331 1363 if (length < 0) {
1332 1364 return -1;
1333 1365 }
1334 1366 if (length != 0) {
1335 1367 break;
1336 1368 }
1337 1369 endidx -= 1;
1338 1370 }
1339 1371 return endidx;
1340 1372 }
1341 1373
1342 1374 struct Gap {
1343 1375 int64_t size;
1344 1376 Py_ssize_t idx;
1345 1377 };
1346 1378
1347 1379 static int gap_compare(const void *left, const void *right)
1348 1380 {
1349 1381 const struct Gap *l_left = ((const struct Gap *)left);
1350 1382 const struct Gap *l_right = ((const struct Gap *)right);
1351 1383 if (l_left->size < l_right->size) {
1352 1384 return -1;
1353 1385 } else if (l_left->size > l_right->size) {
1354 1386 return 1;
1355 1387 }
1356 1388 return 0;
1357 1389 }
1358 1390 static int Py_ssize_t_compare(const void *left, const void *right)
1359 1391 {
1360 1392 const Py_ssize_t l_left = *(const Py_ssize_t *)left;
1361 1393 const Py_ssize_t l_right = *(const Py_ssize_t *)right;
1362 1394 if (l_left < l_right) {
1363 1395 return -1;
1364 1396 } else if (l_left > l_right) {
1365 1397 return 1;
1366 1398 }
1367 1399 return 0;
1368 1400 }
1369 1401
1370 1402 static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
1371 1403 {
1372 1404 /* method arguments */
1373 1405 PyObject *list_revs = NULL; /* revisions in the chain */
1374 1406 double targetdensity = 0; /* min density to achieve */
1375 1407 Py_ssize_t mingapsize = 0; /* threshold to ignore gaps */
1376 1408
1377 1409 /* other core variables */
1378 1410 Py_ssize_t idxlen = index_length(self);
1379 1411 Py_ssize_t i; /* used for various iteration */
1380 1412 PyObject *result = NULL; /* the final return of the function */
1381 1413
1382 1414 /* generic information about the delta chain being slice */
1383 1415 Py_ssize_t num_revs = 0; /* size of the full delta chain */
1384 1416 Py_ssize_t *revs = NULL; /* native array of revision in the chain */
1385 1417 int64_t chainpayload = 0; /* sum of all delta in the chain */
1386 1418 int64_t deltachainspan = 0; /* distance from first byte to last byte */
1387 1419
1388 1420 /* variable used for slicing the delta chain */
1389 1421 int64_t readdata = 0; /* amount of data currently planned to be read */
1390 1422 double density = 0; /* ration of payload data compared to read ones */
1391 1423 int64_t previous_end;
1392 1424 struct Gap *gaps = NULL; /* array of notable gap in the chain */
1393 1425 Py_ssize_t num_gaps =
1394 1426 0; /* total number of notable gap recorded so far */
1395 1427 Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
1396 1428 Py_ssize_t num_selected = 0; /* number of gaps skipped */
1397 1429 PyObject *chunk = NULL; /* individual slice */
1398 1430 PyObject *allchunks = NULL; /* all slices */
1399 1431 Py_ssize_t previdx;
1400 1432
1401 1433 /* parsing argument */
1402 1434 if (!PyArg_ParseTuple(args, "O!dn", &PyList_Type, &list_revs,
1403 1435 &targetdensity, &mingapsize)) {
1404 1436 goto bail;
1405 1437 }
1406 1438
1407 1439 /* If the delta chain contains a single element, we do not need slicing
1408 1440 */
1409 1441 num_revs = PyList_GET_SIZE(list_revs);
1410 1442 if (num_revs <= 1) {
1411 1443 result = PyTuple_Pack(1, list_revs);
1412 1444 goto done;
1413 1445 }
1414 1446
1415 1447 /* Turn the python list into a native integer array (for efficiency) */
1416 1448 revs = (Py_ssize_t *)calloc(num_revs, sizeof(Py_ssize_t));
1417 1449 if (revs == NULL) {
1418 1450 PyErr_NoMemory();
1419 1451 goto bail;
1420 1452 }
1421 1453 for (i = 0; i < num_revs; i++) {
1422 1454 Py_ssize_t revnum = PyInt_AsLong(PyList_GET_ITEM(list_revs, i));
1423 1455 if (revnum == -1 && PyErr_Occurred()) {
1424 1456 goto bail;
1425 1457 }
1426 1458 if (revnum < nullrev || revnum >= idxlen) {
1427 1459 PyErr_Format(PyExc_IndexError,
1428 1460 "index out of range: %zd", revnum);
1429 1461 goto bail;
1430 1462 }
1431 1463 revs[i] = revnum;
1432 1464 }
1433 1465
1434 1466 /* Compute and check various property of the unsliced delta chain */
1435 1467 deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
1436 1468 if (deltachainspan < 0) {
1437 1469 goto bail;
1438 1470 }
1439 1471
1440 1472 if (deltachainspan <= mingapsize) {
1441 1473 result = PyTuple_Pack(1, list_revs);
1442 1474 goto done;
1443 1475 }
1444 1476 chainpayload = 0;
1445 1477 for (i = 0; i < num_revs; i++) {
1446 1478 int tmp = index_get_length(self, revs[i]);
1447 1479 if (tmp < 0) {
1448 1480 goto bail;
1449 1481 }
1450 1482 chainpayload += tmp;
1451 1483 }
1452 1484
1453 1485 readdata = deltachainspan;
1454 1486 density = 1.0;
1455 1487
1456 1488 if (0 < deltachainspan) {
1457 1489 density = (double)chainpayload / (double)deltachainspan;
1458 1490 }
1459 1491
1460 1492 if (density >= targetdensity) {
1461 1493 result = PyTuple_Pack(1, list_revs);
1462 1494 goto done;
1463 1495 }
1464 1496
1465 1497 /* if chain is too sparse, look for relevant gaps */
1466 1498 gaps = (struct Gap *)calloc(num_revs, sizeof(struct Gap));
1467 1499 if (gaps == NULL) {
1468 1500 PyErr_NoMemory();
1469 1501 goto bail;
1470 1502 }
1471 1503
1472 1504 previous_end = -1;
1473 1505 for (i = 0; i < num_revs; i++) {
1474 1506 int64_t revstart;
1475 1507 int revsize;
1476 1508 revstart = index_get_start(self, revs[i]);
1477 1509 if (revstart < 0) {
1478 1510 goto bail;
1479 1511 };
1480 1512 revsize = index_get_length(self, revs[i]);
1481 1513 if (revsize < 0) {
1482 1514 goto bail;
1483 1515 };
1484 1516 if (revsize == 0) {
1485 1517 continue;
1486 1518 }
1487 1519 if (previous_end >= 0) {
1488 1520 int64_t gapsize = revstart - previous_end;
1489 1521 if (gapsize > mingapsize) {
1490 1522 gaps[num_gaps].size = gapsize;
1491 1523 gaps[num_gaps].idx = i;
1492 1524 num_gaps += 1;
1493 1525 }
1494 1526 }
1495 1527 previous_end = revstart + revsize;
1496 1528 }
1497 1529 if (num_gaps == 0) {
1498 1530 result = PyTuple_Pack(1, list_revs);
1499 1531 goto done;
1500 1532 }
1501 1533 qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
1502 1534
1503 1535 /* Slice the largest gap first, they improve the density the most */
1504 1536 selected_indices =
1505 1537 (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
1506 1538 if (selected_indices == NULL) {
1507 1539 PyErr_NoMemory();
1508 1540 goto bail;
1509 1541 }
1510 1542
1511 1543 for (i = num_gaps - 1; i >= 0; i--) {
1512 1544 selected_indices[num_selected] = gaps[i].idx;
1513 1545 readdata -= gaps[i].size;
1514 1546 num_selected += 1;
1515 1547 if (readdata <= 0) {
1516 1548 density = 1.0;
1517 1549 } else {
1518 1550 density = (double)chainpayload / (double)readdata;
1519 1551 }
1520 1552 if (density >= targetdensity) {
1521 1553 break;
1522 1554 }
1523 1555 }
1524 1556 qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
1525 1557 &Py_ssize_t_compare);
1526 1558
1527 1559 /* create the resulting slice */
1528 1560 allchunks = PyList_New(0);
1529 1561 if (allchunks == NULL) {
1530 1562 goto bail;
1531 1563 }
1532 1564 previdx = 0;
1533 1565 selected_indices[num_selected] = num_revs;
1534 1566 for (i = 0; i <= num_selected; i++) {
1535 1567 Py_ssize_t idx = selected_indices[i];
1536 1568 Py_ssize_t endidx = trim_endidx(self, revs, previdx, idx);
1537 1569 if (endidx < 0) {
1538 1570 goto bail;
1539 1571 }
1540 1572 if (previdx < endidx) {
1541 1573 chunk = PyList_GetSlice(list_revs, previdx, endidx);
1542 1574 if (chunk == NULL) {
1543 1575 goto bail;
1544 1576 }
1545 1577 if (PyList_Append(allchunks, chunk) == -1) {
1546 1578 goto bail;
1547 1579 }
1548 1580 Py_DECREF(chunk);
1549 1581 chunk = NULL;
1550 1582 }
1551 1583 previdx = idx;
1552 1584 }
1553 1585 result = allchunks;
1554 1586 goto done;
1555 1587
1556 1588 bail:
1557 1589 Py_XDECREF(allchunks);
1558 1590 Py_XDECREF(chunk);
1559 1591 done:
1560 1592 free(revs);
1561 1593 free(gaps);
1562 1594 free(selected_indices);
1563 1595 return result;
1564 1596 }
1565 1597
1566 1598 static inline int nt_level(const char *node, Py_ssize_t level)
1567 1599 {
1568 1600 int v = node[level >> 1];
1569 1601 if (!(level & 1))
1570 1602 v >>= 4;
1571 1603 return v & 0xf;
1572 1604 }
1573 1605
1574 1606 /*
1575 1607 * Return values:
1576 1608 *
1577 1609 * -4: match is ambiguous (multiple candidates)
1578 1610 * -2: not found
1579 1611 * rest: valid rev
1580 1612 */
1581 1613 static int nt_find(nodetree *self, const char *node, Py_ssize_t nodelen,
1582 1614 int hex)
1583 1615 {
1584 1616 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
1585 1617 int level, maxlevel, off;
1586 1618
1587 1619 /* If the input is binary, do a fast check for the nullid first. */
1588 1620 if (!hex && nodelen == self->nodelen && node[0] == '\0' &&
1589 1621 node[1] == '\0' && memcmp(node, nullid, self->nodelen) == 0)
1590 1622 return -1;
1591 1623
1592 1624 if (hex)
1593 1625 maxlevel = nodelen;
1594 1626 else
1595 1627 maxlevel = 2 * nodelen;
1596 1628 if (maxlevel > 2 * self->nodelen)
1597 1629 maxlevel = 2 * self->nodelen;
1598 1630
1599 1631 for (level = off = 0; level < maxlevel; level++) {
1600 1632 int k = getnybble(node, level);
1601 1633 nodetreenode *n = &self->nodes[off];
1602 1634 int v = n->children[k];
1603 1635
1604 1636 if (v < 0) {
1605 1637 const char *n;
1606 1638 Py_ssize_t i;
1607 1639
1608 1640 v = -(v + 2);
1609 1641 n = index_node(self->index, v);
1610 1642 if (n == NULL)
1611 1643 return -2;
1612 1644 for (i = level; i < maxlevel; i++)
1613 1645 if (getnybble(node, i) != nt_level(n, i))
1614 1646 return -2;
1615 1647 return v;
1616 1648 }
1617 1649 if (v == 0)
1618 1650 return -2;
1619 1651 off = v;
1620 1652 }
1621 1653 /* multiple matches against an ambiguous prefix */
1622 1654 return -4;
1623 1655 }
1624 1656
1625 1657 static int nt_new(nodetree *self)
1626 1658 {
1627 1659 if (self->length == self->capacity) {
1628 1660 size_t newcapacity;
1629 1661 nodetreenode *newnodes;
1630 1662 newcapacity = self->capacity * 2;
1631 1663 if (newcapacity >= SIZE_MAX / sizeof(nodetreenode)) {
1632 1664 PyErr_SetString(PyExc_MemoryError,
1633 1665 "overflow in nt_new");
1634 1666 return -1;
1635 1667 }
1636 1668 newnodes =
1637 1669 realloc(self->nodes, newcapacity * sizeof(nodetreenode));
1638 1670 if (newnodes == NULL) {
1639 1671 PyErr_SetString(PyExc_MemoryError, "out of memory");
1640 1672 return -1;
1641 1673 }
1642 1674 self->capacity = newcapacity;
1643 1675 self->nodes = newnodes;
1644 1676 memset(&self->nodes[self->length], 0,
1645 1677 sizeof(nodetreenode) * (self->capacity - self->length));
1646 1678 }
1647 1679 return self->length++;
1648 1680 }
1649 1681
1650 1682 static int nt_insert(nodetree *self, const char *node, int rev)
1651 1683 {
1652 1684 int level = 0;
1653 1685 int off = 0;
1654 1686
1655 1687 while (level < 2 * self->nodelen) {
1656 1688 int k = nt_level(node, level);
1657 1689 nodetreenode *n;
1658 1690 int v;
1659 1691
1660 1692 n = &self->nodes[off];
1661 1693 v = n->children[k];
1662 1694
1663 1695 if (v == 0) {
1664 1696 n->children[k] = -rev - 2;
1665 1697 return 0;
1666 1698 }
1667 1699 if (v < 0) {
1668 1700 const char *oldnode =
1669 1701 index_node_existing(self->index, -(v + 2));
1670 1702 int noff;
1671 1703
1672 1704 if (oldnode == NULL)
1673 1705 return -1;
1674 1706 if (!memcmp(oldnode, node, self->nodelen)) {
1675 1707 n->children[k] = -rev - 2;
1676 1708 return 0;
1677 1709 }
1678 1710 noff = nt_new(self);
1679 1711 if (noff == -1)
1680 1712 return -1;
1681 1713 /* self->nodes may have been changed by realloc */
1682 1714 self->nodes[off].children[k] = noff;
1683 1715 off = noff;
1684 1716 n = &self->nodes[off];
1685 1717 n->children[nt_level(oldnode, ++level)] = v;
1686 1718 if (level > self->depth)
1687 1719 self->depth = level;
1688 1720 self->splits += 1;
1689 1721 } else {
1690 1722 level += 1;
1691 1723 off = v;
1692 1724 }
1693 1725 }
1694 1726
1695 1727 return -1;
1696 1728 }
1697 1729
1698 1730 static PyObject *ntobj_insert(nodetreeObject *self, PyObject *args)
1699 1731 {
1700 1732 Py_ssize_t rev;
1701 1733 const char *node;
1702 1734 Py_ssize_t length;
1703 1735 if (!PyArg_ParseTuple(args, "n", &rev))
1704 1736 return NULL;
1705 1737 length = index_length(self->nt.index);
1706 1738 if (rev < 0 || rev >= length) {
1707 1739 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1708 1740 return NULL;
1709 1741 }
1710 1742 node = index_node_existing(self->nt.index, rev);
1711 1743 if (nt_insert(&self->nt, node, (int)rev) == -1)
1712 1744 return NULL;
1713 1745 Py_RETURN_NONE;
1714 1746 }
1715 1747
1716 1748 static int nt_delete_node(nodetree *self, const char *node)
1717 1749 {
1718 1750 /* rev==-2 happens to get encoded as 0, which is interpreted as not set
1719 1751 */
1720 1752 return nt_insert(self, node, -2);
1721 1753 }
1722 1754
1723 1755 static int nt_init(nodetree *self, indexObject *index, unsigned capacity)
1724 1756 {
1725 1757 /* Initialize before overflow-checking to avoid nt_dealloc() crash. */
1726 1758 self->nodes = NULL;
1727 1759
1728 1760 self->index = index;
1729 1761 /* The input capacity is in terms of revisions, while the field is in
1730 1762 * terms of nodetree nodes. */
1731 1763 self->capacity = (capacity < 4 ? 4 : capacity / 2);
1732 1764 self->nodelen = index->nodelen;
1733 1765 self->depth = 0;
1734 1766 self->splits = 0;
1735 1767 if (self->capacity > SIZE_MAX / sizeof(nodetreenode)) {
1736 1768 PyErr_SetString(PyExc_ValueError, "overflow in init_nt");
1737 1769 return -1;
1738 1770 }
1739 1771 self->nodes = calloc(self->capacity, sizeof(nodetreenode));
1740 1772 if (self->nodes == NULL) {
1741 1773 PyErr_NoMemory();
1742 1774 return -1;
1743 1775 }
1744 1776 self->length = 1;
1745 1777 return 0;
1746 1778 }
1747 1779
1748 1780 static int ntobj_init(nodetreeObject *self, PyObject *args)
1749 1781 {
1750 1782 PyObject *index;
1751 1783 unsigned capacity;
1752 1784 if (!PyArg_ParseTuple(args, "O!I", &HgRevlogIndex_Type, &index,
1753 1785 &capacity))
1754 1786 return -1;
1755 1787 Py_INCREF(index);
1756 1788 return nt_init(&self->nt, (indexObject *)index, capacity);
1757 1789 }
1758 1790
1759 1791 static int nt_partialmatch(nodetree *self, const char *node, Py_ssize_t nodelen)
1760 1792 {
1761 1793 return nt_find(self, node, nodelen, 1);
1762 1794 }
1763 1795
1764 1796 /*
1765 1797 * Find the length of the shortest unique prefix of node.
1766 1798 *
1767 1799 * Return values:
1768 1800 *
1769 1801 * -3: error (exception set)
1770 1802 * -2: not found (no exception set)
1771 1803 * rest: length of shortest prefix
1772 1804 */
1773 1805 static int nt_shortest(nodetree *self, const char *node)
1774 1806 {
1775 1807 int level, off;
1776 1808
1777 1809 for (level = off = 0; level < 2 * self->nodelen; level++) {
1778 1810 int k, v;
1779 1811 nodetreenode *n = &self->nodes[off];
1780 1812 k = nt_level(node, level);
1781 1813 v = n->children[k];
1782 1814 if (v < 0) {
1783 1815 const char *n;
1784 1816 v = -(v + 2);
1785 1817 n = index_node_existing(self->index, v);
1786 1818 if (n == NULL)
1787 1819 return -3;
1788 1820 if (memcmp(node, n, self->nodelen) != 0)
1789 1821 /*
1790 1822 * Found a unique prefix, but it wasn't for the
1791 1823 * requested node (i.e the requested node does
1792 1824 * not exist).
1793 1825 */
1794 1826 return -2;
1795 1827 return level + 1;
1796 1828 }
1797 1829 if (v == 0)
1798 1830 return -2;
1799 1831 off = v;
1800 1832 }
1801 1833 /*
1802 1834 * The node was still not unique after 40 hex digits, so this won't
1803 1835 * happen. Also, if we get here, then there's a programming error in
1804 1836 * this file that made us insert a node longer than 40 hex digits.
1805 1837 */
1806 1838 PyErr_SetString(PyExc_Exception, "broken node tree");
1807 1839 return -3;
1808 1840 }
1809 1841
1810 1842 static PyObject *ntobj_shortest(nodetreeObject *self, PyObject *args)
1811 1843 {
1812 1844 PyObject *val;
1813 1845 char *node;
1814 1846 int length;
1815 1847
1816 1848 if (!PyArg_ParseTuple(args, "O", &val))
1817 1849 return NULL;
1818 1850 if (node_check(self->nt.nodelen, val, &node) == -1)
1819 1851 return NULL;
1820 1852
1821 1853 length = nt_shortest(&self->nt, node);
1822 1854 if (length == -3)
1823 1855 return NULL;
1824 1856 if (length == -2) {
1825 1857 raise_revlog_error();
1826 1858 return NULL;
1827 1859 }
1828 1860 return PyInt_FromLong(length);
1829 1861 }
1830 1862
1831 1863 static void nt_dealloc(nodetree *self)
1832 1864 {
1833 1865 free(self->nodes);
1834 1866 self->nodes = NULL;
1835 1867 }
1836 1868
1837 1869 static void ntobj_dealloc(nodetreeObject *self)
1838 1870 {
1839 1871 Py_XDECREF(self->nt.index);
1840 1872 nt_dealloc(&self->nt);
1841 1873 PyObject_Del(self);
1842 1874 }
1843 1875
1844 1876 static PyMethodDef ntobj_methods[] = {
1845 1877 {"insert", (PyCFunction)ntobj_insert, METH_VARARGS,
1846 1878 "insert an index entry"},
1847 1879 {"shortest", (PyCFunction)ntobj_shortest, METH_VARARGS,
1848 1880 "find length of shortest hex nodeid of a binary ID"},
1849 1881 {NULL} /* Sentinel */
1850 1882 };
1851 1883
1852 1884 static PyTypeObject nodetreeType = {
1853 1885 PyVarObject_HEAD_INIT(NULL, 0) /* header */
1854 1886 "parsers.nodetree", /* tp_name */
1855 1887 sizeof(nodetreeObject), /* tp_basicsize */
1856 1888 0, /* tp_itemsize */
1857 1889 (destructor)ntobj_dealloc, /* tp_dealloc */
1858 1890 0, /* tp_print */
1859 1891 0, /* tp_getattr */
1860 1892 0, /* tp_setattr */
1861 1893 0, /* tp_compare */
1862 1894 0, /* tp_repr */
1863 1895 0, /* tp_as_number */
1864 1896 0, /* tp_as_sequence */
1865 1897 0, /* tp_as_mapping */
1866 1898 0, /* tp_hash */
1867 1899 0, /* tp_call */
1868 1900 0, /* tp_str */
1869 1901 0, /* tp_getattro */
1870 1902 0, /* tp_setattro */
1871 1903 0, /* tp_as_buffer */
1872 1904 Py_TPFLAGS_DEFAULT, /* tp_flags */
1873 1905 "nodetree", /* tp_doc */
1874 1906 0, /* tp_traverse */
1875 1907 0, /* tp_clear */
1876 1908 0, /* tp_richcompare */
1877 1909 0, /* tp_weaklistoffset */
1878 1910 0, /* tp_iter */
1879 1911 0, /* tp_iternext */
1880 1912 ntobj_methods, /* tp_methods */
1881 1913 0, /* tp_members */
1882 1914 0, /* tp_getset */
1883 1915 0, /* tp_base */
1884 1916 0, /* tp_dict */
1885 1917 0, /* tp_descr_get */
1886 1918 0, /* tp_descr_set */
1887 1919 0, /* tp_dictoffset */
1888 1920 (initproc)ntobj_init, /* tp_init */
1889 1921 0, /* tp_alloc */
1890 1922 };
1891 1923
1892 1924 static int index_init_nt(indexObject *self)
1893 1925 {
1894 1926 if (!self->ntinitialized) {
1895 1927 if (nt_init(&self->nt, self, (int)self->length) == -1) {
1896 1928 nt_dealloc(&self->nt);
1897 1929 return -1;
1898 1930 }
1899 1931 if (nt_insert(&self->nt, nullid, -1) == -1) {
1900 1932 nt_dealloc(&self->nt);
1901 1933 return -1;
1902 1934 }
1903 1935 self->ntinitialized = 1;
1904 1936 self->ntrev = (int)index_length(self);
1905 1937 self->ntlookups = 1;
1906 1938 self->ntmisses = 0;
1907 1939 }
1908 1940 return 0;
1909 1941 }
1910 1942
1911 1943 /*
1912 1944 * Return values:
1913 1945 *
1914 1946 * -3: error (exception set)
1915 1947 * -2: not found (no exception set)
1916 1948 * rest: valid rev
1917 1949 */
1918 1950 static int index_find_node(indexObject *self, const char *node)
1919 1951 {
1920 1952 int rev;
1921 1953
1922 1954 if (index_init_nt(self) == -1)
1923 1955 return -3;
1924 1956
1925 1957 self->ntlookups++;
1926 1958 rev = nt_find(&self->nt, node, self->nodelen, 0);
1927 1959 if (rev >= -1)
1928 1960 return rev;
1929 1961
1930 1962 /*
1931 1963 * For the first handful of lookups, we scan the entire index,
1932 1964 * and cache only the matching nodes. This optimizes for cases
1933 1965 * like "hg tip", where only a few nodes are accessed.
1934 1966 *
1935 1967 * After that, we cache every node we visit, using a single
1936 1968 * scan amortized over multiple lookups. This gives the best
1937 1969 * bulk performance, e.g. for "hg log".
1938 1970 */
1939 1971 if (self->ntmisses++ < 4) {
1940 1972 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1941 1973 const char *n = index_node_existing(self, rev);
1942 1974 if (n == NULL)
1943 1975 return -3;
1944 1976 if (memcmp(node, n, self->nodelen) == 0) {
1945 1977 if (nt_insert(&self->nt, n, rev) == -1)
1946 1978 return -3;
1947 1979 break;
1948 1980 }
1949 1981 }
1950 1982 } else {
1951 1983 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1952 1984 const char *n = index_node_existing(self, rev);
1953 1985 if (n == NULL)
1954 1986 return -3;
1955 1987 if (nt_insert(&self->nt, n, rev) == -1) {
1956 1988 self->ntrev = rev + 1;
1957 1989 return -3;
1958 1990 }
1959 1991 if (memcmp(node, n, self->nodelen) == 0) {
1960 1992 break;
1961 1993 }
1962 1994 }
1963 1995 self->ntrev = rev;
1964 1996 }
1965 1997
1966 1998 if (rev >= 0)
1967 1999 return rev;
1968 2000 return -2;
1969 2001 }
1970 2002
1971 2003 static PyObject *index_getitem(indexObject *self, PyObject *value)
1972 2004 {
1973 2005 char *node;
1974 2006 int rev;
1975 2007
1976 2008 if (PyInt_Check(value)) {
1977 2009 long idx;
1978 2010 if (!pylong_to_long(value, &idx)) {
1979 2011 return NULL;
1980 2012 }
1981 2013 return index_get(self, idx);
1982 2014 }
1983 2015
1984 2016 if (node_check(self->nodelen, value, &node) == -1)
1985 2017 return NULL;
1986 2018 rev = index_find_node(self, node);
1987 2019 if (rev >= -1)
1988 2020 return PyInt_FromLong(rev);
1989 2021 if (rev == -2)
1990 2022 raise_revlog_error();
1991 2023 return NULL;
1992 2024 }
1993 2025
1994 2026 /*
1995 2027 * Fully populate the radix tree.
1996 2028 */
1997 2029 static int index_populate_nt(indexObject *self)
1998 2030 {
1999 2031 int rev;
2000 2032 if (self->ntrev > 0) {
2001 2033 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2002 2034 const char *n = index_node_existing(self, rev);
2003 2035 if (n == NULL)
2004 2036 return -1;
2005 2037 if (nt_insert(&self->nt, n, rev) == -1)
2006 2038 return -1;
2007 2039 }
2008 2040 self->ntrev = -1;
2009 2041 }
2010 2042 return 0;
2011 2043 }
2012 2044
2013 2045 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
2014 2046 {
2015 2047 const char *fullnode;
2016 2048 Py_ssize_t nodelen;
2017 2049 char *node;
2018 2050 int rev, i;
2019 2051
2020 2052 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &node, &nodelen))
2021 2053 return NULL;
2022 2054
2023 2055 if (nodelen < 1) {
2024 2056 PyErr_SetString(PyExc_ValueError, "key too short");
2025 2057 return NULL;
2026 2058 }
2027 2059
2028 2060 if (nodelen > 2 * self->nodelen) {
2029 2061 PyErr_SetString(PyExc_ValueError, "key too long");
2030 2062 return NULL;
2031 2063 }
2032 2064
2033 2065 for (i = 0; i < nodelen; i++)
2034 2066 hexdigit(node, i);
2035 2067 if (PyErr_Occurred()) {
2036 2068 /* input contains non-hex characters */
2037 2069 PyErr_Clear();
2038 2070 Py_RETURN_NONE;
2039 2071 }
2040 2072
2041 2073 if (index_init_nt(self) == -1)
2042 2074 return NULL;
2043 2075 if (index_populate_nt(self) == -1)
2044 2076 return NULL;
2045 2077 rev = nt_partialmatch(&self->nt, node, nodelen);
2046 2078
2047 2079 switch (rev) {
2048 2080 case -4:
2049 2081 raise_revlog_error();
2050 2082 return NULL;
2051 2083 case -2:
2052 2084 Py_RETURN_NONE;
2053 2085 case -1:
2054 2086 return PyBytes_FromStringAndSize(nullid, self->nodelen);
2055 2087 }
2056 2088
2057 2089 fullnode = index_node_existing(self, rev);
2058 2090 if (fullnode == NULL) {
2059 2091 return NULL;
2060 2092 }
2061 2093 return PyBytes_FromStringAndSize(fullnode, self->nodelen);
2062 2094 }
2063 2095
2064 2096 static PyObject *index_shortest(indexObject *self, PyObject *args)
2065 2097 {
2066 2098 PyObject *val;
2067 2099 char *node;
2068 2100 int length;
2069 2101
2070 2102 if (!PyArg_ParseTuple(args, "O", &val))
2071 2103 return NULL;
2072 2104 if (node_check(self->nodelen, val, &node) == -1)
2073 2105 return NULL;
2074 2106
2075 2107 self->ntlookups++;
2076 2108 if (index_init_nt(self) == -1)
2077 2109 return NULL;
2078 2110 if (index_populate_nt(self) == -1)
2079 2111 return NULL;
2080 2112 length = nt_shortest(&self->nt, node);
2081 2113 if (length == -3)
2082 2114 return NULL;
2083 2115 if (length == -2) {
2084 2116 raise_revlog_error();
2085 2117 return NULL;
2086 2118 }
2087 2119 return PyInt_FromLong(length);
2088 2120 }
2089 2121
2090 2122 static PyObject *index_m_get(indexObject *self, PyObject *args)
2091 2123 {
2092 2124 PyObject *val;
2093 2125 char *node;
2094 2126 int rev;
2095 2127
2096 2128 if (!PyArg_ParseTuple(args, "O", &val))
2097 2129 return NULL;
2098 2130 if (node_check(self->nodelen, val, &node) == -1)
2099 2131 return NULL;
2100 2132 rev = index_find_node(self, node);
2101 2133 if (rev == -3)
2102 2134 return NULL;
2103 2135 if (rev == -2)
2104 2136 Py_RETURN_NONE;
2105 2137 return PyInt_FromLong(rev);
2106 2138 }
2107 2139
2108 2140 static int index_contains(indexObject *self, PyObject *value)
2109 2141 {
2110 2142 char *node;
2111 2143
2112 2144 if (PyInt_Check(value)) {
2113 2145 long rev;
2114 2146 if (!pylong_to_long(value, &rev)) {
2115 2147 return -1;
2116 2148 }
2117 2149 return rev >= -1 && rev < index_length(self);
2118 2150 }
2119 2151
2120 2152 if (node_check(self->nodelen, value, &node) == -1)
2121 2153 return -1;
2122 2154
2123 2155 switch (index_find_node(self, node)) {
2124 2156 case -3:
2125 2157 return -1;
2126 2158 case -2:
2127 2159 return 0;
2128 2160 default:
2129 2161 return 1;
2130 2162 }
2131 2163 }
2132 2164
2133 2165 static PyObject *index_m_has_node(indexObject *self, PyObject *args)
2134 2166 {
2135 2167 int ret = index_contains(self, args);
2136 2168 if (ret < 0)
2137 2169 return NULL;
2138 2170 return PyBool_FromLong((long)ret);
2139 2171 }
2140 2172
2141 2173 static PyObject *index_m_rev(indexObject *self, PyObject *val)
2142 2174 {
2143 2175 char *node;
2144 2176 int rev;
2145 2177
2146 2178 if (node_check(self->nodelen, val, &node) == -1)
2147 2179 return NULL;
2148 2180 rev = index_find_node(self, node);
2149 2181 if (rev >= -1)
2150 2182 return PyInt_FromLong(rev);
2151 2183 if (rev == -2)
2152 2184 raise_revlog_error();
2153 2185 return NULL;
2154 2186 }
2155 2187
2156 2188 typedef uint64_t bitmask;
2157 2189
2158 2190 /*
2159 2191 * Given a disjoint set of revs, return all candidates for the
2160 2192 * greatest common ancestor. In revset notation, this is the set
2161 2193 * "heads(::a and ::b and ...)"
2162 2194 */
2163 2195 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
2164 2196 int revcount)
2165 2197 {
2166 2198 const bitmask allseen = (1ull << revcount) - 1;
2167 2199 const bitmask poison = 1ull << revcount;
2168 2200 PyObject *gca = PyList_New(0);
2169 2201 int i, v, interesting;
2170 2202 int maxrev = -1;
2171 2203 bitmask sp;
2172 2204 bitmask *seen;
2173 2205
2174 2206 if (gca == NULL)
2175 2207 return PyErr_NoMemory();
2176 2208
2177 2209 for (i = 0; i < revcount; i++) {
2178 2210 if (revs[i] > maxrev)
2179 2211 maxrev = revs[i];
2180 2212 }
2181 2213
2182 2214 seen = calloc(sizeof(*seen), maxrev + 1);
2183 2215 if (seen == NULL) {
2184 2216 Py_DECREF(gca);
2185 2217 return PyErr_NoMemory();
2186 2218 }
2187 2219
2188 2220 for (i = 0; i < revcount; i++)
2189 2221 seen[revs[i]] = 1ull << i;
2190 2222
2191 2223 interesting = revcount;
2192 2224
2193 2225 for (v = maxrev; v >= 0 && interesting; v--) {
2194 2226 bitmask sv = seen[v];
2195 2227 int parents[2];
2196 2228
2197 2229 if (!sv)
2198 2230 continue;
2199 2231
2200 2232 if (sv < poison) {
2201 2233 interesting -= 1;
2202 2234 if (sv == allseen) {
2203 2235 PyObject *obj = PyInt_FromLong(v);
2204 2236 if (obj == NULL)
2205 2237 goto bail;
2206 2238 if (PyList_Append(gca, obj) == -1) {
2207 2239 Py_DECREF(obj);
2208 2240 goto bail;
2209 2241 }
2210 2242 sv |= poison;
2211 2243 for (i = 0; i < revcount; i++) {
2212 2244 if (revs[i] == v)
2213 2245 goto done;
2214 2246 }
2215 2247 }
2216 2248 }
2217 2249 if (index_get_parents(self, v, parents, maxrev) < 0)
2218 2250 goto bail;
2219 2251
2220 2252 for (i = 0; i < 2; i++) {
2221 2253 int p = parents[i];
2222 2254 if (p == -1)
2223 2255 continue;
2224 2256 sp = seen[p];
2225 2257 if (sv < poison) {
2226 2258 if (sp == 0) {
2227 2259 seen[p] = sv;
2228 2260 interesting++;
2229 2261 } else if (sp != sv)
2230 2262 seen[p] |= sv;
2231 2263 } else {
2232 2264 if (sp && sp < poison)
2233 2265 interesting--;
2234 2266 seen[p] = sv;
2235 2267 }
2236 2268 }
2237 2269 }
2238 2270
2239 2271 done:
2240 2272 free(seen);
2241 2273 return gca;
2242 2274 bail:
2243 2275 free(seen);
2244 2276 Py_XDECREF(gca);
2245 2277 return NULL;
2246 2278 }
2247 2279
2248 2280 /*
2249 2281 * Given a disjoint set of revs, return the subset with the longest
2250 2282 * path to the root.
2251 2283 */
2252 2284 static PyObject *find_deepest(indexObject *self, PyObject *revs)
2253 2285 {
2254 2286 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
2255 2287 static const Py_ssize_t capacity = 24;
2256 2288 int *depth, *interesting = NULL;
2257 2289 int i, j, v, ninteresting;
2258 2290 PyObject *dict = NULL, *keys = NULL;
2259 2291 long *seen = NULL;
2260 2292 int maxrev = -1;
2261 2293 long final;
2262 2294
2263 2295 if (revcount > capacity) {
2264 2296 PyErr_Format(PyExc_OverflowError,
2265 2297 "bitset size (%ld) > capacity (%ld)",
2266 2298 (long)revcount, (long)capacity);
2267 2299 return NULL;
2268 2300 }
2269 2301
2270 2302 for (i = 0; i < revcount; i++) {
2271 2303 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2272 2304 if (n > maxrev)
2273 2305 maxrev = n;
2274 2306 }
2275 2307
2276 2308 depth = calloc(sizeof(*depth), maxrev + 1);
2277 2309 if (depth == NULL)
2278 2310 return PyErr_NoMemory();
2279 2311
2280 2312 seen = calloc(sizeof(*seen), maxrev + 1);
2281 2313 if (seen == NULL) {
2282 2314 PyErr_NoMemory();
2283 2315 goto bail;
2284 2316 }
2285 2317
2286 2318 interesting = calloc(sizeof(*interesting), ((size_t)1) << revcount);
2287 2319 if (interesting == NULL) {
2288 2320 PyErr_NoMemory();
2289 2321 goto bail;
2290 2322 }
2291 2323
2292 2324 if (PyList_Sort(revs) == -1)
2293 2325 goto bail;
2294 2326
2295 2327 for (i = 0; i < revcount; i++) {
2296 2328 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2297 2329 long b = 1l << i;
2298 2330 depth[n] = 1;
2299 2331 seen[n] = b;
2300 2332 interesting[b] = 1;
2301 2333 }
2302 2334
2303 2335 /* invariant: ninteresting is the number of non-zero entries in
2304 2336 * interesting. */
2305 2337 ninteresting = (int)revcount;
2306 2338
2307 2339 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
2308 2340 int dv = depth[v];
2309 2341 int parents[2];
2310 2342 long sv;
2311 2343
2312 2344 if (dv == 0)
2313 2345 continue;
2314 2346
2315 2347 sv = seen[v];
2316 2348 if (index_get_parents(self, v, parents, maxrev) < 0)
2317 2349 goto bail;
2318 2350
2319 2351 for (i = 0; i < 2; i++) {
2320 2352 int p = parents[i];
2321 2353 long sp;
2322 2354 int dp;
2323 2355
2324 2356 if (p == -1)
2325 2357 continue;
2326 2358
2327 2359 dp = depth[p];
2328 2360 sp = seen[p];
2329 2361 if (dp <= dv) {
2330 2362 depth[p] = dv + 1;
2331 2363 if (sp != sv) {
2332 2364 interesting[sv] += 1;
2333 2365 seen[p] = sv;
2334 2366 if (sp) {
2335 2367 interesting[sp] -= 1;
2336 2368 if (interesting[sp] == 0)
2337 2369 ninteresting -= 1;
2338 2370 }
2339 2371 }
2340 2372 } else if (dv == dp - 1) {
2341 2373 long nsp = sp | sv;
2342 2374 if (nsp == sp)
2343 2375 continue;
2344 2376 seen[p] = nsp;
2345 2377 interesting[sp] -= 1;
2346 2378 if (interesting[sp] == 0)
2347 2379 ninteresting -= 1;
2348 2380 if (interesting[nsp] == 0)
2349 2381 ninteresting += 1;
2350 2382 interesting[nsp] += 1;
2351 2383 }
2352 2384 }
2353 2385 interesting[sv] -= 1;
2354 2386 if (interesting[sv] == 0)
2355 2387 ninteresting -= 1;
2356 2388 }
2357 2389
2358 2390 final = 0;
2359 2391 j = ninteresting;
2360 2392 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
2361 2393 if (interesting[i] == 0)
2362 2394 continue;
2363 2395 final |= i;
2364 2396 j -= 1;
2365 2397 }
2366 2398 if (final == 0) {
2367 2399 keys = PyList_New(0);
2368 2400 goto bail;
2369 2401 }
2370 2402
2371 2403 dict = PyDict_New();
2372 2404 if (dict == NULL)
2373 2405 goto bail;
2374 2406
2375 2407 for (i = 0; i < revcount; i++) {
2376 2408 PyObject *key;
2377 2409
2378 2410 if ((final & (1 << i)) == 0)
2379 2411 continue;
2380 2412
2381 2413 key = PyList_GET_ITEM(revs, i);
2382 2414 Py_INCREF(key);
2383 2415 Py_INCREF(Py_None);
2384 2416 if (PyDict_SetItem(dict, key, Py_None) == -1) {
2385 2417 Py_DECREF(key);
2386 2418 Py_DECREF(Py_None);
2387 2419 goto bail;
2388 2420 }
2389 2421 }
2390 2422
2391 2423 keys = PyDict_Keys(dict);
2392 2424
2393 2425 bail:
2394 2426 free(depth);
2395 2427 free(seen);
2396 2428 free(interesting);
2397 2429 Py_XDECREF(dict);
2398 2430
2399 2431 return keys;
2400 2432 }
2401 2433
2402 2434 /*
2403 2435 * Given a (possibly overlapping) set of revs, return all the
2404 2436 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
2405 2437 */
2406 2438 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
2407 2439 {
2408 2440 PyObject *ret = NULL;
2409 2441 Py_ssize_t argcount, i, len;
2410 2442 bitmask repeat = 0;
2411 2443 int revcount = 0;
2412 2444 int *revs;
2413 2445
2414 2446 argcount = PySequence_Length(args);
2415 2447 revs = PyMem_Malloc(argcount * sizeof(*revs));
2416 2448 if (argcount > 0 && revs == NULL)
2417 2449 return PyErr_NoMemory();
2418 2450 len = index_length(self);
2419 2451
2420 2452 for (i = 0; i < argcount; i++) {
2421 2453 static const int capacity = 24;
2422 2454 PyObject *obj = PySequence_GetItem(args, i);
2423 2455 bitmask x;
2424 2456 long val;
2425 2457
2426 2458 if (!PyInt_Check(obj)) {
2427 2459 PyErr_SetString(PyExc_TypeError,
2428 2460 "arguments must all be ints");
2429 2461 Py_DECREF(obj);
2430 2462 goto bail;
2431 2463 }
2432 2464 val = PyInt_AsLong(obj);
2433 2465 Py_DECREF(obj);
2434 2466 if (val == -1) {
2435 2467 ret = PyList_New(0);
2436 2468 goto done;
2437 2469 }
2438 2470 if (val < 0 || val >= len) {
2439 2471 PyErr_SetString(PyExc_IndexError, "index out of range");
2440 2472 goto bail;
2441 2473 }
2442 2474 /* this cheesy bloom filter lets us avoid some more
2443 2475 * expensive duplicate checks in the common set-is-disjoint
2444 2476 * case */
2445 2477 x = 1ull << (val & 0x3f);
2446 2478 if (repeat & x) {
2447 2479 int k;
2448 2480 for (k = 0; k < revcount; k++) {
2449 2481 if (val == revs[k])
2450 2482 goto duplicate;
2451 2483 }
2452 2484 } else
2453 2485 repeat |= x;
2454 2486 if (revcount >= capacity) {
2455 2487 PyErr_Format(PyExc_OverflowError,
2456 2488 "bitset size (%d) > capacity (%d)",
2457 2489 revcount, capacity);
2458 2490 goto bail;
2459 2491 }
2460 2492 revs[revcount++] = (int)val;
2461 2493 duplicate:;
2462 2494 }
2463 2495
2464 2496 if (revcount == 0) {
2465 2497 ret = PyList_New(0);
2466 2498 goto done;
2467 2499 }
2468 2500 if (revcount == 1) {
2469 2501 PyObject *obj;
2470 2502 ret = PyList_New(1);
2471 2503 if (ret == NULL)
2472 2504 goto bail;
2473 2505 obj = PyInt_FromLong(revs[0]);
2474 2506 if (obj == NULL)
2475 2507 goto bail;
2476 2508 PyList_SET_ITEM(ret, 0, obj);
2477 2509 goto done;
2478 2510 }
2479 2511
2480 2512 ret = find_gca_candidates(self, revs, revcount);
2481 2513 if (ret == NULL)
2482 2514 goto bail;
2483 2515
2484 2516 done:
2485 2517 PyMem_Free(revs);
2486 2518 return ret;
2487 2519
2488 2520 bail:
2489 2521 PyMem_Free(revs);
2490 2522 Py_XDECREF(ret);
2491 2523 return NULL;
2492 2524 }
2493 2525
2494 2526 /*
2495 2527 * Given a (possibly overlapping) set of revs, return the greatest
2496 2528 * common ancestors: those with the longest path to the root.
2497 2529 */
2498 2530 static PyObject *index_ancestors(indexObject *self, PyObject *args)
2499 2531 {
2500 2532 PyObject *ret;
2501 2533 PyObject *gca = index_commonancestorsheads(self, args);
2502 2534 if (gca == NULL)
2503 2535 return NULL;
2504 2536
2505 2537 if (PyList_GET_SIZE(gca) <= 1) {
2506 2538 return gca;
2507 2539 }
2508 2540
2509 2541 ret = find_deepest(self, gca);
2510 2542 Py_DECREF(gca);
2511 2543 return ret;
2512 2544 }
2513 2545
2514 2546 /*
2515 2547 * Invalidate any trie entries introduced by added revs.
2516 2548 */
2517 2549 static void index_invalidate_added(indexObject *self, Py_ssize_t start)
2518 2550 {
2519 2551 Py_ssize_t i, len;
2520 2552
2521 2553 len = self->length + self->new_length;
2522 2554 i = start - self->length;
2523 2555 if (i < 0)
2524 2556 return;
2525 2557
2526 2558 for (i = start; i < len; i++)
2527 2559 nt_delete_node(&self->nt, index_deref(self, i) + 32);
2528 2560
2529 2561 self->new_length = start - self->length;
2530 2562 }
2531 2563
2532 2564 /*
2533 2565 * Delete a numeric range of revs, which must be at the end of the
2534 2566 * range.
2535 2567 */
2536 2568 static int index_slice_del(indexObject *self, PyObject *item)
2537 2569 {
2538 2570 Py_ssize_t start, stop, step, slicelength;
2539 2571 Py_ssize_t length = index_length(self) + 1;
2540 2572 int ret = 0;
2541 2573
2542 2574 /* Argument changed from PySliceObject* to PyObject* in Python 3. */
2543 2575 #ifdef IS_PY3K
2544 2576 if (PySlice_GetIndicesEx(item, length, &start, &stop, &step,
2545 2577 &slicelength) < 0)
2546 2578 #else
2547 2579 if (PySlice_GetIndicesEx((PySliceObject *)item, length, &start, &stop,
2548 2580 &step, &slicelength) < 0)
2549 2581 #endif
2550 2582 return -1;
2551 2583
2552 2584 if (slicelength <= 0)
2553 2585 return 0;
2554 2586
2555 2587 if ((step < 0 && start < stop) || (step > 0 && start > stop))
2556 2588 stop = start;
2557 2589
2558 2590 if (step < 0) {
2559 2591 stop = start + 1;
2560 2592 start = stop + step * (slicelength - 1) - 1;
2561 2593 step = -step;
2562 2594 }
2563 2595
2564 2596 if (step != 1) {
2565 2597 PyErr_SetString(PyExc_ValueError,
2566 2598 "revlog index delete requires step size of 1");
2567 2599 return -1;
2568 2600 }
2569 2601
2570 2602 if (stop != length - 1) {
2571 2603 PyErr_SetString(PyExc_IndexError,
2572 2604 "revlog index deletion indices are invalid");
2573 2605 return -1;
2574 2606 }
2575 2607
2576 2608 if (start < self->length) {
2577 2609 if (self->ntinitialized) {
2578 2610 Py_ssize_t i;
2579 2611
2580 2612 for (i = start; i < self->length; i++) {
2581 2613 const char *node = index_node_existing(self, i);
2582 2614 if (node == NULL)
2583 2615 return -1;
2584 2616
2585 2617 nt_delete_node(&self->nt, node);
2586 2618 }
2587 2619 if (self->new_length)
2588 2620 index_invalidate_added(self, self->length);
2589 2621 if (self->ntrev > start)
2590 2622 self->ntrev = (int)start;
2591 2623 } else if (self->new_length) {
2592 2624 self->new_length = 0;
2593 2625 }
2594 2626
2595 2627 self->length = start;
2596 2628 goto done;
2597 2629 }
2598 2630
2599 2631 if (self->ntinitialized) {
2600 2632 index_invalidate_added(self, start);
2601 2633 if (self->ntrev > start)
2602 2634 self->ntrev = (int)start;
2603 2635 } else {
2604 2636 self->new_length = start - self->length;
2605 2637 }
2606 2638 done:
2607 2639 Py_CLEAR(self->headrevs);
2608 2640 return ret;
2609 2641 }
2610 2642
2611 2643 /*
2612 2644 * Supported ops:
2613 2645 *
2614 2646 * slice deletion
2615 2647 * string assignment (extend node->rev mapping)
2616 2648 * string deletion (shrink node->rev mapping)
2617 2649 */
2618 2650 static int index_assign_subscript(indexObject *self, PyObject *item,
2619 2651 PyObject *value)
2620 2652 {
2621 2653 char *node;
2622 2654 long rev;
2623 2655
2624 2656 if (PySlice_Check(item) && value == NULL)
2625 2657 return index_slice_del(self, item);
2626 2658
2627 2659 if (node_check(self->nodelen, item, &node) == -1)
2628 2660 return -1;
2629 2661
2630 2662 if (value == NULL)
2631 2663 return self->ntinitialized ? nt_delete_node(&self->nt, node)
2632 2664 : 0;
2633 2665 rev = PyInt_AsLong(value);
2634 2666 if (rev > INT_MAX || rev < 0) {
2635 2667 if (!PyErr_Occurred())
2636 2668 PyErr_SetString(PyExc_ValueError, "rev out of range");
2637 2669 return -1;
2638 2670 }
2639 2671
2640 2672 if (index_init_nt(self) == -1)
2641 2673 return -1;
2642 2674 return nt_insert(&self->nt, node, (int)rev);
2643 2675 }
2644 2676
2645 2677 /*
2646 2678 * Find all RevlogNG entries in an index that has inline data. Update
2647 2679 * the optional "offsets" table with those entries.
2648 2680 */
2649 2681 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
2650 2682 {
2651 2683 const char *data = (const char *)self->buf.buf;
2652 2684 Py_ssize_t pos = 0;
2653 2685 Py_ssize_t end = self->buf.len;
2654 2686 long incr = self->hdrsize;
2655 2687 Py_ssize_t len = 0;
2656 2688
2657 2689 while (pos + self->hdrsize <= end && pos >= 0) {
2658 2690 uint32_t comp_len, sidedata_comp_len = 0;
2659 2691 /* 3rd element of header is length of compressed inline data */
2660 2692 comp_len = getbe32(data + pos + 8);
2661 2693 if (self->hdrsize == v2_hdrsize) {
2662 2694 sidedata_comp_len = getbe32(data + pos + 72);
2663 2695 }
2664 2696 incr = self->hdrsize + comp_len + sidedata_comp_len;
2665 2697 if (offsets)
2666 2698 offsets[len] = data + pos;
2667 2699 len++;
2668 2700 pos += incr;
2669 2701 }
2670 2702
2671 2703 if (pos != end) {
2672 2704 if (!PyErr_Occurred())
2673 2705 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2674 2706 return -1;
2675 2707 }
2676 2708
2677 2709 return len;
2678 2710 }
2679 2711
2680 2712 static int index_init(indexObject *self, PyObject *args, PyObject *kwargs)
2681 2713 {
2682 2714 PyObject *data_obj, *inlined_obj, *revlogv2;
2683 2715 Py_ssize_t size;
2684 2716
2685 2717 static char *kwlist[] = {"data", "inlined", "revlogv2", NULL};
2686 2718
2687 2719 /* Initialize before argument-checking to avoid index_dealloc() crash.
2688 2720 */
2689 2721 self->added = NULL;
2690 2722 self->new_length = 0;
2691 2723 self->added_length = 0;
2692 2724 self->data = NULL;
2693 2725 memset(&self->buf, 0, sizeof(self->buf));
2694 2726 self->headrevs = NULL;
2695 2727 self->filteredrevs = Py_None;
2696 2728 Py_INCREF(Py_None);
2697 2729 self->ntinitialized = 0;
2698 2730 self->offsets = NULL;
2699 2731 self->nodelen = 20;
2700 2732 self->nullentry = NULL;
2701 2733
2702 2734 revlogv2 = NULL;
2703 2735 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|O", kwlist,
2704 2736 &data_obj, &inlined_obj, &revlogv2))
2705 2737 return -1;
2706 2738 if (!PyObject_CheckBuffer(data_obj)) {
2707 2739 PyErr_SetString(PyExc_TypeError,
2708 2740 "data does not support buffer interface");
2709 2741 return -1;
2710 2742 }
2711 2743 if (self->nodelen < 20 || self->nodelen > (Py_ssize_t)sizeof(nullid)) {
2712 2744 PyErr_SetString(PyExc_RuntimeError, "unsupported node size");
2713 2745 return -1;
2714 2746 }
2715 2747
2716 2748 if (revlogv2 && PyObject_IsTrue(revlogv2)) {
2717 2749 self->hdrsize = v2_hdrsize;
2718 2750 } else {
2719 2751 self->hdrsize = v1_hdrsize;
2720 2752 }
2721 2753
2722 2754 if (self->hdrsize == v1_hdrsize) {
2723 2755 self->nullentry =
2724 2756 Py_BuildValue(PY23("iiiiiiis#", "iiiiiiiy#"), 0, 0, 0, -1,
2725 2757 -1, -1, -1, nullid, self->nodelen);
2726 2758 } else {
2727 2759 self->nullentry =
2728 2760 Py_BuildValue(PY23("iiiiiiis#ii", "iiiiiiiy#ii"), 0, 0, 0,
2729 2761 -1, -1, -1, -1, nullid, self->nodelen, 0, 0);
2730 2762 }
2731 2763
2732 2764 if (!self->nullentry)
2733 2765 return -1;
2734 2766 PyObject_GC_UnTrack(self->nullentry);
2735 2767
2736 2768 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
2737 2769 return -1;
2738 2770 size = self->buf.len;
2739 2771
2740 2772 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
2741 2773 self->data = data_obj;
2742 2774
2743 2775 self->ntlookups = self->ntmisses = 0;
2744 2776 self->ntrev = -1;
2745 2777 Py_INCREF(self->data);
2746 2778
2747 2779 if (self->inlined) {
2748 2780 Py_ssize_t len = inline_scan(self, NULL);
2749 2781 if (len == -1)
2750 2782 goto bail;
2751 2783 self->length = len;
2752 2784 } else {
2753 2785 if (size % self->hdrsize) {
2754 2786 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2755 2787 goto bail;
2756 2788 }
2757 2789 self->length = size / self->hdrsize;
2758 2790 }
2759 2791
2760 2792 return 0;
2761 2793 bail:
2762 2794 return -1;
2763 2795 }
2764 2796
2765 2797 static PyObject *index_nodemap(indexObject *self)
2766 2798 {
2767 2799 Py_INCREF(self);
2768 2800 return (PyObject *)self;
2769 2801 }
2770 2802
2771 2803 static void _index_clearcaches(indexObject *self)
2772 2804 {
2773 2805 if (self->offsets) {
2774 2806 PyMem_Free((void *)self->offsets);
2775 2807 self->offsets = NULL;
2776 2808 }
2777 2809 if (self->ntinitialized) {
2778 2810 nt_dealloc(&self->nt);
2779 2811 }
2780 2812 self->ntinitialized = 0;
2781 2813 Py_CLEAR(self->headrevs);
2782 2814 }
2783 2815
2784 2816 static PyObject *index_clearcaches(indexObject *self)
2785 2817 {
2786 2818 _index_clearcaches(self);
2787 2819 self->ntrev = -1;
2788 2820 self->ntlookups = self->ntmisses = 0;
2789 2821 Py_RETURN_NONE;
2790 2822 }
2791 2823
2792 2824 static void index_dealloc(indexObject *self)
2793 2825 {
2794 2826 _index_clearcaches(self);
2795 2827 Py_XDECREF(self->filteredrevs);
2796 2828 if (self->buf.buf) {
2797 2829 PyBuffer_Release(&self->buf);
2798 2830 memset(&self->buf, 0, sizeof(self->buf));
2799 2831 }
2800 2832 Py_XDECREF(self->data);
2801 2833 PyMem_Free(self->added);
2802 2834 Py_XDECREF(self->nullentry);
2803 2835 PyObject_Del(self);
2804 2836 }
2805 2837
2806 2838 static PySequenceMethods index_sequence_methods = {
2807 2839 (lenfunc)index_length, /* sq_length */
2808 2840 0, /* sq_concat */
2809 2841 0, /* sq_repeat */
2810 2842 (ssizeargfunc)index_get, /* sq_item */
2811 2843 0, /* sq_slice */
2812 2844 0, /* sq_ass_item */
2813 2845 0, /* sq_ass_slice */
2814 2846 (objobjproc)index_contains, /* sq_contains */
2815 2847 };
2816 2848
2817 2849 static PyMappingMethods index_mapping_methods = {
2818 2850 (lenfunc)index_length, /* mp_length */
2819 2851 (binaryfunc)index_getitem, /* mp_subscript */
2820 2852 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
2821 2853 };
2822 2854
2823 2855 static PyMethodDef index_methods[] = {
2824 2856 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
2825 2857 "return the gca set of the given revs"},
2826 2858 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
2827 2859 METH_VARARGS,
2828 2860 "return the heads of the common ancestors of the given revs"},
2829 2861 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
2830 2862 "clear the index caches"},
2831 2863 {"get", (PyCFunction)index_m_get, METH_VARARGS, "get an index entry"},
2832 2864 {"get_rev", (PyCFunction)index_m_get, METH_VARARGS,
2833 2865 "return `rev` associated with a node or None"},
2834 2866 {"has_node", (PyCFunction)index_m_has_node, METH_O,
2835 2867 "return True if the node exist in the index"},
2836 2868 {"rev", (PyCFunction)index_m_rev, METH_O,
2837 2869 "return `rev` associated with a node or raise RevlogError"},
2838 2870 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets, METH_VARARGS,
2839 2871 "compute phases"},
2840 2872 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
2841 2873 "reachableroots"},
2842 2874 {"replace_sidedata_info", (PyCFunction)index_replace_sidedata_info,
2843 2875 METH_VARARGS, "replace an existing index entry with a new value"},
2844 2876 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
2845 2877 "get head revisions"}, /* Can do filtering since 3.2 */
2846 2878 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
2847 2879 "get filtered head revisions"}, /* Can always do filtering */
2848 2880 {"issnapshot", (PyCFunction)index_issnapshot, METH_O,
2849 2881 "True if the object is a snapshot"},
2850 2882 {"findsnapshots", (PyCFunction)index_findsnapshots, METH_VARARGS,
2851 2883 "Gather snapshot data in a cache dict"},
2852 2884 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
2853 2885 "determine revisions with deltas to reconstruct fulltext"},
2854 2886 {"slicechunktodensity", (PyCFunction)index_slicechunktodensity,
2855 2887 METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"},
2856 2888 {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
2857 2889 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
2858 2890 "match a potentially ambiguous node ID"},
2859 2891 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
2860 2892 "find length of shortest hex nodeid of a binary ID"},
2861 2893 {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"},
2894 {"entry_binary", (PyCFunction)index_entry_binary, METH_VARARGS,
2895 "return an entry in binary form"},
2862 2896 {NULL} /* Sentinel */
2863 2897 };
2864 2898
2865 2899 static PyGetSetDef index_getset[] = {
2866 2900 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
2867 2901 {NULL} /* Sentinel */
2868 2902 };
2869 2903
2870 2904 static PyMemberDef index_members[] = {
2871 2905 {"entry_size", T_LONG, offsetof(indexObject, hdrsize), 0,
2872 2906 "size of an index entry"},
2873 2907 {NULL} /* Sentinel */
2874 2908 };
2875 2909
2876 2910 PyTypeObject HgRevlogIndex_Type = {
2877 2911 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2878 2912 "parsers.index", /* tp_name */
2879 2913 sizeof(indexObject), /* tp_basicsize */
2880 2914 0, /* tp_itemsize */
2881 2915 (destructor)index_dealloc, /* tp_dealloc */
2882 2916 0, /* tp_print */
2883 2917 0, /* tp_getattr */
2884 2918 0, /* tp_setattr */
2885 2919 0, /* tp_compare */
2886 2920 0, /* tp_repr */
2887 2921 0, /* tp_as_number */
2888 2922 &index_sequence_methods, /* tp_as_sequence */
2889 2923 &index_mapping_methods, /* tp_as_mapping */
2890 2924 0, /* tp_hash */
2891 2925 0, /* tp_call */
2892 2926 0, /* tp_str */
2893 2927 0, /* tp_getattro */
2894 2928 0, /* tp_setattro */
2895 2929 0, /* tp_as_buffer */
2896 2930 Py_TPFLAGS_DEFAULT, /* tp_flags */
2897 2931 "revlog index", /* tp_doc */
2898 2932 0, /* tp_traverse */
2899 2933 0, /* tp_clear */
2900 2934 0, /* tp_richcompare */
2901 2935 0, /* tp_weaklistoffset */
2902 2936 0, /* tp_iter */
2903 2937 0, /* tp_iternext */
2904 2938 index_methods, /* tp_methods */
2905 2939 index_members, /* tp_members */
2906 2940 index_getset, /* tp_getset */
2907 2941 0, /* tp_base */
2908 2942 0, /* tp_dict */
2909 2943 0, /* tp_descr_get */
2910 2944 0, /* tp_descr_set */
2911 2945 0, /* tp_dictoffset */
2912 2946 (initproc)index_init, /* tp_init */
2913 2947 0, /* tp_alloc */
2914 2948 };
2915 2949
2916 2950 /*
2917 2951 * returns a tuple of the form (index, cache) with elements as
2918 2952 * follows:
2919 2953 *
2920 2954 * index: an index object that lazily parses Revlog (v1 or v2) records
2921 2955 * cache: if data is inlined, a tuple (0, index_file_content), else None
2922 2956 * index_file_content could be a string, or a buffer
2923 2957 *
2924 2958 * added complications are for backwards compatibility
2925 2959 */
2926 2960 PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs)
2927 2961 {
2928 2962 PyObject *cache = NULL;
2929 2963 indexObject *idx;
2930 2964 int ret;
2931 2965
2932 2966 idx = PyObject_New(indexObject, &HgRevlogIndex_Type);
2933 2967 if (idx == NULL)
2934 2968 goto bail;
2935 2969
2936 2970 ret = index_init(idx, args, kwargs);
2937 2971 if (ret == -1)
2938 2972 goto bail;
2939 2973
2940 2974 if (idx->inlined) {
2941 2975 cache = Py_BuildValue("iO", 0, idx->data);
2942 2976 if (cache == NULL)
2943 2977 goto bail;
2944 2978 } else {
2945 2979 cache = Py_None;
2946 2980 Py_INCREF(cache);
2947 2981 }
2948 2982
2949 2983 return Py_BuildValue("NN", idx, cache);
2950 2984
2951 2985 bail:
2952 2986 Py_XDECREF(idx);
2953 2987 Py_XDECREF(cache);
2954 2988 return NULL;
2955 2989 }
2956 2990
2957 2991 static Revlog_CAPI CAPI = {
2958 2992 /* increment the abi_version field upon each change in the Revlog_CAPI
2959 2993 struct or in the ABI of the listed functions */
2960 2994 2,
2961 2995 index_length,
2962 2996 index_node,
2963 2997 HgRevlogIndex_GetParents,
2964 2998 };
2965 2999
2966 3000 void revlog_module_init(PyObject *mod)
2967 3001 {
2968 3002 PyObject *caps = NULL;
2969 3003 HgRevlogIndex_Type.tp_new = PyType_GenericNew;
2970 3004 if (PyType_Ready(&HgRevlogIndex_Type) < 0)
2971 3005 return;
2972 3006 Py_INCREF(&HgRevlogIndex_Type);
2973 3007 PyModule_AddObject(mod, "index", (PyObject *)&HgRevlogIndex_Type);
2974 3008
2975 3009 nodetreeType.tp_new = PyType_GenericNew;
2976 3010 if (PyType_Ready(&nodetreeType) < 0)
2977 3011 return;
2978 3012 Py_INCREF(&nodetreeType);
2979 3013 PyModule_AddObject(mod, "nodetree", (PyObject *)&nodetreeType);
2980 3014
2981 3015 caps = PyCapsule_New(&CAPI, "mercurial.cext.parsers.revlog_CAPI", NULL);
2982 3016 if (caps != NULL)
2983 3017 PyModule_AddObject(mod, "revlog_CAPI", caps);
2984 3018 }
@@ -1,356 +1,380 b''
1 1 # parsers.py - Python implementation of parsers.c
2 2 #
3 3 # Copyright 2009 Olivia Mackall <olivia@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import struct
11 11 import zlib
12 12
13 13 from ..node import (
14 14 nullrev,
15 15 sha1nodeconstants,
16 16 )
17 17 from .. import (
18 18 pycompat,
19 19 util,
20 20 )
21 21
22 22 from ..revlogutils import nodemap as nodemaputil
23 23 from ..revlogutils import constants as revlog_constants
24 24
25 25 stringio = pycompat.bytesio
26 26
27 27
28 28 _pack = struct.pack
29 29 _unpack = struct.unpack
30 30 _compress = zlib.compress
31 31 _decompress = zlib.decompress
32 32
33 33 # Some code below makes tuples directly because it's more convenient. However,
34 34 # code outside this module should always use dirstatetuple.
35 35 def dirstatetuple(*x):
36 36 # x is a tuple
37 37 return x
38 38
39 39
40 40 def gettype(q):
41 41 return int(q & 0xFFFF)
42 42
43 43
44 44 def offset_type(offset, type):
45 45 return int(int(offset) << 16 | type)
46 46
47 47
48 48 class BaseIndexObject(object):
49 49 # Format of an index entry according to Python's `struct` language
50 50 index_format = revlog_constants.INDEX_ENTRY_V1
51 51 # Size of a C unsigned long long int, platform independent
52 52 big_int_size = struct.calcsize(b'>Q')
53 53 # Size of a C long int, platform independent
54 54 int_size = struct.calcsize(b'>i')
55 55 # An empty index entry, used as a default value to be overridden, or nullrev
56 56 null_item = (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid)
57 57
58 58 @util.propertycache
59 59 def entry_size(self):
60 60 return self.index_format.size
61 61
62 62 @property
63 63 def nodemap(self):
64 64 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
65 65 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
66 66 return self._nodemap
67 67
68 68 @util.propertycache
69 69 def _nodemap(self):
70 70 nodemap = nodemaputil.NodeMap({sha1nodeconstants.nullid: nullrev})
71 71 for r in range(0, len(self)):
72 72 n = self[r][7]
73 73 nodemap[n] = r
74 74 return nodemap
75 75
76 76 def has_node(self, node):
77 77 """return True if the node exist in the index"""
78 78 return node in self._nodemap
79 79
80 80 def rev(self, node):
81 81 """return a revision for a node
82 82
83 83 If the node is unknown, raise a RevlogError"""
84 84 return self._nodemap[node]
85 85
86 86 def get_rev(self, node):
87 87 """return a revision for a node
88 88
89 89 If the node is unknown, return None"""
90 90 return self._nodemap.get(node)
91 91
92 92 def _stripnodes(self, start):
93 93 if '_nodemap' in vars(self):
94 94 for r in range(start, len(self)):
95 95 n = self[r][7]
96 96 del self._nodemap[n]
97 97
98 98 def clearcaches(self):
99 99 self.__dict__.pop('_nodemap', None)
100 100
101 101 def __len__(self):
102 102 return self._lgt + len(self._extra)
103 103
104 104 def append(self, tup):
105 105 if '_nodemap' in vars(self):
106 106 self._nodemap[tup[7]] = len(self)
107 107 data = self.index_format.pack(*tup)
108 108 self._extra.append(data)
109 109
110 110 def _check_index(self, i):
111 111 if not isinstance(i, int):
112 112 raise TypeError(b"expecting int indexes")
113 113 if i < 0 or i >= len(self):
114 114 raise IndexError
115 115
116 116 def __getitem__(self, i):
117 117 if i == -1:
118 118 return self.null_item
119 119 self._check_index(i)
120 120 if i >= self._lgt:
121 121 data = self._extra[i - self._lgt]
122 122 else:
123 123 index = self._calculate_index(i)
124 124 data = self._data[index : index + self.entry_size]
125 125 r = self.index_format.unpack(data)
126 126 if self._lgt and i == 0:
127 127 r = (offset_type(0, gettype(r[0])),) + r[1:]
128 128 return r
129 129
130 def entry_binary(self, rev, header):
131 """return the raw binary string representing a revision"""
132 entry = self[rev]
133 p = revlog_constants.INDEX_ENTRY_V1.pack(*entry)
134 if rev == 0:
135 v_fmt = revlog_constants.INDEX_HEADER
136 v_bin = v_fmt.pack(header)
137 p = v_bin + p[v_fmt.size :]
138 return p
139
130 140
131 141 class IndexObject(BaseIndexObject):
132 142 def __init__(self, data):
133 assert len(data) % self.entry_size == 0
143 assert len(data) % self.entry_size == 0, (
144 len(data),
145 self.entry_size,
146 len(data) % self.entry_size,
147 )
134 148 self._data = data
135 149 self._lgt = len(data) // self.entry_size
136 150 self._extra = []
137 151
138 152 def _calculate_index(self, i):
139 153 return i * self.entry_size
140 154
141 155 def __delitem__(self, i):
142 156 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
143 157 raise ValueError(b"deleting slices only supports a:-1 with step 1")
144 158 i = i.start
145 159 self._check_index(i)
146 160 self._stripnodes(i)
147 161 if i < self._lgt:
148 162 self._data = self._data[: i * self.entry_size]
149 163 self._lgt = i
150 164 self._extra = []
151 165 else:
152 166 self._extra = self._extra[: i - self._lgt]
153 167
154 168
155 169 class PersistentNodeMapIndexObject(IndexObject):
156 170 """a Debug oriented class to test persistent nodemap
157 171
158 172 We need a simple python object to test API and higher level behavior. See
159 173 the Rust implementation for more serious usage. This should be used only
160 174 through the dedicated `devel.persistent-nodemap` config.
161 175 """
162 176
163 177 def nodemap_data_all(self):
164 178 """Return bytes containing a full serialization of a nodemap
165 179
166 180 The nodemap should be valid for the full set of revisions in the
167 181 index."""
168 182 return nodemaputil.persistent_data(self)
169 183
170 184 def nodemap_data_incremental(self):
171 185 """Return bytes containing a incremental update to persistent nodemap
172 186
173 187 This containst the data for an append-only update of the data provided
174 188 in the last call to `update_nodemap_data`.
175 189 """
176 190 if self._nm_root is None:
177 191 return None
178 192 docket = self._nm_docket
179 193 changed, data = nodemaputil.update_persistent_data(
180 194 self, self._nm_root, self._nm_max_idx, self._nm_docket.tip_rev
181 195 )
182 196
183 197 self._nm_root = self._nm_max_idx = self._nm_docket = None
184 198 return docket, changed, data
185 199
186 200 def update_nodemap_data(self, docket, nm_data):
187 201 """provide full block of persisted binary data for a nodemap
188 202
189 203 The data are expected to come from disk. See `nodemap_data_all` for a
190 204 produceur of such data."""
191 205 if nm_data is not None:
192 206 self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data)
193 207 if self._nm_root:
194 208 self._nm_docket = docket
195 209 else:
196 210 self._nm_root = self._nm_max_idx = self._nm_docket = None
197 211
198 212
199 213 class InlinedIndexObject(BaseIndexObject):
200 214 def __init__(self, data, inline=0):
201 215 self._data = data
202 216 self._lgt = self._inline_scan(None)
203 217 self._inline_scan(self._lgt)
204 218 self._extra = []
205 219
206 220 def _inline_scan(self, lgt):
207 221 off = 0
208 222 if lgt is not None:
209 223 self._offsets = [0] * lgt
210 224 count = 0
211 225 while off <= len(self._data) - self.entry_size:
212 226 start = off + self.big_int_size
213 227 (s,) = struct.unpack(
214 228 b'>i',
215 229 self._data[start : start + self.int_size],
216 230 )
217 231 if lgt is not None:
218 232 self._offsets[count] = off
219 233 count += 1
220 234 off += self.entry_size + s
221 235 if off != len(self._data):
222 236 raise ValueError(b"corrupted data")
223 237 return count
224 238
225 239 def __delitem__(self, i):
226 240 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
227 241 raise ValueError(b"deleting slices only supports a:-1 with step 1")
228 242 i = i.start
229 243 self._check_index(i)
230 244 self._stripnodes(i)
231 245 if i < self._lgt:
232 246 self._offsets = self._offsets[:i]
233 247 self._lgt = i
234 248 self._extra = []
235 249 else:
236 250 self._extra = self._extra[: i - self._lgt]
237 251
238 252 def _calculate_index(self, i):
239 253 return self._offsets[i]
240 254
241 255
242 256 def parse_index2(data, inline, revlogv2=False):
243 257 if not inline:
244 258 cls = IndexObject2 if revlogv2 else IndexObject
245 259 return cls(data), None
246 260 cls = InlinedIndexObject2 if revlogv2 else InlinedIndexObject
247 261 return cls(data, inline), (0, data)
248 262
249 263
250 264 class Index2Mixin(object):
251 265 index_format = revlog_constants.INDEX_ENTRY_V2
252 266 null_item = (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid, 0, 0)
253 267
254 268 def replace_sidedata_info(self, i, sidedata_offset, sidedata_length):
255 269 """
256 270 Replace an existing index entry's sidedata offset and length with new
257 271 ones.
258 272 This cannot be used outside of the context of sidedata rewriting,
259 273 inside the transaction that creates the revision `i`.
260 274 """
261 275 if i < 0:
262 276 raise KeyError
263 277 self._check_index(i)
264 278 sidedata_format = b">Qi"
265 279 packed_size = struct.calcsize(sidedata_format)
266 280 if i >= self._lgt:
267 281 packed = _pack(sidedata_format, sidedata_offset, sidedata_length)
268 282 old = self._extra[i - self._lgt]
269 283 new = old[:64] + packed + old[64 + packed_size :]
270 284 self._extra[i - self._lgt] = new
271 285 else:
272 286 msg = b"cannot rewrite entries outside of this transaction"
273 287 raise KeyError(msg)
274 288
289 def entry_binary(self, rev, header):
290 """return the raw binary string representing a revision"""
291 entry = self[rev]
292 p = revlog_constants.INDEX_ENTRY_V2.pack(*entry)
293 if rev == 0:
294 v_fmt = revlog_constants.INDEX_HEADER
295 v_bin = v_fmt.pack(header)
296 p = v_bin + p[v_fmt.size :]
297 return p
298
275 299
276 300 class IndexObject2(Index2Mixin, IndexObject):
277 301 pass
278 302
279 303
280 304 class InlinedIndexObject2(Index2Mixin, InlinedIndexObject):
281 305 def _inline_scan(self, lgt):
282 306 sidedata_length_pos = 72
283 307 off = 0
284 308 if lgt is not None:
285 309 self._offsets = [0] * lgt
286 310 count = 0
287 311 while off <= len(self._data) - self.entry_size:
288 312 start = off + self.big_int_size
289 313 (data_size,) = struct.unpack(
290 314 b'>i',
291 315 self._data[start : start + self.int_size],
292 316 )
293 317 start = off + sidedata_length_pos
294 318 (side_data_size,) = struct.unpack(
295 319 b'>i', self._data[start : start + self.int_size]
296 320 )
297 321 if lgt is not None:
298 322 self._offsets[count] = off
299 323 count += 1
300 324 off += self.entry_size + data_size + side_data_size
301 325 if off != len(self._data):
302 326 raise ValueError(b"corrupted data")
303 327 return count
304 328
305 329
306 330 def parse_index_devel_nodemap(data, inline):
307 331 """like parse_index2, but alway return a PersistentNodeMapIndexObject"""
308 332 return PersistentNodeMapIndexObject(data), None
309 333
310 334
311 335 def parse_dirstate(dmap, copymap, st):
312 336 parents = [st[:20], st[20:40]]
313 337 # dereference fields so they will be local in loop
314 338 format = b">cllll"
315 339 e_size = struct.calcsize(format)
316 340 pos1 = 40
317 341 l = len(st)
318 342
319 343 # the inner loop
320 344 while pos1 < l:
321 345 pos2 = pos1 + e_size
322 346 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
323 347 pos1 = pos2 + e[4]
324 348 f = st[pos2:pos1]
325 349 if b'\0' in f:
326 350 f, c = f.split(b'\0')
327 351 copymap[f] = c
328 352 dmap[f] = e[:4]
329 353 return parents
330 354
331 355
332 356 def pack_dirstate(dmap, copymap, pl, now):
333 357 now = int(now)
334 358 cs = stringio()
335 359 write = cs.write
336 360 write(b"".join(pl))
337 361 for f, e in pycompat.iteritems(dmap):
338 362 if e[0] == b'n' and e[3] == now:
339 363 # The file was last modified "simultaneously" with the current
340 364 # write to dirstate (i.e. within the same second for file-
341 365 # systems with a granularity of 1 sec). This commonly happens
342 366 # for at least a couple of files on 'update'.
343 367 # The user could change the file without changing its size
344 368 # within the same second. Invalidate the file's mtime in
345 369 # dirstate, forcing future 'status' calls to compare the
346 370 # contents of the file if the size is the same. This prevents
347 371 # mistakenly treating such files as clean.
348 372 e = dirstatetuple(e[0], e[1], e[2], -1)
349 373 dmap[f] = e
350 374
351 375 if f in copymap:
352 376 f = b"%s\0%s" % (f, copymap[f])
353 377 e = _pack(b">cllll", e[0], e[1], e[2], e[3], len(f))
354 378 write(e)
355 379 write(f)
356 380 return cs.getvalue()
@@ -1,3247 +1,3230 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import errno
20 20 import io
21 21 import os
22 22 import struct
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .pycompat import getattr
36 36 from .revlogutils.constants import (
37 37 FLAG_GENERALDELTA,
38 38 FLAG_INLINE_DATA,
39 39 INDEX_ENTRY_V0,
40 40 INDEX_ENTRY_V1,
41 41 INDEX_ENTRY_V2,
42 42 INDEX_HEADER,
43 43 REVLOGV0,
44 44 REVLOGV1,
45 45 REVLOGV1_FLAGS,
46 46 REVLOGV2,
47 47 REVLOGV2_FLAGS,
48 48 REVLOG_DEFAULT_FLAGS,
49 49 REVLOG_DEFAULT_FORMAT,
50 50 REVLOG_DEFAULT_VERSION,
51 51 )
52 52 from .revlogutils.flagutil import (
53 53 REVIDX_DEFAULT_FLAGS,
54 54 REVIDX_ELLIPSIS,
55 55 REVIDX_EXTSTORED,
56 56 REVIDX_FLAGS_ORDER,
57 57 REVIDX_HASCOPIESINFO,
58 58 REVIDX_ISCENSORED,
59 59 REVIDX_RAWTEXT_CHANGING_FLAGS,
60 60 REVIDX_SIDEDATA,
61 61 )
62 62 from .thirdparty import attr
63 63 from . import (
64 64 ancestor,
65 65 dagop,
66 66 error,
67 67 mdiff,
68 68 policy,
69 69 pycompat,
70 70 templatefilters,
71 71 util,
72 72 )
73 73 from .interfaces import (
74 74 repository,
75 75 util as interfaceutil,
76 76 )
77 77 from .revlogutils import (
78 78 deltas as deltautil,
79 79 flagutil,
80 80 nodemap as nodemaputil,
81 81 sidedata as sidedatautil,
82 82 )
83 83 from .utils import (
84 84 storageutil,
85 85 stringutil,
86 86 )
87 87
88 88 # blanked usage of all the name to prevent pyflakes constraints
89 89 # We need these name available in the module for extensions.
90 90 REVLOGV0
91 91 REVLOGV1
92 92 REVLOGV2
93 93 FLAG_INLINE_DATA
94 94 FLAG_GENERALDELTA
95 95 REVLOG_DEFAULT_FLAGS
96 96 REVLOG_DEFAULT_FORMAT
97 97 REVLOG_DEFAULT_VERSION
98 98 REVLOGV1_FLAGS
99 99 REVLOGV2_FLAGS
100 100 REVIDX_ISCENSORED
101 101 REVIDX_ELLIPSIS
102 102 REVIDX_SIDEDATA
103 103 REVIDX_HASCOPIESINFO
104 104 REVIDX_EXTSTORED
105 105 REVIDX_DEFAULT_FLAGS
106 106 REVIDX_FLAGS_ORDER
107 107 REVIDX_RAWTEXT_CHANGING_FLAGS
108 108
109 109 parsers = policy.importmod('parsers')
110 110 rustancestor = policy.importrust('ancestor')
111 111 rustdagop = policy.importrust('dagop')
112 112 rustrevlog = policy.importrust('revlog')
113 113
114 114 # Aliased for performance.
115 115 _zlibdecompress = zlib.decompress
116 116
117 117 # max size of revlog with inline data
118 118 _maxinline = 131072
119 119 _chunksize = 1048576
120 120
121 121 # Flag processors for REVIDX_ELLIPSIS.
122 122 def ellipsisreadprocessor(rl, text):
123 123 return text, False
124 124
125 125
126 126 def ellipsiswriteprocessor(rl, text):
127 127 return text, False
128 128
129 129
130 130 def ellipsisrawprocessor(rl, text):
131 131 return False
132 132
133 133
134 134 ellipsisprocessor = (
135 135 ellipsisreadprocessor,
136 136 ellipsiswriteprocessor,
137 137 ellipsisrawprocessor,
138 138 )
139 139
140 140
141 141 def getoffset(q):
142 142 return int(q >> 16)
143 143
144 144
145 145 def gettype(q):
146 146 return int(q & 0xFFFF)
147 147
148 148
149 149 def offset_type(offset, type):
150 150 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
151 151 raise ValueError(b'unknown revlog index flags')
152 152 return int(int(offset) << 16 | type)
153 153
154 154
155 155 def _verify_revision(rl, skipflags, state, node):
156 156 """Verify the integrity of the given revlog ``node`` while providing a hook
157 157 point for extensions to influence the operation."""
158 158 if skipflags:
159 159 state[b'skipread'].add(node)
160 160 else:
161 161 # Side-effect: read content and verify hash.
162 162 rl.revision(node)
163 163
164 164
165 165 # True if a fast implementation for persistent-nodemap is available
166 166 #
167 167 # We also consider we have a "fast" implementation in "pure" python because
168 168 # people using pure don't really have performance consideration (and a
169 169 # wheelbarrow of other slowness source)
170 170 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
171 171 parsers, 'BaseIndexObject'
172 172 )
173 173
174 174
175 175 @attr.s(slots=True, frozen=True)
176 176 class _revisioninfo(object):
177 177 """Information about a revision that allows building its fulltext
178 178 node: expected hash of the revision
179 179 p1, p2: parent revs of the revision
180 180 btext: built text cache consisting of a one-element list
181 181 cachedelta: (baserev, uncompressed_delta) or None
182 182 flags: flags associated to the revision storage
183 183
184 184 One of btext[0] or cachedelta must be set.
185 185 """
186 186
187 187 node = attr.ib()
188 188 p1 = attr.ib()
189 189 p2 = attr.ib()
190 190 btext = attr.ib()
191 191 textlen = attr.ib()
192 192 cachedelta = attr.ib()
193 193 flags = attr.ib()
194 194
195 195
196 196 @interfaceutil.implementer(repository.irevisiondelta)
197 197 @attr.s(slots=True)
198 198 class revlogrevisiondelta(object):
199 199 node = attr.ib()
200 200 p1node = attr.ib()
201 201 p2node = attr.ib()
202 202 basenode = attr.ib()
203 203 flags = attr.ib()
204 204 baserevisionsize = attr.ib()
205 205 revision = attr.ib()
206 206 delta = attr.ib()
207 207 sidedata = attr.ib()
208 208 linknode = attr.ib(default=None)
209 209
210 210
211 211 @interfaceutil.implementer(repository.iverifyproblem)
212 212 @attr.s(frozen=True)
213 213 class revlogproblem(object):
214 214 warning = attr.ib(default=None)
215 215 error = attr.ib(default=None)
216 216 node = attr.ib(default=None)
217 217
218 218
219 219 class revlogoldindex(list):
220 220 entry_size = INDEX_ENTRY_V0.size
221 221
222 222 @property
223 223 def nodemap(self):
224 224 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
225 225 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
226 226 return self._nodemap
227 227
228 228 @util.propertycache
229 229 def _nodemap(self):
230 230 nodemap = nodemaputil.NodeMap({sha1nodeconstants.nullid: nullrev})
231 231 for r in range(0, len(self)):
232 232 n = self[r][7]
233 233 nodemap[n] = r
234 234 return nodemap
235 235
236 236 def has_node(self, node):
237 237 """return True if the node exist in the index"""
238 238 return node in self._nodemap
239 239
240 240 def rev(self, node):
241 241 """return a revision for a node
242 242
243 243 If the node is unknown, raise a RevlogError"""
244 244 return self._nodemap[node]
245 245
246 246 def get_rev(self, node):
247 247 """return a revision for a node
248 248
249 249 If the node is unknown, return None"""
250 250 return self._nodemap.get(node)
251 251
252 252 def append(self, tup):
253 253 self._nodemap[tup[7]] = len(self)
254 254 super(revlogoldindex, self).append(tup)
255 255
256 256 def __delitem__(self, i):
257 257 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
258 258 raise ValueError(b"deleting slices only supports a:-1 with step 1")
259 259 for r in pycompat.xrange(i.start, len(self)):
260 260 del self._nodemap[self[r][7]]
261 261 super(revlogoldindex, self).__delitem__(i)
262 262
263 263 def clearcaches(self):
264 264 self.__dict__.pop('_nodemap', None)
265 265
266 266 def __getitem__(self, i):
267 267 if i == -1:
268 268 return (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid)
269 269 return list.__getitem__(self, i)
270 270
271 def entry_binary(self, rev, header):
272 """return the raw binary string representing a revision"""
273 entry = self[rev]
274 if gettype(entry[0]):
275 raise error.RevlogError(
276 _(b'index entry flags need revlog version 1')
277 )
278 e2 = (
279 getoffset(entry[0]),
280 entry[1],
281 entry[3],
282 entry[4],
283 self[entry[5]][7],
284 self[entry[6]][7],
285 entry[7],
286 )
287 return INDEX_ENTRY_V0.pack(*e2)
288
271 289
272 290 class revlogoldio(object):
273 291 def parseindex(self, data, inline):
274 292 s = INDEX_ENTRY_V0.size
275 293 index = []
276 294 nodemap = nodemaputil.NodeMap({sha1nodeconstants.nullid: nullrev})
277 295 n = off = 0
278 296 l = len(data)
279 297 while off + s <= l:
280 298 cur = data[off : off + s]
281 299 off += s
282 300 e = INDEX_ENTRY_V0.unpack(cur)
283 301 # transform to revlogv1 format
284 302 e2 = (
285 303 offset_type(e[0], 0),
286 304 e[1],
287 305 -1,
288 306 e[2],
289 307 e[3],
290 308 nodemap.get(e[4], nullrev),
291 309 nodemap.get(e[5], nullrev),
292 310 e[6],
293 311 )
294 312 index.append(e2)
295 313 nodemap[e[6]] = n
296 314 n += 1
297 315
298 316 index = revlogoldindex(index)
299 317 return index, None
300 318
301 def packentry(self, entry, node, version, rev):
302 """return the binary representation of an entry
303
304 entry: a tuple containing all the values (see index.__getitem__)
305 node: a callback to convert a revision to nodeid
306 version: the changelog version
307 rev: the revision number
308 """
309 if gettype(entry[0]):
310 raise error.RevlogError(
311 _(b'index entry flags need revlog version 1')
312 )
313 e2 = (
314 getoffset(entry[0]),
315 entry[1],
316 entry[3],
317 entry[4],
318 node(entry[5]),
319 node(entry[6]),
320 entry[7],
321 )
322 return INDEX_ENTRY_V0.pack(*e2)
323
324 319
325 320 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
326 321 # signed integer)
327 322 _maxentrysize = 0x7FFFFFFF
328 323
329 324
330 325 class revlogio(object):
331 326 def parseindex(self, data, inline):
332 327 # call the C implementation to parse the index data
333 328 index, cache = parsers.parse_index2(data, inline)
334 329 return index, cache
335 330
336 def packentry(self, entry, node, version, rev):
337 p = INDEX_ENTRY_V1.pack(*entry)
338 if rev == 0:
339 p = INDEX_HEADER.pack(version) + p[4:]
340 return p
341
342 331
343 332 class revlogv2io(object):
344 333 def parseindex(self, data, inline):
345 334 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
346 335 return index, cache
347 336
348 def packentry(self, entry, node, version, rev):
349 p = INDEX_ENTRY_V2.pack(*entry)
350 if rev == 0:
351 p = INDEX_HEADER.pack(version) + p[4:]
352 return p
353
354 337
355 338 NodemapRevlogIO = None
356 339
357 340 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
358 341
359 342 class NodemapRevlogIO(revlogio):
360 343 """A debug oriented IO class that return a PersistentNodeMapIndexObject
361 344
362 345 The PersistentNodeMapIndexObject object is meant to test the persistent nodemap feature.
363 346 """
364 347
365 348 def parseindex(self, data, inline):
366 349 index, cache = parsers.parse_index_devel_nodemap(data, inline)
367 350 return index, cache
368 351
369 352
370 353 class rustrevlogio(revlogio):
371 354 def parseindex(self, data, inline):
372 355 index, cache = super(rustrevlogio, self).parseindex(data, inline)
373 356 return rustrevlog.MixedIndex(index), cache
374 357
375 358
376 359 class revlog(object):
377 360 """
378 361 the underlying revision storage object
379 362
380 363 A revlog consists of two parts, an index and the revision data.
381 364
382 365 The index is a file with a fixed record size containing
383 366 information on each revision, including its nodeid (hash), the
384 367 nodeids of its parents, the position and offset of its data within
385 368 the data file, and the revision it's based on. Finally, each entry
386 369 contains a linkrev entry that can serve as a pointer to external
387 370 data.
388 371
389 372 The revision data itself is a linear collection of data chunks.
390 373 Each chunk represents a revision and is usually represented as a
391 374 delta against the previous chunk. To bound lookup time, runs of
392 375 deltas are limited to about 2 times the length of the original
393 376 version data. This makes retrieval of a version proportional to
394 377 its size, or O(1) relative to the number of revisions.
395 378
396 379 Both pieces of the revlog are written to in an append-only
397 380 fashion, which means we never need to rewrite a file to insert or
398 381 remove data, and can use some simple techniques to avoid the need
399 382 for locking while reading.
400 383
401 384 If checkambig, indexfile is opened with checkambig=True at
402 385 writing, to avoid file stat ambiguity.
403 386
404 387 If mmaplargeindex is True, and an mmapindexthreshold is set, the
405 388 index will be mmapped rather than read if it is larger than the
406 389 configured threshold.
407 390
408 391 If censorable is True, the revlog can have censored revisions.
409 392
410 393 If `upperboundcomp` is not None, this is the expected maximal gain from
411 394 compression for the data content.
412 395
413 396 `concurrencychecker` is an optional function that receives 3 arguments: a
414 397 file handle, a filename, and an expected position. It should check whether
415 398 the current position in the file handle is valid, and log/warn/fail (by
416 399 raising).
417 400 """
418 401
419 402 _flagserrorclass = error.RevlogError
420 403
421 404 def __init__(
422 405 self,
423 406 opener,
424 407 indexfile,
425 408 datafile=None,
426 409 checkambig=False,
427 410 mmaplargeindex=False,
428 411 censorable=False,
429 412 upperboundcomp=None,
430 413 persistentnodemap=False,
431 414 concurrencychecker=None,
432 415 ):
433 416 """
434 417 create a revlog object
435 418
436 419 opener is a function that abstracts the file opening operation
437 420 and can be used to implement COW semantics or the like.
438 421
439 422 """
440 423 self.upperboundcomp = upperboundcomp
441 424 self.indexfile = indexfile
442 425 self.datafile = datafile or (indexfile[:-2] + b".d")
443 426 self.nodemap_file = None
444 427 if persistentnodemap:
445 428 self.nodemap_file = nodemaputil.get_nodemap_file(
446 429 opener, self.indexfile
447 430 )
448 431
449 432 self.opener = opener
450 433 # When True, indexfile is opened with checkambig=True at writing, to
451 434 # avoid file stat ambiguity.
452 435 self._checkambig = checkambig
453 436 self._mmaplargeindex = mmaplargeindex
454 437 self._censorable = censorable
455 438 # 3-tuple of (node, rev, text) for a raw revision.
456 439 self._revisioncache = None
457 440 # Maps rev to chain base rev.
458 441 self._chainbasecache = util.lrucachedict(100)
459 442 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
460 443 self._chunkcache = (0, b'')
461 444 # How much data to read and cache into the raw revlog data cache.
462 445 self._chunkcachesize = 65536
463 446 self._maxchainlen = None
464 447 self._deltabothparents = True
465 448 self.index = None
466 449 self._nodemap_docket = None
467 450 # Mapping of partial identifiers to full nodes.
468 451 self._pcache = {}
469 452 # Mapping of revision integer to full node.
470 453 self._compengine = b'zlib'
471 454 self._compengineopts = {}
472 455 self._maxdeltachainspan = -1
473 456 self._withsparseread = False
474 457 self._sparserevlog = False
475 458 self._srdensitythreshold = 0.50
476 459 self._srmingapsize = 262144
477 460
478 461 # Make copy of flag processors so each revlog instance can support
479 462 # custom flags.
480 463 self._flagprocessors = dict(flagutil.flagprocessors)
481 464
482 465 # 2-tuple of file handles being used for active writing.
483 466 self._writinghandles = None
484 467
485 468 self._loadindex()
486 469
487 470 self._concurrencychecker = concurrencychecker
488 471
489 472 def _loadindex(self):
490 473 mmapindexthreshold = None
491 474 opts = self.opener.options
492 475
493 476 if b'revlogv2' in opts:
494 477 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
495 478 elif b'revlogv1' in opts:
496 479 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
497 480 if b'generaldelta' in opts:
498 481 newversionflags |= FLAG_GENERALDELTA
499 482 elif b'revlogv0' in self.opener.options:
500 483 newversionflags = REVLOGV0
501 484 else:
502 485 newversionflags = REVLOG_DEFAULT_VERSION
503 486
504 487 if b'chunkcachesize' in opts:
505 488 self._chunkcachesize = opts[b'chunkcachesize']
506 489 if b'maxchainlen' in opts:
507 490 self._maxchainlen = opts[b'maxchainlen']
508 491 if b'deltabothparents' in opts:
509 492 self._deltabothparents = opts[b'deltabothparents']
510 493 self._lazydelta = bool(opts.get(b'lazydelta', True))
511 494 self._lazydeltabase = False
512 495 if self._lazydelta:
513 496 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
514 497 if b'compengine' in opts:
515 498 self._compengine = opts[b'compengine']
516 499 if b'zlib.level' in opts:
517 500 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
518 501 if b'zstd.level' in opts:
519 502 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
520 503 if b'maxdeltachainspan' in opts:
521 504 self._maxdeltachainspan = opts[b'maxdeltachainspan']
522 505 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
523 506 mmapindexthreshold = opts[b'mmapindexthreshold']
524 507 self.hassidedata = bool(opts.get(b'side-data', False))
525 508 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
526 509 withsparseread = bool(opts.get(b'with-sparse-read', False))
527 510 # sparse-revlog forces sparse-read
528 511 self._withsparseread = self._sparserevlog or withsparseread
529 512 if b'sparse-read-density-threshold' in opts:
530 513 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
531 514 if b'sparse-read-min-gap-size' in opts:
532 515 self._srmingapsize = opts[b'sparse-read-min-gap-size']
533 516 if opts.get(b'enableellipsis'):
534 517 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
535 518
536 519 # revlog v0 doesn't have flag processors
537 520 for flag, processor in pycompat.iteritems(
538 521 opts.get(b'flagprocessors', {})
539 522 ):
540 523 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
541 524
542 525 if self._chunkcachesize <= 0:
543 526 raise error.RevlogError(
544 527 _(b'revlog chunk cache size %r is not greater than 0')
545 528 % self._chunkcachesize
546 529 )
547 530 elif self._chunkcachesize & (self._chunkcachesize - 1):
548 531 raise error.RevlogError(
549 532 _(b'revlog chunk cache size %r is not a power of 2')
550 533 % self._chunkcachesize
551 534 )
552 535
553 536 indexdata = b''
554 537 self._initempty = True
555 538 try:
556 539 with self._indexfp() as f:
557 540 if (
558 541 mmapindexthreshold is not None
559 542 and self.opener.fstat(f).st_size >= mmapindexthreshold
560 543 ):
561 544 # TODO: should .close() to release resources without
562 545 # relying on Python GC
563 546 indexdata = util.buffer(util.mmapread(f))
564 547 else:
565 548 indexdata = f.read()
566 549 if len(indexdata) > 0:
567 550 versionflags = INDEX_HEADER.unpack(indexdata[:4])[0]
568 551 self._initempty = False
569 552 else:
570 553 versionflags = newversionflags
571 554 except IOError as inst:
572 555 if inst.errno != errno.ENOENT:
573 556 raise
574 557
575 558 versionflags = newversionflags
576 559
577 560 self.version = versionflags
578 561
579 562 flags = versionflags & ~0xFFFF
580 563 fmt = versionflags & 0xFFFF
581 564
582 565 if fmt == REVLOGV0:
583 566 if flags:
584 567 raise error.RevlogError(
585 568 _(b'unknown flags (%#04x) in version %d revlog %s')
586 569 % (flags >> 16, fmt, self.indexfile)
587 570 )
588 571
589 572 self._inline = False
590 573 self._generaldelta = False
591 574
592 575 elif fmt == REVLOGV1:
593 576 if flags & ~REVLOGV1_FLAGS:
594 577 raise error.RevlogError(
595 578 _(b'unknown flags (%#04x) in version %d revlog %s')
596 579 % (flags >> 16, fmt, self.indexfile)
597 580 )
598 581
599 582 self._inline = versionflags & FLAG_INLINE_DATA
600 583 self._generaldelta = versionflags & FLAG_GENERALDELTA
601 584
602 585 elif fmt == REVLOGV2:
603 586 if flags & ~REVLOGV2_FLAGS:
604 587 raise error.RevlogError(
605 588 _(b'unknown flags (%#04x) in version %d revlog %s')
606 589 % (flags >> 16, fmt, self.indexfile)
607 590 )
608 591
609 592 # There is a bug in the transaction handling when going from an
610 593 # inline revlog to a separate index and data file. Turn it off until
611 594 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
612 595 # See issue6485
613 596 self._inline = False
614 597 # generaldelta implied by version 2 revlogs.
615 598 self._generaldelta = True
616 599
617 600 else:
618 601 raise error.RevlogError(
619 602 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
620 603 )
621 604
622 605 self.nodeconstants = sha1nodeconstants
623 606 self.nullid = self.nodeconstants.nullid
624 607
625 608 # sparse-revlog can't be on without general-delta (issue6056)
626 609 if not self._generaldelta:
627 610 self._sparserevlog = False
628 611
629 612 self._storedeltachains = True
630 613
631 614 devel_nodemap = (
632 615 self.nodemap_file
633 616 and opts.get(b'devel-force-nodemap', False)
634 617 and NodemapRevlogIO is not None
635 618 )
636 619
637 620 use_rust_index = False
638 621 if rustrevlog is not None:
639 622 if self.nodemap_file is not None:
640 623 use_rust_index = True
641 624 else:
642 625 use_rust_index = self.opener.options.get(b'rust.index')
643 626
644 627 self._io = revlogio()
645 628 if self.version == REVLOGV0:
646 629 self._io = revlogoldio()
647 630 elif fmt == REVLOGV2:
648 631 self._io = revlogv2io()
649 632 elif devel_nodemap:
650 633 self._io = NodemapRevlogIO()
651 634 elif use_rust_index:
652 635 self._io = rustrevlogio()
653 636 try:
654 637 d = self._io.parseindex(indexdata, self._inline)
655 638 index, _chunkcache = d
656 639 use_nodemap = (
657 640 not self._inline
658 641 and self.nodemap_file is not None
659 642 and util.safehasattr(index, 'update_nodemap_data')
660 643 )
661 644 if use_nodemap:
662 645 nodemap_data = nodemaputil.persisted_data(self)
663 646 if nodemap_data is not None:
664 647 docket = nodemap_data[0]
665 648 if (
666 649 len(d[0]) > docket.tip_rev
667 650 and d[0][docket.tip_rev][7] == docket.tip_node
668 651 ):
669 652 # no changelog tampering
670 653 self._nodemap_docket = docket
671 654 index.update_nodemap_data(*nodemap_data)
672 655 except (ValueError, IndexError):
673 656 raise error.RevlogError(
674 657 _(b"index %s is corrupted") % self.indexfile
675 658 )
676 659 self.index, self._chunkcache = d
677 660 if not self._chunkcache:
678 661 self._chunkclear()
679 662 # revnum -> (chain-length, sum-delta-length)
680 663 self._chaininfocache = util.lrucachedict(500)
681 664 # revlog header -> revlog compressor
682 665 self._decompressors = {}
683 666
684 667 @util.propertycache
685 668 def _compressor(self):
686 669 engine = util.compengines[self._compengine]
687 670 return engine.revlogcompressor(self._compengineopts)
688 671
689 672 def _indexfp(self, mode=b'r'):
690 673 """file object for the revlog's index file"""
691 674 args = {'mode': mode}
692 675 if mode != b'r':
693 676 args['checkambig'] = self._checkambig
694 677 if mode == b'w':
695 678 args['atomictemp'] = True
696 679 return self.opener(self.indexfile, **args)
697 680
698 681 def _datafp(self, mode=b'r'):
699 682 """file object for the revlog's data file"""
700 683 return self.opener(self.datafile, mode=mode)
701 684
702 685 @contextlib.contextmanager
703 686 def _datareadfp(self, existingfp=None):
704 687 """file object suitable to read data"""
705 688 # Use explicit file handle, if given.
706 689 if existingfp is not None:
707 690 yield existingfp
708 691
709 692 # Use a file handle being actively used for writes, if available.
710 693 # There is some danger to doing this because reads will seek the
711 694 # file. However, _writeentry() performs a SEEK_END before all writes,
712 695 # so we should be safe.
713 696 elif self._writinghandles:
714 697 if self._inline:
715 698 yield self._writinghandles[0]
716 699 else:
717 700 yield self._writinghandles[1]
718 701
719 702 # Otherwise open a new file handle.
720 703 else:
721 704 if self._inline:
722 705 func = self._indexfp
723 706 else:
724 707 func = self._datafp
725 708 with func() as fp:
726 709 yield fp
727 710
728 711 def tiprev(self):
729 712 return len(self.index) - 1
730 713
731 714 def tip(self):
732 715 return self.node(self.tiprev())
733 716
734 717 def __contains__(self, rev):
735 718 return 0 <= rev < len(self)
736 719
737 720 def __len__(self):
738 721 return len(self.index)
739 722
740 723 def __iter__(self):
741 724 return iter(pycompat.xrange(len(self)))
742 725
743 726 def revs(self, start=0, stop=None):
744 727 """iterate over all rev in this revlog (from start to stop)"""
745 728 return storageutil.iterrevs(len(self), start=start, stop=stop)
746 729
747 730 @property
748 731 def nodemap(self):
749 732 msg = (
750 733 b"revlog.nodemap is deprecated, "
751 734 b"use revlog.index.[has_node|rev|get_rev]"
752 735 )
753 736 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
754 737 return self.index.nodemap
755 738
756 739 @property
757 740 def _nodecache(self):
758 741 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
759 742 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
760 743 return self.index.nodemap
761 744
762 745 def hasnode(self, node):
763 746 try:
764 747 self.rev(node)
765 748 return True
766 749 except KeyError:
767 750 return False
768 751
769 752 def candelta(self, baserev, rev):
770 753 """whether two revisions (baserev, rev) can be delta-ed or not"""
771 754 # Disable delta if either rev requires a content-changing flag
772 755 # processor (ex. LFS). This is because such flag processor can alter
773 756 # the rawtext content that the delta will be based on, and two clients
774 757 # could have a same revlog node with different flags (i.e. different
775 758 # rawtext contents) and the delta could be incompatible.
776 759 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
777 760 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
778 761 ):
779 762 return False
780 763 return True
781 764
782 765 def update_caches(self, transaction):
783 766 if self.nodemap_file is not None:
784 767 if transaction is None:
785 768 nodemaputil.update_persistent_nodemap(self)
786 769 else:
787 770 nodemaputil.setup_persistent_nodemap(transaction, self)
788 771
789 772 def clearcaches(self):
790 773 self._revisioncache = None
791 774 self._chainbasecache.clear()
792 775 self._chunkcache = (0, b'')
793 776 self._pcache = {}
794 777 self._nodemap_docket = None
795 778 self.index.clearcaches()
796 779 # The python code is the one responsible for validating the docket, we
797 780 # end up having to refresh it here.
798 781 use_nodemap = (
799 782 not self._inline
800 783 and self.nodemap_file is not None
801 784 and util.safehasattr(self.index, 'update_nodemap_data')
802 785 )
803 786 if use_nodemap:
804 787 nodemap_data = nodemaputil.persisted_data(self)
805 788 if nodemap_data is not None:
806 789 self._nodemap_docket = nodemap_data[0]
807 790 self.index.update_nodemap_data(*nodemap_data)
808 791
809 792 def rev(self, node):
810 793 try:
811 794 return self.index.rev(node)
812 795 except TypeError:
813 796 raise
814 797 except error.RevlogError:
815 798 # parsers.c radix tree lookup failed
816 799 if (
817 800 node == self.nodeconstants.wdirid
818 801 or node in self.nodeconstants.wdirfilenodeids
819 802 ):
820 803 raise error.WdirUnsupported
821 804 raise error.LookupError(node, self.indexfile, _(b'no node'))
822 805
823 806 # Accessors for index entries.
824 807
825 808 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
826 809 # are flags.
827 810 def start(self, rev):
828 811 return int(self.index[rev][0] >> 16)
829 812
830 813 def flags(self, rev):
831 814 return self.index[rev][0] & 0xFFFF
832 815
833 816 def length(self, rev):
834 817 return self.index[rev][1]
835 818
836 819 def sidedata_length(self, rev):
837 820 if self.version & 0xFFFF != REVLOGV2:
838 821 return 0
839 822 return self.index[rev][9]
840 823
841 824 def rawsize(self, rev):
842 825 """return the length of the uncompressed text for a given revision"""
843 826 l = self.index[rev][2]
844 827 if l >= 0:
845 828 return l
846 829
847 830 t = self.rawdata(rev)
848 831 return len(t)
849 832
850 833 def size(self, rev):
851 834 """length of non-raw text (processed by a "read" flag processor)"""
852 835 # fast path: if no "read" flag processor could change the content,
853 836 # size is rawsize. note: ELLIPSIS is known to not change the content.
854 837 flags = self.flags(rev)
855 838 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
856 839 return self.rawsize(rev)
857 840
858 841 return len(self.revision(rev, raw=False))
859 842
860 843 def chainbase(self, rev):
861 844 base = self._chainbasecache.get(rev)
862 845 if base is not None:
863 846 return base
864 847
865 848 index = self.index
866 849 iterrev = rev
867 850 base = index[iterrev][3]
868 851 while base != iterrev:
869 852 iterrev = base
870 853 base = index[iterrev][3]
871 854
872 855 self._chainbasecache[rev] = base
873 856 return base
874 857
875 858 def linkrev(self, rev):
876 859 return self.index[rev][4]
877 860
878 861 def parentrevs(self, rev):
879 862 try:
880 863 entry = self.index[rev]
881 864 except IndexError:
882 865 if rev == wdirrev:
883 866 raise error.WdirUnsupported
884 867 raise
885 868 if entry[5] == nullrev:
886 869 return entry[6], entry[5]
887 870 else:
888 871 return entry[5], entry[6]
889 872
890 873 # fast parentrevs(rev) where rev isn't filtered
891 874 _uncheckedparentrevs = parentrevs
892 875
893 876 def node(self, rev):
894 877 try:
895 878 return self.index[rev][7]
896 879 except IndexError:
897 880 if rev == wdirrev:
898 881 raise error.WdirUnsupported
899 882 raise
900 883
901 884 # Derived from index values.
902 885
903 886 def end(self, rev):
904 887 return self.start(rev) + self.length(rev)
905 888
906 889 def parents(self, node):
907 890 i = self.index
908 891 d = i[self.rev(node)]
909 892 # inline node() to avoid function call overhead
910 893 if d[5] == self.nullid:
911 894 return i[d[6]][7], i[d[5]][7]
912 895 else:
913 896 return i[d[5]][7], i[d[6]][7]
914 897
915 898 def chainlen(self, rev):
916 899 return self._chaininfo(rev)[0]
917 900
918 901 def _chaininfo(self, rev):
919 902 chaininfocache = self._chaininfocache
920 903 if rev in chaininfocache:
921 904 return chaininfocache[rev]
922 905 index = self.index
923 906 generaldelta = self._generaldelta
924 907 iterrev = rev
925 908 e = index[iterrev]
926 909 clen = 0
927 910 compresseddeltalen = 0
928 911 while iterrev != e[3]:
929 912 clen += 1
930 913 compresseddeltalen += e[1]
931 914 if generaldelta:
932 915 iterrev = e[3]
933 916 else:
934 917 iterrev -= 1
935 918 if iterrev in chaininfocache:
936 919 t = chaininfocache[iterrev]
937 920 clen += t[0]
938 921 compresseddeltalen += t[1]
939 922 break
940 923 e = index[iterrev]
941 924 else:
942 925 # Add text length of base since decompressing that also takes
943 926 # work. For cache hits the length is already included.
944 927 compresseddeltalen += e[1]
945 928 r = (clen, compresseddeltalen)
946 929 chaininfocache[rev] = r
947 930 return r
948 931
949 932 def _deltachain(self, rev, stoprev=None):
950 933 """Obtain the delta chain for a revision.
951 934
952 935 ``stoprev`` specifies a revision to stop at. If not specified, we
953 936 stop at the base of the chain.
954 937
955 938 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
956 939 revs in ascending order and ``stopped`` is a bool indicating whether
957 940 ``stoprev`` was hit.
958 941 """
959 942 # Try C implementation.
960 943 try:
961 944 return self.index.deltachain(rev, stoprev, self._generaldelta)
962 945 except AttributeError:
963 946 pass
964 947
965 948 chain = []
966 949
967 950 # Alias to prevent attribute lookup in tight loop.
968 951 index = self.index
969 952 generaldelta = self._generaldelta
970 953
971 954 iterrev = rev
972 955 e = index[iterrev]
973 956 while iterrev != e[3] and iterrev != stoprev:
974 957 chain.append(iterrev)
975 958 if generaldelta:
976 959 iterrev = e[3]
977 960 else:
978 961 iterrev -= 1
979 962 e = index[iterrev]
980 963
981 964 if iterrev == stoprev:
982 965 stopped = True
983 966 else:
984 967 chain.append(iterrev)
985 968 stopped = False
986 969
987 970 chain.reverse()
988 971 return chain, stopped
989 972
990 973 def ancestors(self, revs, stoprev=0, inclusive=False):
991 974 """Generate the ancestors of 'revs' in reverse revision order.
992 975 Does not generate revs lower than stoprev.
993 976
994 977 See the documentation for ancestor.lazyancestors for more details."""
995 978
996 979 # first, make sure start revisions aren't filtered
997 980 revs = list(revs)
998 981 checkrev = self.node
999 982 for r in revs:
1000 983 checkrev(r)
1001 984 # and we're sure ancestors aren't filtered as well
1002 985
1003 986 if rustancestor is not None:
1004 987 lazyancestors = rustancestor.LazyAncestors
1005 988 arg = self.index
1006 989 else:
1007 990 lazyancestors = ancestor.lazyancestors
1008 991 arg = self._uncheckedparentrevs
1009 992 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1010 993
1011 994 def descendants(self, revs):
1012 995 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1013 996
1014 997 def findcommonmissing(self, common=None, heads=None):
1015 998 """Return a tuple of the ancestors of common and the ancestors of heads
1016 999 that are not ancestors of common. In revset terminology, we return the
1017 1000 tuple:
1018 1001
1019 1002 ::common, (::heads) - (::common)
1020 1003
1021 1004 The list is sorted by revision number, meaning it is
1022 1005 topologically sorted.
1023 1006
1024 1007 'heads' and 'common' are both lists of node IDs. If heads is
1025 1008 not supplied, uses all of the revlog's heads. If common is not
1026 1009 supplied, uses nullid."""
1027 1010 if common is None:
1028 1011 common = [self.nullid]
1029 1012 if heads is None:
1030 1013 heads = self.heads()
1031 1014
1032 1015 common = [self.rev(n) for n in common]
1033 1016 heads = [self.rev(n) for n in heads]
1034 1017
1035 1018 # we want the ancestors, but inclusive
1036 1019 class lazyset(object):
1037 1020 def __init__(self, lazyvalues):
1038 1021 self.addedvalues = set()
1039 1022 self.lazyvalues = lazyvalues
1040 1023
1041 1024 def __contains__(self, value):
1042 1025 return value in self.addedvalues or value in self.lazyvalues
1043 1026
1044 1027 def __iter__(self):
1045 1028 added = self.addedvalues
1046 1029 for r in added:
1047 1030 yield r
1048 1031 for r in self.lazyvalues:
1049 1032 if not r in added:
1050 1033 yield r
1051 1034
1052 1035 def add(self, value):
1053 1036 self.addedvalues.add(value)
1054 1037
1055 1038 def update(self, values):
1056 1039 self.addedvalues.update(values)
1057 1040
1058 1041 has = lazyset(self.ancestors(common))
1059 1042 has.add(nullrev)
1060 1043 has.update(common)
1061 1044
1062 1045 # take all ancestors from heads that aren't in has
1063 1046 missing = set()
1064 1047 visit = collections.deque(r for r in heads if r not in has)
1065 1048 while visit:
1066 1049 r = visit.popleft()
1067 1050 if r in missing:
1068 1051 continue
1069 1052 else:
1070 1053 missing.add(r)
1071 1054 for p in self.parentrevs(r):
1072 1055 if p not in has:
1073 1056 visit.append(p)
1074 1057 missing = list(missing)
1075 1058 missing.sort()
1076 1059 return has, [self.node(miss) for miss in missing]
1077 1060
1078 1061 def incrementalmissingrevs(self, common=None):
1079 1062 """Return an object that can be used to incrementally compute the
1080 1063 revision numbers of the ancestors of arbitrary sets that are not
1081 1064 ancestors of common. This is an ancestor.incrementalmissingancestors
1082 1065 object.
1083 1066
1084 1067 'common' is a list of revision numbers. If common is not supplied, uses
1085 1068 nullrev.
1086 1069 """
1087 1070 if common is None:
1088 1071 common = [nullrev]
1089 1072
1090 1073 if rustancestor is not None:
1091 1074 return rustancestor.MissingAncestors(self.index, common)
1092 1075 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1093 1076
1094 1077 def findmissingrevs(self, common=None, heads=None):
1095 1078 """Return the revision numbers of the ancestors of heads that
1096 1079 are not ancestors of common.
1097 1080
1098 1081 More specifically, return a list of revision numbers corresponding to
1099 1082 nodes N such that every N satisfies the following constraints:
1100 1083
1101 1084 1. N is an ancestor of some node in 'heads'
1102 1085 2. N is not an ancestor of any node in 'common'
1103 1086
1104 1087 The list is sorted by revision number, meaning it is
1105 1088 topologically sorted.
1106 1089
1107 1090 'heads' and 'common' are both lists of revision numbers. If heads is
1108 1091 not supplied, uses all of the revlog's heads. If common is not
1109 1092 supplied, uses nullid."""
1110 1093 if common is None:
1111 1094 common = [nullrev]
1112 1095 if heads is None:
1113 1096 heads = self.headrevs()
1114 1097
1115 1098 inc = self.incrementalmissingrevs(common=common)
1116 1099 return inc.missingancestors(heads)
1117 1100
1118 1101 def findmissing(self, common=None, heads=None):
1119 1102 """Return the ancestors of heads that are not ancestors of common.
1120 1103
1121 1104 More specifically, return a list of nodes N such that every N
1122 1105 satisfies the following constraints:
1123 1106
1124 1107 1. N is an ancestor of some node in 'heads'
1125 1108 2. N is not an ancestor of any node in 'common'
1126 1109
1127 1110 The list is sorted by revision number, meaning it is
1128 1111 topologically sorted.
1129 1112
1130 1113 'heads' and 'common' are both lists of node IDs. If heads is
1131 1114 not supplied, uses all of the revlog's heads. If common is not
1132 1115 supplied, uses nullid."""
1133 1116 if common is None:
1134 1117 common = [self.nullid]
1135 1118 if heads is None:
1136 1119 heads = self.heads()
1137 1120
1138 1121 common = [self.rev(n) for n in common]
1139 1122 heads = [self.rev(n) for n in heads]
1140 1123
1141 1124 inc = self.incrementalmissingrevs(common=common)
1142 1125 return [self.node(r) for r in inc.missingancestors(heads)]
1143 1126
1144 1127 def nodesbetween(self, roots=None, heads=None):
1145 1128 """Return a topological path from 'roots' to 'heads'.
1146 1129
1147 1130 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1148 1131 topologically sorted list of all nodes N that satisfy both of
1149 1132 these constraints:
1150 1133
1151 1134 1. N is a descendant of some node in 'roots'
1152 1135 2. N is an ancestor of some node in 'heads'
1153 1136
1154 1137 Every node is considered to be both a descendant and an ancestor
1155 1138 of itself, so every reachable node in 'roots' and 'heads' will be
1156 1139 included in 'nodes'.
1157 1140
1158 1141 'outroots' is the list of reachable nodes in 'roots', i.e., the
1159 1142 subset of 'roots' that is returned in 'nodes'. Likewise,
1160 1143 'outheads' is the subset of 'heads' that is also in 'nodes'.
1161 1144
1162 1145 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1163 1146 unspecified, uses nullid as the only root. If 'heads' is
1164 1147 unspecified, uses list of all of the revlog's heads."""
1165 1148 nonodes = ([], [], [])
1166 1149 if roots is not None:
1167 1150 roots = list(roots)
1168 1151 if not roots:
1169 1152 return nonodes
1170 1153 lowestrev = min([self.rev(n) for n in roots])
1171 1154 else:
1172 1155 roots = [self.nullid] # Everybody's a descendant of nullid
1173 1156 lowestrev = nullrev
1174 1157 if (lowestrev == nullrev) and (heads is None):
1175 1158 # We want _all_ the nodes!
1176 1159 return (
1177 1160 [self.node(r) for r in self],
1178 1161 [self.nullid],
1179 1162 list(self.heads()),
1180 1163 )
1181 1164 if heads is None:
1182 1165 # All nodes are ancestors, so the latest ancestor is the last
1183 1166 # node.
1184 1167 highestrev = len(self) - 1
1185 1168 # Set ancestors to None to signal that every node is an ancestor.
1186 1169 ancestors = None
1187 1170 # Set heads to an empty dictionary for later discovery of heads
1188 1171 heads = {}
1189 1172 else:
1190 1173 heads = list(heads)
1191 1174 if not heads:
1192 1175 return nonodes
1193 1176 ancestors = set()
1194 1177 # Turn heads into a dictionary so we can remove 'fake' heads.
1195 1178 # Also, later we will be using it to filter out the heads we can't
1196 1179 # find from roots.
1197 1180 heads = dict.fromkeys(heads, False)
1198 1181 # Start at the top and keep marking parents until we're done.
1199 1182 nodestotag = set(heads)
1200 1183 # Remember where the top was so we can use it as a limit later.
1201 1184 highestrev = max([self.rev(n) for n in nodestotag])
1202 1185 while nodestotag:
1203 1186 # grab a node to tag
1204 1187 n = nodestotag.pop()
1205 1188 # Never tag nullid
1206 1189 if n == self.nullid:
1207 1190 continue
1208 1191 # A node's revision number represents its place in a
1209 1192 # topologically sorted list of nodes.
1210 1193 r = self.rev(n)
1211 1194 if r >= lowestrev:
1212 1195 if n not in ancestors:
1213 1196 # If we are possibly a descendant of one of the roots
1214 1197 # and we haven't already been marked as an ancestor
1215 1198 ancestors.add(n) # Mark as ancestor
1216 1199 # Add non-nullid parents to list of nodes to tag.
1217 1200 nodestotag.update(
1218 1201 [p for p in self.parents(n) if p != self.nullid]
1219 1202 )
1220 1203 elif n in heads: # We've seen it before, is it a fake head?
1221 1204 # So it is, real heads should not be the ancestors of
1222 1205 # any other heads.
1223 1206 heads.pop(n)
1224 1207 if not ancestors:
1225 1208 return nonodes
1226 1209 # Now that we have our set of ancestors, we want to remove any
1227 1210 # roots that are not ancestors.
1228 1211
1229 1212 # If one of the roots was nullid, everything is included anyway.
1230 1213 if lowestrev > nullrev:
1231 1214 # But, since we weren't, let's recompute the lowest rev to not
1232 1215 # include roots that aren't ancestors.
1233 1216
1234 1217 # Filter out roots that aren't ancestors of heads
1235 1218 roots = [root for root in roots if root in ancestors]
1236 1219 # Recompute the lowest revision
1237 1220 if roots:
1238 1221 lowestrev = min([self.rev(root) for root in roots])
1239 1222 else:
1240 1223 # No more roots? Return empty list
1241 1224 return nonodes
1242 1225 else:
1243 1226 # We are descending from nullid, and don't need to care about
1244 1227 # any other roots.
1245 1228 lowestrev = nullrev
1246 1229 roots = [self.nullid]
1247 1230 # Transform our roots list into a set.
1248 1231 descendants = set(roots)
1249 1232 # Also, keep the original roots so we can filter out roots that aren't
1250 1233 # 'real' roots (i.e. are descended from other roots).
1251 1234 roots = descendants.copy()
1252 1235 # Our topologically sorted list of output nodes.
1253 1236 orderedout = []
1254 1237 # Don't start at nullid since we don't want nullid in our output list,
1255 1238 # and if nullid shows up in descendants, empty parents will look like
1256 1239 # they're descendants.
1257 1240 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1258 1241 n = self.node(r)
1259 1242 isdescendant = False
1260 1243 if lowestrev == nullrev: # Everybody is a descendant of nullid
1261 1244 isdescendant = True
1262 1245 elif n in descendants:
1263 1246 # n is already a descendant
1264 1247 isdescendant = True
1265 1248 # This check only needs to be done here because all the roots
1266 1249 # will start being marked is descendants before the loop.
1267 1250 if n in roots:
1268 1251 # If n was a root, check if it's a 'real' root.
1269 1252 p = tuple(self.parents(n))
1270 1253 # If any of its parents are descendants, it's not a root.
1271 1254 if (p[0] in descendants) or (p[1] in descendants):
1272 1255 roots.remove(n)
1273 1256 else:
1274 1257 p = tuple(self.parents(n))
1275 1258 # A node is a descendant if either of its parents are
1276 1259 # descendants. (We seeded the dependents list with the roots
1277 1260 # up there, remember?)
1278 1261 if (p[0] in descendants) or (p[1] in descendants):
1279 1262 descendants.add(n)
1280 1263 isdescendant = True
1281 1264 if isdescendant and ((ancestors is None) or (n in ancestors)):
1282 1265 # Only include nodes that are both descendants and ancestors.
1283 1266 orderedout.append(n)
1284 1267 if (ancestors is not None) and (n in heads):
1285 1268 # We're trying to figure out which heads are reachable
1286 1269 # from roots.
1287 1270 # Mark this head as having been reached
1288 1271 heads[n] = True
1289 1272 elif ancestors is None:
1290 1273 # Otherwise, we're trying to discover the heads.
1291 1274 # Assume this is a head because if it isn't, the next step
1292 1275 # will eventually remove it.
1293 1276 heads[n] = True
1294 1277 # But, obviously its parents aren't.
1295 1278 for p in self.parents(n):
1296 1279 heads.pop(p, None)
1297 1280 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1298 1281 roots = list(roots)
1299 1282 assert orderedout
1300 1283 assert roots
1301 1284 assert heads
1302 1285 return (orderedout, roots, heads)
1303 1286
1304 1287 def headrevs(self, revs=None):
1305 1288 if revs is None:
1306 1289 try:
1307 1290 return self.index.headrevs()
1308 1291 except AttributeError:
1309 1292 return self._headrevs()
1310 1293 if rustdagop is not None:
1311 1294 return rustdagop.headrevs(self.index, revs)
1312 1295 return dagop.headrevs(revs, self._uncheckedparentrevs)
1313 1296
1314 1297 def computephases(self, roots):
1315 1298 return self.index.computephasesmapsets(roots)
1316 1299
1317 1300 def _headrevs(self):
1318 1301 count = len(self)
1319 1302 if not count:
1320 1303 return [nullrev]
1321 1304 # we won't iter over filtered rev so nobody is a head at start
1322 1305 ishead = [0] * (count + 1)
1323 1306 index = self.index
1324 1307 for r in self:
1325 1308 ishead[r] = 1 # I may be an head
1326 1309 e = index[r]
1327 1310 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1328 1311 return [r for r, val in enumerate(ishead) if val]
1329 1312
1330 1313 def heads(self, start=None, stop=None):
1331 1314 """return the list of all nodes that have no children
1332 1315
1333 1316 if start is specified, only heads that are descendants of
1334 1317 start will be returned
1335 1318 if stop is specified, it will consider all the revs from stop
1336 1319 as if they had no children
1337 1320 """
1338 1321 if start is None and stop is None:
1339 1322 if not len(self):
1340 1323 return [self.nullid]
1341 1324 return [self.node(r) for r in self.headrevs()]
1342 1325
1343 1326 if start is None:
1344 1327 start = nullrev
1345 1328 else:
1346 1329 start = self.rev(start)
1347 1330
1348 1331 stoprevs = {self.rev(n) for n in stop or []}
1349 1332
1350 1333 revs = dagop.headrevssubset(
1351 1334 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1352 1335 )
1353 1336
1354 1337 return [self.node(rev) for rev in revs]
1355 1338
1356 1339 def children(self, node):
1357 1340 """find the children of a given node"""
1358 1341 c = []
1359 1342 p = self.rev(node)
1360 1343 for r in self.revs(start=p + 1):
1361 1344 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1362 1345 if prevs:
1363 1346 for pr in prevs:
1364 1347 if pr == p:
1365 1348 c.append(self.node(r))
1366 1349 elif p == nullrev:
1367 1350 c.append(self.node(r))
1368 1351 return c
1369 1352
1370 1353 def commonancestorsheads(self, a, b):
1371 1354 """calculate all the heads of the common ancestors of nodes a and b"""
1372 1355 a, b = self.rev(a), self.rev(b)
1373 1356 ancs = self._commonancestorsheads(a, b)
1374 1357 return pycompat.maplist(self.node, ancs)
1375 1358
1376 1359 def _commonancestorsheads(self, *revs):
1377 1360 """calculate all the heads of the common ancestors of revs"""
1378 1361 try:
1379 1362 ancs = self.index.commonancestorsheads(*revs)
1380 1363 except (AttributeError, OverflowError): # C implementation failed
1381 1364 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1382 1365 return ancs
1383 1366
1384 1367 def isancestor(self, a, b):
1385 1368 """return True if node a is an ancestor of node b
1386 1369
1387 1370 A revision is considered an ancestor of itself."""
1388 1371 a, b = self.rev(a), self.rev(b)
1389 1372 return self.isancestorrev(a, b)
1390 1373
1391 1374 def isancestorrev(self, a, b):
1392 1375 """return True if revision a is an ancestor of revision b
1393 1376
1394 1377 A revision is considered an ancestor of itself.
1395 1378
1396 1379 The implementation of this is trivial but the use of
1397 1380 reachableroots is not."""
1398 1381 if a == nullrev:
1399 1382 return True
1400 1383 elif a == b:
1401 1384 return True
1402 1385 elif a > b:
1403 1386 return False
1404 1387 return bool(self.reachableroots(a, [b], [a], includepath=False))
1405 1388
1406 1389 def reachableroots(self, minroot, heads, roots, includepath=False):
1407 1390 """return (heads(::(<roots> and <roots>::<heads>)))
1408 1391
1409 1392 If includepath is True, return (<roots>::<heads>)."""
1410 1393 try:
1411 1394 return self.index.reachableroots2(
1412 1395 minroot, heads, roots, includepath
1413 1396 )
1414 1397 except AttributeError:
1415 1398 return dagop._reachablerootspure(
1416 1399 self.parentrevs, minroot, roots, heads, includepath
1417 1400 )
1418 1401
1419 1402 def ancestor(self, a, b):
1420 1403 """calculate the "best" common ancestor of nodes a and b"""
1421 1404
1422 1405 a, b = self.rev(a), self.rev(b)
1423 1406 try:
1424 1407 ancs = self.index.ancestors(a, b)
1425 1408 except (AttributeError, OverflowError):
1426 1409 ancs = ancestor.ancestors(self.parentrevs, a, b)
1427 1410 if ancs:
1428 1411 # choose a consistent winner when there's a tie
1429 1412 return min(map(self.node, ancs))
1430 1413 return self.nullid
1431 1414
1432 1415 def _match(self, id):
1433 1416 if isinstance(id, int):
1434 1417 # rev
1435 1418 return self.node(id)
1436 1419 if len(id) == 20:
1437 1420 # possibly a binary node
1438 1421 # odds of a binary node being all hex in ASCII are 1 in 10**25
1439 1422 try:
1440 1423 node = id
1441 1424 self.rev(node) # quick search the index
1442 1425 return node
1443 1426 except error.LookupError:
1444 1427 pass # may be partial hex id
1445 1428 try:
1446 1429 # str(rev)
1447 1430 rev = int(id)
1448 1431 if b"%d" % rev != id:
1449 1432 raise ValueError
1450 1433 if rev < 0:
1451 1434 rev = len(self) + rev
1452 1435 if rev < 0 or rev >= len(self):
1453 1436 raise ValueError
1454 1437 return self.node(rev)
1455 1438 except (ValueError, OverflowError):
1456 1439 pass
1457 1440 if len(id) == 40:
1458 1441 try:
1459 1442 # a full hex nodeid?
1460 1443 node = bin(id)
1461 1444 self.rev(node)
1462 1445 return node
1463 1446 except (TypeError, error.LookupError):
1464 1447 pass
1465 1448
1466 1449 def _partialmatch(self, id):
1467 1450 # we don't care wdirfilenodeids as they should be always full hash
1468 1451 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1469 1452 try:
1470 1453 partial = self.index.partialmatch(id)
1471 1454 if partial and self.hasnode(partial):
1472 1455 if maybewdir:
1473 1456 # single 'ff...' match in radix tree, ambiguous with wdir
1474 1457 raise error.RevlogError
1475 1458 return partial
1476 1459 if maybewdir:
1477 1460 # no 'ff...' match in radix tree, wdir identified
1478 1461 raise error.WdirUnsupported
1479 1462 return None
1480 1463 except error.RevlogError:
1481 1464 # parsers.c radix tree lookup gave multiple matches
1482 1465 # fast path: for unfiltered changelog, radix tree is accurate
1483 1466 if not getattr(self, 'filteredrevs', None):
1484 1467 raise error.AmbiguousPrefixLookupError(
1485 1468 id, self.indexfile, _(b'ambiguous identifier')
1486 1469 )
1487 1470 # fall through to slow path that filters hidden revisions
1488 1471 except (AttributeError, ValueError):
1489 1472 # we are pure python, or key was too short to search radix tree
1490 1473 pass
1491 1474
1492 1475 if id in self._pcache:
1493 1476 return self._pcache[id]
1494 1477
1495 1478 if len(id) <= 40:
1496 1479 try:
1497 1480 # hex(node)[:...]
1498 1481 l = len(id) // 2 # grab an even number of digits
1499 1482 prefix = bin(id[: l * 2])
1500 1483 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1501 1484 nl = [
1502 1485 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1503 1486 ]
1504 1487 if self.nodeconstants.nullhex.startswith(id):
1505 1488 nl.append(self.nullid)
1506 1489 if len(nl) > 0:
1507 1490 if len(nl) == 1 and not maybewdir:
1508 1491 self._pcache[id] = nl[0]
1509 1492 return nl[0]
1510 1493 raise error.AmbiguousPrefixLookupError(
1511 1494 id, self.indexfile, _(b'ambiguous identifier')
1512 1495 )
1513 1496 if maybewdir:
1514 1497 raise error.WdirUnsupported
1515 1498 return None
1516 1499 except TypeError:
1517 1500 pass
1518 1501
1519 1502 def lookup(self, id):
1520 1503 """locate a node based on:
1521 1504 - revision number or str(revision number)
1522 1505 - nodeid or subset of hex nodeid
1523 1506 """
1524 1507 n = self._match(id)
1525 1508 if n is not None:
1526 1509 return n
1527 1510 n = self._partialmatch(id)
1528 1511 if n:
1529 1512 return n
1530 1513
1531 1514 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1532 1515
1533 1516 def shortest(self, node, minlength=1):
1534 1517 """Find the shortest unambiguous prefix that matches node."""
1535 1518
1536 1519 def isvalid(prefix):
1537 1520 try:
1538 1521 matchednode = self._partialmatch(prefix)
1539 1522 except error.AmbiguousPrefixLookupError:
1540 1523 return False
1541 1524 except error.WdirUnsupported:
1542 1525 # single 'ff...' match
1543 1526 return True
1544 1527 if matchednode is None:
1545 1528 raise error.LookupError(node, self.indexfile, _(b'no node'))
1546 1529 return True
1547 1530
1548 1531 def maybewdir(prefix):
1549 1532 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1550 1533
1551 1534 hexnode = hex(node)
1552 1535
1553 1536 def disambiguate(hexnode, minlength):
1554 1537 """Disambiguate against wdirid."""
1555 1538 for length in range(minlength, len(hexnode) + 1):
1556 1539 prefix = hexnode[:length]
1557 1540 if not maybewdir(prefix):
1558 1541 return prefix
1559 1542
1560 1543 if not getattr(self, 'filteredrevs', None):
1561 1544 try:
1562 1545 length = max(self.index.shortest(node), minlength)
1563 1546 return disambiguate(hexnode, length)
1564 1547 except error.RevlogError:
1565 1548 if node != self.nodeconstants.wdirid:
1566 1549 raise error.LookupError(node, self.indexfile, _(b'no node'))
1567 1550 except AttributeError:
1568 1551 # Fall through to pure code
1569 1552 pass
1570 1553
1571 1554 if node == self.nodeconstants.wdirid:
1572 1555 for length in range(minlength, len(hexnode) + 1):
1573 1556 prefix = hexnode[:length]
1574 1557 if isvalid(prefix):
1575 1558 return prefix
1576 1559
1577 1560 for length in range(minlength, len(hexnode) + 1):
1578 1561 prefix = hexnode[:length]
1579 1562 if isvalid(prefix):
1580 1563 return disambiguate(hexnode, length)
1581 1564
1582 1565 def cmp(self, node, text):
1583 1566 """compare text with a given file revision
1584 1567
1585 1568 returns True if text is different than what is stored.
1586 1569 """
1587 1570 p1, p2 = self.parents(node)
1588 1571 return storageutil.hashrevisionsha1(text, p1, p2) != node
1589 1572
1590 1573 def _cachesegment(self, offset, data):
1591 1574 """Add a segment to the revlog cache.
1592 1575
1593 1576 Accepts an absolute offset and the data that is at that location.
1594 1577 """
1595 1578 o, d = self._chunkcache
1596 1579 # try to add to existing cache
1597 1580 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1598 1581 self._chunkcache = o, d + data
1599 1582 else:
1600 1583 self._chunkcache = offset, data
1601 1584
1602 1585 def _readsegment(self, offset, length, df=None):
1603 1586 """Load a segment of raw data from the revlog.
1604 1587
1605 1588 Accepts an absolute offset, length to read, and an optional existing
1606 1589 file handle to read from.
1607 1590
1608 1591 If an existing file handle is passed, it will be seeked and the
1609 1592 original seek position will NOT be restored.
1610 1593
1611 1594 Returns a str or buffer of raw byte data.
1612 1595
1613 1596 Raises if the requested number of bytes could not be read.
1614 1597 """
1615 1598 # Cache data both forward and backward around the requested
1616 1599 # data, in a fixed size window. This helps speed up operations
1617 1600 # involving reading the revlog backwards.
1618 1601 cachesize = self._chunkcachesize
1619 1602 realoffset = offset & ~(cachesize - 1)
1620 1603 reallength = (
1621 1604 (offset + length + cachesize) & ~(cachesize - 1)
1622 1605 ) - realoffset
1623 1606 with self._datareadfp(df) as df:
1624 1607 df.seek(realoffset)
1625 1608 d = df.read(reallength)
1626 1609
1627 1610 self._cachesegment(realoffset, d)
1628 1611 if offset != realoffset or reallength != length:
1629 1612 startoffset = offset - realoffset
1630 1613 if len(d) - startoffset < length:
1631 1614 raise error.RevlogError(
1632 1615 _(
1633 1616 b'partial read of revlog %s; expected %d bytes from '
1634 1617 b'offset %d, got %d'
1635 1618 )
1636 1619 % (
1637 1620 self.indexfile if self._inline else self.datafile,
1638 1621 length,
1639 1622 realoffset,
1640 1623 len(d) - startoffset,
1641 1624 )
1642 1625 )
1643 1626
1644 1627 return util.buffer(d, startoffset, length)
1645 1628
1646 1629 if len(d) < length:
1647 1630 raise error.RevlogError(
1648 1631 _(
1649 1632 b'partial read of revlog %s; expected %d bytes from offset '
1650 1633 b'%d, got %d'
1651 1634 )
1652 1635 % (
1653 1636 self.indexfile if self._inline else self.datafile,
1654 1637 length,
1655 1638 offset,
1656 1639 len(d),
1657 1640 )
1658 1641 )
1659 1642
1660 1643 return d
1661 1644
1662 1645 def _getsegment(self, offset, length, df=None):
1663 1646 """Obtain a segment of raw data from the revlog.
1664 1647
1665 1648 Accepts an absolute offset, length of bytes to obtain, and an
1666 1649 optional file handle to the already-opened revlog. If the file
1667 1650 handle is used, it's original seek position will not be preserved.
1668 1651
1669 1652 Requests for data may be returned from a cache.
1670 1653
1671 1654 Returns a str or a buffer instance of raw byte data.
1672 1655 """
1673 1656 o, d = self._chunkcache
1674 1657 l = len(d)
1675 1658
1676 1659 # is it in the cache?
1677 1660 cachestart = offset - o
1678 1661 cacheend = cachestart + length
1679 1662 if cachestart >= 0 and cacheend <= l:
1680 1663 if cachestart == 0 and cacheend == l:
1681 1664 return d # avoid a copy
1682 1665 return util.buffer(d, cachestart, cacheend - cachestart)
1683 1666
1684 1667 return self._readsegment(offset, length, df=df)
1685 1668
1686 1669 def _getsegmentforrevs(self, startrev, endrev, df=None):
1687 1670 """Obtain a segment of raw data corresponding to a range of revisions.
1688 1671
1689 1672 Accepts the start and end revisions and an optional already-open
1690 1673 file handle to be used for reading. If the file handle is read, its
1691 1674 seek position will not be preserved.
1692 1675
1693 1676 Requests for data may be satisfied by a cache.
1694 1677
1695 1678 Returns a 2-tuple of (offset, data) for the requested range of
1696 1679 revisions. Offset is the integer offset from the beginning of the
1697 1680 revlog and data is a str or buffer of the raw byte data.
1698 1681
1699 1682 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1700 1683 to determine where each revision's data begins and ends.
1701 1684 """
1702 1685 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1703 1686 # (functions are expensive).
1704 1687 index = self.index
1705 1688 istart = index[startrev]
1706 1689 start = int(istart[0] >> 16)
1707 1690 if startrev == endrev:
1708 1691 end = start + istart[1]
1709 1692 else:
1710 1693 iend = index[endrev]
1711 1694 end = int(iend[0] >> 16) + iend[1]
1712 1695
1713 1696 if self._inline:
1714 1697 start += (startrev + 1) * self.index.entry_size
1715 1698 end += (endrev + 1) * self.index.entry_size
1716 1699 length = end - start
1717 1700
1718 1701 return start, self._getsegment(start, length, df=df)
1719 1702
1720 1703 def _chunk(self, rev, df=None):
1721 1704 """Obtain a single decompressed chunk for a revision.
1722 1705
1723 1706 Accepts an integer revision and an optional already-open file handle
1724 1707 to be used for reading. If used, the seek position of the file will not
1725 1708 be preserved.
1726 1709
1727 1710 Returns a str holding uncompressed data for the requested revision.
1728 1711 """
1729 1712 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1730 1713
1731 1714 def _chunks(self, revs, df=None, targetsize=None):
1732 1715 """Obtain decompressed chunks for the specified revisions.
1733 1716
1734 1717 Accepts an iterable of numeric revisions that are assumed to be in
1735 1718 ascending order. Also accepts an optional already-open file handle
1736 1719 to be used for reading. If used, the seek position of the file will
1737 1720 not be preserved.
1738 1721
1739 1722 This function is similar to calling ``self._chunk()`` multiple times,
1740 1723 but is faster.
1741 1724
1742 1725 Returns a list with decompressed data for each requested revision.
1743 1726 """
1744 1727 if not revs:
1745 1728 return []
1746 1729 start = self.start
1747 1730 length = self.length
1748 1731 inline = self._inline
1749 1732 iosize = self.index.entry_size
1750 1733 buffer = util.buffer
1751 1734
1752 1735 l = []
1753 1736 ladd = l.append
1754 1737
1755 1738 if not self._withsparseread:
1756 1739 slicedchunks = (revs,)
1757 1740 else:
1758 1741 slicedchunks = deltautil.slicechunk(
1759 1742 self, revs, targetsize=targetsize
1760 1743 )
1761 1744
1762 1745 for revschunk in slicedchunks:
1763 1746 firstrev = revschunk[0]
1764 1747 # Skip trailing revisions with empty diff
1765 1748 for lastrev in revschunk[::-1]:
1766 1749 if length(lastrev) != 0:
1767 1750 break
1768 1751
1769 1752 try:
1770 1753 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1771 1754 except OverflowError:
1772 1755 # issue4215 - we can't cache a run of chunks greater than
1773 1756 # 2G on Windows
1774 1757 return [self._chunk(rev, df=df) for rev in revschunk]
1775 1758
1776 1759 decomp = self.decompress
1777 1760 for rev in revschunk:
1778 1761 chunkstart = start(rev)
1779 1762 if inline:
1780 1763 chunkstart += (rev + 1) * iosize
1781 1764 chunklength = length(rev)
1782 1765 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1783 1766
1784 1767 return l
1785 1768
1786 1769 def _chunkclear(self):
1787 1770 """Clear the raw chunk cache."""
1788 1771 self._chunkcache = (0, b'')
1789 1772
1790 1773 def deltaparent(self, rev):
1791 1774 """return deltaparent of the given revision"""
1792 1775 base = self.index[rev][3]
1793 1776 if base == rev:
1794 1777 return nullrev
1795 1778 elif self._generaldelta:
1796 1779 return base
1797 1780 else:
1798 1781 return rev - 1
1799 1782
1800 1783 def issnapshot(self, rev):
1801 1784 """tells whether rev is a snapshot"""
1802 1785 if not self._sparserevlog:
1803 1786 return self.deltaparent(rev) == nullrev
1804 1787 elif util.safehasattr(self.index, b'issnapshot'):
1805 1788 # directly assign the method to cache the testing and access
1806 1789 self.issnapshot = self.index.issnapshot
1807 1790 return self.issnapshot(rev)
1808 1791 if rev == nullrev:
1809 1792 return True
1810 1793 entry = self.index[rev]
1811 1794 base = entry[3]
1812 1795 if base == rev:
1813 1796 return True
1814 1797 if base == nullrev:
1815 1798 return True
1816 1799 p1 = entry[5]
1817 1800 p2 = entry[6]
1818 1801 if base == p1 or base == p2:
1819 1802 return False
1820 1803 return self.issnapshot(base)
1821 1804
1822 1805 def snapshotdepth(self, rev):
1823 1806 """number of snapshot in the chain before this one"""
1824 1807 if not self.issnapshot(rev):
1825 1808 raise error.ProgrammingError(b'revision %d not a snapshot')
1826 1809 return len(self._deltachain(rev)[0]) - 1
1827 1810
1828 1811 def revdiff(self, rev1, rev2):
1829 1812 """return or calculate a delta between two revisions
1830 1813
1831 1814 The delta calculated is in binary form and is intended to be written to
1832 1815 revlog data directly. So this function needs raw revision data.
1833 1816 """
1834 1817 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1835 1818 return bytes(self._chunk(rev2))
1836 1819
1837 1820 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1838 1821
1839 1822 def _processflags(self, text, flags, operation, raw=False):
1840 1823 """deprecated entry point to access flag processors"""
1841 1824 msg = b'_processflag(...) use the specialized variant'
1842 1825 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1843 1826 if raw:
1844 1827 return text, flagutil.processflagsraw(self, text, flags)
1845 1828 elif operation == b'read':
1846 1829 return flagutil.processflagsread(self, text, flags)
1847 1830 else: # write operation
1848 1831 return flagutil.processflagswrite(self, text, flags)
1849 1832
1850 1833 def revision(self, nodeorrev, _df=None, raw=False):
1851 1834 """return an uncompressed revision of a given node or revision
1852 1835 number.
1853 1836
1854 1837 _df - an existing file handle to read from. (internal-only)
1855 1838 raw - an optional argument specifying if the revision data is to be
1856 1839 treated as raw data when applying flag transforms. 'raw' should be set
1857 1840 to True when generating changegroups or in debug commands.
1858 1841 """
1859 1842 if raw:
1860 1843 msg = (
1861 1844 b'revlog.revision(..., raw=True) is deprecated, '
1862 1845 b'use revlog.rawdata(...)'
1863 1846 )
1864 1847 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1865 1848 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1866 1849
1867 1850 def sidedata(self, nodeorrev, _df=None):
1868 1851 """a map of extra data related to the changeset but not part of the hash
1869 1852
1870 1853 This function currently return a dictionary. However, more advanced
1871 1854 mapping object will likely be used in the future for a more
1872 1855 efficient/lazy code.
1873 1856 """
1874 1857 return self._revisiondata(nodeorrev, _df)[1]
1875 1858
1876 1859 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1877 1860 # deal with <nodeorrev> argument type
1878 1861 if isinstance(nodeorrev, int):
1879 1862 rev = nodeorrev
1880 1863 node = self.node(rev)
1881 1864 else:
1882 1865 node = nodeorrev
1883 1866 rev = None
1884 1867
1885 1868 # fast path the special `nullid` rev
1886 1869 if node == self.nullid:
1887 1870 return b"", {}
1888 1871
1889 1872 # ``rawtext`` is the text as stored inside the revlog. Might be the
1890 1873 # revision or might need to be processed to retrieve the revision.
1891 1874 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1892 1875
1893 1876 if self.version & 0xFFFF == REVLOGV2:
1894 1877 if rev is None:
1895 1878 rev = self.rev(node)
1896 1879 sidedata = self._sidedata(rev)
1897 1880 else:
1898 1881 sidedata = {}
1899 1882
1900 1883 if raw and validated:
1901 1884 # if we don't want to process the raw text and that raw
1902 1885 # text is cached, we can exit early.
1903 1886 return rawtext, sidedata
1904 1887 if rev is None:
1905 1888 rev = self.rev(node)
1906 1889 # the revlog's flag for this revision
1907 1890 # (usually alter its state or content)
1908 1891 flags = self.flags(rev)
1909 1892
1910 1893 if validated and flags == REVIDX_DEFAULT_FLAGS:
1911 1894 # no extra flags set, no flag processor runs, text = rawtext
1912 1895 return rawtext, sidedata
1913 1896
1914 1897 if raw:
1915 1898 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1916 1899 text = rawtext
1917 1900 else:
1918 1901 r = flagutil.processflagsread(self, rawtext, flags)
1919 1902 text, validatehash = r
1920 1903 if validatehash:
1921 1904 self.checkhash(text, node, rev=rev)
1922 1905 if not validated:
1923 1906 self._revisioncache = (node, rev, rawtext)
1924 1907
1925 1908 return text, sidedata
1926 1909
1927 1910 def _rawtext(self, node, rev, _df=None):
1928 1911 """return the possibly unvalidated rawtext for a revision
1929 1912
1930 1913 returns (rev, rawtext, validated)
1931 1914 """
1932 1915
1933 1916 # revision in the cache (could be useful to apply delta)
1934 1917 cachedrev = None
1935 1918 # An intermediate text to apply deltas to
1936 1919 basetext = None
1937 1920
1938 1921 # Check if we have the entry in cache
1939 1922 # The cache entry looks like (node, rev, rawtext)
1940 1923 if self._revisioncache:
1941 1924 if self._revisioncache[0] == node:
1942 1925 return (rev, self._revisioncache[2], True)
1943 1926 cachedrev = self._revisioncache[1]
1944 1927
1945 1928 if rev is None:
1946 1929 rev = self.rev(node)
1947 1930
1948 1931 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1949 1932 if stopped:
1950 1933 basetext = self._revisioncache[2]
1951 1934
1952 1935 # drop cache to save memory, the caller is expected to
1953 1936 # update self._revisioncache after validating the text
1954 1937 self._revisioncache = None
1955 1938
1956 1939 targetsize = None
1957 1940 rawsize = self.index[rev][2]
1958 1941 if 0 <= rawsize:
1959 1942 targetsize = 4 * rawsize
1960 1943
1961 1944 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1962 1945 if basetext is None:
1963 1946 basetext = bytes(bins[0])
1964 1947 bins = bins[1:]
1965 1948
1966 1949 rawtext = mdiff.patches(basetext, bins)
1967 1950 del basetext # let us have a chance to free memory early
1968 1951 return (rev, rawtext, False)
1969 1952
1970 1953 def _sidedata(self, rev):
1971 1954 """Return the sidedata for a given revision number."""
1972 1955 index_entry = self.index[rev]
1973 1956 sidedata_offset = index_entry[8]
1974 1957 sidedata_size = index_entry[9]
1975 1958
1976 1959 if self._inline:
1977 1960 sidedata_offset += self.index.entry_size * (1 + rev)
1978 1961 if sidedata_size == 0:
1979 1962 return {}
1980 1963
1981 1964 segment = self._getsegment(sidedata_offset, sidedata_size)
1982 1965 sidedata = sidedatautil.deserialize_sidedata(segment)
1983 1966 return sidedata
1984 1967
1985 1968 def rawdata(self, nodeorrev, _df=None):
1986 1969 """return an uncompressed raw data of a given node or revision number.
1987 1970
1988 1971 _df - an existing file handle to read from. (internal-only)
1989 1972 """
1990 1973 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1991 1974
1992 1975 def hash(self, text, p1, p2):
1993 1976 """Compute a node hash.
1994 1977
1995 1978 Available as a function so that subclasses can replace the hash
1996 1979 as needed.
1997 1980 """
1998 1981 return storageutil.hashrevisionsha1(text, p1, p2)
1999 1982
2000 1983 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2001 1984 """Check node hash integrity.
2002 1985
2003 1986 Available as a function so that subclasses can extend hash mismatch
2004 1987 behaviors as needed.
2005 1988 """
2006 1989 try:
2007 1990 if p1 is None and p2 is None:
2008 1991 p1, p2 = self.parents(node)
2009 1992 if node != self.hash(text, p1, p2):
2010 1993 # Clear the revision cache on hash failure. The revision cache
2011 1994 # only stores the raw revision and clearing the cache does have
2012 1995 # the side-effect that we won't have a cache hit when the raw
2013 1996 # revision data is accessed. But this case should be rare and
2014 1997 # it is extra work to teach the cache about the hash
2015 1998 # verification state.
2016 1999 if self._revisioncache and self._revisioncache[0] == node:
2017 2000 self._revisioncache = None
2018 2001
2019 2002 revornode = rev
2020 2003 if revornode is None:
2021 2004 revornode = templatefilters.short(hex(node))
2022 2005 raise error.RevlogError(
2023 2006 _(b"integrity check failed on %s:%s")
2024 2007 % (self.indexfile, pycompat.bytestr(revornode))
2025 2008 )
2026 2009 except error.RevlogError:
2027 2010 if self._censorable and storageutil.iscensoredtext(text):
2028 2011 raise error.CensoredNodeError(self.indexfile, node, text)
2029 2012 raise
2030 2013
2031 2014 def _enforceinlinesize(self, tr, fp=None):
2032 2015 """Check if the revlog is too big for inline and convert if so.
2033 2016
2034 2017 This should be called after revisions are added to the revlog. If the
2035 2018 revlog has grown too large to be an inline revlog, it will convert it
2036 2019 to use multiple index and data files.
2037 2020 """
2038 2021 tiprev = len(self) - 1
2039 2022 if (
2040 2023 not self._inline
2041 2024 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
2042 2025 ):
2043 2026 return
2044 2027
2045 2028 troffset = tr.findoffset(self.indexfile)
2046 2029 if troffset is None:
2047 2030 raise error.RevlogError(
2048 2031 _(b"%s not found in the transaction") % self.indexfile
2049 2032 )
2050 2033 trindex = 0
2051 2034 tr.add(self.datafile, 0)
2052 2035
2053 2036 if fp:
2054 2037 fp.flush()
2055 2038 fp.close()
2056 2039 # We can't use the cached file handle after close(). So prevent
2057 2040 # its usage.
2058 2041 self._writinghandles = None
2059 2042
2060 2043 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
2061 2044 for r in self:
2062 2045 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
2063 2046 if troffset <= self.start(r):
2064 2047 trindex = r
2065 2048
2066 2049 with self._indexfp(b'w') as fp:
2067 2050 self.version &= ~FLAG_INLINE_DATA
2068 2051 self._inline = False
2069 2052 io = self._io
2070 2053 for i in self:
2071 e = io.packentry(self.index[i], self.node, self.version, i)
2054 e = self.index.entry_binary(i, self.version)
2072 2055 fp.write(e)
2073 2056
2074 2057 # the temp file replace the real index when we exit the context
2075 2058 # manager
2076 2059
2077 2060 tr.replace(self.indexfile, trindex * self.index.entry_size)
2078 2061 nodemaputil.setup_persistent_nodemap(tr, self)
2079 2062 self._chunkclear()
2080 2063
2081 2064 def _nodeduplicatecallback(self, transaction, node):
2082 2065 """called when trying to add a node already stored."""
2083 2066
2084 2067 def addrevision(
2085 2068 self,
2086 2069 text,
2087 2070 transaction,
2088 2071 link,
2089 2072 p1,
2090 2073 p2,
2091 2074 cachedelta=None,
2092 2075 node=None,
2093 2076 flags=REVIDX_DEFAULT_FLAGS,
2094 2077 deltacomputer=None,
2095 2078 sidedata=None,
2096 2079 ):
2097 2080 """add a revision to the log
2098 2081
2099 2082 text - the revision data to add
2100 2083 transaction - the transaction object used for rollback
2101 2084 link - the linkrev data to add
2102 2085 p1, p2 - the parent nodeids of the revision
2103 2086 cachedelta - an optional precomputed delta
2104 2087 node - nodeid of revision; typically node is not specified, and it is
2105 2088 computed by default as hash(text, p1, p2), however subclasses might
2106 2089 use different hashing method (and override checkhash() in such case)
2107 2090 flags - the known flags to set on the revision
2108 2091 deltacomputer - an optional deltacomputer instance shared between
2109 2092 multiple calls
2110 2093 """
2111 2094 if link == nullrev:
2112 2095 raise error.RevlogError(
2113 2096 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2114 2097 )
2115 2098
2116 2099 if sidedata is None:
2117 2100 sidedata = {}
2118 2101 elif not self.hassidedata:
2119 2102 raise error.ProgrammingError(
2120 2103 _(b"trying to add sidedata to a revlog who don't support them")
2121 2104 )
2122 2105
2123 2106 if flags:
2124 2107 node = node or self.hash(text, p1, p2)
2125 2108
2126 2109 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2127 2110
2128 2111 # If the flag processor modifies the revision data, ignore any provided
2129 2112 # cachedelta.
2130 2113 if rawtext != text:
2131 2114 cachedelta = None
2132 2115
2133 2116 if len(rawtext) > _maxentrysize:
2134 2117 raise error.RevlogError(
2135 2118 _(
2136 2119 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2137 2120 )
2138 2121 % (self.indexfile, len(rawtext))
2139 2122 )
2140 2123
2141 2124 node = node or self.hash(rawtext, p1, p2)
2142 2125 rev = self.index.get_rev(node)
2143 2126 if rev is not None:
2144 2127 return rev
2145 2128
2146 2129 if validatehash:
2147 2130 self.checkhash(rawtext, node, p1=p1, p2=p2)
2148 2131
2149 2132 return self.addrawrevision(
2150 2133 rawtext,
2151 2134 transaction,
2152 2135 link,
2153 2136 p1,
2154 2137 p2,
2155 2138 node,
2156 2139 flags,
2157 2140 cachedelta=cachedelta,
2158 2141 deltacomputer=deltacomputer,
2159 2142 sidedata=sidedata,
2160 2143 )
2161 2144
2162 2145 def addrawrevision(
2163 2146 self,
2164 2147 rawtext,
2165 2148 transaction,
2166 2149 link,
2167 2150 p1,
2168 2151 p2,
2169 2152 node,
2170 2153 flags,
2171 2154 cachedelta=None,
2172 2155 deltacomputer=None,
2173 2156 sidedata=None,
2174 2157 ):
2175 2158 """add a raw revision with known flags, node and parents
2176 2159 useful when reusing a revision not stored in this revlog (ex: received
2177 2160 over wire, or read from an external bundle).
2178 2161 """
2179 2162 dfh = None
2180 2163 if not self._inline:
2181 2164 dfh = self._datafp(b"a+")
2182 2165 ifh = self._indexfp(b"a+")
2183 2166 try:
2184 2167 return self._addrevision(
2185 2168 node,
2186 2169 rawtext,
2187 2170 transaction,
2188 2171 link,
2189 2172 p1,
2190 2173 p2,
2191 2174 flags,
2192 2175 cachedelta,
2193 2176 ifh,
2194 2177 dfh,
2195 2178 deltacomputer=deltacomputer,
2196 2179 sidedata=sidedata,
2197 2180 )
2198 2181 finally:
2199 2182 if dfh:
2200 2183 dfh.close()
2201 2184 ifh.close()
2202 2185
2203 2186 def compress(self, data):
2204 2187 """Generate a possibly-compressed representation of data."""
2205 2188 if not data:
2206 2189 return b'', data
2207 2190
2208 2191 compressed = self._compressor.compress(data)
2209 2192
2210 2193 if compressed:
2211 2194 # The revlog compressor added the header in the returned data.
2212 2195 return b'', compressed
2213 2196
2214 2197 if data[0:1] == b'\0':
2215 2198 return b'', data
2216 2199 return b'u', data
2217 2200
2218 2201 def decompress(self, data):
2219 2202 """Decompress a revlog chunk.
2220 2203
2221 2204 The chunk is expected to begin with a header identifying the
2222 2205 format type so it can be routed to an appropriate decompressor.
2223 2206 """
2224 2207 if not data:
2225 2208 return data
2226 2209
2227 2210 # Revlogs are read much more frequently than they are written and many
2228 2211 # chunks only take microseconds to decompress, so performance is
2229 2212 # important here.
2230 2213 #
2231 2214 # We can make a few assumptions about revlogs:
2232 2215 #
2233 2216 # 1) the majority of chunks will be compressed (as opposed to inline
2234 2217 # raw data).
2235 2218 # 2) decompressing *any* data will likely by at least 10x slower than
2236 2219 # returning raw inline data.
2237 2220 # 3) we want to prioritize common and officially supported compression
2238 2221 # engines
2239 2222 #
2240 2223 # It follows that we want to optimize for "decompress compressed data
2241 2224 # when encoded with common and officially supported compression engines"
2242 2225 # case over "raw data" and "data encoded by less common or non-official
2243 2226 # compression engines." That is why we have the inline lookup first
2244 2227 # followed by the compengines lookup.
2245 2228 #
2246 2229 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2247 2230 # compressed chunks. And this matters for changelog and manifest reads.
2248 2231 t = data[0:1]
2249 2232
2250 2233 if t == b'x':
2251 2234 try:
2252 2235 return _zlibdecompress(data)
2253 2236 except zlib.error as e:
2254 2237 raise error.RevlogError(
2255 2238 _(b'revlog decompress error: %s')
2256 2239 % stringutil.forcebytestr(e)
2257 2240 )
2258 2241 # '\0' is more common than 'u' so it goes first.
2259 2242 elif t == b'\0':
2260 2243 return data
2261 2244 elif t == b'u':
2262 2245 return util.buffer(data, 1)
2263 2246
2264 2247 try:
2265 2248 compressor = self._decompressors[t]
2266 2249 except KeyError:
2267 2250 try:
2268 2251 engine = util.compengines.forrevlogheader(t)
2269 2252 compressor = engine.revlogcompressor(self._compengineopts)
2270 2253 self._decompressors[t] = compressor
2271 2254 except KeyError:
2272 2255 raise error.RevlogError(
2273 2256 _(b'unknown compression type %s') % binascii.hexlify(t)
2274 2257 )
2275 2258
2276 2259 return compressor.decompress(data)
2277 2260
2278 2261 def _addrevision(
2279 2262 self,
2280 2263 node,
2281 2264 rawtext,
2282 2265 transaction,
2283 2266 link,
2284 2267 p1,
2285 2268 p2,
2286 2269 flags,
2287 2270 cachedelta,
2288 2271 ifh,
2289 2272 dfh,
2290 2273 alwayscache=False,
2291 2274 deltacomputer=None,
2292 2275 sidedata=None,
2293 2276 ):
2294 2277 """internal function to add revisions to the log
2295 2278
2296 2279 see addrevision for argument descriptions.
2297 2280
2298 2281 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2299 2282
2300 2283 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2301 2284 be used.
2302 2285
2303 2286 invariants:
2304 2287 - rawtext is optional (can be None); if not set, cachedelta must be set.
2305 2288 if both are set, they must correspond to each other.
2306 2289 """
2307 2290 if node == self.nullid:
2308 2291 raise error.RevlogError(
2309 2292 _(b"%s: attempt to add null revision") % self.indexfile
2310 2293 )
2311 2294 if (
2312 2295 node == self.nodeconstants.wdirid
2313 2296 or node in self.nodeconstants.wdirfilenodeids
2314 2297 ):
2315 2298 raise error.RevlogError(
2316 2299 _(b"%s: attempt to add wdir revision") % self.indexfile
2317 2300 )
2318 2301
2319 2302 if self._inline:
2320 2303 fh = ifh
2321 2304 else:
2322 2305 fh = dfh
2323 2306
2324 2307 btext = [rawtext]
2325 2308
2326 2309 curr = len(self)
2327 2310 prev = curr - 1
2328 2311
2329 2312 offset = self._get_data_offset(prev)
2330 2313
2331 2314 if self._concurrencychecker:
2332 2315 if self._inline:
2333 2316 # offset is "as if" it were in the .d file, so we need to add on
2334 2317 # the size of the entry metadata.
2335 2318 self._concurrencychecker(
2336 2319 ifh, self.indexfile, offset + curr * self.index.entry_size
2337 2320 )
2338 2321 else:
2339 2322 # Entries in the .i are a consistent size.
2340 2323 self._concurrencychecker(
2341 2324 ifh, self.indexfile, curr * self.index.entry_size
2342 2325 )
2343 2326 self._concurrencychecker(dfh, self.datafile, offset)
2344 2327
2345 2328 p1r, p2r = self.rev(p1), self.rev(p2)
2346 2329
2347 2330 # full versions are inserted when the needed deltas
2348 2331 # become comparable to the uncompressed text
2349 2332 if rawtext is None:
2350 2333 # need rawtext size, before changed by flag processors, which is
2351 2334 # the non-raw size. use revlog explicitly to avoid filelog's extra
2352 2335 # logic that might remove metadata size.
2353 2336 textlen = mdiff.patchedsize(
2354 2337 revlog.size(self, cachedelta[0]), cachedelta[1]
2355 2338 )
2356 2339 else:
2357 2340 textlen = len(rawtext)
2358 2341
2359 2342 if deltacomputer is None:
2360 2343 deltacomputer = deltautil.deltacomputer(self)
2361 2344
2362 2345 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2363 2346
2364 2347 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2365 2348
2366 2349 if sidedata:
2367 2350 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2368 2351 sidedata_offset = offset + deltainfo.deltalen
2369 2352 else:
2370 2353 serialized_sidedata = b""
2371 2354 # Don't store the offset if the sidedata is empty, that way
2372 2355 # we can easily detect empty sidedata and they will be no different
2373 2356 # than ones we manually add.
2374 2357 sidedata_offset = 0
2375 2358
2376 2359 e = (
2377 2360 offset_type(offset, flags),
2378 2361 deltainfo.deltalen,
2379 2362 textlen,
2380 2363 deltainfo.base,
2381 2364 link,
2382 2365 p1r,
2383 2366 p2r,
2384 2367 node,
2385 2368 sidedata_offset,
2386 2369 len(serialized_sidedata),
2387 2370 )
2388 2371
2389 2372 if self.version & 0xFFFF != REVLOGV2:
2390 2373 e = e[:8]
2391 2374
2392 2375 self.index.append(e)
2393 entry = self._io.packentry(e, self.node, self.version, curr)
2376 entry = self.index.entry_binary(curr, self.version)
2394 2377 self._writeentry(
2395 2378 transaction,
2396 2379 ifh,
2397 2380 dfh,
2398 2381 entry,
2399 2382 deltainfo.data,
2400 2383 link,
2401 2384 offset,
2402 2385 serialized_sidedata,
2403 2386 )
2404 2387
2405 2388 rawtext = btext[0]
2406 2389
2407 2390 if alwayscache and rawtext is None:
2408 2391 rawtext = deltacomputer.buildtext(revinfo, fh)
2409 2392
2410 2393 if type(rawtext) == bytes: # only accept immutable objects
2411 2394 self._revisioncache = (node, curr, rawtext)
2412 2395 self._chainbasecache[curr] = deltainfo.chainbase
2413 2396 return curr
2414 2397
2415 2398 def _get_data_offset(self, prev):
2416 2399 """Returns the current offset in the (in-transaction) data file.
2417 2400 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2418 2401 file to store that information: since sidedata can be rewritten to the
2419 2402 end of the data file within a transaction, you can have cases where, for
2420 2403 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2421 2404 to `n - 1`'s sidedata being written after `n`'s data.
2422 2405
2423 2406 TODO cache this in a docket file before getting out of experimental."""
2424 2407 if self.version & 0xFFFF != REVLOGV2:
2425 2408 return self.end(prev)
2426 2409
2427 2410 offset = 0
2428 2411 for rev, entry in enumerate(self.index):
2429 2412 sidedata_end = entry[8] + entry[9]
2430 2413 # Sidedata for a previous rev has potentially been written after
2431 2414 # this rev's end, so take the max.
2432 2415 offset = max(self.end(rev), offset, sidedata_end)
2433 2416 return offset
2434 2417
2435 2418 def _writeentry(
2436 2419 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2437 2420 ):
2438 2421 # Files opened in a+ mode have inconsistent behavior on various
2439 2422 # platforms. Windows requires that a file positioning call be made
2440 2423 # when the file handle transitions between reads and writes. See
2441 2424 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2442 2425 # platforms, Python or the platform itself can be buggy. Some versions
2443 2426 # of Solaris have been observed to not append at the end of the file
2444 2427 # if the file was seeked to before the end. See issue4943 for more.
2445 2428 #
2446 2429 # We work around this issue by inserting a seek() before writing.
2447 2430 # Note: This is likely not necessary on Python 3. However, because
2448 2431 # the file handle is reused for reads and may be seeked there, we need
2449 2432 # to be careful before changing this.
2450 2433 ifh.seek(0, os.SEEK_END)
2451 2434 if dfh:
2452 2435 dfh.seek(0, os.SEEK_END)
2453 2436
2454 2437 curr = len(self) - 1
2455 2438 if not self._inline:
2456 2439 transaction.add(self.datafile, offset)
2457 2440 transaction.add(self.indexfile, curr * len(entry))
2458 2441 if data[0]:
2459 2442 dfh.write(data[0])
2460 2443 dfh.write(data[1])
2461 2444 if sidedata:
2462 2445 dfh.write(sidedata)
2463 2446 ifh.write(entry)
2464 2447 else:
2465 2448 offset += curr * self.index.entry_size
2466 2449 transaction.add(self.indexfile, offset)
2467 2450 ifh.write(entry)
2468 2451 ifh.write(data[0])
2469 2452 ifh.write(data[1])
2470 2453 if sidedata:
2471 2454 ifh.write(sidedata)
2472 2455 self._enforceinlinesize(transaction, ifh)
2473 2456 nodemaputil.setup_persistent_nodemap(transaction, self)
2474 2457
2475 2458 def addgroup(
2476 2459 self,
2477 2460 deltas,
2478 2461 linkmapper,
2479 2462 transaction,
2480 2463 alwayscache=False,
2481 2464 addrevisioncb=None,
2482 2465 duplicaterevisioncb=None,
2483 2466 ):
2484 2467 """
2485 2468 add a delta group
2486 2469
2487 2470 given a set of deltas, add them to the revision log. the
2488 2471 first delta is against its parent, which should be in our
2489 2472 log, the rest are against the previous delta.
2490 2473
2491 2474 If ``addrevisioncb`` is defined, it will be called with arguments of
2492 2475 this revlog and the node that was added.
2493 2476 """
2494 2477
2495 2478 if self._writinghandles:
2496 2479 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2497 2480
2498 2481 r = len(self)
2499 2482 end = 0
2500 2483 if r:
2501 2484 end = self.end(r - 1)
2502 2485 ifh = self._indexfp(b"a+")
2503 2486 isize = r * self.index.entry_size
2504 2487 if self._inline:
2505 2488 transaction.add(self.indexfile, end + isize)
2506 2489 dfh = None
2507 2490 else:
2508 2491 transaction.add(self.indexfile, isize)
2509 2492 transaction.add(self.datafile, end)
2510 2493 dfh = self._datafp(b"a+")
2511 2494
2512 2495 def flush():
2513 2496 if dfh:
2514 2497 dfh.flush()
2515 2498 ifh.flush()
2516 2499
2517 2500 self._writinghandles = (ifh, dfh)
2518 2501 empty = True
2519 2502
2520 2503 try:
2521 2504 deltacomputer = deltautil.deltacomputer(self)
2522 2505 # loop through our set of deltas
2523 2506 for data in deltas:
2524 2507 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2525 2508 link = linkmapper(linknode)
2526 2509 flags = flags or REVIDX_DEFAULT_FLAGS
2527 2510
2528 2511 rev = self.index.get_rev(node)
2529 2512 if rev is not None:
2530 2513 # this can happen if two branches make the same change
2531 2514 self._nodeduplicatecallback(transaction, rev)
2532 2515 if duplicaterevisioncb:
2533 2516 duplicaterevisioncb(self, rev)
2534 2517 empty = False
2535 2518 continue
2536 2519
2537 2520 for p in (p1, p2):
2538 2521 if not self.index.has_node(p):
2539 2522 raise error.LookupError(
2540 2523 p, self.indexfile, _(b'unknown parent')
2541 2524 )
2542 2525
2543 2526 if not self.index.has_node(deltabase):
2544 2527 raise error.LookupError(
2545 2528 deltabase, self.indexfile, _(b'unknown delta base')
2546 2529 )
2547 2530
2548 2531 baserev = self.rev(deltabase)
2549 2532
2550 2533 if baserev != nullrev and self.iscensored(baserev):
2551 2534 # if base is censored, delta must be full replacement in a
2552 2535 # single patch operation
2553 2536 hlen = struct.calcsize(b">lll")
2554 2537 oldlen = self.rawsize(baserev)
2555 2538 newlen = len(delta) - hlen
2556 2539 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2557 2540 raise error.CensoredBaseError(
2558 2541 self.indexfile, self.node(baserev)
2559 2542 )
2560 2543
2561 2544 if not flags and self._peek_iscensored(baserev, delta, flush):
2562 2545 flags |= REVIDX_ISCENSORED
2563 2546
2564 2547 # We assume consumers of addrevisioncb will want to retrieve
2565 2548 # the added revision, which will require a call to
2566 2549 # revision(). revision() will fast path if there is a cache
2567 2550 # hit. So, we tell _addrevision() to always cache in this case.
2568 2551 # We're only using addgroup() in the context of changegroup
2569 2552 # generation so the revision data can always be handled as raw
2570 2553 # by the flagprocessor.
2571 2554 rev = self._addrevision(
2572 2555 node,
2573 2556 None,
2574 2557 transaction,
2575 2558 link,
2576 2559 p1,
2577 2560 p2,
2578 2561 flags,
2579 2562 (baserev, delta),
2580 2563 ifh,
2581 2564 dfh,
2582 2565 alwayscache=alwayscache,
2583 2566 deltacomputer=deltacomputer,
2584 2567 sidedata=sidedata,
2585 2568 )
2586 2569
2587 2570 if addrevisioncb:
2588 2571 addrevisioncb(self, rev)
2589 2572 empty = False
2590 2573
2591 2574 if not dfh and not self._inline:
2592 2575 # addrevision switched from inline to conventional
2593 2576 # reopen the index
2594 2577 ifh.close()
2595 2578 dfh = self._datafp(b"a+")
2596 2579 ifh = self._indexfp(b"a+")
2597 2580 self._writinghandles = (ifh, dfh)
2598 2581 finally:
2599 2582 self._writinghandles = None
2600 2583
2601 2584 if dfh:
2602 2585 dfh.close()
2603 2586 ifh.close()
2604 2587 return not empty
2605 2588
2606 2589 def iscensored(self, rev):
2607 2590 """Check if a file revision is censored."""
2608 2591 if not self._censorable:
2609 2592 return False
2610 2593
2611 2594 return self.flags(rev) & REVIDX_ISCENSORED
2612 2595
2613 2596 def _peek_iscensored(self, baserev, delta, flush):
2614 2597 """Quickly check if a delta produces a censored revision."""
2615 2598 if not self._censorable:
2616 2599 return False
2617 2600
2618 2601 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2619 2602
2620 2603 def getstrippoint(self, minlink):
2621 2604 """find the minimum rev that must be stripped to strip the linkrev
2622 2605
2623 2606 Returns a tuple containing the minimum rev and a set of all revs that
2624 2607 have linkrevs that will be broken by this strip.
2625 2608 """
2626 2609 return storageutil.resolvestripinfo(
2627 2610 minlink,
2628 2611 len(self) - 1,
2629 2612 self.headrevs(),
2630 2613 self.linkrev,
2631 2614 self.parentrevs,
2632 2615 )
2633 2616
2634 2617 def strip(self, minlink, transaction):
2635 2618 """truncate the revlog on the first revision with a linkrev >= minlink
2636 2619
2637 2620 This function is called when we're stripping revision minlink and
2638 2621 its descendants from the repository.
2639 2622
2640 2623 We have to remove all revisions with linkrev >= minlink, because
2641 2624 the equivalent changelog revisions will be renumbered after the
2642 2625 strip.
2643 2626
2644 2627 So we truncate the revlog on the first of these revisions, and
2645 2628 trust that the caller has saved the revisions that shouldn't be
2646 2629 removed and that it'll re-add them after this truncation.
2647 2630 """
2648 2631 if len(self) == 0:
2649 2632 return
2650 2633
2651 2634 rev, _ = self.getstrippoint(minlink)
2652 2635 if rev == len(self):
2653 2636 return
2654 2637
2655 2638 # first truncate the files on disk
2656 2639 end = self.start(rev)
2657 2640 if not self._inline:
2658 2641 transaction.add(self.datafile, end)
2659 2642 end = rev * self.index.entry_size
2660 2643 else:
2661 2644 end += rev * self.index.entry_size
2662 2645
2663 2646 transaction.add(self.indexfile, end)
2664 2647
2665 2648 # then reset internal state in memory to forget those revisions
2666 2649 self._revisioncache = None
2667 2650 self._chaininfocache = util.lrucachedict(500)
2668 2651 self._chunkclear()
2669 2652
2670 2653 del self.index[rev:-1]
2671 2654
2672 2655 def checksize(self):
2673 2656 """Check size of index and data files
2674 2657
2675 2658 return a (dd, di) tuple.
2676 2659 - dd: extra bytes for the "data" file
2677 2660 - di: extra bytes for the "index" file
2678 2661
2679 2662 A healthy revlog will return (0, 0).
2680 2663 """
2681 2664 expected = 0
2682 2665 if len(self):
2683 2666 expected = max(0, self.end(len(self) - 1))
2684 2667
2685 2668 try:
2686 2669 with self._datafp() as f:
2687 2670 f.seek(0, io.SEEK_END)
2688 2671 actual = f.tell()
2689 2672 dd = actual - expected
2690 2673 except IOError as inst:
2691 2674 if inst.errno != errno.ENOENT:
2692 2675 raise
2693 2676 dd = 0
2694 2677
2695 2678 try:
2696 2679 f = self.opener(self.indexfile)
2697 2680 f.seek(0, io.SEEK_END)
2698 2681 actual = f.tell()
2699 2682 f.close()
2700 2683 s = self.index.entry_size
2701 2684 i = max(0, actual // s)
2702 2685 di = actual - (i * s)
2703 2686 if self._inline:
2704 2687 databytes = 0
2705 2688 for r in self:
2706 2689 databytes += max(0, self.length(r))
2707 2690 dd = 0
2708 2691 di = actual - len(self) * s - databytes
2709 2692 except IOError as inst:
2710 2693 if inst.errno != errno.ENOENT:
2711 2694 raise
2712 2695 di = 0
2713 2696
2714 2697 return (dd, di)
2715 2698
2716 2699 def files(self):
2717 2700 res = [self.indexfile]
2718 2701 if not self._inline:
2719 2702 res.append(self.datafile)
2720 2703 return res
2721 2704
2722 2705 def emitrevisions(
2723 2706 self,
2724 2707 nodes,
2725 2708 nodesorder=None,
2726 2709 revisiondata=False,
2727 2710 assumehaveparentrevisions=False,
2728 2711 deltamode=repository.CG_DELTAMODE_STD,
2729 2712 sidedata_helpers=None,
2730 2713 ):
2731 2714 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2732 2715 raise error.ProgrammingError(
2733 2716 b'unhandled value for nodesorder: %s' % nodesorder
2734 2717 )
2735 2718
2736 2719 if nodesorder is None and not self._generaldelta:
2737 2720 nodesorder = b'storage'
2738 2721
2739 2722 if (
2740 2723 not self._storedeltachains
2741 2724 and deltamode != repository.CG_DELTAMODE_PREV
2742 2725 ):
2743 2726 deltamode = repository.CG_DELTAMODE_FULL
2744 2727
2745 2728 return storageutil.emitrevisions(
2746 2729 self,
2747 2730 nodes,
2748 2731 nodesorder,
2749 2732 revlogrevisiondelta,
2750 2733 deltaparentfn=self.deltaparent,
2751 2734 candeltafn=self.candelta,
2752 2735 rawsizefn=self.rawsize,
2753 2736 revdifffn=self.revdiff,
2754 2737 flagsfn=self.flags,
2755 2738 deltamode=deltamode,
2756 2739 revisiondata=revisiondata,
2757 2740 assumehaveparentrevisions=assumehaveparentrevisions,
2758 2741 sidedata_helpers=sidedata_helpers,
2759 2742 )
2760 2743
2761 2744 DELTAREUSEALWAYS = b'always'
2762 2745 DELTAREUSESAMEREVS = b'samerevs'
2763 2746 DELTAREUSENEVER = b'never'
2764 2747
2765 2748 DELTAREUSEFULLADD = b'fulladd'
2766 2749
2767 2750 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2768 2751
2769 2752 def clone(
2770 2753 self,
2771 2754 tr,
2772 2755 destrevlog,
2773 2756 addrevisioncb=None,
2774 2757 deltareuse=DELTAREUSESAMEREVS,
2775 2758 forcedeltabothparents=None,
2776 2759 sidedatacompanion=None,
2777 2760 ):
2778 2761 """Copy this revlog to another, possibly with format changes.
2779 2762
2780 2763 The destination revlog will contain the same revisions and nodes.
2781 2764 However, it may not be bit-for-bit identical due to e.g. delta encoding
2782 2765 differences.
2783 2766
2784 2767 The ``deltareuse`` argument control how deltas from the existing revlog
2785 2768 are preserved in the destination revlog. The argument can have the
2786 2769 following values:
2787 2770
2788 2771 DELTAREUSEALWAYS
2789 2772 Deltas will always be reused (if possible), even if the destination
2790 2773 revlog would not select the same revisions for the delta. This is the
2791 2774 fastest mode of operation.
2792 2775 DELTAREUSESAMEREVS
2793 2776 Deltas will be reused if the destination revlog would pick the same
2794 2777 revisions for the delta. This mode strikes a balance between speed
2795 2778 and optimization.
2796 2779 DELTAREUSENEVER
2797 2780 Deltas will never be reused. This is the slowest mode of execution.
2798 2781 This mode can be used to recompute deltas (e.g. if the diff/delta
2799 2782 algorithm changes).
2800 2783 DELTAREUSEFULLADD
2801 2784 Revision will be re-added as if their were new content. This is
2802 2785 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2803 2786 eg: large file detection and handling.
2804 2787
2805 2788 Delta computation can be slow, so the choice of delta reuse policy can
2806 2789 significantly affect run time.
2807 2790
2808 2791 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2809 2792 two extremes. Deltas will be reused if they are appropriate. But if the
2810 2793 delta could choose a better revision, it will do so. This means if you
2811 2794 are converting a non-generaldelta revlog to a generaldelta revlog,
2812 2795 deltas will be recomputed if the delta's parent isn't a parent of the
2813 2796 revision.
2814 2797
2815 2798 In addition to the delta policy, the ``forcedeltabothparents``
2816 2799 argument controls whether to force compute deltas against both parents
2817 2800 for merges. By default, the current default is used.
2818 2801
2819 2802 If not None, the `sidedatacompanion` is callable that accept two
2820 2803 arguments:
2821 2804
2822 2805 (srcrevlog, rev)
2823 2806
2824 2807 and return a quintet that control changes to sidedata content from the
2825 2808 old revision to the new clone result:
2826 2809
2827 2810 (dropall, filterout, update, new_flags, dropped_flags)
2828 2811
2829 2812 * if `dropall` is True, all sidedata should be dropped
2830 2813 * `filterout` is a set of sidedata keys that should be dropped
2831 2814 * `update` is a mapping of additionnal/new key -> value
2832 2815 * new_flags is a bitfields of new flags that the revision should get
2833 2816 * dropped_flags is a bitfields of new flags that the revision shoudl not longer have
2834 2817 """
2835 2818 if deltareuse not in self.DELTAREUSEALL:
2836 2819 raise ValueError(
2837 2820 _(b'value for deltareuse invalid: %s') % deltareuse
2838 2821 )
2839 2822
2840 2823 if len(destrevlog):
2841 2824 raise ValueError(_(b'destination revlog is not empty'))
2842 2825
2843 2826 if getattr(self, 'filteredrevs', None):
2844 2827 raise ValueError(_(b'source revlog has filtered revisions'))
2845 2828 if getattr(destrevlog, 'filteredrevs', None):
2846 2829 raise ValueError(_(b'destination revlog has filtered revisions'))
2847 2830
2848 2831 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2849 2832 # if possible.
2850 2833 oldlazydelta = destrevlog._lazydelta
2851 2834 oldlazydeltabase = destrevlog._lazydeltabase
2852 2835 oldamd = destrevlog._deltabothparents
2853 2836
2854 2837 try:
2855 2838 if deltareuse == self.DELTAREUSEALWAYS:
2856 2839 destrevlog._lazydeltabase = True
2857 2840 destrevlog._lazydelta = True
2858 2841 elif deltareuse == self.DELTAREUSESAMEREVS:
2859 2842 destrevlog._lazydeltabase = False
2860 2843 destrevlog._lazydelta = True
2861 2844 elif deltareuse == self.DELTAREUSENEVER:
2862 2845 destrevlog._lazydeltabase = False
2863 2846 destrevlog._lazydelta = False
2864 2847
2865 2848 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2866 2849
2867 2850 self._clone(
2868 2851 tr,
2869 2852 destrevlog,
2870 2853 addrevisioncb,
2871 2854 deltareuse,
2872 2855 forcedeltabothparents,
2873 2856 sidedatacompanion,
2874 2857 )
2875 2858
2876 2859 finally:
2877 2860 destrevlog._lazydelta = oldlazydelta
2878 2861 destrevlog._lazydeltabase = oldlazydeltabase
2879 2862 destrevlog._deltabothparents = oldamd
2880 2863
2881 2864 def _clone(
2882 2865 self,
2883 2866 tr,
2884 2867 destrevlog,
2885 2868 addrevisioncb,
2886 2869 deltareuse,
2887 2870 forcedeltabothparents,
2888 2871 sidedatacompanion,
2889 2872 ):
2890 2873 """perform the core duty of `revlog.clone` after parameter processing"""
2891 2874 deltacomputer = deltautil.deltacomputer(destrevlog)
2892 2875 index = self.index
2893 2876 for rev in self:
2894 2877 entry = index[rev]
2895 2878
2896 2879 # Some classes override linkrev to take filtered revs into
2897 2880 # account. Use raw entry from index.
2898 2881 flags = entry[0] & 0xFFFF
2899 2882 linkrev = entry[4]
2900 2883 p1 = index[entry[5]][7]
2901 2884 p2 = index[entry[6]][7]
2902 2885 node = entry[7]
2903 2886
2904 2887 sidedataactions = (False, [], {}, 0, 0)
2905 2888 if sidedatacompanion is not None:
2906 2889 sidedataactions = sidedatacompanion(self, rev)
2907 2890
2908 2891 # (Possibly) reuse the delta from the revlog if allowed and
2909 2892 # the revlog chunk is a delta.
2910 2893 cachedelta = None
2911 2894 rawtext = None
2912 2895 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2913 2896 dropall = sidedataactions[0]
2914 2897 filterout = sidedataactions[1]
2915 2898 update = sidedataactions[2]
2916 2899 new_flags = sidedataactions[3]
2917 2900 dropped_flags = sidedataactions[4]
2918 2901 text, sidedata = self._revisiondata(rev)
2919 2902 if dropall:
2920 2903 sidedata = {}
2921 2904 for key in filterout:
2922 2905 sidedata.pop(key, None)
2923 2906 sidedata.update(update)
2924 2907 if not sidedata:
2925 2908 sidedata = None
2926 2909
2927 2910 flags |= new_flags
2928 2911 flags &= ~dropped_flags
2929 2912
2930 2913 destrevlog.addrevision(
2931 2914 text,
2932 2915 tr,
2933 2916 linkrev,
2934 2917 p1,
2935 2918 p2,
2936 2919 cachedelta=cachedelta,
2937 2920 node=node,
2938 2921 flags=flags,
2939 2922 deltacomputer=deltacomputer,
2940 2923 sidedata=sidedata,
2941 2924 )
2942 2925 else:
2943 2926 if destrevlog._lazydelta:
2944 2927 dp = self.deltaparent(rev)
2945 2928 if dp != nullrev:
2946 2929 cachedelta = (dp, bytes(self._chunk(rev)))
2947 2930
2948 2931 if not cachedelta:
2949 2932 rawtext = self.rawdata(rev)
2950 2933
2951 2934 ifh = destrevlog.opener(
2952 2935 destrevlog.indexfile, b'a+', checkambig=False
2953 2936 )
2954 2937 dfh = None
2955 2938 if not destrevlog._inline:
2956 2939 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2957 2940 try:
2958 2941 destrevlog._addrevision(
2959 2942 node,
2960 2943 rawtext,
2961 2944 tr,
2962 2945 linkrev,
2963 2946 p1,
2964 2947 p2,
2965 2948 flags,
2966 2949 cachedelta,
2967 2950 ifh,
2968 2951 dfh,
2969 2952 deltacomputer=deltacomputer,
2970 2953 )
2971 2954 finally:
2972 2955 if dfh:
2973 2956 dfh.close()
2974 2957 ifh.close()
2975 2958
2976 2959 if addrevisioncb:
2977 2960 addrevisioncb(self, rev, node)
2978 2961
2979 2962 def censorrevision(self, tr, censornode, tombstone=b''):
2980 2963 if (self.version & 0xFFFF) == REVLOGV0:
2981 2964 raise error.RevlogError(
2982 2965 _(b'cannot censor with version %d revlogs') % self.version
2983 2966 )
2984 2967
2985 2968 censorrev = self.rev(censornode)
2986 2969 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2987 2970
2988 2971 if len(tombstone) > self.rawsize(censorrev):
2989 2972 raise error.Abort(
2990 2973 _(b'censor tombstone must be no longer than censored data')
2991 2974 )
2992 2975
2993 2976 # Rewriting the revlog in place is hard. Our strategy for censoring is
2994 2977 # to create a new revlog, copy all revisions to it, then replace the
2995 2978 # revlogs on transaction close.
2996 2979
2997 2980 newindexfile = self.indexfile + b'.tmpcensored'
2998 2981 newdatafile = self.datafile + b'.tmpcensored'
2999 2982
3000 2983 # This is a bit dangerous. We could easily have a mismatch of state.
3001 2984 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
3002 2985 newrl.version = self.version
3003 2986 newrl._generaldelta = self._generaldelta
3004 2987 newrl._io = self._io
3005 2988
3006 2989 for rev in self.revs():
3007 2990 node = self.node(rev)
3008 2991 p1, p2 = self.parents(node)
3009 2992
3010 2993 if rev == censorrev:
3011 2994 newrl.addrawrevision(
3012 2995 tombstone,
3013 2996 tr,
3014 2997 self.linkrev(censorrev),
3015 2998 p1,
3016 2999 p2,
3017 3000 censornode,
3018 3001 REVIDX_ISCENSORED,
3019 3002 )
3020 3003
3021 3004 if newrl.deltaparent(rev) != nullrev:
3022 3005 raise error.Abort(
3023 3006 _(
3024 3007 b'censored revision stored as delta; '
3025 3008 b'cannot censor'
3026 3009 ),
3027 3010 hint=_(
3028 3011 b'censoring of revlogs is not '
3029 3012 b'fully implemented; please report '
3030 3013 b'this bug'
3031 3014 ),
3032 3015 )
3033 3016 continue
3034 3017
3035 3018 if self.iscensored(rev):
3036 3019 if self.deltaparent(rev) != nullrev:
3037 3020 raise error.Abort(
3038 3021 _(
3039 3022 b'cannot censor due to censored '
3040 3023 b'revision having delta stored'
3041 3024 )
3042 3025 )
3043 3026 rawtext = self._chunk(rev)
3044 3027 else:
3045 3028 rawtext = self.rawdata(rev)
3046 3029
3047 3030 newrl.addrawrevision(
3048 3031 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3049 3032 )
3050 3033
3051 3034 tr.addbackup(self.indexfile, location=b'store')
3052 3035 if not self._inline:
3053 3036 tr.addbackup(self.datafile, location=b'store')
3054 3037
3055 3038 self.opener.rename(newrl.indexfile, self.indexfile)
3056 3039 if not self._inline:
3057 3040 self.opener.rename(newrl.datafile, self.datafile)
3058 3041
3059 3042 self.clearcaches()
3060 3043 self._loadindex()
3061 3044
3062 3045 def verifyintegrity(self, state):
3063 3046 """Verifies the integrity of the revlog.
3064 3047
3065 3048 Yields ``revlogproblem`` instances describing problems that are
3066 3049 found.
3067 3050 """
3068 3051 dd, di = self.checksize()
3069 3052 if dd:
3070 3053 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3071 3054 if di:
3072 3055 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3073 3056
3074 3057 version = self.version & 0xFFFF
3075 3058
3076 3059 # The verifier tells us what version revlog we should be.
3077 3060 if version != state[b'expectedversion']:
3078 3061 yield revlogproblem(
3079 3062 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3080 3063 % (self.indexfile, version, state[b'expectedversion'])
3081 3064 )
3082 3065
3083 3066 state[b'skipread'] = set()
3084 3067 state[b'safe_renamed'] = set()
3085 3068
3086 3069 for rev in self:
3087 3070 node = self.node(rev)
3088 3071
3089 3072 # Verify contents. 4 cases to care about:
3090 3073 #
3091 3074 # common: the most common case
3092 3075 # rename: with a rename
3093 3076 # meta: file content starts with b'\1\n', the metadata
3094 3077 # header defined in filelog.py, but without a rename
3095 3078 # ext: content stored externally
3096 3079 #
3097 3080 # More formally, their differences are shown below:
3098 3081 #
3099 3082 # | common | rename | meta | ext
3100 3083 # -------------------------------------------------------
3101 3084 # flags() | 0 | 0 | 0 | not 0
3102 3085 # renamed() | False | True | False | ?
3103 3086 # rawtext[0:2]=='\1\n'| False | True | True | ?
3104 3087 #
3105 3088 # "rawtext" means the raw text stored in revlog data, which
3106 3089 # could be retrieved by "rawdata(rev)". "text"
3107 3090 # mentioned below is "revision(rev)".
3108 3091 #
3109 3092 # There are 3 different lengths stored physically:
3110 3093 # 1. L1: rawsize, stored in revlog index
3111 3094 # 2. L2: len(rawtext), stored in revlog data
3112 3095 # 3. L3: len(text), stored in revlog data if flags==0, or
3113 3096 # possibly somewhere else if flags!=0
3114 3097 #
3115 3098 # L1 should be equal to L2. L3 could be different from them.
3116 3099 # "text" may or may not affect commit hash depending on flag
3117 3100 # processors (see flagutil.addflagprocessor).
3118 3101 #
3119 3102 # | common | rename | meta | ext
3120 3103 # -------------------------------------------------
3121 3104 # rawsize() | L1 | L1 | L1 | L1
3122 3105 # size() | L1 | L2-LM | L1(*) | L1 (?)
3123 3106 # len(rawtext) | L2 | L2 | L2 | L2
3124 3107 # len(text) | L2 | L2 | L2 | L3
3125 3108 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3126 3109 #
3127 3110 # LM: length of metadata, depending on rawtext
3128 3111 # (*): not ideal, see comment in filelog.size
3129 3112 # (?): could be "- len(meta)" if the resolved content has
3130 3113 # rename metadata
3131 3114 #
3132 3115 # Checks needed to be done:
3133 3116 # 1. length check: L1 == L2, in all cases.
3134 3117 # 2. hash check: depending on flag processor, we may need to
3135 3118 # use either "text" (external), or "rawtext" (in revlog).
3136 3119
3137 3120 try:
3138 3121 skipflags = state.get(b'skipflags', 0)
3139 3122 if skipflags:
3140 3123 skipflags &= self.flags(rev)
3141 3124
3142 3125 _verify_revision(self, skipflags, state, node)
3143 3126
3144 3127 l1 = self.rawsize(rev)
3145 3128 l2 = len(self.rawdata(node))
3146 3129
3147 3130 if l1 != l2:
3148 3131 yield revlogproblem(
3149 3132 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3150 3133 node=node,
3151 3134 )
3152 3135
3153 3136 except error.CensoredNodeError:
3154 3137 if state[b'erroroncensored']:
3155 3138 yield revlogproblem(
3156 3139 error=_(b'censored file data'), node=node
3157 3140 )
3158 3141 state[b'skipread'].add(node)
3159 3142 except Exception as e:
3160 3143 yield revlogproblem(
3161 3144 error=_(b'unpacking %s: %s')
3162 3145 % (short(node), stringutil.forcebytestr(e)),
3163 3146 node=node,
3164 3147 )
3165 3148 state[b'skipread'].add(node)
3166 3149
3167 3150 def storageinfo(
3168 3151 self,
3169 3152 exclusivefiles=False,
3170 3153 sharedfiles=False,
3171 3154 revisionscount=False,
3172 3155 trackedsize=False,
3173 3156 storedsize=False,
3174 3157 ):
3175 3158 d = {}
3176 3159
3177 3160 if exclusivefiles:
3178 3161 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3179 3162 if not self._inline:
3180 3163 d[b'exclusivefiles'].append((self.opener, self.datafile))
3181 3164
3182 3165 if sharedfiles:
3183 3166 d[b'sharedfiles'] = []
3184 3167
3185 3168 if revisionscount:
3186 3169 d[b'revisionscount'] = len(self)
3187 3170
3188 3171 if trackedsize:
3189 3172 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3190 3173
3191 3174 if storedsize:
3192 3175 d[b'storedsize'] = sum(
3193 3176 self.opener.stat(path).st_size for path in self.files()
3194 3177 )
3195 3178
3196 3179 return d
3197 3180
3198 3181 def rewrite_sidedata(self, helpers, startrev, endrev):
3199 3182 if self.version & 0xFFFF != REVLOGV2:
3200 3183 return
3201 3184 # inline are not yet supported because they suffer from an issue when
3202 3185 # rewriting them (since it's not an append-only operation).
3203 3186 # See issue6485.
3204 3187 assert not self._inline
3205 3188 if not helpers[1] and not helpers[2]:
3206 3189 # Nothing to generate or remove
3207 3190 return
3208 3191
3209 3192 new_entries = []
3210 3193 # append the new sidedata
3211 3194 with self._datafp(b'a+') as fp:
3212 3195 # Maybe this bug still exists, see revlog._writeentry
3213 3196 fp.seek(0, os.SEEK_END)
3214 3197 current_offset = fp.tell()
3215 3198 for rev in range(startrev, endrev + 1):
3216 3199 entry = self.index[rev]
3217 3200 new_sidedata = storageutil.run_sidedata_helpers(
3218 3201 store=self,
3219 3202 sidedata_helpers=helpers,
3220 3203 sidedata={},
3221 3204 rev=rev,
3222 3205 )
3223 3206
3224 3207 serialized_sidedata = sidedatautil.serialize_sidedata(
3225 3208 new_sidedata
3226 3209 )
3227 3210 if entry[8] != 0 or entry[9] != 0:
3228 3211 # rewriting entries that already have sidedata is not
3229 3212 # supported yet, because it introduces garbage data in the
3230 3213 # revlog.
3231 3214 msg = b"Rewriting existing sidedata is not supported yet"
3232 3215 raise error.Abort(msg)
3233 3216 entry = entry[:8]
3234 3217 entry += (current_offset, len(serialized_sidedata))
3235 3218
3236 3219 fp.write(serialized_sidedata)
3237 3220 new_entries.append(entry)
3238 3221 current_offset += len(serialized_sidedata)
3239 3222
3240 3223 # rewrite the new index entries
3241 3224 with self._indexfp(b'w+') as fp:
3242 3225 fp.seek(startrev * self.index.entry_size)
3243 3226 for i, entry in enumerate(new_entries):
3244 3227 rev = startrev + i
3245 3228 self.index.replace_sidedata_info(rev, entry[8], entry[9])
3246 packed = self._io.packentry(entry, self.node, self.version, rev)
3229 packed = self.index.entry_binary(rev, self.version)
3247 3230 fp.write(packed)
@@ -1,494 +1,499 b''
1 1 // revlog.rs
2 2 //
3 3 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 use crate::{
9 9 cindex,
10 10 utils::{node_from_py_bytes, node_from_py_object},
11 11 };
12 12 use cpython::{
13 13 buffer::{Element, PyBuffer},
14 14 exc::{IndexError, ValueError},
15 15 ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyInt, PyModule,
16 16 PyObject, PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject,
17 17 };
18 18 use hg::{
19 19 nodemap::{Block, NodeMapError, NodeTree},
20 20 revlog::{nodemap::NodeMap, NodePrefix, RevlogIndex},
21 21 Revision,
22 22 };
23 23 use std::cell::RefCell;
24 24
25 25 /// Return a Struct implementing the Graph trait
26 26 pub(crate) fn pyindex_to_graph(
27 27 py: Python,
28 28 index: PyObject,
29 29 ) -> PyResult<cindex::Index> {
30 30 match index.extract::<MixedIndex>(py) {
31 31 Ok(midx) => Ok(midx.clone_cindex(py)),
32 32 Err(_) => cindex::Index::new(py, index),
33 33 }
34 34 }
35 35
36 36 py_class!(pub class MixedIndex |py| {
37 37 data cindex: RefCell<cindex::Index>;
38 38 data nt: RefCell<Option<NodeTree>>;
39 39 data docket: RefCell<Option<PyObject>>;
40 40 // Holds a reference to the mmap'ed persistent nodemap data
41 41 data mmap: RefCell<Option<PyBuffer>>;
42 42
43 43 def __new__(_cls, cindex: PyObject) -> PyResult<MixedIndex> {
44 44 Self::new(py, cindex)
45 45 }
46 46
47 47 /// Compatibility layer used for Python consumers needing access to the C index
48 48 ///
49 49 /// Only use case so far is `scmutil.shortesthexnodeidprefix`,
50 50 /// that may need to build a custom `nodetree`, based on a specified revset.
51 51 /// With a Rust implementation of the nodemap, we will be able to get rid of
52 52 /// this, by exposing our own standalone nodemap class,
53 53 /// ready to accept `MixedIndex`.
54 54 def get_cindex(&self) -> PyResult<PyObject> {
55 55 Ok(self.cindex(py).borrow().inner().clone_ref(py))
56 56 }
57 57
58 58 // Index API involving nodemap, as defined in mercurial/pure/parsers.py
59 59
60 60 /// Return Revision if found, raises a bare `error.RevlogError`
61 61 /// in case of ambiguity, same as C version does
62 62 def get_rev(&self, node: PyBytes) -> PyResult<Option<Revision>> {
63 63 let opt = self.get_nodetree(py)?.borrow();
64 64 let nt = opt.as_ref().unwrap();
65 65 let idx = &*self.cindex(py).borrow();
66 66 let node = node_from_py_bytes(py, &node)?;
67 67 nt.find_bin(idx, node.into()).map_err(|e| nodemap_error(py, e))
68 68 }
69 69
70 70 /// same as `get_rev()` but raises a bare `error.RevlogError` if node
71 71 /// is not found.
72 72 ///
73 73 /// No need to repeat `node` in the exception, `mercurial/revlog.py`
74 74 /// will catch and rewrap with it
75 75 def rev(&self, node: PyBytes) -> PyResult<Revision> {
76 76 self.get_rev(py, node)?.ok_or_else(|| revlog_error(py))
77 77 }
78 78
79 79 /// return True if the node exist in the index
80 80 def has_node(&self, node: PyBytes) -> PyResult<bool> {
81 81 self.get_rev(py, node).map(|opt| opt.is_some())
82 82 }
83 83
84 84 /// find length of shortest hex nodeid of a binary ID
85 85 def shortest(&self, node: PyBytes) -> PyResult<usize> {
86 86 let opt = self.get_nodetree(py)?.borrow();
87 87 let nt = opt.as_ref().unwrap();
88 88 let idx = &*self.cindex(py).borrow();
89 89 match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
90 90 {
91 91 Ok(Some(l)) => Ok(l),
92 92 Ok(None) => Err(revlog_error(py)),
93 93 Err(e) => Err(nodemap_error(py, e)),
94 94 }
95 95 }
96 96
97 97 def partialmatch(&self, node: PyObject) -> PyResult<Option<PyBytes>> {
98 98 let opt = self.get_nodetree(py)?.borrow();
99 99 let nt = opt.as_ref().unwrap();
100 100 let idx = &*self.cindex(py).borrow();
101 101
102 102 let node_as_string = if cfg!(feature = "python3-sys") {
103 103 node.cast_as::<PyString>(py)?.to_string(py)?.to_string()
104 104 }
105 105 else {
106 106 let node = node.extract::<PyBytes>(py)?;
107 107 String::from_utf8_lossy(node.data(py)).to_string()
108 108 };
109 109
110 110 let prefix = NodePrefix::from_hex(&node_as_string).map_err(|_| PyErr::new::<ValueError, _>(py, "Invalid node or prefix"))?;
111 111
112 112 nt.find_bin(idx, prefix)
113 113 // TODO make an inner API returning the node directly
114 114 .map(|opt| opt.map(
115 115 |rev| PyBytes::new(py, idx.node(rev).unwrap().as_bytes())))
116 116 .map_err(|e| nodemap_error(py, e))
117 117
118 118 }
119 119
120 120 /// append an index entry
121 121 def append(&self, tup: PyTuple) -> PyResult<PyObject> {
122 122 if tup.len(py) < 8 {
123 123 // this is better than the panic promised by tup.get_item()
124 124 return Err(
125 125 PyErr::new::<IndexError, _>(py, "tuple index out of range"))
126 126 }
127 127 let node_bytes = tup.get_item(py, 7).extract(py)?;
128 128 let node = node_from_py_object(py, &node_bytes)?;
129 129
130 130 let mut idx = self.cindex(py).borrow_mut();
131 131 let rev = idx.len() as Revision;
132 132
133 133 idx.append(py, tup)?;
134 134 self.get_nodetree(py)?.borrow_mut().as_mut().unwrap()
135 135 .insert(&*idx, &node, rev)
136 136 .map_err(|e| nodemap_error(py, e))?;
137 137 Ok(py.None())
138 138 }
139 139
140 140 def __delitem__(&self, key: PyObject) -> PyResult<()> {
141 141 // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]`
142 142 self.cindex(py).borrow().inner().del_item(py, key)?;
143 143 let mut opt = self.get_nodetree(py)?.borrow_mut();
144 144 let mut nt = opt.as_mut().unwrap();
145 145 nt.invalidate_all();
146 146 self.fill_nodemap(py, &mut nt)?;
147 147 Ok(())
148 148 }
149 149
150 150 //
151 151 // Reforwarded C index API
152 152 //
153 153
154 154 // index_methods (tp_methods). Same ordering as in revlog.c
155 155
156 156 /// return the gca set of the given revs
157 157 def ancestors(&self, *args, **kw) -> PyResult<PyObject> {
158 158 self.call_cindex(py, "ancestors", args, kw)
159 159 }
160 160
161 161 /// return the heads of the common ancestors of the given revs
162 162 def commonancestorsheads(&self, *args, **kw) -> PyResult<PyObject> {
163 163 self.call_cindex(py, "commonancestorsheads", args, kw)
164 164 }
165 165
166 166 /// Clear the index caches and inner py_class data.
167 167 /// It is Python's responsibility to call `update_nodemap_data` again.
168 168 def clearcaches(&self, *args, **kw) -> PyResult<PyObject> {
169 169 self.nt(py).borrow_mut().take();
170 170 self.docket(py).borrow_mut().take();
171 171 self.mmap(py).borrow_mut().take();
172 172 self.call_cindex(py, "clearcaches", args, kw)
173 173 }
174 174
175 /// return the raw binary string representing a revision
176 def entry_binary(&self, *args, **kw) -> PyResult<PyObject> {
177 self.call_cindex(py, "entry_binary", args, kw)
178 }
179
175 180 /// get an index entry
176 181 def get(&self, *args, **kw) -> PyResult<PyObject> {
177 182 self.call_cindex(py, "get", args, kw)
178 183 }
179 184
180 185 /// compute phases
181 186 def computephasesmapsets(&self, *args, **kw) -> PyResult<PyObject> {
182 187 self.call_cindex(py, "computephasesmapsets", args, kw)
183 188 }
184 189
185 190 /// reachableroots
186 191 def reachableroots2(&self, *args, **kw) -> PyResult<PyObject> {
187 192 self.call_cindex(py, "reachableroots2", args, kw)
188 193 }
189 194
190 195 /// get head revisions
191 196 def headrevs(&self, *args, **kw) -> PyResult<PyObject> {
192 197 self.call_cindex(py, "headrevs", args, kw)
193 198 }
194 199
195 200 /// get filtered head revisions
196 201 def headrevsfiltered(&self, *args, **kw) -> PyResult<PyObject> {
197 202 self.call_cindex(py, "headrevsfiltered", args, kw)
198 203 }
199 204
200 205 /// True if the object is a snapshot
201 206 def issnapshot(&self, *args, **kw) -> PyResult<PyObject> {
202 207 self.call_cindex(py, "issnapshot", args, kw)
203 208 }
204 209
205 210 /// Gather snapshot data in a cache dict
206 211 def findsnapshots(&self, *args, **kw) -> PyResult<PyObject> {
207 212 self.call_cindex(py, "findsnapshots", args, kw)
208 213 }
209 214
210 215 /// determine revisions with deltas to reconstruct fulltext
211 216 def deltachain(&self, *args, **kw) -> PyResult<PyObject> {
212 217 self.call_cindex(py, "deltachain", args, kw)
213 218 }
214 219
215 220 /// slice planned chunk read to reach a density threshold
216 221 def slicechunktodensity(&self, *args, **kw) -> PyResult<PyObject> {
217 222 self.call_cindex(py, "slicechunktodensity", args, kw)
218 223 }
219 224
220 225 /// stats for the index
221 226 def stats(&self, *args, **kw) -> PyResult<PyObject> {
222 227 self.call_cindex(py, "stats", args, kw)
223 228 }
224 229
225 230 // index_sequence_methods and index_mapping_methods.
226 231 //
227 232 // Since we call back through the high level Python API,
228 233 // there's no point making a distinction between index_get
229 234 // and index_getitem.
230 235
231 236 def __len__(&self) -> PyResult<usize> {
232 237 self.cindex(py).borrow().inner().len(py)
233 238 }
234 239
235 240 def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
236 241 // this conversion seems needless, but that's actually because
237 242 // `index_getitem` does not handle conversion from PyLong,
238 243 // which expressions such as [e for e in index] internally use.
239 244 // Note that we don't seem to have a direct way to call
240 245 // PySequence_GetItem (does the job), which would possibly be better
241 246 // for performance
242 247 let key = match key.extract::<Revision>(py) {
243 248 Ok(rev) => rev.to_py_object(py).into_object(),
244 249 Err(_) => key,
245 250 };
246 251 self.cindex(py).borrow().inner().get_item(py, key)
247 252 }
248 253
249 254 def __setitem__(&self, key: PyObject, value: PyObject) -> PyResult<()> {
250 255 self.cindex(py).borrow().inner().set_item(py, key, value)
251 256 }
252 257
253 258 def __contains__(&self, item: PyObject) -> PyResult<bool> {
254 259 // ObjectProtocol does not seem to provide contains(), so
255 260 // this is an equivalent implementation of the index_contains()
256 261 // defined in revlog.c
257 262 let cindex = self.cindex(py).borrow();
258 263 match item.extract::<Revision>(py) {
259 264 Ok(rev) => {
260 265 Ok(rev >= -1 && rev < cindex.inner().len(py)? as Revision)
261 266 }
262 267 Err(_) => {
263 268 cindex.inner().call_method(
264 269 py,
265 270 "has_node",
266 271 PyTuple::new(py, &[item]),
267 272 None)?
268 273 .extract(py)
269 274 }
270 275 }
271 276 }
272 277
273 278 def nodemap_data_all(&self) -> PyResult<PyBytes> {
274 279 self.inner_nodemap_data_all(py)
275 280 }
276 281
277 282 def nodemap_data_incremental(&self) -> PyResult<PyObject> {
278 283 self.inner_nodemap_data_incremental(py)
279 284 }
280 285 def update_nodemap_data(
281 286 &self,
282 287 docket: PyObject,
283 288 nm_data: PyObject
284 289 ) -> PyResult<PyObject> {
285 290 self.inner_update_nodemap_data(py, docket, nm_data)
286 291 }
287 292
288 293 @property
289 294 def entry_size(&self) -> PyResult<PyInt> {
290 295 self.cindex(py).borrow().inner().getattr(py, "entry_size")?.extract::<PyInt>(py)
291 296 }
292 297
293 298 });
294 299
295 300 impl MixedIndex {
296 301 fn new(py: Python, cindex: PyObject) -> PyResult<MixedIndex> {
297 302 Self::create_instance(
298 303 py,
299 304 RefCell::new(cindex::Index::new(py, cindex)?),
300 305 RefCell::new(None),
301 306 RefCell::new(None),
302 307 RefCell::new(None),
303 308 )
304 309 }
305 310
306 311 /// This is scaffolding at this point, but it could also become
307 312 /// a way to start a persistent nodemap or perform a
308 313 /// vacuum / repack operation
309 314 fn fill_nodemap(
310 315 &self,
311 316 py: Python,
312 317 nt: &mut NodeTree,
313 318 ) -> PyResult<PyObject> {
314 319 let index = self.cindex(py).borrow();
315 320 for r in 0..index.len() {
316 321 let rev = r as Revision;
317 322 // in this case node() won't ever return None
318 323 nt.insert(&*index, index.node(rev).unwrap(), rev)
319 324 .map_err(|e| nodemap_error(py, e))?
320 325 }
321 326 Ok(py.None())
322 327 }
323 328
324 329 fn get_nodetree<'a>(
325 330 &'a self,
326 331 py: Python<'a>,
327 332 ) -> PyResult<&'a RefCell<Option<NodeTree>>> {
328 333 if self.nt(py).borrow().is_none() {
329 334 let readonly = Box::new(Vec::new());
330 335 let mut nt = NodeTree::load_bytes(readonly, 0);
331 336 self.fill_nodemap(py, &mut nt)?;
332 337 self.nt(py).borrow_mut().replace(nt);
333 338 }
334 339 Ok(self.nt(py))
335 340 }
336 341
337 342 /// forward a method call to the underlying C index
338 343 fn call_cindex(
339 344 &self,
340 345 py: Python,
341 346 name: &str,
342 347 args: &PyTuple,
343 348 kwargs: Option<&PyDict>,
344 349 ) -> PyResult<PyObject> {
345 350 self.cindex(py)
346 351 .borrow()
347 352 .inner()
348 353 .call_method(py, name, args, kwargs)
349 354 }
350 355
351 356 pub fn clone_cindex(&self, py: Python) -> cindex::Index {
352 357 self.cindex(py).borrow().clone_ref(py)
353 358 }
354 359
355 360 /// Returns the full nodemap bytes to be written as-is to disk
356 361 fn inner_nodemap_data_all(&self, py: Python) -> PyResult<PyBytes> {
357 362 let nodemap = self.get_nodetree(py)?.borrow_mut().take().unwrap();
358 363 let (readonly, bytes) = nodemap.into_readonly_and_added_bytes();
359 364
360 365 // If there's anything readonly, we need to build the data again from
361 366 // scratch
362 367 let bytes = if readonly.len() > 0 {
363 368 let mut nt = NodeTree::load_bytes(Box::new(vec![]), 0);
364 369 self.fill_nodemap(py, &mut nt)?;
365 370
366 371 let (readonly, bytes) = nt.into_readonly_and_added_bytes();
367 372 assert_eq!(readonly.len(), 0);
368 373
369 374 bytes
370 375 } else {
371 376 bytes
372 377 };
373 378
374 379 let bytes = PyBytes::new(py, &bytes);
375 380 Ok(bytes)
376 381 }
377 382
378 383 /// Returns the last saved docket along with the size of any changed data
379 384 /// (in number of blocks), and said data as bytes.
380 385 fn inner_nodemap_data_incremental(
381 386 &self,
382 387 py: Python,
383 388 ) -> PyResult<PyObject> {
384 389 let docket = self.docket(py).borrow();
385 390 let docket = match docket.as_ref() {
386 391 Some(d) => d,
387 392 None => return Ok(py.None()),
388 393 };
389 394
390 395 let node_tree = self.get_nodetree(py)?.borrow_mut().take().unwrap();
391 396 let masked_blocks = node_tree.masked_readonly_blocks();
392 397 let (_, data) = node_tree.into_readonly_and_added_bytes();
393 398 let changed = masked_blocks * std::mem::size_of::<Block>();
394 399
395 400 Ok((docket, changed, PyBytes::new(py, &data))
396 401 .to_py_object(py)
397 402 .into_object())
398 403 }
399 404
400 405 /// Update the nodemap from the new (mmaped) data.
401 406 /// The docket is kept as a reference for later incremental calls.
402 407 fn inner_update_nodemap_data(
403 408 &self,
404 409 py: Python,
405 410 docket: PyObject,
406 411 nm_data: PyObject,
407 412 ) -> PyResult<PyObject> {
408 413 let buf = PyBuffer::get(py, &nm_data)?;
409 414 let len = buf.item_count();
410 415
411 416 // Build a slice from the mmap'ed buffer data
412 417 let cbuf = buf.buf_ptr();
413 418 let bytes = if std::mem::size_of::<u8>() == buf.item_size()
414 419 && buf.is_c_contiguous()
415 420 && u8::is_compatible_format(buf.format())
416 421 {
417 422 unsafe { std::slice::from_raw_parts(cbuf as *const u8, len) }
418 423 } else {
419 424 return Err(PyErr::new::<ValueError, _>(
420 425 py,
421 426 "Nodemap data buffer has an invalid memory representation"
422 427 .to_string(),
423 428 ));
424 429 };
425 430
426 431 // Keep a reference to the mmap'ed buffer, otherwise we get a dangling
427 432 // pointer.
428 433 self.mmap(py).borrow_mut().replace(buf);
429 434
430 435 let mut nt = NodeTree::load_bytes(Box::new(bytes), len);
431 436
432 437 let data_tip =
433 438 docket.getattr(py, "tip_rev")?.extract::<Revision>(py)?;
434 439 self.docket(py).borrow_mut().replace(docket.clone_ref(py));
435 440 let idx = self.cindex(py).borrow();
436 441 let current_tip = idx.len();
437 442
438 443 for r in (data_tip + 1)..current_tip as Revision {
439 444 let rev = r as Revision;
440 445 // in this case node() won't ever return None
441 446 nt.insert(&*idx, idx.node(rev).unwrap(), rev)
442 447 .map_err(|e| nodemap_error(py, e))?
443 448 }
444 449
445 450 *self.nt(py).borrow_mut() = Some(nt);
446 451
447 452 Ok(py.None())
448 453 }
449 454 }
450 455
451 456 fn revlog_error(py: Python) -> PyErr {
452 457 match py
453 458 .import("mercurial.error")
454 459 .and_then(|m| m.get(py, "RevlogError"))
455 460 {
456 461 Err(e) => e,
457 462 Ok(cls) => PyErr::from_instance(py, cls),
458 463 }
459 464 }
460 465
461 466 fn rev_not_in_index(py: Python, rev: Revision) -> PyErr {
462 467 PyErr::new::<ValueError, _>(
463 468 py,
464 469 format!(
465 470 "Inconsistency: Revision {} found in nodemap \
466 471 is not in revlog index",
467 472 rev
468 473 ),
469 474 )
470 475 }
471 476
472 477 /// Standard treatment of NodeMapError
473 478 fn nodemap_error(py: Python, err: NodeMapError) -> PyErr {
474 479 match err {
475 480 NodeMapError::MultipleResults => revlog_error(py),
476 481 NodeMapError::RevisionNotInIndex(r) => rev_not_in_index(py, r),
477 482 }
478 483 }
479 484
480 485 /// Create the module, with __package__ given from parent
481 486 pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
482 487 let dotted_name = &format!("{}.revlog", package);
483 488 let m = PyModule::new(py, dotted_name)?;
484 489 m.add(py, "__package__", package)?;
485 490 m.add(py, "__doc__", "RevLog - Rust implementations")?;
486 491
487 492 m.add_class::<MixedIndex>(py)?;
488 493
489 494 let sys = PyModule::import(py, "sys")?;
490 495 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
491 496 sys_modules.set_item(py, dotted_name, &m)?;
492 497
493 498 Ok(m)
494 499 }
General Comments 0
You need to be logged in to leave comments. Login now