##// END OF EJS Templates
revlog: have an explicit "pack_header" method...
marmoute -
r47811:d57386e5 default
parent child Browse files
Show More
@@ -1,3018 +1,3028 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Olivia Mackall <olivia@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #define PY_SSIZE_T_CLEAN
11 11 #include <Python.h>
12 12 #include <assert.h>
13 13 #include <ctype.h>
14 14 #include <limits.h>
15 15 #include <stddef.h>
16 16 #include <stdlib.h>
17 17 #include <string.h>
18 18 #include <structmember.h>
19 19
20 20 #include "bitmanipulation.h"
21 21 #include "charencode.h"
22 22 #include "compat.h"
23 23 #include "revlog.h"
24 24 #include "util.h"
25 25
26 26 #ifdef IS_PY3K
27 27 /* The mapping of Python types is meant to be temporary to get Python
28 28 * 3 to compile. We should remove this once Python 3 support is fully
29 29 * supported and proper types are used in the extensions themselves. */
30 30 #define PyInt_Check PyLong_Check
31 31 #define PyInt_FromLong PyLong_FromLong
32 32 #define PyInt_FromSsize_t PyLong_FromSsize_t
33 33 #define PyInt_AsLong PyLong_AsLong
34 34 #endif
35 35
36 36 typedef struct indexObjectStruct indexObject;
37 37
38 38 typedef struct {
39 39 int children[16];
40 40 } nodetreenode;
41 41
42 42 typedef struct {
43 43 int abi_version;
44 44 Py_ssize_t (*index_length)(const indexObject *);
45 45 const char *(*index_node)(indexObject *, Py_ssize_t);
46 46 int (*index_parents)(PyObject *, int, int *);
47 47 } Revlog_CAPI;
48 48
49 49 /*
50 50 * A base-16 trie for fast node->rev mapping.
51 51 *
52 52 * Positive value is index of the next node in the trie
53 53 * Negative value is a leaf: -(rev + 2)
54 54 * Zero is empty
55 55 */
56 56 typedef struct {
57 57 indexObject *index;
58 58 nodetreenode *nodes;
59 59 Py_ssize_t nodelen;
60 60 size_t length; /* # nodes in use */
61 61 size_t capacity; /* # nodes allocated */
62 62 int depth; /* maximum depth of tree */
63 63 int splits; /* # splits performed */
64 64 } nodetree;
65 65
66 66 typedef struct {
67 67 PyObject_HEAD /* ; */
68 68 nodetree nt;
69 69 } nodetreeObject;
70 70
71 71 /*
72 72 * This class has two behaviors.
73 73 *
74 74 * When used in a list-like way (with integer keys), we decode an
75 75 * entry in a RevlogNG index file on demand. We have limited support for
76 76 * integer-keyed insert and delete, only at elements right before the
77 77 * end.
78 78 *
79 79 * With string keys, we lazily perform a reverse mapping from node to
80 80 * rev, using a base-16 trie.
81 81 */
82 82 struct indexObjectStruct {
83 83 PyObject_HEAD
84 84 /* Type-specific fields go here. */
85 85 PyObject *data; /* raw bytes of index */
86 86 Py_ssize_t nodelen; /* digest size of the hash, 20 for SHA-1 */
87 87 PyObject *nullentry; /* fast path for references to null */
88 88 Py_buffer buf; /* buffer of data */
89 89 const char **offsets; /* populated on demand */
90 90 Py_ssize_t length; /* current on-disk number of elements */
91 91 unsigned new_length; /* number of added elements */
92 92 unsigned added_length; /* space reserved for added elements */
93 93 char *added; /* populated on demand */
94 94 PyObject *headrevs; /* cache, invalidated on changes */
95 95 PyObject *filteredrevs; /* filtered revs set */
96 96 nodetree nt; /* base-16 trie */
97 97 int ntinitialized; /* 0 or 1 */
98 98 int ntrev; /* last rev scanned */
99 99 int ntlookups; /* # lookups */
100 100 int ntmisses; /* # lookups that miss the cache */
101 101 int inlined;
102 102 long hdrsize; /* size of index headers. Differs in v1 v.s. v2 format */
103 103 };
104 104
105 105 static Py_ssize_t index_length(const indexObject *self)
106 106 {
107 107 return self->length + self->new_length;
108 108 }
109 109
110 110 static const char nullid[32] = {0};
111 111 static const Py_ssize_t nullrev = -1;
112 112
113 113 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
114 114
115 115 static int index_find_node(indexObject *self, const char *node);
116 116
117 117 #if LONG_MAX == 0x7fffffffL
118 118 static const char *const v1_tuple_format = PY23("Kiiiiiis#", "Kiiiiiiy#");
119 119 static const char *const v2_tuple_format = PY23("Kiiiiiis#Ki", "Kiiiiiiy#Ki");
120 120 #else
121 121 static const char *const v1_tuple_format = PY23("kiiiiiis#", "kiiiiiiy#");
122 122 static const char *const v2_tuple_format = PY23("kiiiiiis#ki", "kiiiiiiy#ki");
123 123 #endif
124 124
125 125 /* A RevlogNG v1 index entry is 64 bytes long. */
126 126 static const long v1_hdrsize = 64;
127 127
128 128 /* A Revlogv2 index entry is 96 bytes long. */
129 129 static const long v2_hdrsize = 96;
130 130
131 131 static void raise_revlog_error(void)
132 132 {
133 133 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
134 134
135 135 mod = PyImport_ImportModule("mercurial.error");
136 136 if (mod == NULL) {
137 137 goto cleanup;
138 138 }
139 139
140 140 dict = PyModule_GetDict(mod);
141 141 if (dict == NULL) {
142 142 goto cleanup;
143 143 }
144 144 Py_INCREF(dict);
145 145
146 146 errclass = PyDict_GetItemString(dict, "RevlogError");
147 147 if (errclass == NULL) {
148 148 PyErr_SetString(PyExc_SystemError,
149 149 "could not find RevlogError");
150 150 goto cleanup;
151 151 }
152 152
153 153 /* value of exception is ignored by callers */
154 154 PyErr_SetString(errclass, "RevlogError");
155 155
156 156 cleanup:
157 157 Py_XDECREF(dict);
158 158 Py_XDECREF(mod);
159 159 }
160 160
161 161 /*
162 162 * Return a pointer to the beginning of a RevlogNG record.
163 163 */
164 164 static const char *index_deref(indexObject *self, Py_ssize_t pos)
165 165 {
166 166 if (pos >= self->length)
167 167 return self->added + (pos - self->length) * self->hdrsize;
168 168
169 169 if (self->inlined && pos > 0) {
170 170 if (self->offsets == NULL) {
171 171 Py_ssize_t ret;
172 172 self->offsets =
173 173 PyMem_Malloc(self->length * sizeof(*self->offsets));
174 174 if (self->offsets == NULL)
175 175 return (const char *)PyErr_NoMemory();
176 176 ret = inline_scan(self, self->offsets);
177 177 if (ret == -1) {
178 178 return NULL;
179 179 };
180 180 }
181 181 return self->offsets[pos];
182 182 }
183 183
184 184 return (const char *)(self->buf.buf) + pos * self->hdrsize;
185 185 }
186 186
187 187 /*
188 188 * Get parents of the given rev.
189 189 *
190 190 * The specified rev must be valid and must not be nullrev. A returned
191 191 * parent revision may be nullrev, but is guaranteed to be in valid range.
192 192 */
193 193 static inline int index_get_parents(indexObject *self, Py_ssize_t rev, int *ps,
194 194 int maxrev)
195 195 {
196 196 const char *data = index_deref(self, rev);
197 197
198 198 ps[0] = getbe32(data + 24);
199 199 ps[1] = getbe32(data + 28);
200 200
201 201 /* If index file is corrupted, ps[] may point to invalid revisions. So
202 202 * there is a risk of buffer overflow to trust them unconditionally. */
203 203 if (ps[0] < -1 || ps[0] > maxrev || ps[1] < -1 || ps[1] > maxrev) {
204 204 PyErr_SetString(PyExc_ValueError, "parent out of range");
205 205 return -1;
206 206 }
207 207 return 0;
208 208 }
209 209
210 210 /*
211 211 * Get parents of the given rev.
212 212 *
213 213 * If the specified rev is out of range, IndexError will be raised. If the
214 214 * revlog entry is corrupted, ValueError may be raised.
215 215 *
216 216 * Returns 0 on success or -1 on failure.
217 217 */
218 218 static int HgRevlogIndex_GetParents(PyObject *op, int rev, int *ps)
219 219 {
220 220 int tiprev;
221 221 if (!op || !HgRevlogIndex_Check(op) || !ps) {
222 222 PyErr_BadInternalCall();
223 223 return -1;
224 224 }
225 225 tiprev = (int)index_length((indexObject *)op) - 1;
226 226 if (rev < -1 || rev > tiprev) {
227 227 PyErr_Format(PyExc_IndexError, "rev out of range: %d", rev);
228 228 return -1;
229 229 } else if (rev == -1) {
230 230 ps[0] = ps[1] = -1;
231 231 return 0;
232 232 } else {
233 233 return index_get_parents((indexObject *)op, rev, ps, tiprev);
234 234 }
235 235 }
236 236
237 237 static inline int64_t index_get_start(indexObject *self, Py_ssize_t rev)
238 238 {
239 239 const char *data;
240 240 uint64_t offset;
241 241
242 242 if (rev == nullrev)
243 243 return 0;
244 244
245 245 data = index_deref(self, rev);
246 246 offset = getbe32(data + 4);
247 247 if (rev == 0) {
248 248 /* mask out version number for the first entry */
249 249 offset &= 0xFFFF;
250 250 } else {
251 251 uint32_t offset_high = getbe32(data);
252 252 offset |= ((uint64_t)offset_high) << 32;
253 253 }
254 254 return (int64_t)(offset >> 16);
255 255 }
256 256
257 257 static inline int index_get_length(indexObject *self, Py_ssize_t rev)
258 258 {
259 259 const char *data;
260 260 int tmp;
261 261
262 262 if (rev == nullrev)
263 263 return 0;
264 264
265 265 data = index_deref(self, rev);
266 266
267 267 tmp = (int)getbe32(data + 8);
268 268 if (tmp < 0) {
269 269 PyErr_Format(PyExc_OverflowError,
270 270 "revlog entry size out of bound (%d)", tmp);
271 271 return -1;
272 272 }
273 273 return tmp;
274 274 }
275 275
276 276 /*
277 277 * RevlogNG format (all in big endian, data may be inlined):
278 278 * 6 bytes: offset
279 279 * 2 bytes: flags
280 280 * 4 bytes: compressed length
281 281 * 4 bytes: uncompressed length
282 282 * 4 bytes: base revision
283 283 * 4 bytes: link revision
284 284 * 4 bytes: parent 1 revision
285 285 * 4 bytes: parent 2 revision
286 286 * 32 bytes: nodeid (only 20 bytes used with SHA-1)
287 287 */
288 288 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
289 289 {
290 290 uint64_t offset_flags, sidedata_offset;
291 291 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
292 292 sidedata_comp_len;
293 293 const char *c_node_id;
294 294 const char *data;
295 295 Py_ssize_t length = index_length(self);
296 296
297 297 if (pos == nullrev) {
298 298 Py_INCREF(self->nullentry);
299 299 return self->nullentry;
300 300 }
301 301
302 302 if (pos < 0 || pos >= length) {
303 303 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
304 304 return NULL;
305 305 }
306 306
307 307 data = index_deref(self, pos);
308 308 if (data == NULL)
309 309 return NULL;
310 310
311 311 offset_flags = getbe32(data + 4);
312 312 /*
313 313 * The first entry on-disk needs the version number masked out,
314 314 * but this doesn't apply if entries are added to an empty index.
315 315 */
316 316 if (self->length && pos == 0)
317 317 offset_flags &= 0xFFFF;
318 318 else {
319 319 uint32_t offset_high = getbe32(data);
320 320 offset_flags |= ((uint64_t)offset_high) << 32;
321 321 }
322 322
323 323 comp_len = getbe32(data + 8);
324 324 uncomp_len = getbe32(data + 12);
325 325 base_rev = getbe32(data + 16);
326 326 link_rev = getbe32(data + 20);
327 327 parent_1 = getbe32(data + 24);
328 328 parent_2 = getbe32(data + 28);
329 329 c_node_id = data + 32;
330 330
331 331 if (self->hdrsize == v1_hdrsize) {
332 332 return Py_BuildValue(v1_tuple_format, offset_flags, comp_len,
333 333 uncomp_len, base_rev, link_rev, parent_1,
334 334 parent_2, c_node_id, self->nodelen);
335 335 } else {
336 336 sidedata_offset = getbe64(data + 64);
337 337 sidedata_comp_len = getbe32(data + 72);
338 338
339 339 return Py_BuildValue(v2_tuple_format, offset_flags, comp_len,
340 340 uncomp_len, base_rev, link_rev, parent_1,
341 341 parent_2, c_node_id, self->nodelen,
342 342 sidedata_offset, sidedata_comp_len);
343 343 }
344 344 }
345 345 /*
346 * Pack header information in binary
347 */
348 static PyObject *index_pack_header(indexObject *self, PyObject *args)
349 {
350 int header;
351 char out[4];
352 if (!PyArg_ParseTuple(args, "I", &header)) {
353 return NULL;
354 }
355 putbe32(header, out);
356 return PyBytes_FromStringAndSize(out, 4);
357 }
358 /*
346 359 * Return the raw binary string representing a revision
347 360 */
348 static PyObject *index_entry_binary(indexObject *self, PyObject *args)
361 static PyObject *index_entry_binary(indexObject *self, PyObject *value)
349 362 {
350 363 long rev;
351 int header;
352 364 const char *data;
353 char entry[v2_hdrsize];
354
355 365 Py_ssize_t length = index_length(self);
356 366
357 if (!PyArg_ParseTuple(args, "lI", &rev, &header)) {
367 if (!pylong_to_long(value, &rev)) {
358 368 return NULL;
359 369 }
360 370 if (rev < 0 || rev >= length) {
361 371 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
362 372 rev);
363 373 return NULL;
364 374 };
365 375
366 376 data = index_deref(self, rev);
367 377 if (data == NULL)
368 378 return NULL;
369 379 if (rev == 0) {
370 /* put the header at the start of the first entry */
371 memcpy(entry, data, self->hdrsize);
372 putbe32(header, entry);
373 return PyBytes_FromStringAndSize(entry, self->hdrsize);
380 /* the header is eating the start of the first entry */
381 return PyBytes_FromStringAndSize(data + 4, self->hdrsize - 4);
374 382 }
375 383 return PyBytes_FromStringAndSize(data, self->hdrsize);
376 384 }
377 385
378 386 /*
379 387 * Return the hash of node corresponding to the given rev.
380 388 */
381 389 static const char *index_node(indexObject *self, Py_ssize_t pos)
382 390 {
383 391 Py_ssize_t length = index_length(self);
384 392 const char *data;
385 393
386 394 if (pos == nullrev)
387 395 return nullid;
388 396
389 397 if (pos >= length)
390 398 return NULL;
391 399
392 400 data = index_deref(self, pos);
393 401 return data ? data + 32 : NULL;
394 402 }
395 403
396 404 /*
397 405 * Return the hash of the node corresponding to the given rev. The
398 406 * rev is assumed to be existing. If not, an exception is set.
399 407 */
400 408 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
401 409 {
402 410 const char *node = index_node(self, pos);
403 411 if (node == NULL) {
404 412 PyErr_Format(PyExc_IndexError, "could not access rev %d",
405 413 (int)pos);
406 414 }
407 415 return node;
408 416 }
409 417
410 418 static int nt_insert(nodetree *self, const char *node, int rev);
411 419
412 420 static int node_check(Py_ssize_t nodelen, PyObject *obj, char **node)
413 421 {
414 422 Py_ssize_t thisnodelen;
415 423 if (PyBytes_AsStringAndSize(obj, node, &thisnodelen) == -1)
416 424 return -1;
417 425 if (nodelen == thisnodelen)
418 426 return 0;
419 427 PyErr_Format(PyExc_ValueError, "node len %zd != expected node len %zd",
420 428 thisnodelen, nodelen);
421 429 return -1;
422 430 }
423 431
424 432 static PyObject *index_append(indexObject *self, PyObject *obj)
425 433 {
426 434 uint64_t offset_flags, sidedata_offset;
427 435 int rev, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
428 436 Py_ssize_t c_node_id_len, sidedata_comp_len;
429 437 const char *c_node_id;
430 438 char *data;
431 439
432 440 if (self->hdrsize == v1_hdrsize) {
433 441 if (!PyArg_ParseTuple(obj, v1_tuple_format, &offset_flags,
434 442 &comp_len, &uncomp_len, &base_rev,
435 443 &link_rev, &parent_1, &parent_2,
436 444 &c_node_id, &c_node_id_len)) {
437 445 PyErr_SetString(PyExc_TypeError, "8-tuple required");
438 446 return NULL;
439 447 }
440 448 } else {
441 449 if (!PyArg_ParseTuple(obj, v2_tuple_format, &offset_flags,
442 450 &comp_len, &uncomp_len, &base_rev,
443 451 &link_rev, &parent_1, &parent_2,
444 452 &c_node_id, &c_node_id_len,
445 453 &sidedata_offset, &sidedata_comp_len)) {
446 454 PyErr_SetString(PyExc_TypeError, "10-tuple required");
447 455 return NULL;
448 456 }
449 457 }
450 458
451 459 if (c_node_id_len != self->nodelen) {
452 460 PyErr_SetString(PyExc_TypeError, "invalid node");
453 461 return NULL;
454 462 }
455 463
456 464 if (self->new_length == self->added_length) {
457 465 size_t new_added_length =
458 466 self->added_length ? self->added_length * 2 : 4096;
459 467 void *new_added = PyMem_Realloc(self->added, new_added_length *
460 468 self->hdrsize);
461 469 if (!new_added)
462 470 return PyErr_NoMemory();
463 471 self->added = new_added;
464 472 self->added_length = new_added_length;
465 473 }
466 474 rev = self->length + self->new_length;
467 475 data = self->added + self->hdrsize * self->new_length++;
468 476 putbe32(offset_flags >> 32, data);
469 477 putbe32(offset_flags & 0xffffffffU, data + 4);
470 478 putbe32(comp_len, data + 8);
471 479 putbe32(uncomp_len, data + 12);
472 480 putbe32(base_rev, data + 16);
473 481 putbe32(link_rev, data + 20);
474 482 putbe32(parent_1, data + 24);
475 483 putbe32(parent_2, data + 28);
476 484 memcpy(data + 32, c_node_id, c_node_id_len);
477 485 /* Padding since SHA-1 is only 20 bytes for now */
478 486 memset(data + 32 + c_node_id_len, 0, 32 - c_node_id_len);
479 487 if (self->hdrsize != v1_hdrsize) {
480 488 putbe64(sidedata_offset, data + 64);
481 489 putbe32(sidedata_comp_len, data + 72);
482 490 /* Padding for 96 bytes alignment */
483 491 memset(data + 76, 0, self->hdrsize - 76);
484 492 }
485 493
486 494 if (self->ntinitialized)
487 495 nt_insert(&self->nt, c_node_id, rev);
488 496
489 497 Py_CLEAR(self->headrevs);
490 498 Py_RETURN_NONE;
491 499 }
492 500
493 501 /* Replace an existing index entry's sidedata offset and length with new ones.
494 502 This cannot be used outside of the context of sidedata rewriting,
495 503 inside the transaction that creates the given revision. */
496 504 static PyObject *index_replace_sidedata_info(indexObject *self, PyObject *args)
497 505 {
498 506 uint64_t sidedata_offset;
499 507 int rev;
500 508 Py_ssize_t sidedata_comp_len;
501 509 char *data;
502 510 #if LONG_MAX == 0x7fffffffL
503 511 const char *const sidedata_format = PY23("nKi", "nKi");
504 512 #else
505 513 const char *const sidedata_format = PY23("nki", "nki");
506 514 #endif
507 515
508 516 if (self->hdrsize == v1_hdrsize || self->inlined) {
509 517 /*
510 518 There is a bug in the transaction handling when going from an
511 519 inline revlog to a separate index and data file. Turn it off until
512 520 it's fixed, since v2 revlogs sometimes get rewritten on exchange.
513 521 See issue6485.
514 522 */
515 523 raise_revlog_error();
516 524 return NULL;
517 525 }
518 526
519 527 if (!PyArg_ParseTuple(args, sidedata_format, &rev, &sidedata_offset,
520 528 &sidedata_comp_len))
521 529 return NULL;
522 530
523 531 if (rev < 0 || rev >= index_length(self)) {
524 532 PyErr_SetString(PyExc_IndexError, "revision outside index");
525 533 return NULL;
526 534 }
527 535 if (rev < self->length) {
528 536 PyErr_SetString(
529 537 PyExc_IndexError,
530 538 "cannot rewrite entries outside of this transaction");
531 539 return NULL;
532 540 }
533 541
534 542 /* Find the newly added node, offset from the "already on-disk" length
535 543 */
536 544 data = self->added + self->hdrsize * (rev - self->length);
537 545 putbe64(sidedata_offset, data + 64);
538 546 putbe32(sidedata_comp_len, data + 72);
539 547
540 548 Py_RETURN_NONE;
541 549 }
542 550
543 551 static PyObject *index_stats(indexObject *self)
544 552 {
545 553 PyObject *obj = PyDict_New();
546 554 PyObject *s = NULL;
547 555 PyObject *t = NULL;
548 556
549 557 if (obj == NULL)
550 558 return NULL;
551 559
552 560 #define istat(__n, __d) \
553 561 do { \
554 562 s = PyBytes_FromString(__d); \
555 563 t = PyInt_FromSsize_t(self->__n); \
556 564 if (!s || !t) \
557 565 goto bail; \
558 566 if (PyDict_SetItem(obj, s, t) == -1) \
559 567 goto bail; \
560 568 Py_CLEAR(s); \
561 569 Py_CLEAR(t); \
562 570 } while (0)
563 571
564 572 if (self->added_length)
565 573 istat(new_length, "index entries added");
566 574 istat(length, "revs in memory");
567 575 istat(ntlookups, "node trie lookups");
568 576 istat(ntmisses, "node trie misses");
569 577 istat(ntrev, "node trie last rev scanned");
570 578 if (self->ntinitialized) {
571 579 istat(nt.capacity, "node trie capacity");
572 580 istat(nt.depth, "node trie depth");
573 581 istat(nt.length, "node trie count");
574 582 istat(nt.splits, "node trie splits");
575 583 }
576 584
577 585 #undef istat
578 586
579 587 return obj;
580 588
581 589 bail:
582 590 Py_XDECREF(obj);
583 591 Py_XDECREF(s);
584 592 Py_XDECREF(t);
585 593 return NULL;
586 594 }
587 595
588 596 /*
589 597 * When we cache a list, we want to be sure the caller can't mutate
590 598 * the cached copy.
591 599 */
592 600 static PyObject *list_copy(PyObject *list)
593 601 {
594 602 Py_ssize_t len = PyList_GET_SIZE(list);
595 603 PyObject *newlist = PyList_New(len);
596 604 Py_ssize_t i;
597 605
598 606 if (newlist == NULL)
599 607 return NULL;
600 608
601 609 for (i = 0; i < len; i++) {
602 610 PyObject *obj = PyList_GET_ITEM(list, i);
603 611 Py_INCREF(obj);
604 612 PyList_SET_ITEM(newlist, i, obj);
605 613 }
606 614
607 615 return newlist;
608 616 }
609 617
610 618 static int check_filter(PyObject *filter, Py_ssize_t arg)
611 619 {
612 620 if (filter) {
613 621 PyObject *arglist, *result;
614 622 int isfiltered;
615 623
616 624 arglist = Py_BuildValue("(n)", arg);
617 625 if (!arglist) {
618 626 return -1;
619 627 }
620 628
621 629 result = PyObject_Call(filter, arglist, NULL);
622 630 Py_DECREF(arglist);
623 631 if (!result) {
624 632 return -1;
625 633 }
626 634
627 635 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
628 636 * same as this function, so we can just return it directly.*/
629 637 isfiltered = PyObject_IsTrue(result);
630 638 Py_DECREF(result);
631 639 return isfiltered;
632 640 } else {
633 641 return 0;
634 642 }
635 643 }
636 644
637 645 static inline void set_phase_from_parents(char *phases, int parent_1,
638 646 int parent_2, Py_ssize_t i)
639 647 {
640 648 if (parent_1 >= 0 && phases[parent_1] > phases[i])
641 649 phases[i] = phases[parent_1];
642 650 if (parent_2 >= 0 && phases[parent_2] > phases[i])
643 651 phases[i] = phases[parent_2];
644 652 }
645 653
646 654 static PyObject *reachableroots2(indexObject *self, PyObject *args)
647 655 {
648 656
649 657 /* Input */
650 658 long minroot;
651 659 PyObject *includepatharg = NULL;
652 660 int includepath = 0;
653 661 /* heads and roots are lists */
654 662 PyObject *heads = NULL;
655 663 PyObject *roots = NULL;
656 664 PyObject *reachable = NULL;
657 665
658 666 PyObject *val;
659 667 Py_ssize_t len = index_length(self);
660 668 long revnum;
661 669 Py_ssize_t k;
662 670 Py_ssize_t i;
663 671 Py_ssize_t l;
664 672 int r;
665 673 int parents[2];
666 674
667 675 /* Internal data structure:
668 676 * tovisit: array of length len+1 (all revs + nullrev), filled upto
669 677 * lentovisit
670 678 *
671 679 * revstates: array of length len+1 (all revs + nullrev) */
672 680 int *tovisit = NULL;
673 681 long lentovisit = 0;
674 682 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
675 683 char *revstates = NULL;
676 684
677 685 /* Get arguments */
678 686 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
679 687 &PyList_Type, &roots, &PyBool_Type,
680 688 &includepatharg))
681 689 goto bail;
682 690
683 691 if (includepatharg == Py_True)
684 692 includepath = 1;
685 693
686 694 /* Initialize return set */
687 695 reachable = PyList_New(0);
688 696 if (reachable == NULL)
689 697 goto bail;
690 698
691 699 /* Initialize internal datastructures */
692 700 tovisit = (int *)malloc((len + 1) * sizeof(int));
693 701 if (tovisit == NULL) {
694 702 PyErr_NoMemory();
695 703 goto bail;
696 704 }
697 705
698 706 revstates = (char *)calloc(len + 1, 1);
699 707 if (revstates == NULL) {
700 708 PyErr_NoMemory();
701 709 goto bail;
702 710 }
703 711
704 712 l = PyList_GET_SIZE(roots);
705 713 for (i = 0; i < l; i++) {
706 714 revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
707 715 if (revnum == -1 && PyErr_Occurred())
708 716 goto bail;
709 717 /* If root is out of range, e.g. wdir(), it must be unreachable
710 718 * from heads. So we can just ignore it. */
711 719 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
712 720 continue;
713 721 revstates[revnum + 1] |= RS_ROOT;
714 722 }
715 723
716 724 /* Populate tovisit with all the heads */
717 725 l = PyList_GET_SIZE(heads);
718 726 for (i = 0; i < l; i++) {
719 727 revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i));
720 728 if (revnum == -1 && PyErr_Occurred())
721 729 goto bail;
722 730 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
723 731 PyErr_SetString(PyExc_IndexError, "head out of range");
724 732 goto bail;
725 733 }
726 734 if (!(revstates[revnum + 1] & RS_SEEN)) {
727 735 tovisit[lentovisit++] = (int)revnum;
728 736 revstates[revnum + 1] |= RS_SEEN;
729 737 }
730 738 }
731 739
732 740 /* Visit the tovisit list and find the reachable roots */
733 741 k = 0;
734 742 while (k < lentovisit) {
735 743 /* Add the node to reachable if it is a root*/
736 744 revnum = tovisit[k++];
737 745 if (revstates[revnum + 1] & RS_ROOT) {
738 746 revstates[revnum + 1] |= RS_REACHABLE;
739 747 val = PyInt_FromLong(revnum);
740 748 if (val == NULL)
741 749 goto bail;
742 750 r = PyList_Append(reachable, val);
743 751 Py_DECREF(val);
744 752 if (r < 0)
745 753 goto bail;
746 754 if (includepath == 0)
747 755 continue;
748 756 }
749 757
750 758 /* Add its parents to the list of nodes to visit */
751 759 if (revnum == nullrev)
752 760 continue;
753 761 r = index_get_parents(self, revnum, parents, (int)len - 1);
754 762 if (r < 0)
755 763 goto bail;
756 764 for (i = 0; i < 2; i++) {
757 765 if (!(revstates[parents[i] + 1] & RS_SEEN) &&
758 766 parents[i] >= minroot) {
759 767 tovisit[lentovisit++] = parents[i];
760 768 revstates[parents[i] + 1] |= RS_SEEN;
761 769 }
762 770 }
763 771 }
764 772
765 773 /* Find all the nodes in between the roots we found and the heads
766 774 * and add them to the reachable set */
767 775 if (includepath == 1) {
768 776 long minidx = minroot;
769 777 if (minidx < 0)
770 778 minidx = 0;
771 779 for (i = minidx; i < len; i++) {
772 780 if (!(revstates[i + 1] & RS_SEEN))
773 781 continue;
774 782 r = index_get_parents(self, i, parents, (int)len - 1);
775 783 /* Corrupted index file, error is set from
776 784 * index_get_parents */
777 785 if (r < 0)
778 786 goto bail;
779 787 if (((revstates[parents[0] + 1] |
780 788 revstates[parents[1] + 1]) &
781 789 RS_REACHABLE) &&
782 790 !(revstates[i + 1] & RS_REACHABLE)) {
783 791 revstates[i + 1] |= RS_REACHABLE;
784 792 val = PyInt_FromSsize_t(i);
785 793 if (val == NULL)
786 794 goto bail;
787 795 r = PyList_Append(reachable, val);
788 796 Py_DECREF(val);
789 797 if (r < 0)
790 798 goto bail;
791 799 }
792 800 }
793 801 }
794 802
795 803 free(revstates);
796 804 free(tovisit);
797 805 return reachable;
798 806 bail:
799 807 Py_XDECREF(reachable);
800 808 free(revstates);
801 809 free(tovisit);
802 810 return NULL;
803 811 }
804 812
805 813 static int add_roots_get_min(indexObject *self, PyObject *roots, char *phases,
806 814 char phase)
807 815 {
808 816 Py_ssize_t len = index_length(self);
809 817 PyObject *item;
810 818 PyObject *iterator;
811 819 int rev, minrev = -1;
812 820 char *node;
813 821
814 822 if (!PySet_Check(roots)) {
815 823 PyErr_SetString(PyExc_TypeError,
816 824 "roots must be a set of nodes");
817 825 return -2;
818 826 }
819 827 iterator = PyObject_GetIter(roots);
820 828 if (iterator == NULL)
821 829 return -2;
822 830 while ((item = PyIter_Next(iterator))) {
823 831 if (node_check(self->nodelen, item, &node) == -1)
824 832 goto failed;
825 833 rev = index_find_node(self, node);
826 834 /* null is implicitly public, so negative is invalid */
827 835 if (rev < 0 || rev >= len)
828 836 goto failed;
829 837 phases[rev] = phase;
830 838 if (minrev == -1 || minrev > rev)
831 839 minrev = rev;
832 840 Py_DECREF(item);
833 841 }
834 842 Py_DECREF(iterator);
835 843 return minrev;
836 844 failed:
837 845 Py_DECREF(iterator);
838 846 Py_DECREF(item);
839 847 return -2;
840 848 }
841 849
842 850 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
843 851 {
844 852 /* 0: public (untracked), 1: draft, 2: secret, 32: archive,
845 853 96: internal */
846 854 static const char trackedphases[] = {1, 2, 32, 96};
847 855 PyObject *roots = Py_None;
848 856 PyObject *phasesetsdict = NULL;
849 857 PyObject *phasesets[4] = {NULL, NULL, NULL, NULL};
850 858 Py_ssize_t len = index_length(self);
851 859 char *phases = NULL;
852 860 int minphaserev = -1, rev, i;
853 861 const int numphases = (int)(sizeof(phasesets) / sizeof(phasesets[0]));
854 862
855 863 if (!PyArg_ParseTuple(args, "O", &roots))
856 864 return NULL;
857 865 if (roots == NULL || !PyDict_Check(roots)) {
858 866 PyErr_SetString(PyExc_TypeError, "roots must be a dictionary");
859 867 return NULL;
860 868 }
861 869
862 870 phases = calloc(len, 1);
863 871 if (phases == NULL) {
864 872 PyErr_NoMemory();
865 873 return NULL;
866 874 }
867 875
868 876 for (i = 0; i < numphases; ++i) {
869 877 PyObject *pyphase = PyInt_FromLong(trackedphases[i]);
870 878 PyObject *phaseroots = NULL;
871 879 if (pyphase == NULL)
872 880 goto release;
873 881 phaseroots = PyDict_GetItem(roots, pyphase);
874 882 Py_DECREF(pyphase);
875 883 if (phaseroots == NULL)
876 884 continue;
877 885 rev = add_roots_get_min(self, phaseroots, phases,
878 886 trackedphases[i]);
879 887 if (rev == -2)
880 888 goto release;
881 889 if (rev != -1 && (minphaserev == -1 || rev < minphaserev))
882 890 minphaserev = rev;
883 891 }
884 892
885 893 for (i = 0; i < numphases; ++i) {
886 894 phasesets[i] = PySet_New(NULL);
887 895 if (phasesets[i] == NULL)
888 896 goto release;
889 897 }
890 898
891 899 if (minphaserev == -1)
892 900 minphaserev = len;
893 901 for (rev = minphaserev; rev < len; ++rev) {
894 902 PyObject *pyphase = NULL;
895 903 PyObject *pyrev = NULL;
896 904 int parents[2];
897 905 /*
898 906 * The parent lookup could be skipped for phaseroots, but
899 907 * phase --force would historically not recompute them
900 908 * correctly, leaving descendents with a lower phase around.
901 909 * As such, unconditionally recompute the phase.
902 910 */
903 911 if (index_get_parents(self, rev, parents, (int)len - 1) < 0)
904 912 goto release;
905 913 set_phase_from_parents(phases, parents[0], parents[1], rev);
906 914 switch (phases[rev]) {
907 915 case 0:
908 916 continue;
909 917 case 1:
910 918 pyphase = phasesets[0];
911 919 break;
912 920 case 2:
913 921 pyphase = phasesets[1];
914 922 break;
915 923 case 32:
916 924 pyphase = phasesets[2];
917 925 break;
918 926 case 96:
919 927 pyphase = phasesets[3];
920 928 break;
921 929 default:
922 930 /* this should never happen since the phase number is
923 931 * specified by this function. */
924 932 PyErr_SetString(PyExc_SystemError,
925 933 "bad phase number in internal list");
926 934 goto release;
927 935 }
928 936 pyrev = PyInt_FromLong(rev);
929 937 if (pyrev == NULL)
930 938 goto release;
931 939 if (PySet_Add(pyphase, pyrev) == -1) {
932 940 Py_DECREF(pyrev);
933 941 goto release;
934 942 }
935 943 Py_DECREF(pyrev);
936 944 }
937 945
938 946 phasesetsdict = _dict_new_presized(numphases);
939 947 if (phasesetsdict == NULL)
940 948 goto release;
941 949 for (i = 0; i < numphases; ++i) {
942 950 PyObject *pyphase = PyInt_FromLong(trackedphases[i]);
943 951 if (pyphase == NULL)
944 952 goto release;
945 953 if (PyDict_SetItem(phasesetsdict, pyphase, phasesets[i]) ==
946 954 -1) {
947 955 Py_DECREF(pyphase);
948 956 goto release;
949 957 }
950 958 Py_DECREF(phasesets[i]);
951 959 phasesets[i] = NULL;
952 960 }
953 961
954 962 return Py_BuildValue("nN", len, phasesetsdict);
955 963
956 964 release:
957 965 for (i = 0; i < numphases; ++i)
958 966 Py_XDECREF(phasesets[i]);
959 967 Py_XDECREF(phasesetsdict);
960 968
961 969 free(phases);
962 970 return NULL;
963 971 }
964 972
965 973 static PyObject *index_headrevs(indexObject *self, PyObject *args)
966 974 {
967 975 Py_ssize_t i, j, len;
968 976 char *nothead = NULL;
969 977 PyObject *heads = NULL;
970 978 PyObject *filter = NULL;
971 979 PyObject *filteredrevs = Py_None;
972 980
973 981 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
974 982 return NULL;
975 983 }
976 984
977 985 if (self->headrevs && filteredrevs == self->filteredrevs)
978 986 return list_copy(self->headrevs);
979 987
980 988 Py_DECREF(self->filteredrevs);
981 989 self->filteredrevs = filteredrevs;
982 990 Py_INCREF(filteredrevs);
983 991
984 992 if (filteredrevs != Py_None) {
985 993 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
986 994 if (!filter) {
987 995 PyErr_SetString(
988 996 PyExc_TypeError,
989 997 "filteredrevs has no attribute __contains__");
990 998 goto bail;
991 999 }
992 1000 }
993 1001
994 1002 len = index_length(self);
995 1003 heads = PyList_New(0);
996 1004 if (heads == NULL)
997 1005 goto bail;
998 1006 if (len == 0) {
999 1007 PyObject *nullid = PyInt_FromLong(-1);
1000 1008 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
1001 1009 Py_XDECREF(nullid);
1002 1010 goto bail;
1003 1011 }
1004 1012 goto done;
1005 1013 }
1006 1014
1007 1015 nothead = calloc(len, 1);
1008 1016 if (nothead == NULL) {
1009 1017 PyErr_NoMemory();
1010 1018 goto bail;
1011 1019 }
1012 1020
1013 1021 for (i = len - 1; i >= 0; i--) {
1014 1022 int isfiltered;
1015 1023 int parents[2];
1016 1024
1017 1025 /* If nothead[i] == 1, it means we've seen an unfiltered child
1018 1026 * of this node already, and therefore this node is not
1019 1027 * filtered. So we can skip the expensive check_filter step.
1020 1028 */
1021 1029 if (nothead[i] != 1) {
1022 1030 isfiltered = check_filter(filter, i);
1023 1031 if (isfiltered == -1) {
1024 1032 PyErr_SetString(PyExc_TypeError,
1025 1033 "unable to check filter");
1026 1034 goto bail;
1027 1035 }
1028 1036
1029 1037 if (isfiltered) {
1030 1038 nothead[i] = 1;
1031 1039 continue;
1032 1040 }
1033 1041 }
1034 1042
1035 1043 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
1036 1044 goto bail;
1037 1045 for (j = 0; j < 2; j++) {
1038 1046 if (parents[j] >= 0)
1039 1047 nothead[parents[j]] = 1;
1040 1048 }
1041 1049 }
1042 1050
1043 1051 for (i = 0; i < len; i++) {
1044 1052 PyObject *head;
1045 1053
1046 1054 if (nothead[i])
1047 1055 continue;
1048 1056 head = PyInt_FromSsize_t(i);
1049 1057 if (head == NULL || PyList_Append(heads, head) == -1) {
1050 1058 Py_XDECREF(head);
1051 1059 goto bail;
1052 1060 }
1053 1061 }
1054 1062
1055 1063 done:
1056 1064 self->headrevs = heads;
1057 1065 Py_XDECREF(filter);
1058 1066 free(nothead);
1059 1067 return list_copy(self->headrevs);
1060 1068 bail:
1061 1069 Py_XDECREF(filter);
1062 1070 Py_XDECREF(heads);
1063 1071 free(nothead);
1064 1072 return NULL;
1065 1073 }
1066 1074
1067 1075 /**
1068 1076 * Obtain the base revision index entry.
1069 1077 *
1070 1078 * Callers must ensure that rev >= 0 or illegal memory access may occur.
1071 1079 */
1072 1080 static inline int index_baserev(indexObject *self, int rev)
1073 1081 {
1074 1082 const char *data;
1075 1083 int result;
1076 1084
1077 1085 data = index_deref(self, rev);
1078 1086 if (data == NULL)
1079 1087 return -2;
1080 1088 result = getbe32(data + 16);
1081 1089
1082 1090 if (result > rev) {
1083 1091 PyErr_Format(
1084 1092 PyExc_ValueError,
1085 1093 "corrupted revlog, revision base above revision: %d, %d",
1086 1094 rev, result);
1087 1095 return -2;
1088 1096 }
1089 1097 if (result < -1) {
1090 1098 PyErr_Format(
1091 1099 PyExc_ValueError,
1092 1100 "corrupted revlog, revision base out of range: %d, %d", rev,
1093 1101 result);
1094 1102 return -2;
1095 1103 }
1096 1104 return result;
1097 1105 }
1098 1106
1099 1107 /**
1100 1108 * Find if a revision is a snapshot or not
1101 1109 *
1102 1110 * Only relevant for sparse-revlog case.
1103 1111 * Callers must ensure that rev is in a valid range.
1104 1112 */
1105 1113 static int index_issnapshotrev(indexObject *self, Py_ssize_t rev)
1106 1114 {
1107 1115 int ps[2];
1108 1116 Py_ssize_t base;
1109 1117 while (rev >= 0) {
1110 1118 base = (Py_ssize_t)index_baserev(self, rev);
1111 1119 if (base == rev) {
1112 1120 base = -1;
1113 1121 }
1114 1122 if (base == -2) {
1115 1123 assert(PyErr_Occurred());
1116 1124 return -1;
1117 1125 }
1118 1126 if (base == -1) {
1119 1127 return 1;
1120 1128 }
1121 1129 if (index_get_parents(self, rev, ps, (int)rev) < 0) {
1122 1130 assert(PyErr_Occurred());
1123 1131 return -1;
1124 1132 };
1125 1133 if (base == ps[0] || base == ps[1]) {
1126 1134 return 0;
1127 1135 }
1128 1136 rev = base;
1129 1137 }
1130 1138 return rev == -1;
1131 1139 }
1132 1140
1133 1141 static PyObject *index_issnapshot(indexObject *self, PyObject *value)
1134 1142 {
1135 1143 long rev;
1136 1144 int issnap;
1137 1145 Py_ssize_t length = index_length(self);
1138 1146
1139 1147 if (!pylong_to_long(value, &rev)) {
1140 1148 return NULL;
1141 1149 }
1142 1150 if (rev < -1 || rev >= length) {
1143 1151 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
1144 1152 rev);
1145 1153 return NULL;
1146 1154 };
1147 1155 issnap = index_issnapshotrev(self, (Py_ssize_t)rev);
1148 1156 if (issnap < 0) {
1149 1157 return NULL;
1150 1158 };
1151 1159 return PyBool_FromLong((long)issnap);
1152 1160 }
1153 1161
1154 1162 static PyObject *index_findsnapshots(indexObject *self, PyObject *args)
1155 1163 {
1156 1164 Py_ssize_t start_rev;
1157 1165 PyObject *cache;
1158 1166 Py_ssize_t base;
1159 1167 Py_ssize_t rev;
1160 1168 PyObject *key = NULL;
1161 1169 PyObject *value = NULL;
1162 1170 const Py_ssize_t length = index_length(self);
1163 1171 if (!PyArg_ParseTuple(args, "O!n", &PyDict_Type, &cache, &start_rev)) {
1164 1172 return NULL;
1165 1173 }
1166 1174 for (rev = start_rev; rev < length; rev++) {
1167 1175 int issnap;
1168 1176 PyObject *allvalues = NULL;
1169 1177 issnap = index_issnapshotrev(self, rev);
1170 1178 if (issnap < 0) {
1171 1179 goto bail;
1172 1180 }
1173 1181 if (issnap == 0) {
1174 1182 continue;
1175 1183 }
1176 1184 base = (Py_ssize_t)index_baserev(self, rev);
1177 1185 if (base == rev) {
1178 1186 base = -1;
1179 1187 }
1180 1188 if (base == -2) {
1181 1189 assert(PyErr_Occurred());
1182 1190 goto bail;
1183 1191 }
1184 1192 key = PyInt_FromSsize_t(base);
1185 1193 allvalues = PyDict_GetItem(cache, key);
1186 1194 if (allvalues == NULL && PyErr_Occurred()) {
1187 1195 goto bail;
1188 1196 }
1189 1197 if (allvalues == NULL) {
1190 1198 int r;
1191 1199 allvalues = PyList_New(0);
1192 1200 if (!allvalues) {
1193 1201 goto bail;
1194 1202 }
1195 1203 r = PyDict_SetItem(cache, key, allvalues);
1196 1204 Py_DECREF(allvalues);
1197 1205 if (r < 0) {
1198 1206 goto bail;
1199 1207 }
1200 1208 }
1201 1209 value = PyInt_FromSsize_t(rev);
1202 1210 if (PyList_Append(allvalues, value)) {
1203 1211 goto bail;
1204 1212 }
1205 1213 Py_CLEAR(key);
1206 1214 Py_CLEAR(value);
1207 1215 }
1208 1216 Py_RETURN_NONE;
1209 1217 bail:
1210 1218 Py_XDECREF(key);
1211 1219 Py_XDECREF(value);
1212 1220 return NULL;
1213 1221 }
1214 1222
1215 1223 static PyObject *index_deltachain(indexObject *self, PyObject *args)
1216 1224 {
1217 1225 int rev, generaldelta;
1218 1226 PyObject *stoparg;
1219 1227 int stoprev, iterrev, baserev = -1;
1220 1228 int stopped;
1221 1229 PyObject *chain = NULL, *result = NULL;
1222 1230 const Py_ssize_t length = index_length(self);
1223 1231
1224 1232 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
1225 1233 return NULL;
1226 1234 }
1227 1235
1228 1236 if (PyInt_Check(stoparg)) {
1229 1237 stoprev = (int)PyInt_AsLong(stoparg);
1230 1238 if (stoprev == -1 && PyErr_Occurred()) {
1231 1239 return NULL;
1232 1240 }
1233 1241 } else if (stoparg == Py_None) {
1234 1242 stoprev = -2;
1235 1243 } else {
1236 1244 PyErr_SetString(PyExc_ValueError,
1237 1245 "stoprev must be integer or None");
1238 1246 return NULL;
1239 1247 }
1240 1248
1241 1249 if (rev < 0 || rev >= length) {
1242 1250 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1243 1251 return NULL;
1244 1252 }
1245 1253
1246 1254 chain = PyList_New(0);
1247 1255 if (chain == NULL) {
1248 1256 return NULL;
1249 1257 }
1250 1258
1251 1259 baserev = index_baserev(self, rev);
1252 1260
1253 1261 /* This should never happen. */
1254 1262 if (baserev <= -2) {
1255 1263 /* Error should be set by index_deref() */
1256 1264 assert(PyErr_Occurred());
1257 1265 goto bail;
1258 1266 }
1259 1267
1260 1268 iterrev = rev;
1261 1269
1262 1270 while (iterrev != baserev && iterrev != stoprev) {
1263 1271 PyObject *value = PyInt_FromLong(iterrev);
1264 1272 if (value == NULL) {
1265 1273 goto bail;
1266 1274 }
1267 1275 if (PyList_Append(chain, value)) {
1268 1276 Py_DECREF(value);
1269 1277 goto bail;
1270 1278 }
1271 1279 Py_DECREF(value);
1272 1280
1273 1281 if (generaldelta) {
1274 1282 iterrev = baserev;
1275 1283 } else {
1276 1284 iterrev--;
1277 1285 }
1278 1286
1279 1287 if (iterrev < 0) {
1280 1288 break;
1281 1289 }
1282 1290
1283 1291 if (iterrev >= length) {
1284 1292 PyErr_SetString(PyExc_IndexError,
1285 1293 "revision outside index");
1286 1294 return NULL;
1287 1295 }
1288 1296
1289 1297 baserev = index_baserev(self, iterrev);
1290 1298
1291 1299 /* This should never happen. */
1292 1300 if (baserev <= -2) {
1293 1301 /* Error should be set by index_deref() */
1294 1302 assert(PyErr_Occurred());
1295 1303 goto bail;
1296 1304 }
1297 1305 }
1298 1306
1299 1307 if (iterrev == stoprev) {
1300 1308 stopped = 1;
1301 1309 } else {
1302 1310 PyObject *value = PyInt_FromLong(iterrev);
1303 1311 if (value == NULL) {
1304 1312 goto bail;
1305 1313 }
1306 1314 if (PyList_Append(chain, value)) {
1307 1315 Py_DECREF(value);
1308 1316 goto bail;
1309 1317 }
1310 1318 Py_DECREF(value);
1311 1319
1312 1320 stopped = 0;
1313 1321 }
1314 1322
1315 1323 if (PyList_Reverse(chain)) {
1316 1324 goto bail;
1317 1325 }
1318 1326
1319 1327 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
1320 1328 Py_DECREF(chain);
1321 1329 return result;
1322 1330
1323 1331 bail:
1324 1332 Py_DECREF(chain);
1325 1333 return NULL;
1326 1334 }
1327 1335
1328 1336 static inline int64_t
1329 1337 index_segment_span(indexObject *self, Py_ssize_t start_rev, Py_ssize_t end_rev)
1330 1338 {
1331 1339 int64_t start_offset;
1332 1340 int64_t end_offset;
1333 1341 int end_size;
1334 1342 start_offset = index_get_start(self, start_rev);
1335 1343 if (start_offset < 0) {
1336 1344 return -1;
1337 1345 }
1338 1346 end_offset = index_get_start(self, end_rev);
1339 1347 if (end_offset < 0) {
1340 1348 return -1;
1341 1349 }
1342 1350 end_size = index_get_length(self, end_rev);
1343 1351 if (end_size < 0) {
1344 1352 return -1;
1345 1353 }
1346 1354 if (end_offset < start_offset) {
1347 1355 PyErr_Format(PyExc_ValueError,
1348 1356 "corrupted revlog index: inconsistent offset "
1349 1357 "between revisions (%zd) and (%zd)",
1350 1358 start_rev, end_rev);
1351 1359 return -1;
1352 1360 }
1353 1361 return (end_offset - start_offset) + (int64_t)end_size;
1354 1362 }
1355 1363
1356 1364 /* returns endidx so that revs[startidx:endidx] has no empty trailing revs */
1357 1365 static Py_ssize_t trim_endidx(indexObject *self, const Py_ssize_t *revs,
1358 1366 Py_ssize_t startidx, Py_ssize_t endidx)
1359 1367 {
1360 1368 int length;
1361 1369 while (endidx > 1 && endidx > startidx) {
1362 1370 length = index_get_length(self, revs[endidx - 1]);
1363 1371 if (length < 0) {
1364 1372 return -1;
1365 1373 }
1366 1374 if (length != 0) {
1367 1375 break;
1368 1376 }
1369 1377 endidx -= 1;
1370 1378 }
1371 1379 return endidx;
1372 1380 }
1373 1381
1374 1382 struct Gap {
1375 1383 int64_t size;
1376 1384 Py_ssize_t idx;
1377 1385 };
1378 1386
1379 1387 static int gap_compare(const void *left, const void *right)
1380 1388 {
1381 1389 const struct Gap *l_left = ((const struct Gap *)left);
1382 1390 const struct Gap *l_right = ((const struct Gap *)right);
1383 1391 if (l_left->size < l_right->size) {
1384 1392 return -1;
1385 1393 } else if (l_left->size > l_right->size) {
1386 1394 return 1;
1387 1395 }
1388 1396 return 0;
1389 1397 }
1390 1398 static int Py_ssize_t_compare(const void *left, const void *right)
1391 1399 {
1392 1400 const Py_ssize_t l_left = *(const Py_ssize_t *)left;
1393 1401 const Py_ssize_t l_right = *(const Py_ssize_t *)right;
1394 1402 if (l_left < l_right) {
1395 1403 return -1;
1396 1404 } else if (l_left > l_right) {
1397 1405 return 1;
1398 1406 }
1399 1407 return 0;
1400 1408 }
1401 1409
1402 1410 static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
1403 1411 {
1404 1412 /* method arguments */
1405 1413 PyObject *list_revs = NULL; /* revisions in the chain */
1406 1414 double targetdensity = 0; /* min density to achieve */
1407 1415 Py_ssize_t mingapsize = 0; /* threshold to ignore gaps */
1408 1416
1409 1417 /* other core variables */
1410 1418 Py_ssize_t idxlen = index_length(self);
1411 1419 Py_ssize_t i; /* used for various iteration */
1412 1420 PyObject *result = NULL; /* the final return of the function */
1413 1421
1414 1422 /* generic information about the delta chain being slice */
1415 1423 Py_ssize_t num_revs = 0; /* size of the full delta chain */
1416 1424 Py_ssize_t *revs = NULL; /* native array of revision in the chain */
1417 1425 int64_t chainpayload = 0; /* sum of all delta in the chain */
1418 1426 int64_t deltachainspan = 0; /* distance from first byte to last byte */
1419 1427
1420 1428 /* variable used for slicing the delta chain */
1421 1429 int64_t readdata = 0; /* amount of data currently planned to be read */
1422 1430 double density = 0; /* ration of payload data compared to read ones */
1423 1431 int64_t previous_end;
1424 1432 struct Gap *gaps = NULL; /* array of notable gap in the chain */
1425 1433 Py_ssize_t num_gaps =
1426 1434 0; /* total number of notable gap recorded so far */
1427 1435 Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
1428 1436 Py_ssize_t num_selected = 0; /* number of gaps skipped */
1429 1437 PyObject *chunk = NULL; /* individual slice */
1430 1438 PyObject *allchunks = NULL; /* all slices */
1431 1439 Py_ssize_t previdx;
1432 1440
1433 1441 /* parsing argument */
1434 1442 if (!PyArg_ParseTuple(args, "O!dn", &PyList_Type, &list_revs,
1435 1443 &targetdensity, &mingapsize)) {
1436 1444 goto bail;
1437 1445 }
1438 1446
1439 1447 /* If the delta chain contains a single element, we do not need slicing
1440 1448 */
1441 1449 num_revs = PyList_GET_SIZE(list_revs);
1442 1450 if (num_revs <= 1) {
1443 1451 result = PyTuple_Pack(1, list_revs);
1444 1452 goto done;
1445 1453 }
1446 1454
1447 1455 /* Turn the python list into a native integer array (for efficiency) */
1448 1456 revs = (Py_ssize_t *)calloc(num_revs, sizeof(Py_ssize_t));
1449 1457 if (revs == NULL) {
1450 1458 PyErr_NoMemory();
1451 1459 goto bail;
1452 1460 }
1453 1461 for (i = 0; i < num_revs; i++) {
1454 1462 Py_ssize_t revnum = PyInt_AsLong(PyList_GET_ITEM(list_revs, i));
1455 1463 if (revnum == -1 && PyErr_Occurred()) {
1456 1464 goto bail;
1457 1465 }
1458 1466 if (revnum < nullrev || revnum >= idxlen) {
1459 1467 PyErr_Format(PyExc_IndexError,
1460 1468 "index out of range: %zd", revnum);
1461 1469 goto bail;
1462 1470 }
1463 1471 revs[i] = revnum;
1464 1472 }
1465 1473
1466 1474 /* Compute and check various property of the unsliced delta chain */
1467 1475 deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
1468 1476 if (deltachainspan < 0) {
1469 1477 goto bail;
1470 1478 }
1471 1479
1472 1480 if (deltachainspan <= mingapsize) {
1473 1481 result = PyTuple_Pack(1, list_revs);
1474 1482 goto done;
1475 1483 }
1476 1484 chainpayload = 0;
1477 1485 for (i = 0; i < num_revs; i++) {
1478 1486 int tmp = index_get_length(self, revs[i]);
1479 1487 if (tmp < 0) {
1480 1488 goto bail;
1481 1489 }
1482 1490 chainpayload += tmp;
1483 1491 }
1484 1492
1485 1493 readdata = deltachainspan;
1486 1494 density = 1.0;
1487 1495
1488 1496 if (0 < deltachainspan) {
1489 1497 density = (double)chainpayload / (double)deltachainspan;
1490 1498 }
1491 1499
1492 1500 if (density >= targetdensity) {
1493 1501 result = PyTuple_Pack(1, list_revs);
1494 1502 goto done;
1495 1503 }
1496 1504
1497 1505 /* if chain is too sparse, look for relevant gaps */
1498 1506 gaps = (struct Gap *)calloc(num_revs, sizeof(struct Gap));
1499 1507 if (gaps == NULL) {
1500 1508 PyErr_NoMemory();
1501 1509 goto bail;
1502 1510 }
1503 1511
1504 1512 previous_end = -1;
1505 1513 for (i = 0; i < num_revs; i++) {
1506 1514 int64_t revstart;
1507 1515 int revsize;
1508 1516 revstart = index_get_start(self, revs[i]);
1509 1517 if (revstart < 0) {
1510 1518 goto bail;
1511 1519 };
1512 1520 revsize = index_get_length(self, revs[i]);
1513 1521 if (revsize < 0) {
1514 1522 goto bail;
1515 1523 };
1516 1524 if (revsize == 0) {
1517 1525 continue;
1518 1526 }
1519 1527 if (previous_end >= 0) {
1520 1528 int64_t gapsize = revstart - previous_end;
1521 1529 if (gapsize > mingapsize) {
1522 1530 gaps[num_gaps].size = gapsize;
1523 1531 gaps[num_gaps].idx = i;
1524 1532 num_gaps += 1;
1525 1533 }
1526 1534 }
1527 1535 previous_end = revstart + revsize;
1528 1536 }
1529 1537 if (num_gaps == 0) {
1530 1538 result = PyTuple_Pack(1, list_revs);
1531 1539 goto done;
1532 1540 }
1533 1541 qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
1534 1542
1535 1543 /* Slice the largest gap first, they improve the density the most */
1536 1544 selected_indices =
1537 1545 (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
1538 1546 if (selected_indices == NULL) {
1539 1547 PyErr_NoMemory();
1540 1548 goto bail;
1541 1549 }
1542 1550
1543 1551 for (i = num_gaps - 1; i >= 0; i--) {
1544 1552 selected_indices[num_selected] = gaps[i].idx;
1545 1553 readdata -= gaps[i].size;
1546 1554 num_selected += 1;
1547 1555 if (readdata <= 0) {
1548 1556 density = 1.0;
1549 1557 } else {
1550 1558 density = (double)chainpayload / (double)readdata;
1551 1559 }
1552 1560 if (density >= targetdensity) {
1553 1561 break;
1554 1562 }
1555 1563 }
1556 1564 qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
1557 1565 &Py_ssize_t_compare);
1558 1566
1559 1567 /* create the resulting slice */
1560 1568 allchunks = PyList_New(0);
1561 1569 if (allchunks == NULL) {
1562 1570 goto bail;
1563 1571 }
1564 1572 previdx = 0;
1565 1573 selected_indices[num_selected] = num_revs;
1566 1574 for (i = 0; i <= num_selected; i++) {
1567 1575 Py_ssize_t idx = selected_indices[i];
1568 1576 Py_ssize_t endidx = trim_endidx(self, revs, previdx, idx);
1569 1577 if (endidx < 0) {
1570 1578 goto bail;
1571 1579 }
1572 1580 if (previdx < endidx) {
1573 1581 chunk = PyList_GetSlice(list_revs, previdx, endidx);
1574 1582 if (chunk == NULL) {
1575 1583 goto bail;
1576 1584 }
1577 1585 if (PyList_Append(allchunks, chunk) == -1) {
1578 1586 goto bail;
1579 1587 }
1580 1588 Py_DECREF(chunk);
1581 1589 chunk = NULL;
1582 1590 }
1583 1591 previdx = idx;
1584 1592 }
1585 1593 result = allchunks;
1586 1594 goto done;
1587 1595
1588 1596 bail:
1589 1597 Py_XDECREF(allchunks);
1590 1598 Py_XDECREF(chunk);
1591 1599 done:
1592 1600 free(revs);
1593 1601 free(gaps);
1594 1602 free(selected_indices);
1595 1603 return result;
1596 1604 }
1597 1605
1598 1606 static inline int nt_level(const char *node, Py_ssize_t level)
1599 1607 {
1600 1608 int v = node[level >> 1];
1601 1609 if (!(level & 1))
1602 1610 v >>= 4;
1603 1611 return v & 0xf;
1604 1612 }
1605 1613
1606 1614 /*
1607 1615 * Return values:
1608 1616 *
1609 1617 * -4: match is ambiguous (multiple candidates)
1610 1618 * -2: not found
1611 1619 * rest: valid rev
1612 1620 */
1613 1621 static int nt_find(nodetree *self, const char *node, Py_ssize_t nodelen,
1614 1622 int hex)
1615 1623 {
1616 1624 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
1617 1625 int level, maxlevel, off;
1618 1626
1619 1627 /* If the input is binary, do a fast check for the nullid first. */
1620 1628 if (!hex && nodelen == self->nodelen && node[0] == '\0' &&
1621 1629 node[1] == '\0' && memcmp(node, nullid, self->nodelen) == 0)
1622 1630 return -1;
1623 1631
1624 1632 if (hex)
1625 1633 maxlevel = nodelen;
1626 1634 else
1627 1635 maxlevel = 2 * nodelen;
1628 1636 if (maxlevel > 2 * self->nodelen)
1629 1637 maxlevel = 2 * self->nodelen;
1630 1638
1631 1639 for (level = off = 0; level < maxlevel; level++) {
1632 1640 int k = getnybble(node, level);
1633 1641 nodetreenode *n = &self->nodes[off];
1634 1642 int v = n->children[k];
1635 1643
1636 1644 if (v < 0) {
1637 1645 const char *n;
1638 1646 Py_ssize_t i;
1639 1647
1640 1648 v = -(v + 2);
1641 1649 n = index_node(self->index, v);
1642 1650 if (n == NULL)
1643 1651 return -2;
1644 1652 for (i = level; i < maxlevel; i++)
1645 1653 if (getnybble(node, i) != nt_level(n, i))
1646 1654 return -2;
1647 1655 return v;
1648 1656 }
1649 1657 if (v == 0)
1650 1658 return -2;
1651 1659 off = v;
1652 1660 }
1653 1661 /* multiple matches against an ambiguous prefix */
1654 1662 return -4;
1655 1663 }
1656 1664
1657 1665 static int nt_new(nodetree *self)
1658 1666 {
1659 1667 if (self->length == self->capacity) {
1660 1668 size_t newcapacity;
1661 1669 nodetreenode *newnodes;
1662 1670 newcapacity = self->capacity * 2;
1663 1671 if (newcapacity >= SIZE_MAX / sizeof(nodetreenode)) {
1664 1672 PyErr_SetString(PyExc_MemoryError,
1665 1673 "overflow in nt_new");
1666 1674 return -1;
1667 1675 }
1668 1676 newnodes =
1669 1677 realloc(self->nodes, newcapacity * sizeof(nodetreenode));
1670 1678 if (newnodes == NULL) {
1671 1679 PyErr_SetString(PyExc_MemoryError, "out of memory");
1672 1680 return -1;
1673 1681 }
1674 1682 self->capacity = newcapacity;
1675 1683 self->nodes = newnodes;
1676 1684 memset(&self->nodes[self->length], 0,
1677 1685 sizeof(nodetreenode) * (self->capacity - self->length));
1678 1686 }
1679 1687 return self->length++;
1680 1688 }
1681 1689
1682 1690 static int nt_insert(nodetree *self, const char *node, int rev)
1683 1691 {
1684 1692 int level = 0;
1685 1693 int off = 0;
1686 1694
1687 1695 while (level < 2 * self->nodelen) {
1688 1696 int k = nt_level(node, level);
1689 1697 nodetreenode *n;
1690 1698 int v;
1691 1699
1692 1700 n = &self->nodes[off];
1693 1701 v = n->children[k];
1694 1702
1695 1703 if (v == 0) {
1696 1704 n->children[k] = -rev - 2;
1697 1705 return 0;
1698 1706 }
1699 1707 if (v < 0) {
1700 1708 const char *oldnode =
1701 1709 index_node_existing(self->index, -(v + 2));
1702 1710 int noff;
1703 1711
1704 1712 if (oldnode == NULL)
1705 1713 return -1;
1706 1714 if (!memcmp(oldnode, node, self->nodelen)) {
1707 1715 n->children[k] = -rev - 2;
1708 1716 return 0;
1709 1717 }
1710 1718 noff = nt_new(self);
1711 1719 if (noff == -1)
1712 1720 return -1;
1713 1721 /* self->nodes may have been changed by realloc */
1714 1722 self->nodes[off].children[k] = noff;
1715 1723 off = noff;
1716 1724 n = &self->nodes[off];
1717 1725 n->children[nt_level(oldnode, ++level)] = v;
1718 1726 if (level > self->depth)
1719 1727 self->depth = level;
1720 1728 self->splits += 1;
1721 1729 } else {
1722 1730 level += 1;
1723 1731 off = v;
1724 1732 }
1725 1733 }
1726 1734
1727 1735 return -1;
1728 1736 }
1729 1737
1730 1738 static PyObject *ntobj_insert(nodetreeObject *self, PyObject *args)
1731 1739 {
1732 1740 Py_ssize_t rev;
1733 1741 const char *node;
1734 1742 Py_ssize_t length;
1735 1743 if (!PyArg_ParseTuple(args, "n", &rev))
1736 1744 return NULL;
1737 1745 length = index_length(self->nt.index);
1738 1746 if (rev < 0 || rev >= length) {
1739 1747 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1740 1748 return NULL;
1741 1749 }
1742 1750 node = index_node_existing(self->nt.index, rev);
1743 1751 if (nt_insert(&self->nt, node, (int)rev) == -1)
1744 1752 return NULL;
1745 1753 Py_RETURN_NONE;
1746 1754 }
1747 1755
1748 1756 static int nt_delete_node(nodetree *self, const char *node)
1749 1757 {
1750 1758 /* rev==-2 happens to get encoded as 0, which is interpreted as not set
1751 1759 */
1752 1760 return nt_insert(self, node, -2);
1753 1761 }
1754 1762
1755 1763 static int nt_init(nodetree *self, indexObject *index, unsigned capacity)
1756 1764 {
1757 1765 /* Initialize before overflow-checking to avoid nt_dealloc() crash. */
1758 1766 self->nodes = NULL;
1759 1767
1760 1768 self->index = index;
1761 1769 /* The input capacity is in terms of revisions, while the field is in
1762 1770 * terms of nodetree nodes. */
1763 1771 self->capacity = (capacity < 4 ? 4 : capacity / 2);
1764 1772 self->nodelen = index->nodelen;
1765 1773 self->depth = 0;
1766 1774 self->splits = 0;
1767 1775 if (self->capacity > SIZE_MAX / sizeof(nodetreenode)) {
1768 1776 PyErr_SetString(PyExc_ValueError, "overflow in init_nt");
1769 1777 return -1;
1770 1778 }
1771 1779 self->nodes = calloc(self->capacity, sizeof(nodetreenode));
1772 1780 if (self->nodes == NULL) {
1773 1781 PyErr_NoMemory();
1774 1782 return -1;
1775 1783 }
1776 1784 self->length = 1;
1777 1785 return 0;
1778 1786 }
1779 1787
1780 1788 static int ntobj_init(nodetreeObject *self, PyObject *args)
1781 1789 {
1782 1790 PyObject *index;
1783 1791 unsigned capacity;
1784 1792 if (!PyArg_ParseTuple(args, "O!I", &HgRevlogIndex_Type, &index,
1785 1793 &capacity))
1786 1794 return -1;
1787 1795 Py_INCREF(index);
1788 1796 return nt_init(&self->nt, (indexObject *)index, capacity);
1789 1797 }
1790 1798
1791 1799 static int nt_partialmatch(nodetree *self, const char *node, Py_ssize_t nodelen)
1792 1800 {
1793 1801 return nt_find(self, node, nodelen, 1);
1794 1802 }
1795 1803
1796 1804 /*
1797 1805 * Find the length of the shortest unique prefix of node.
1798 1806 *
1799 1807 * Return values:
1800 1808 *
1801 1809 * -3: error (exception set)
1802 1810 * -2: not found (no exception set)
1803 1811 * rest: length of shortest prefix
1804 1812 */
1805 1813 static int nt_shortest(nodetree *self, const char *node)
1806 1814 {
1807 1815 int level, off;
1808 1816
1809 1817 for (level = off = 0; level < 2 * self->nodelen; level++) {
1810 1818 int k, v;
1811 1819 nodetreenode *n = &self->nodes[off];
1812 1820 k = nt_level(node, level);
1813 1821 v = n->children[k];
1814 1822 if (v < 0) {
1815 1823 const char *n;
1816 1824 v = -(v + 2);
1817 1825 n = index_node_existing(self->index, v);
1818 1826 if (n == NULL)
1819 1827 return -3;
1820 1828 if (memcmp(node, n, self->nodelen) != 0)
1821 1829 /*
1822 1830 * Found a unique prefix, but it wasn't for the
1823 1831 * requested node (i.e the requested node does
1824 1832 * not exist).
1825 1833 */
1826 1834 return -2;
1827 1835 return level + 1;
1828 1836 }
1829 1837 if (v == 0)
1830 1838 return -2;
1831 1839 off = v;
1832 1840 }
1833 1841 /*
1834 1842 * The node was still not unique after 40 hex digits, so this won't
1835 1843 * happen. Also, if we get here, then there's a programming error in
1836 1844 * this file that made us insert a node longer than 40 hex digits.
1837 1845 */
1838 1846 PyErr_SetString(PyExc_Exception, "broken node tree");
1839 1847 return -3;
1840 1848 }
1841 1849
1842 1850 static PyObject *ntobj_shortest(nodetreeObject *self, PyObject *args)
1843 1851 {
1844 1852 PyObject *val;
1845 1853 char *node;
1846 1854 int length;
1847 1855
1848 1856 if (!PyArg_ParseTuple(args, "O", &val))
1849 1857 return NULL;
1850 1858 if (node_check(self->nt.nodelen, val, &node) == -1)
1851 1859 return NULL;
1852 1860
1853 1861 length = nt_shortest(&self->nt, node);
1854 1862 if (length == -3)
1855 1863 return NULL;
1856 1864 if (length == -2) {
1857 1865 raise_revlog_error();
1858 1866 return NULL;
1859 1867 }
1860 1868 return PyInt_FromLong(length);
1861 1869 }
1862 1870
1863 1871 static void nt_dealloc(nodetree *self)
1864 1872 {
1865 1873 free(self->nodes);
1866 1874 self->nodes = NULL;
1867 1875 }
1868 1876
1869 1877 static void ntobj_dealloc(nodetreeObject *self)
1870 1878 {
1871 1879 Py_XDECREF(self->nt.index);
1872 1880 nt_dealloc(&self->nt);
1873 1881 PyObject_Del(self);
1874 1882 }
1875 1883
1876 1884 static PyMethodDef ntobj_methods[] = {
1877 1885 {"insert", (PyCFunction)ntobj_insert, METH_VARARGS,
1878 1886 "insert an index entry"},
1879 1887 {"shortest", (PyCFunction)ntobj_shortest, METH_VARARGS,
1880 1888 "find length of shortest hex nodeid of a binary ID"},
1881 1889 {NULL} /* Sentinel */
1882 1890 };
1883 1891
1884 1892 static PyTypeObject nodetreeType = {
1885 1893 PyVarObject_HEAD_INIT(NULL, 0) /* header */
1886 1894 "parsers.nodetree", /* tp_name */
1887 1895 sizeof(nodetreeObject), /* tp_basicsize */
1888 1896 0, /* tp_itemsize */
1889 1897 (destructor)ntobj_dealloc, /* tp_dealloc */
1890 1898 0, /* tp_print */
1891 1899 0, /* tp_getattr */
1892 1900 0, /* tp_setattr */
1893 1901 0, /* tp_compare */
1894 1902 0, /* tp_repr */
1895 1903 0, /* tp_as_number */
1896 1904 0, /* tp_as_sequence */
1897 1905 0, /* tp_as_mapping */
1898 1906 0, /* tp_hash */
1899 1907 0, /* tp_call */
1900 1908 0, /* tp_str */
1901 1909 0, /* tp_getattro */
1902 1910 0, /* tp_setattro */
1903 1911 0, /* tp_as_buffer */
1904 1912 Py_TPFLAGS_DEFAULT, /* tp_flags */
1905 1913 "nodetree", /* tp_doc */
1906 1914 0, /* tp_traverse */
1907 1915 0, /* tp_clear */
1908 1916 0, /* tp_richcompare */
1909 1917 0, /* tp_weaklistoffset */
1910 1918 0, /* tp_iter */
1911 1919 0, /* tp_iternext */
1912 1920 ntobj_methods, /* tp_methods */
1913 1921 0, /* tp_members */
1914 1922 0, /* tp_getset */
1915 1923 0, /* tp_base */
1916 1924 0, /* tp_dict */
1917 1925 0, /* tp_descr_get */
1918 1926 0, /* tp_descr_set */
1919 1927 0, /* tp_dictoffset */
1920 1928 (initproc)ntobj_init, /* tp_init */
1921 1929 0, /* tp_alloc */
1922 1930 };
1923 1931
1924 1932 static int index_init_nt(indexObject *self)
1925 1933 {
1926 1934 if (!self->ntinitialized) {
1927 1935 if (nt_init(&self->nt, self, (int)self->length) == -1) {
1928 1936 nt_dealloc(&self->nt);
1929 1937 return -1;
1930 1938 }
1931 1939 if (nt_insert(&self->nt, nullid, -1) == -1) {
1932 1940 nt_dealloc(&self->nt);
1933 1941 return -1;
1934 1942 }
1935 1943 self->ntinitialized = 1;
1936 1944 self->ntrev = (int)index_length(self);
1937 1945 self->ntlookups = 1;
1938 1946 self->ntmisses = 0;
1939 1947 }
1940 1948 return 0;
1941 1949 }
1942 1950
1943 1951 /*
1944 1952 * Return values:
1945 1953 *
1946 1954 * -3: error (exception set)
1947 1955 * -2: not found (no exception set)
1948 1956 * rest: valid rev
1949 1957 */
1950 1958 static int index_find_node(indexObject *self, const char *node)
1951 1959 {
1952 1960 int rev;
1953 1961
1954 1962 if (index_init_nt(self) == -1)
1955 1963 return -3;
1956 1964
1957 1965 self->ntlookups++;
1958 1966 rev = nt_find(&self->nt, node, self->nodelen, 0);
1959 1967 if (rev >= -1)
1960 1968 return rev;
1961 1969
1962 1970 /*
1963 1971 * For the first handful of lookups, we scan the entire index,
1964 1972 * and cache only the matching nodes. This optimizes for cases
1965 1973 * like "hg tip", where only a few nodes are accessed.
1966 1974 *
1967 1975 * After that, we cache every node we visit, using a single
1968 1976 * scan amortized over multiple lookups. This gives the best
1969 1977 * bulk performance, e.g. for "hg log".
1970 1978 */
1971 1979 if (self->ntmisses++ < 4) {
1972 1980 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1973 1981 const char *n = index_node_existing(self, rev);
1974 1982 if (n == NULL)
1975 1983 return -3;
1976 1984 if (memcmp(node, n, self->nodelen) == 0) {
1977 1985 if (nt_insert(&self->nt, n, rev) == -1)
1978 1986 return -3;
1979 1987 break;
1980 1988 }
1981 1989 }
1982 1990 } else {
1983 1991 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1984 1992 const char *n = index_node_existing(self, rev);
1985 1993 if (n == NULL)
1986 1994 return -3;
1987 1995 if (nt_insert(&self->nt, n, rev) == -1) {
1988 1996 self->ntrev = rev + 1;
1989 1997 return -3;
1990 1998 }
1991 1999 if (memcmp(node, n, self->nodelen) == 0) {
1992 2000 break;
1993 2001 }
1994 2002 }
1995 2003 self->ntrev = rev;
1996 2004 }
1997 2005
1998 2006 if (rev >= 0)
1999 2007 return rev;
2000 2008 return -2;
2001 2009 }
2002 2010
2003 2011 static PyObject *index_getitem(indexObject *self, PyObject *value)
2004 2012 {
2005 2013 char *node;
2006 2014 int rev;
2007 2015
2008 2016 if (PyInt_Check(value)) {
2009 2017 long idx;
2010 2018 if (!pylong_to_long(value, &idx)) {
2011 2019 return NULL;
2012 2020 }
2013 2021 return index_get(self, idx);
2014 2022 }
2015 2023
2016 2024 if (node_check(self->nodelen, value, &node) == -1)
2017 2025 return NULL;
2018 2026 rev = index_find_node(self, node);
2019 2027 if (rev >= -1)
2020 2028 return PyInt_FromLong(rev);
2021 2029 if (rev == -2)
2022 2030 raise_revlog_error();
2023 2031 return NULL;
2024 2032 }
2025 2033
2026 2034 /*
2027 2035 * Fully populate the radix tree.
2028 2036 */
2029 2037 static int index_populate_nt(indexObject *self)
2030 2038 {
2031 2039 int rev;
2032 2040 if (self->ntrev > 0) {
2033 2041 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2034 2042 const char *n = index_node_existing(self, rev);
2035 2043 if (n == NULL)
2036 2044 return -1;
2037 2045 if (nt_insert(&self->nt, n, rev) == -1)
2038 2046 return -1;
2039 2047 }
2040 2048 self->ntrev = -1;
2041 2049 }
2042 2050 return 0;
2043 2051 }
2044 2052
2045 2053 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
2046 2054 {
2047 2055 const char *fullnode;
2048 2056 Py_ssize_t nodelen;
2049 2057 char *node;
2050 2058 int rev, i;
2051 2059
2052 2060 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &node, &nodelen))
2053 2061 return NULL;
2054 2062
2055 2063 if (nodelen < 1) {
2056 2064 PyErr_SetString(PyExc_ValueError, "key too short");
2057 2065 return NULL;
2058 2066 }
2059 2067
2060 2068 if (nodelen > 2 * self->nodelen) {
2061 2069 PyErr_SetString(PyExc_ValueError, "key too long");
2062 2070 return NULL;
2063 2071 }
2064 2072
2065 2073 for (i = 0; i < nodelen; i++)
2066 2074 hexdigit(node, i);
2067 2075 if (PyErr_Occurred()) {
2068 2076 /* input contains non-hex characters */
2069 2077 PyErr_Clear();
2070 2078 Py_RETURN_NONE;
2071 2079 }
2072 2080
2073 2081 if (index_init_nt(self) == -1)
2074 2082 return NULL;
2075 2083 if (index_populate_nt(self) == -1)
2076 2084 return NULL;
2077 2085 rev = nt_partialmatch(&self->nt, node, nodelen);
2078 2086
2079 2087 switch (rev) {
2080 2088 case -4:
2081 2089 raise_revlog_error();
2082 2090 return NULL;
2083 2091 case -2:
2084 2092 Py_RETURN_NONE;
2085 2093 case -1:
2086 2094 return PyBytes_FromStringAndSize(nullid, self->nodelen);
2087 2095 }
2088 2096
2089 2097 fullnode = index_node_existing(self, rev);
2090 2098 if (fullnode == NULL) {
2091 2099 return NULL;
2092 2100 }
2093 2101 return PyBytes_FromStringAndSize(fullnode, self->nodelen);
2094 2102 }
2095 2103
2096 2104 static PyObject *index_shortest(indexObject *self, PyObject *args)
2097 2105 {
2098 2106 PyObject *val;
2099 2107 char *node;
2100 2108 int length;
2101 2109
2102 2110 if (!PyArg_ParseTuple(args, "O", &val))
2103 2111 return NULL;
2104 2112 if (node_check(self->nodelen, val, &node) == -1)
2105 2113 return NULL;
2106 2114
2107 2115 self->ntlookups++;
2108 2116 if (index_init_nt(self) == -1)
2109 2117 return NULL;
2110 2118 if (index_populate_nt(self) == -1)
2111 2119 return NULL;
2112 2120 length = nt_shortest(&self->nt, node);
2113 2121 if (length == -3)
2114 2122 return NULL;
2115 2123 if (length == -2) {
2116 2124 raise_revlog_error();
2117 2125 return NULL;
2118 2126 }
2119 2127 return PyInt_FromLong(length);
2120 2128 }
2121 2129
2122 2130 static PyObject *index_m_get(indexObject *self, PyObject *args)
2123 2131 {
2124 2132 PyObject *val;
2125 2133 char *node;
2126 2134 int rev;
2127 2135
2128 2136 if (!PyArg_ParseTuple(args, "O", &val))
2129 2137 return NULL;
2130 2138 if (node_check(self->nodelen, val, &node) == -1)
2131 2139 return NULL;
2132 2140 rev = index_find_node(self, node);
2133 2141 if (rev == -3)
2134 2142 return NULL;
2135 2143 if (rev == -2)
2136 2144 Py_RETURN_NONE;
2137 2145 return PyInt_FromLong(rev);
2138 2146 }
2139 2147
2140 2148 static int index_contains(indexObject *self, PyObject *value)
2141 2149 {
2142 2150 char *node;
2143 2151
2144 2152 if (PyInt_Check(value)) {
2145 2153 long rev;
2146 2154 if (!pylong_to_long(value, &rev)) {
2147 2155 return -1;
2148 2156 }
2149 2157 return rev >= -1 && rev < index_length(self);
2150 2158 }
2151 2159
2152 2160 if (node_check(self->nodelen, value, &node) == -1)
2153 2161 return -1;
2154 2162
2155 2163 switch (index_find_node(self, node)) {
2156 2164 case -3:
2157 2165 return -1;
2158 2166 case -2:
2159 2167 return 0;
2160 2168 default:
2161 2169 return 1;
2162 2170 }
2163 2171 }
2164 2172
2165 2173 static PyObject *index_m_has_node(indexObject *self, PyObject *args)
2166 2174 {
2167 2175 int ret = index_contains(self, args);
2168 2176 if (ret < 0)
2169 2177 return NULL;
2170 2178 return PyBool_FromLong((long)ret);
2171 2179 }
2172 2180
2173 2181 static PyObject *index_m_rev(indexObject *self, PyObject *val)
2174 2182 {
2175 2183 char *node;
2176 2184 int rev;
2177 2185
2178 2186 if (node_check(self->nodelen, val, &node) == -1)
2179 2187 return NULL;
2180 2188 rev = index_find_node(self, node);
2181 2189 if (rev >= -1)
2182 2190 return PyInt_FromLong(rev);
2183 2191 if (rev == -2)
2184 2192 raise_revlog_error();
2185 2193 return NULL;
2186 2194 }
2187 2195
2188 2196 typedef uint64_t bitmask;
2189 2197
2190 2198 /*
2191 2199 * Given a disjoint set of revs, return all candidates for the
2192 2200 * greatest common ancestor. In revset notation, this is the set
2193 2201 * "heads(::a and ::b and ...)"
2194 2202 */
2195 2203 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
2196 2204 int revcount)
2197 2205 {
2198 2206 const bitmask allseen = (1ull << revcount) - 1;
2199 2207 const bitmask poison = 1ull << revcount;
2200 2208 PyObject *gca = PyList_New(0);
2201 2209 int i, v, interesting;
2202 2210 int maxrev = -1;
2203 2211 bitmask sp;
2204 2212 bitmask *seen;
2205 2213
2206 2214 if (gca == NULL)
2207 2215 return PyErr_NoMemory();
2208 2216
2209 2217 for (i = 0; i < revcount; i++) {
2210 2218 if (revs[i] > maxrev)
2211 2219 maxrev = revs[i];
2212 2220 }
2213 2221
2214 2222 seen = calloc(sizeof(*seen), maxrev + 1);
2215 2223 if (seen == NULL) {
2216 2224 Py_DECREF(gca);
2217 2225 return PyErr_NoMemory();
2218 2226 }
2219 2227
2220 2228 for (i = 0; i < revcount; i++)
2221 2229 seen[revs[i]] = 1ull << i;
2222 2230
2223 2231 interesting = revcount;
2224 2232
2225 2233 for (v = maxrev; v >= 0 && interesting; v--) {
2226 2234 bitmask sv = seen[v];
2227 2235 int parents[2];
2228 2236
2229 2237 if (!sv)
2230 2238 continue;
2231 2239
2232 2240 if (sv < poison) {
2233 2241 interesting -= 1;
2234 2242 if (sv == allseen) {
2235 2243 PyObject *obj = PyInt_FromLong(v);
2236 2244 if (obj == NULL)
2237 2245 goto bail;
2238 2246 if (PyList_Append(gca, obj) == -1) {
2239 2247 Py_DECREF(obj);
2240 2248 goto bail;
2241 2249 }
2242 2250 sv |= poison;
2243 2251 for (i = 0; i < revcount; i++) {
2244 2252 if (revs[i] == v)
2245 2253 goto done;
2246 2254 }
2247 2255 }
2248 2256 }
2249 2257 if (index_get_parents(self, v, parents, maxrev) < 0)
2250 2258 goto bail;
2251 2259
2252 2260 for (i = 0; i < 2; i++) {
2253 2261 int p = parents[i];
2254 2262 if (p == -1)
2255 2263 continue;
2256 2264 sp = seen[p];
2257 2265 if (sv < poison) {
2258 2266 if (sp == 0) {
2259 2267 seen[p] = sv;
2260 2268 interesting++;
2261 2269 } else if (sp != sv)
2262 2270 seen[p] |= sv;
2263 2271 } else {
2264 2272 if (sp && sp < poison)
2265 2273 interesting--;
2266 2274 seen[p] = sv;
2267 2275 }
2268 2276 }
2269 2277 }
2270 2278
2271 2279 done:
2272 2280 free(seen);
2273 2281 return gca;
2274 2282 bail:
2275 2283 free(seen);
2276 2284 Py_XDECREF(gca);
2277 2285 return NULL;
2278 2286 }
2279 2287
2280 2288 /*
2281 2289 * Given a disjoint set of revs, return the subset with the longest
2282 2290 * path to the root.
2283 2291 */
2284 2292 static PyObject *find_deepest(indexObject *self, PyObject *revs)
2285 2293 {
2286 2294 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
2287 2295 static const Py_ssize_t capacity = 24;
2288 2296 int *depth, *interesting = NULL;
2289 2297 int i, j, v, ninteresting;
2290 2298 PyObject *dict = NULL, *keys = NULL;
2291 2299 long *seen = NULL;
2292 2300 int maxrev = -1;
2293 2301 long final;
2294 2302
2295 2303 if (revcount > capacity) {
2296 2304 PyErr_Format(PyExc_OverflowError,
2297 2305 "bitset size (%ld) > capacity (%ld)",
2298 2306 (long)revcount, (long)capacity);
2299 2307 return NULL;
2300 2308 }
2301 2309
2302 2310 for (i = 0; i < revcount; i++) {
2303 2311 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2304 2312 if (n > maxrev)
2305 2313 maxrev = n;
2306 2314 }
2307 2315
2308 2316 depth = calloc(sizeof(*depth), maxrev + 1);
2309 2317 if (depth == NULL)
2310 2318 return PyErr_NoMemory();
2311 2319
2312 2320 seen = calloc(sizeof(*seen), maxrev + 1);
2313 2321 if (seen == NULL) {
2314 2322 PyErr_NoMemory();
2315 2323 goto bail;
2316 2324 }
2317 2325
2318 2326 interesting = calloc(sizeof(*interesting), ((size_t)1) << revcount);
2319 2327 if (interesting == NULL) {
2320 2328 PyErr_NoMemory();
2321 2329 goto bail;
2322 2330 }
2323 2331
2324 2332 if (PyList_Sort(revs) == -1)
2325 2333 goto bail;
2326 2334
2327 2335 for (i = 0; i < revcount; i++) {
2328 2336 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2329 2337 long b = 1l << i;
2330 2338 depth[n] = 1;
2331 2339 seen[n] = b;
2332 2340 interesting[b] = 1;
2333 2341 }
2334 2342
2335 2343 /* invariant: ninteresting is the number of non-zero entries in
2336 2344 * interesting. */
2337 2345 ninteresting = (int)revcount;
2338 2346
2339 2347 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
2340 2348 int dv = depth[v];
2341 2349 int parents[2];
2342 2350 long sv;
2343 2351
2344 2352 if (dv == 0)
2345 2353 continue;
2346 2354
2347 2355 sv = seen[v];
2348 2356 if (index_get_parents(self, v, parents, maxrev) < 0)
2349 2357 goto bail;
2350 2358
2351 2359 for (i = 0; i < 2; i++) {
2352 2360 int p = parents[i];
2353 2361 long sp;
2354 2362 int dp;
2355 2363
2356 2364 if (p == -1)
2357 2365 continue;
2358 2366
2359 2367 dp = depth[p];
2360 2368 sp = seen[p];
2361 2369 if (dp <= dv) {
2362 2370 depth[p] = dv + 1;
2363 2371 if (sp != sv) {
2364 2372 interesting[sv] += 1;
2365 2373 seen[p] = sv;
2366 2374 if (sp) {
2367 2375 interesting[sp] -= 1;
2368 2376 if (interesting[sp] == 0)
2369 2377 ninteresting -= 1;
2370 2378 }
2371 2379 }
2372 2380 } else if (dv == dp - 1) {
2373 2381 long nsp = sp | sv;
2374 2382 if (nsp == sp)
2375 2383 continue;
2376 2384 seen[p] = nsp;
2377 2385 interesting[sp] -= 1;
2378 2386 if (interesting[sp] == 0)
2379 2387 ninteresting -= 1;
2380 2388 if (interesting[nsp] == 0)
2381 2389 ninteresting += 1;
2382 2390 interesting[nsp] += 1;
2383 2391 }
2384 2392 }
2385 2393 interesting[sv] -= 1;
2386 2394 if (interesting[sv] == 0)
2387 2395 ninteresting -= 1;
2388 2396 }
2389 2397
2390 2398 final = 0;
2391 2399 j = ninteresting;
2392 2400 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
2393 2401 if (interesting[i] == 0)
2394 2402 continue;
2395 2403 final |= i;
2396 2404 j -= 1;
2397 2405 }
2398 2406 if (final == 0) {
2399 2407 keys = PyList_New(0);
2400 2408 goto bail;
2401 2409 }
2402 2410
2403 2411 dict = PyDict_New();
2404 2412 if (dict == NULL)
2405 2413 goto bail;
2406 2414
2407 2415 for (i = 0; i < revcount; i++) {
2408 2416 PyObject *key;
2409 2417
2410 2418 if ((final & (1 << i)) == 0)
2411 2419 continue;
2412 2420
2413 2421 key = PyList_GET_ITEM(revs, i);
2414 2422 Py_INCREF(key);
2415 2423 Py_INCREF(Py_None);
2416 2424 if (PyDict_SetItem(dict, key, Py_None) == -1) {
2417 2425 Py_DECREF(key);
2418 2426 Py_DECREF(Py_None);
2419 2427 goto bail;
2420 2428 }
2421 2429 }
2422 2430
2423 2431 keys = PyDict_Keys(dict);
2424 2432
2425 2433 bail:
2426 2434 free(depth);
2427 2435 free(seen);
2428 2436 free(interesting);
2429 2437 Py_XDECREF(dict);
2430 2438
2431 2439 return keys;
2432 2440 }
2433 2441
2434 2442 /*
2435 2443 * Given a (possibly overlapping) set of revs, return all the
2436 2444 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
2437 2445 */
2438 2446 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
2439 2447 {
2440 2448 PyObject *ret = NULL;
2441 2449 Py_ssize_t argcount, i, len;
2442 2450 bitmask repeat = 0;
2443 2451 int revcount = 0;
2444 2452 int *revs;
2445 2453
2446 2454 argcount = PySequence_Length(args);
2447 2455 revs = PyMem_Malloc(argcount * sizeof(*revs));
2448 2456 if (argcount > 0 && revs == NULL)
2449 2457 return PyErr_NoMemory();
2450 2458 len = index_length(self);
2451 2459
2452 2460 for (i = 0; i < argcount; i++) {
2453 2461 static const int capacity = 24;
2454 2462 PyObject *obj = PySequence_GetItem(args, i);
2455 2463 bitmask x;
2456 2464 long val;
2457 2465
2458 2466 if (!PyInt_Check(obj)) {
2459 2467 PyErr_SetString(PyExc_TypeError,
2460 2468 "arguments must all be ints");
2461 2469 Py_DECREF(obj);
2462 2470 goto bail;
2463 2471 }
2464 2472 val = PyInt_AsLong(obj);
2465 2473 Py_DECREF(obj);
2466 2474 if (val == -1) {
2467 2475 ret = PyList_New(0);
2468 2476 goto done;
2469 2477 }
2470 2478 if (val < 0 || val >= len) {
2471 2479 PyErr_SetString(PyExc_IndexError, "index out of range");
2472 2480 goto bail;
2473 2481 }
2474 2482 /* this cheesy bloom filter lets us avoid some more
2475 2483 * expensive duplicate checks in the common set-is-disjoint
2476 2484 * case */
2477 2485 x = 1ull << (val & 0x3f);
2478 2486 if (repeat & x) {
2479 2487 int k;
2480 2488 for (k = 0; k < revcount; k++) {
2481 2489 if (val == revs[k])
2482 2490 goto duplicate;
2483 2491 }
2484 2492 } else
2485 2493 repeat |= x;
2486 2494 if (revcount >= capacity) {
2487 2495 PyErr_Format(PyExc_OverflowError,
2488 2496 "bitset size (%d) > capacity (%d)",
2489 2497 revcount, capacity);
2490 2498 goto bail;
2491 2499 }
2492 2500 revs[revcount++] = (int)val;
2493 2501 duplicate:;
2494 2502 }
2495 2503
2496 2504 if (revcount == 0) {
2497 2505 ret = PyList_New(0);
2498 2506 goto done;
2499 2507 }
2500 2508 if (revcount == 1) {
2501 2509 PyObject *obj;
2502 2510 ret = PyList_New(1);
2503 2511 if (ret == NULL)
2504 2512 goto bail;
2505 2513 obj = PyInt_FromLong(revs[0]);
2506 2514 if (obj == NULL)
2507 2515 goto bail;
2508 2516 PyList_SET_ITEM(ret, 0, obj);
2509 2517 goto done;
2510 2518 }
2511 2519
2512 2520 ret = find_gca_candidates(self, revs, revcount);
2513 2521 if (ret == NULL)
2514 2522 goto bail;
2515 2523
2516 2524 done:
2517 2525 PyMem_Free(revs);
2518 2526 return ret;
2519 2527
2520 2528 bail:
2521 2529 PyMem_Free(revs);
2522 2530 Py_XDECREF(ret);
2523 2531 return NULL;
2524 2532 }
2525 2533
2526 2534 /*
2527 2535 * Given a (possibly overlapping) set of revs, return the greatest
2528 2536 * common ancestors: those with the longest path to the root.
2529 2537 */
2530 2538 static PyObject *index_ancestors(indexObject *self, PyObject *args)
2531 2539 {
2532 2540 PyObject *ret;
2533 2541 PyObject *gca = index_commonancestorsheads(self, args);
2534 2542 if (gca == NULL)
2535 2543 return NULL;
2536 2544
2537 2545 if (PyList_GET_SIZE(gca) <= 1) {
2538 2546 return gca;
2539 2547 }
2540 2548
2541 2549 ret = find_deepest(self, gca);
2542 2550 Py_DECREF(gca);
2543 2551 return ret;
2544 2552 }
2545 2553
2546 2554 /*
2547 2555 * Invalidate any trie entries introduced by added revs.
2548 2556 */
2549 2557 static void index_invalidate_added(indexObject *self, Py_ssize_t start)
2550 2558 {
2551 2559 Py_ssize_t i, len;
2552 2560
2553 2561 len = self->length + self->new_length;
2554 2562 i = start - self->length;
2555 2563 if (i < 0)
2556 2564 return;
2557 2565
2558 2566 for (i = start; i < len; i++)
2559 2567 nt_delete_node(&self->nt, index_deref(self, i) + 32);
2560 2568
2561 2569 self->new_length = start - self->length;
2562 2570 }
2563 2571
2564 2572 /*
2565 2573 * Delete a numeric range of revs, which must be at the end of the
2566 2574 * range.
2567 2575 */
2568 2576 static int index_slice_del(indexObject *self, PyObject *item)
2569 2577 {
2570 2578 Py_ssize_t start, stop, step, slicelength;
2571 2579 Py_ssize_t length = index_length(self) + 1;
2572 2580 int ret = 0;
2573 2581
2574 2582 /* Argument changed from PySliceObject* to PyObject* in Python 3. */
2575 2583 #ifdef IS_PY3K
2576 2584 if (PySlice_GetIndicesEx(item, length, &start, &stop, &step,
2577 2585 &slicelength) < 0)
2578 2586 #else
2579 2587 if (PySlice_GetIndicesEx((PySliceObject *)item, length, &start, &stop,
2580 2588 &step, &slicelength) < 0)
2581 2589 #endif
2582 2590 return -1;
2583 2591
2584 2592 if (slicelength <= 0)
2585 2593 return 0;
2586 2594
2587 2595 if ((step < 0 && start < stop) || (step > 0 && start > stop))
2588 2596 stop = start;
2589 2597
2590 2598 if (step < 0) {
2591 2599 stop = start + 1;
2592 2600 start = stop + step * (slicelength - 1) - 1;
2593 2601 step = -step;
2594 2602 }
2595 2603
2596 2604 if (step != 1) {
2597 2605 PyErr_SetString(PyExc_ValueError,
2598 2606 "revlog index delete requires step size of 1");
2599 2607 return -1;
2600 2608 }
2601 2609
2602 2610 if (stop != length - 1) {
2603 2611 PyErr_SetString(PyExc_IndexError,
2604 2612 "revlog index deletion indices are invalid");
2605 2613 return -1;
2606 2614 }
2607 2615
2608 2616 if (start < self->length) {
2609 2617 if (self->ntinitialized) {
2610 2618 Py_ssize_t i;
2611 2619
2612 2620 for (i = start; i < self->length; i++) {
2613 2621 const char *node = index_node_existing(self, i);
2614 2622 if (node == NULL)
2615 2623 return -1;
2616 2624
2617 2625 nt_delete_node(&self->nt, node);
2618 2626 }
2619 2627 if (self->new_length)
2620 2628 index_invalidate_added(self, self->length);
2621 2629 if (self->ntrev > start)
2622 2630 self->ntrev = (int)start;
2623 2631 } else if (self->new_length) {
2624 2632 self->new_length = 0;
2625 2633 }
2626 2634
2627 2635 self->length = start;
2628 2636 goto done;
2629 2637 }
2630 2638
2631 2639 if (self->ntinitialized) {
2632 2640 index_invalidate_added(self, start);
2633 2641 if (self->ntrev > start)
2634 2642 self->ntrev = (int)start;
2635 2643 } else {
2636 2644 self->new_length = start - self->length;
2637 2645 }
2638 2646 done:
2639 2647 Py_CLEAR(self->headrevs);
2640 2648 return ret;
2641 2649 }
2642 2650
2643 2651 /*
2644 2652 * Supported ops:
2645 2653 *
2646 2654 * slice deletion
2647 2655 * string assignment (extend node->rev mapping)
2648 2656 * string deletion (shrink node->rev mapping)
2649 2657 */
2650 2658 static int index_assign_subscript(indexObject *self, PyObject *item,
2651 2659 PyObject *value)
2652 2660 {
2653 2661 char *node;
2654 2662 long rev;
2655 2663
2656 2664 if (PySlice_Check(item) && value == NULL)
2657 2665 return index_slice_del(self, item);
2658 2666
2659 2667 if (node_check(self->nodelen, item, &node) == -1)
2660 2668 return -1;
2661 2669
2662 2670 if (value == NULL)
2663 2671 return self->ntinitialized ? nt_delete_node(&self->nt, node)
2664 2672 : 0;
2665 2673 rev = PyInt_AsLong(value);
2666 2674 if (rev > INT_MAX || rev < 0) {
2667 2675 if (!PyErr_Occurred())
2668 2676 PyErr_SetString(PyExc_ValueError, "rev out of range");
2669 2677 return -1;
2670 2678 }
2671 2679
2672 2680 if (index_init_nt(self) == -1)
2673 2681 return -1;
2674 2682 return nt_insert(&self->nt, node, (int)rev);
2675 2683 }
2676 2684
2677 2685 /*
2678 2686 * Find all RevlogNG entries in an index that has inline data. Update
2679 2687 * the optional "offsets" table with those entries.
2680 2688 */
2681 2689 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
2682 2690 {
2683 2691 const char *data = (const char *)self->buf.buf;
2684 2692 Py_ssize_t pos = 0;
2685 2693 Py_ssize_t end = self->buf.len;
2686 2694 long incr = self->hdrsize;
2687 2695 Py_ssize_t len = 0;
2688 2696
2689 2697 while (pos + self->hdrsize <= end && pos >= 0) {
2690 2698 uint32_t comp_len, sidedata_comp_len = 0;
2691 2699 /* 3rd element of header is length of compressed inline data */
2692 2700 comp_len = getbe32(data + pos + 8);
2693 2701 if (self->hdrsize == v2_hdrsize) {
2694 2702 sidedata_comp_len = getbe32(data + pos + 72);
2695 2703 }
2696 2704 incr = self->hdrsize + comp_len + sidedata_comp_len;
2697 2705 if (offsets)
2698 2706 offsets[len] = data + pos;
2699 2707 len++;
2700 2708 pos += incr;
2701 2709 }
2702 2710
2703 2711 if (pos != end) {
2704 2712 if (!PyErr_Occurred())
2705 2713 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2706 2714 return -1;
2707 2715 }
2708 2716
2709 2717 return len;
2710 2718 }
2711 2719
2712 2720 static int index_init(indexObject *self, PyObject *args, PyObject *kwargs)
2713 2721 {
2714 2722 PyObject *data_obj, *inlined_obj, *revlogv2;
2715 2723 Py_ssize_t size;
2716 2724
2717 2725 static char *kwlist[] = {"data", "inlined", "revlogv2", NULL};
2718 2726
2719 2727 /* Initialize before argument-checking to avoid index_dealloc() crash.
2720 2728 */
2721 2729 self->added = NULL;
2722 2730 self->new_length = 0;
2723 2731 self->added_length = 0;
2724 2732 self->data = NULL;
2725 2733 memset(&self->buf, 0, sizeof(self->buf));
2726 2734 self->headrevs = NULL;
2727 2735 self->filteredrevs = Py_None;
2728 2736 Py_INCREF(Py_None);
2729 2737 self->ntinitialized = 0;
2730 2738 self->offsets = NULL;
2731 2739 self->nodelen = 20;
2732 2740 self->nullentry = NULL;
2733 2741
2734 2742 revlogv2 = NULL;
2735 2743 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|O", kwlist,
2736 2744 &data_obj, &inlined_obj, &revlogv2))
2737 2745 return -1;
2738 2746 if (!PyObject_CheckBuffer(data_obj)) {
2739 2747 PyErr_SetString(PyExc_TypeError,
2740 2748 "data does not support buffer interface");
2741 2749 return -1;
2742 2750 }
2743 2751 if (self->nodelen < 20 || self->nodelen > (Py_ssize_t)sizeof(nullid)) {
2744 2752 PyErr_SetString(PyExc_RuntimeError, "unsupported node size");
2745 2753 return -1;
2746 2754 }
2747 2755
2748 2756 if (revlogv2 && PyObject_IsTrue(revlogv2)) {
2749 2757 self->hdrsize = v2_hdrsize;
2750 2758 } else {
2751 2759 self->hdrsize = v1_hdrsize;
2752 2760 }
2753 2761
2754 2762 if (self->hdrsize == v1_hdrsize) {
2755 2763 self->nullentry =
2756 2764 Py_BuildValue(PY23("iiiiiiis#", "iiiiiiiy#"), 0, 0, 0, -1,
2757 2765 -1, -1, -1, nullid, self->nodelen);
2758 2766 } else {
2759 2767 self->nullentry =
2760 2768 Py_BuildValue(PY23("iiiiiiis#ii", "iiiiiiiy#ii"), 0, 0, 0,
2761 2769 -1, -1, -1, -1, nullid, self->nodelen, 0, 0);
2762 2770 }
2763 2771
2764 2772 if (!self->nullentry)
2765 2773 return -1;
2766 2774 PyObject_GC_UnTrack(self->nullentry);
2767 2775
2768 2776 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
2769 2777 return -1;
2770 2778 size = self->buf.len;
2771 2779
2772 2780 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
2773 2781 self->data = data_obj;
2774 2782
2775 2783 self->ntlookups = self->ntmisses = 0;
2776 2784 self->ntrev = -1;
2777 2785 Py_INCREF(self->data);
2778 2786
2779 2787 if (self->inlined) {
2780 2788 Py_ssize_t len = inline_scan(self, NULL);
2781 2789 if (len == -1)
2782 2790 goto bail;
2783 2791 self->length = len;
2784 2792 } else {
2785 2793 if (size % self->hdrsize) {
2786 2794 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2787 2795 goto bail;
2788 2796 }
2789 2797 self->length = size / self->hdrsize;
2790 2798 }
2791 2799
2792 2800 return 0;
2793 2801 bail:
2794 2802 return -1;
2795 2803 }
2796 2804
2797 2805 static PyObject *index_nodemap(indexObject *self)
2798 2806 {
2799 2807 Py_INCREF(self);
2800 2808 return (PyObject *)self;
2801 2809 }
2802 2810
2803 2811 static void _index_clearcaches(indexObject *self)
2804 2812 {
2805 2813 if (self->offsets) {
2806 2814 PyMem_Free((void *)self->offsets);
2807 2815 self->offsets = NULL;
2808 2816 }
2809 2817 if (self->ntinitialized) {
2810 2818 nt_dealloc(&self->nt);
2811 2819 }
2812 2820 self->ntinitialized = 0;
2813 2821 Py_CLEAR(self->headrevs);
2814 2822 }
2815 2823
2816 2824 static PyObject *index_clearcaches(indexObject *self)
2817 2825 {
2818 2826 _index_clearcaches(self);
2819 2827 self->ntrev = -1;
2820 2828 self->ntlookups = self->ntmisses = 0;
2821 2829 Py_RETURN_NONE;
2822 2830 }
2823 2831
2824 2832 static void index_dealloc(indexObject *self)
2825 2833 {
2826 2834 _index_clearcaches(self);
2827 2835 Py_XDECREF(self->filteredrevs);
2828 2836 if (self->buf.buf) {
2829 2837 PyBuffer_Release(&self->buf);
2830 2838 memset(&self->buf, 0, sizeof(self->buf));
2831 2839 }
2832 2840 Py_XDECREF(self->data);
2833 2841 PyMem_Free(self->added);
2834 2842 Py_XDECREF(self->nullentry);
2835 2843 PyObject_Del(self);
2836 2844 }
2837 2845
2838 2846 static PySequenceMethods index_sequence_methods = {
2839 2847 (lenfunc)index_length, /* sq_length */
2840 2848 0, /* sq_concat */
2841 2849 0, /* sq_repeat */
2842 2850 (ssizeargfunc)index_get, /* sq_item */
2843 2851 0, /* sq_slice */
2844 2852 0, /* sq_ass_item */
2845 2853 0, /* sq_ass_slice */
2846 2854 (objobjproc)index_contains, /* sq_contains */
2847 2855 };
2848 2856
2849 2857 static PyMappingMethods index_mapping_methods = {
2850 2858 (lenfunc)index_length, /* mp_length */
2851 2859 (binaryfunc)index_getitem, /* mp_subscript */
2852 2860 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
2853 2861 };
2854 2862
2855 2863 static PyMethodDef index_methods[] = {
2856 2864 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
2857 2865 "return the gca set of the given revs"},
2858 2866 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
2859 2867 METH_VARARGS,
2860 2868 "return the heads of the common ancestors of the given revs"},
2861 2869 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
2862 2870 "clear the index caches"},
2863 2871 {"get", (PyCFunction)index_m_get, METH_VARARGS, "get an index entry"},
2864 2872 {"get_rev", (PyCFunction)index_m_get, METH_VARARGS,
2865 2873 "return `rev` associated with a node or None"},
2866 2874 {"has_node", (PyCFunction)index_m_has_node, METH_O,
2867 2875 "return True if the node exist in the index"},
2868 2876 {"rev", (PyCFunction)index_m_rev, METH_O,
2869 2877 "return `rev` associated with a node or raise RevlogError"},
2870 2878 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets, METH_VARARGS,
2871 2879 "compute phases"},
2872 2880 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
2873 2881 "reachableroots"},
2874 2882 {"replace_sidedata_info", (PyCFunction)index_replace_sidedata_info,
2875 2883 METH_VARARGS, "replace an existing index entry with a new value"},
2876 2884 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
2877 2885 "get head revisions"}, /* Can do filtering since 3.2 */
2878 2886 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
2879 2887 "get filtered head revisions"}, /* Can always do filtering */
2880 2888 {"issnapshot", (PyCFunction)index_issnapshot, METH_O,
2881 2889 "True if the object is a snapshot"},
2882 2890 {"findsnapshots", (PyCFunction)index_findsnapshots, METH_VARARGS,
2883 2891 "Gather snapshot data in a cache dict"},
2884 2892 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
2885 2893 "determine revisions with deltas to reconstruct fulltext"},
2886 2894 {"slicechunktodensity", (PyCFunction)index_slicechunktodensity,
2887 2895 METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"},
2888 2896 {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
2889 2897 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
2890 2898 "match a potentially ambiguous node ID"},
2891 2899 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
2892 2900 "find length of shortest hex nodeid of a binary ID"},
2893 2901 {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"},
2894 {"entry_binary", (PyCFunction)index_entry_binary, METH_VARARGS,
2902 {"entry_binary", (PyCFunction)index_entry_binary, METH_O,
2895 2903 "return an entry in binary form"},
2904 {"pack_header", (PyCFunction)index_pack_header, METH_VARARGS,
2905 "pack the revlog header information into binary"},
2896 2906 {NULL} /* Sentinel */
2897 2907 };
2898 2908
2899 2909 static PyGetSetDef index_getset[] = {
2900 2910 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
2901 2911 {NULL} /* Sentinel */
2902 2912 };
2903 2913
2904 2914 static PyMemberDef index_members[] = {
2905 2915 {"entry_size", T_LONG, offsetof(indexObject, hdrsize), 0,
2906 2916 "size of an index entry"},
2907 2917 {NULL} /* Sentinel */
2908 2918 };
2909 2919
2910 2920 PyTypeObject HgRevlogIndex_Type = {
2911 2921 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2912 2922 "parsers.index", /* tp_name */
2913 2923 sizeof(indexObject), /* tp_basicsize */
2914 2924 0, /* tp_itemsize */
2915 2925 (destructor)index_dealloc, /* tp_dealloc */
2916 2926 0, /* tp_print */
2917 2927 0, /* tp_getattr */
2918 2928 0, /* tp_setattr */
2919 2929 0, /* tp_compare */
2920 2930 0, /* tp_repr */
2921 2931 0, /* tp_as_number */
2922 2932 &index_sequence_methods, /* tp_as_sequence */
2923 2933 &index_mapping_methods, /* tp_as_mapping */
2924 2934 0, /* tp_hash */
2925 2935 0, /* tp_call */
2926 2936 0, /* tp_str */
2927 2937 0, /* tp_getattro */
2928 2938 0, /* tp_setattro */
2929 2939 0, /* tp_as_buffer */
2930 2940 Py_TPFLAGS_DEFAULT, /* tp_flags */
2931 2941 "revlog index", /* tp_doc */
2932 2942 0, /* tp_traverse */
2933 2943 0, /* tp_clear */
2934 2944 0, /* tp_richcompare */
2935 2945 0, /* tp_weaklistoffset */
2936 2946 0, /* tp_iter */
2937 2947 0, /* tp_iternext */
2938 2948 index_methods, /* tp_methods */
2939 2949 index_members, /* tp_members */
2940 2950 index_getset, /* tp_getset */
2941 2951 0, /* tp_base */
2942 2952 0, /* tp_dict */
2943 2953 0, /* tp_descr_get */
2944 2954 0, /* tp_descr_set */
2945 2955 0, /* tp_dictoffset */
2946 2956 (initproc)index_init, /* tp_init */
2947 2957 0, /* tp_alloc */
2948 2958 };
2949 2959
2950 2960 /*
2951 2961 * returns a tuple of the form (index, cache) with elements as
2952 2962 * follows:
2953 2963 *
2954 2964 * index: an index object that lazily parses Revlog (v1 or v2) records
2955 2965 * cache: if data is inlined, a tuple (0, index_file_content), else None
2956 2966 * index_file_content could be a string, or a buffer
2957 2967 *
2958 2968 * added complications are for backwards compatibility
2959 2969 */
2960 2970 PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs)
2961 2971 {
2962 2972 PyObject *cache = NULL;
2963 2973 indexObject *idx;
2964 2974 int ret;
2965 2975
2966 2976 idx = PyObject_New(indexObject, &HgRevlogIndex_Type);
2967 2977 if (idx == NULL)
2968 2978 goto bail;
2969 2979
2970 2980 ret = index_init(idx, args, kwargs);
2971 2981 if (ret == -1)
2972 2982 goto bail;
2973 2983
2974 2984 if (idx->inlined) {
2975 2985 cache = Py_BuildValue("iO", 0, idx->data);
2976 2986 if (cache == NULL)
2977 2987 goto bail;
2978 2988 } else {
2979 2989 cache = Py_None;
2980 2990 Py_INCREF(cache);
2981 2991 }
2982 2992
2983 2993 return Py_BuildValue("NN", idx, cache);
2984 2994
2985 2995 bail:
2986 2996 Py_XDECREF(idx);
2987 2997 Py_XDECREF(cache);
2988 2998 return NULL;
2989 2999 }
2990 3000
2991 3001 static Revlog_CAPI CAPI = {
2992 3002 /* increment the abi_version field upon each change in the Revlog_CAPI
2993 3003 struct or in the ABI of the listed functions */
2994 3004 2,
2995 3005 index_length,
2996 3006 index_node,
2997 3007 HgRevlogIndex_GetParents,
2998 3008 };
2999 3009
3000 3010 void revlog_module_init(PyObject *mod)
3001 3011 {
3002 3012 PyObject *caps = NULL;
3003 3013 HgRevlogIndex_Type.tp_new = PyType_GenericNew;
3004 3014 if (PyType_Ready(&HgRevlogIndex_Type) < 0)
3005 3015 return;
3006 3016 Py_INCREF(&HgRevlogIndex_Type);
3007 3017 PyModule_AddObject(mod, "index", (PyObject *)&HgRevlogIndex_Type);
3008 3018
3009 3019 nodetreeType.tp_new = PyType_GenericNew;
3010 3020 if (PyType_Ready(&nodetreeType) < 0)
3011 3021 return;
3012 3022 Py_INCREF(&nodetreeType);
3013 3023 PyModule_AddObject(mod, "nodetree", (PyObject *)&nodetreeType);
3014 3024
3015 3025 caps = PyCapsule_New(&CAPI, "mercurial.cext.parsers.revlog_CAPI", NULL);
3016 3026 if (caps != NULL)
3017 3027 PyModule_AddObject(mod, "revlog_CAPI", caps);
3018 3028 }
@@ -1,380 +1,381 b''
1 1 # parsers.py - Python implementation of parsers.c
2 2 #
3 3 # Copyright 2009 Olivia Mackall <olivia@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import struct
11 11 import zlib
12 12
13 13 from ..node import (
14 14 nullrev,
15 15 sha1nodeconstants,
16 16 )
17 17 from .. import (
18 18 pycompat,
19 19 util,
20 20 )
21 21
22 22 from ..revlogutils import nodemap as nodemaputil
23 23 from ..revlogutils import constants as revlog_constants
24 24
25 25 stringio = pycompat.bytesio
26 26
27 27
28 28 _pack = struct.pack
29 29 _unpack = struct.unpack
30 30 _compress = zlib.compress
31 31 _decompress = zlib.decompress
32 32
33 33 # Some code below makes tuples directly because it's more convenient. However,
34 34 # code outside this module should always use dirstatetuple.
35 35 def dirstatetuple(*x):
36 36 # x is a tuple
37 37 return x
38 38
39 39
40 40 def gettype(q):
41 41 return int(q & 0xFFFF)
42 42
43 43
44 44 def offset_type(offset, type):
45 45 return int(int(offset) << 16 | type)
46 46
47 47
48 48 class BaseIndexObject(object):
49 49 # Format of an index entry according to Python's `struct` language
50 50 index_format = revlog_constants.INDEX_ENTRY_V1
51 51 # Size of a C unsigned long long int, platform independent
52 52 big_int_size = struct.calcsize(b'>Q')
53 53 # Size of a C long int, platform independent
54 54 int_size = struct.calcsize(b'>i')
55 55 # An empty index entry, used as a default value to be overridden, or nullrev
56 56 null_item = (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid)
57 57
58 58 @util.propertycache
59 59 def entry_size(self):
60 60 return self.index_format.size
61 61
62 62 @property
63 63 def nodemap(self):
64 64 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
65 65 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
66 66 return self._nodemap
67 67
68 68 @util.propertycache
69 69 def _nodemap(self):
70 70 nodemap = nodemaputil.NodeMap({sha1nodeconstants.nullid: nullrev})
71 71 for r in range(0, len(self)):
72 72 n = self[r][7]
73 73 nodemap[n] = r
74 74 return nodemap
75 75
76 76 def has_node(self, node):
77 77 """return True if the node exist in the index"""
78 78 return node in self._nodemap
79 79
80 80 def rev(self, node):
81 81 """return a revision for a node
82 82
83 83 If the node is unknown, raise a RevlogError"""
84 84 return self._nodemap[node]
85 85
86 86 def get_rev(self, node):
87 87 """return a revision for a node
88 88
89 89 If the node is unknown, return None"""
90 90 return self._nodemap.get(node)
91 91
92 92 def _stripnodes(self, start):
93 93 if '_nodemap' in vars(self):
94 94 for r in range(start, len(self)):
95 95 n = self[r][7]
96 96 del self._nodemap[n]
97 97
98 98 def clearcaches(self):
99 99 self.__dict__.pop('_nodemap', None)
100 100
101 101 def __len__(self):
102 102 return self._lgt + len(self._extra)
103 103
104 104 def append(self, tup):
105 105 if '_nodemap' in vars(self):
106 106 self._nodemap[tup[7]] = len(self)
107 107 data = self.index_format.pack(*tup)
108 108 self._extra.append(data)
109 109
110 110 def _check_index(self, i):
111 111 if not isinstance(i, int):
112 112 raise TypeError(b"expecting int indexes")
113 113 if i < 0 or i >= len(self):
114 114 raise IndexError
115 115
116 116 def __getitem__(self, i):
117 117 if i == -1:
118 118 return self.null_item
119 119 self._check_index(i)
120 120 if i >= self._lgt:
121 121 data = self._extra[i - self._lgt]
122 122 else:
123 123 index = self._calculate_index(i)
124 124 data = self._data[index : index + self.entry_size]
125 125 r = self.index_format.unpack(data)
126 126 if self._lgt and i == 0:
127 127 r = (offset_type(0, gettype(r[0])),) + r[1:]
128 128 return r
129 129
130 def entry_binary(self, rev, header):
130 def pack_header(self, header):
131 """pack header information as binary"""
132 v_fmt = revlog_constants.INDEX_HEADER
133 return v_fmt.pack(header)
134
135 def entry_binary(self, rev):
131 136 """return the raw binary string representing a revision"""
132 137 entry = self[rev]
133 138 p = revlog_constants.INDEX_ENTRY_V1.pack(*entry)
134 139 if rev == 0:
135 v_fmt = revlog_constants.INDEX_HEADER
136 v_bin = v_fmt.pack(header)
137 p = v_bin + p[v_fmt.size :]
140 p = p[revlog_constants.INDEX_HEADER.size :]
138 141 return p
139 142
140 143
141 144 class IndexObject(BaseIndexObject):
142 145 def __init__(self, data):
143 146 assert len(data) % self.entry_size == 0, (
144 147 len(data),
145 148 self.entry_size,
146 149 len(data) % self.entry_size,
147 150 )
148 151 self._data = data
149 152 self._lgt = len(data) // self.entry_size
150 153 self._extra = []
151 154
152 155 def _calculate_index(self, i):
153 156 return i * self.entry_size
154 157
155 158 def __delitem__(self, i):
156 159 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
157 160 raise ValueError(b"deleting slices only supports a:-1 with step 1")
158 161 i = i.start
159 162 self._check_index(i)
160 163 self._stripnodes(i)
161 164 if i < self._lgt:
162 165 self._data = self._data[: i * self.entry_size]
163 166 self._lgt = i
164 167 self._extra = []
165 168 else:
166 169 self._extra = self._extra[: i - self._lgt]
167 170
168 171
169 172 class PersistentNodeMapIndexObject(IndexObject):
170 173 """a Debug oriented class to test persistent nodemap
171 174
172 175 We need a simple python object to test API and higher level behavior. See
173 176 the Rust implementation for more serious usage. This should be used only
174 177 through the dedicated `devel.persistent-nodemap` config.
175 178 """
176 179
177 180 def nodemap_data_all(self):
178 181 """Return bytes containing a full serialization of a nodemap
179 182
180 183 The nodemap should be valid for the full set of revisions in the
181 184 index."""
182 185 return nodemaputil.persistent_data(self)
183 186
184 187 def nodemap_data_incremental(self):
185 188 """Return bytes containing a incremental update to persistent nodemap
186 189
187 190 This containst the data for an append-only update of the data provided
188 191 in the last call to `update_nodemap_data`.
189 192 """
190 193 if self._nm_root is None:
191 194 return None
192 195 docket = self._nm_docket
193 196 changed, data = nodemaputil.update_persistent_data(
194 197 self, self._nm_root, self._nm_max_idx, self._nm_docket.tip_rev
195 198 )
196 199
197 200 self._nm_root = self._nm_max_idx = self._nm_docket = None
198 201 return docket, changed, data
199 202
200 203 def update_nodemap_data(self, docket, nm_data):
201 204 """provide full block of persisted binary data for a nodemap
202 205
203 206 The data are expected to come from disk. See `nodemap_data_all` for a
204 207 produceur of such data."""
205 208 if nm_data is not None:
206 209 self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data)
207 210 if self._nm_root:
208 211 self._nm_docket = docket
209 212 else:
210 213 self._nm_root = self._nm_max_idx = self._nm_docket = None
211 214
212 215
213 216 class InlinedIndexObject(BaseIndexObject):
214 217 def __init__(self, data, inline=0):
215 218 self._data = data
216 219 self._lgt = self._inline_scan(None)
217 220 self._inline_scan(self._lgt)
218 221 self._extra = []
219 222
220 223 def _inline_scan(self, lgt):
221 224 off = 0
222 225 if lgt is not None:
223 226 self._offsets = [0] * lgt
224 227 count = 0
225 228 while off <= len(self._data) - self.entry_size:
226 229 start = off + self.big_int_size
227 230 (s,) = struct.unpack(
228 231 b'>i',
229 232 self._data[start : start + self.int_size],
230 233 )
231 234 if lgt is not None:
232 235 self._offsets[count] = off
233 236 count += 1
234 237 off += self.entry_size + s
235 238 if off != len(self._data):
236 239 raise ValueError(b"corrupted data")
237 240 return count
238 241
239 242 def __delitem__(self, i):
240 243 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
241 244 raise ValueError(b"deleting slices only supports a:-1 with step 1")
242 245 i = i.start
243 246 self._check_index(i)
244 247 self._stripnodes(i)
245 248 if i < self._lgt:
246 249 self._offsets = self._offsets[:i]
247 250 self._lgt = i
248 251 self._extra = []
249 252 else:
250 253 self._extra = self._extra[: i - self._lgt]
251 254
252 255 def _calculate_index(self, i):
253 256 return self._offsets[i]
254 257
255 258
256 259 def parse_index2(data, inline, revlogv2=False):
257 260 if not inline:
258 261 cls = IndexObject2 if revlogv2 else IndexObject
259 262 return cls(data), None
260 263 cls = InlinedIndexObject2 if revlogv2 else InlinedIndexObject
261 264 return cls(data, inline), (0, data)
262 265
263 266
264 267 class Index2Mixin(object):
265 268 index_format = revlog_constants.INDEX_ENTRY_V2
266 269 null_item = (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid, 0, 0)
267 270
268 271 def replace_sidedata_info(self, i, sidedata_offset, sidedata_length):
269 272 """
270 273 Replace an existing index entry's sidedata offset and length with new
271 274 ones.
272 275 This cannot be used outside of the context of sidedata rewriting,
273 276 inside the transaction that creates the revision `i`.
274 277 """
275 278 if i < 0:
276 279 raise KeyError
277 280 self._check_index(i)
278 281 sidedata_format = b">Qi"
279 282 packed_size = struct.calcsize(sidedata_format)
280 283 if i >= self._lgt:
281 284 packed = _pack(sidedata_format, sidedata_offset, sidedata_length)
282 285 old = self._extra[i - self._lgt]
283 286 new = old[:64] + packed + old[64 + packed_size :]
284 287 self._extra[i - self._lgt] = new
285 288 else:
286 289 msg = b"cannot rewrite entries outside of this transaction"
287 290 raise KeyError(msg)
288 291
289 def entry_binary(self, rev, header):
292 def entry_binary(self, rev):
290 293 """return the raw binary string representing a revision"""
291 294 entry = self[rev]
292 295 p = revlog_constants.INDEX_ENTRY_V2.pack(*entry)
293 296 if rev == 0:
294 v_fmt = revlog_constants.INDEX_HEADER
295 v_bin = v_fmt.pack(header)
296 p = v_bin + p[v_fmt.size :]
297 p = p[revlog_constants.INDEX_HEADER.size :]
297 298 return p
298 299
299 300
300 301 class IndexObject2(Index2Mixin, IndexObject):
301 302 pass
302 303
303 304
304 305 class InlinedIndexObject2(Index2Mixin, InlinedIndexObject):
305 306 def _inline_scan(self, lgt):
306 307 sidedata_length_pos = 72
307 308 off = 0
308 309 if lgt is not None:
309 310 self._offsets = [0] * lgt
310 311 count = 0
311 312 while off <= len(self._data) - self.entry_size:
312 313 start = off + self.big_int_size
313 314 (data_size,) = struct.unpack(
314 315 b'>i',
315 316 self._data[start : start + self.int_size],
316 317 )
317 318 start = off + sidedata_length_pos
318 319 (side_data_size,) = struct.unpack(
319 320 b'>i', self._data[start : start + self.int_size]
320 321 )
321 322 if lgt is not None:
322 323 self._offsets[count] = off
323 324 count += 1
324 325 off += self.entry_size + data_size + side_data_size
325 326 if off != len(self._data):
326 327 raise ValueError(b"corrupted data")
327 328 return count
328 329
329 330
330 331 def parse_index_devel_nodemap(data, inline):
331 332 """like parse_index2, but alway return a PersistentNodeMapIndexObject"""
332 333 return PersistentNodeMapIndexObject(data), None
333 334
334 335
335 336 def parse_dirstate(dmap, copymap, st):
336 337 parents = [st[:20], st[20:40]]
337 338 # dereference fields so they will be local in loop
338 339 format = b">cllll"
339 340 e_size = struct.calcsize(format)
340 341 pos1 = 40
341 342 l = len(st)
342 343
343 344 # the inner loop
344 345 while pos1 < l:
345 346 pos2 = pos1 + e_size
346 347 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
347 348 pos1 = pos2 + e[4]
348 349 f = st[pos2:pos1]
349 350 if b'\0' in f:
350 351 f, c = f.split(b'\0')
351 352 copymap[f] = c
352 353 dmap[f] = e[:4]
353 354 return parents
354 355
355 356
356 357 def pack_dirstate(dmap, copymap, pl, now):
357 358 now = int(now)
358 359 cs = stringio()
359 360 write = cs.write
360 361 write(b"".join(pl))
361 362 for f, e in pycompat.iteritems(dmap):
362 363 if e[0] == b'n' and e[3] == now:
363 364 # The file was last modified "simultaneously" with the current
364 365 # write to dirstate (i.e. within the same second for file-
365 366 # systems with a granularity of 1 sec). This commonly happens
366 367 # for at least a couple of files on 'update'.
367 368 # The user could change the file without changing its size
368 369 # within the same second. Invalidate the file's mtime in
369 370 # dirstate, forcing future 'status' calls to compare the
370 371 # contents of the file if the size is the same. This prevents
371 372 # mistakenly treating such files as clean.
372 373 e = dirstatetuple(e[0], e[1], e[2], -1)
373 374 dmap[f] = e
374 375
375 376 if f in copymap:
376 377 f = b"%s\0%s" % (f, copymap[f])
377 378 e = _pack(b">cllll", e[0], e[1], e[2], e[3], len(f))
378 379 write(e)
379 380 write(f)
380 381 return cs.getvalue()
@@ -1,3220 +1,3233 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import errno
20 20 import io
21 21 import os
22 22 import struct
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .pycompat import getattr
36 36 from .revlogutils.constants import (
37 37 FLAG_GENERALDELTA,
38 38 FLAG_INLINE_DATA,
39 39 INDEX_ENTRY_V0,
40 40 INDEX_HEADER,
41 41 REVLOGV0,
42 42 REVLOGV1,
43 43 REVLOGV1_FLAGS,
44 44 REVLOGV2,
45 45 REVLOGV2_FLAGS,
46 46 REVLOG_DEFAULT_FLAGS,
47 47 REVLOG_DEFAULT_FORMAT,
48 48 REVLOG_DEFAULT_VERSION,
49 49 )
50 50 from .revlogutils.flagutil import (
51 51 REVIDX_DEFAULT_FLAGS,
52 52 REVIDX_ELLIPSIS,
53 53 REVIDX_EXTSTORED,
54 54 REVIDX_FLAGS_ORDER,
55 55 REVIDX_HASCOPIESINFO,
56 56 REVIDX_ISCENSORED,
57 57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 58 REVIDX_SIDEDATA,
59 59 )
60 60 from .thirdparty import attr
61 61 from . import (
62 62 ancestor,
63 63 dagop,
64 64 error,
65 65 mdiff,
66 66 policy,
67 67 pycompat,
68 68 templatefilters,
69 69 util,
70 70 )
71 71 from .interfaces import (
72 72 repository,
73 73 util as interfaceutil,
74 74 )
75 75 from .revlogutils import (
76 76 deltas as deltautil,
77 77 flagutil,
78 78 nodemap as nodemaputil,
79 79 sidedata as sidedatautil,
80 80 )
81 81 from .utils import (
82 82 storageutil,
83 83 stringutil,
84 84 )
85 85
86 86 # blanked usage of all the name to prevent pyflakes constraints
87 87 # We need these name available in the module for extensions.
88 88 REVLOGV0
89 89 REVLOGV1
90 90 REVLOGV2
91 91 FLAG_INLINE_DATA
92 92 FLAG_GENERALDELTA
93 93 REVLOG_DEFAULT_FLAGS
94 94 REVLOG_DEFAULT_FORMAT
95 95 REVLOG_DEFAULT_VERSION
96 96 REVLOGV1_FLAGS
97 97 REVLOGV2_FLAGS
98 98 REVIDX_ISCENSORED
99 99 REVIDX_ELLIPSIS
100 100 REVIDX_SIDEDATA
101 101 REVIDX_HASCOPIESINFO
102 102 REVIDX_EXTSTORED
103 103 REVIDX_DEFAULT_FLAGS
104 104 REVIDX_FLAGS_ORDER
105 105 REVIDX_RAWTEXT_CHANGING_FLAGS
106 106
107 107 parsers = policy.importmod('parsers')
108 108 rustancestor = policy.importrust('ancestor')
109 109 rustdagop = policy.importrust('dagop')
110 110 rustrevlog = policy.importrust('revlog')
111 111
112 112 # Aliased for performance.
113 113 _zlibdecompress = zlib.decompress
114 114
115 115 # max size of revlog with inline data
116 116 _maxinline = 131072
117 117 _chunksize = 1048576
118 118
119 119 # Flag processors for REVIDX_ELLIPSIS.
120 120 def ellipsisreadprocessor(rl, text):
121 121 return text, False
122 122
123 123
124 124 def ellipsiswriteprocessor(rl, text):
125 125 return text, False
126 126
127 127
128 128 def ellipsisrawprocessor(rl, text):
129 129 return False
130 130
131 131
132 132 ellipsisprocessor = (
133 133 ellipsisreadprocessor,
134 134 ellipsiswriteprocessor,
135 135 ellipsisrawprocessor,
136 136 )
137 137
138 138
139 139 def getoffset(q):
140 140 return int(q >> 16)
141 141
142 142
143 143 def gettype(q):
144 144 return int(q & 0xFFFF)
145 145
146 146
147 147 def offset_type(offset, type):
148 148 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
149 149 raise ValueError(b'unknown revlog index flags')
150 150 return int(int(offset) << 16 | type)
151 151
152 152
153 153 def _verify_revision(rl, skipflags, state, node):
154 154 """Verify the integrity of the given revlog ``node`` while providing a hook
155 155 point for extensions to influence the operation."""
156 156 if skipflags:
157 157 state[b'skipread'].add(node)
158 158 else:
159 159 # Side-effect: read content and verify hash.
160 160 rl.revision(node)
161 161
162 162
163 163 # True if a fast implementation for persistent-nodemap is available
164 164 #
165 165 # We also consider we have a "fast" implementation in "pure" python because
166 166 # people using pure don't really have performance consideration (and a
167 167 # wheelbarrow of other slowness source)
168 168 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
169 169 parsers, 'BaseIndexObject'
170 170 )
171 171
172 172
173 173 @attr.s(slots=True, frozen=True)
174 174 class _revisioninfo(object):
175 175 """Information about a revision that allows building its fulltext
176 176 node: expected hash of the revision
177 177 p1, p2: parent revs of the revision
178 178 btext: built text cache consisting of a one-element list
179 179 cachedelta: (baserev, uncompressed_delta) or None
180 180 flags: flags associated to the revision storage
181 181
182 182 One of btext[0] or cachedelta must be set.
183 183 """
184 184
185 185 node = attr.ib()
186 186 p1 = attr.ib()
187 187 p2 = attr.ib()
188 188 btext = attr.ib()
189 189 textlen = attr.ib()
190 190 cachedelta = attr.ib()
191 191 flags = attr.ib()
192 192
193 193
194 194 @interfaceutil.implementer(repository.irevisiondelta)
195 195 @attr.s(slots=True)
196 196 class revlogrevisiondelta(object):
197 197 node = attr.ib()
198 198 p1node = attr.ib()
199 199 p2node = attr.ib()
200 200 basenode = attr.ib()
201 201 flags = attr.ib()
202 202 baserevisionsize = attr.ib()
203 203 revision = attr.ib()
204 204 delta = attr.ib()
205 205 sidedata = attr.ib()
206 206 linknode = attr.ib(default=None)
207 207
208 208
209 209 @interfaceutil.implementer(repository.iverifyproblem)
210 210 @attr.s(frozen=True)
211 211 class revlogproblem(object):
212 212 warning = attr.ib(default=None)
213 213 error = attr.ib(default=None)
214 214 node = attr.ib(default=None)
215 215
216 216
217 217 class revlogoldindex(list):
218 218 entry_size = INDEX_ENTRY_V0.size
219 219
220 220 @property
221 221 def nodemap(self):
222 222 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
223 223 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
224 224 return self._nodemap
225 225
226 226 @util.propertycache
227 227 def _nodemap(self):
228 228 nodemap = nodemaputil.NodeMap({sha1nodeconstants.nullid: nullrev})
229 229 for r in range(0, len(self)):
230 230 n = self[r][7]
231 231 nodemap[n] = r
232 232 return nodemap
233 233
234 234 def has_node(self, node):
235 235 """return True if the node exist in the index"""
236 236 return node in self._nodemap
237 237
238 238 def rev(self, node):
239 239 """return a revision for a node
240 240
241 241 If the node is unknown, raise a RevlogError"""
242 242 return self._nodemap[node]
243 243
244 244 def get_rev(self, node):
245 245 """return a revision for a node
246 246
247 247 If the node is unknown, return None"""
248 248 return self._nodemap.get(node)
249 249
250 250 def append(self, tup):
251 251 self._nodemap[tup[7]] = len(self)
252 252 super(revlogoldindex, self).append(tup)
253 253
254 254 def __delitem__(self, i):
255 255 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
256 256 raise ValueError(b"deleting slices only supports a:-1 with step 1")
257 257 for r in pycompat.xrange(i.start, len(self)):
258 258 del self._nodemap[self[r][7]]
259 259 super(revlogoldindex, self).__delitem__(i)
260 260
261 261 def clearcaches(self):
262 262 self.__dict__.pop('_nodemap', None)
263 263
264 264 def __getitem__(self, i):
265 265 if i == -1:
266 266 return (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid)
267 267 return list.__getitem__(self, i)
268 268
269 def entry_binary(self, rev, header):
269 def entry_binary(self, rev):
270 270 """return the raw binary string representing a revision"""
271 271 entry = self[rev]
272 272 if gettype(entry[0]):
273 273 raise error.RevlogError(
274 274 _(b'index entry flags need revlog version 1')
275 275 )
276 276 e2 = (
277 277 getoffset(entry[0]),
278 278 entry[1],
279 279 entry[3],
280 280 entry[4],
281 281 self[entry[5]][7],
282 282 self[entry[6]][7],
283 283 entry[7],
284 284 )
285 285 return INDEX_ENTRY_V0.pack(*e2)
286 286
287 def pack_header(self, header):
288 """Pack header information in binary"""
289 return b''
290
287 291
288 292 def parse_index_v0(data, inline):
289 293 s = INDEX_ENTRY_V0.size
290 294 index = []
291 295 nodemap = nodemaputil.NodeMap({sha1nodeconstants.nullid: nullrev})
292 296 n = off = 0
293 297 l = len(data)
294 298 while off + s <= l:
295 299 cur = data[off : off + s]
296 300 off += s
297 301 e = INDEX_ENTRY_V0.unpack(cur)
298 302 # transform to revlogv1 format
299 303 e2 = (
300 304 offset_type(e[0], 0),
301 305 e[1],
302 306 -1,
303 307 e[2],
304 308 e[3],
305 309 nodemap.get(e[4], nullrev),
306 310 nodemap.get(e[5], nullrev),
307 311 e[6],
308 312 )
309 313 index.append(e2)
310 314 nodemap[e[6]] = n
311 315 n += 1
312 316
313 317 index = revlogoldindex(index)
314 318 return index, None
315 319
316 320
317 321 def parse_index_v1(data, inline):
318 322 # call the C implementation to parse the index data
319 323 index, cache = parsers.parse_index2(data, inline)
320 324 return index, cache
321 325
322 326
323 327 def parse_index_v2(data, inline):
324 328 # call the C implementation to parse the index data
325 329 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
326 330 return index, cache
327 331
328 332
329 333 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
330 334
331 335 def parse_index_v1_nodemap(data, inline):
332 336 index, cache = parsers.parse_index_devel_nodemap(data, inline)
333 337 return index, cache
334 338
335 339
336 340 else:
337 341 parse_index_v1_nodemap = None
338 342
339 343
340 344 def parse_index_v1_mixed(data, inline):
341 345 index, cache = parse_index_v1(data, inline)
342 346 return rustrevlog.MixedIndex(index), cache
343 347
344 348
345 349 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
346 350 # signed integer)
347 351 _maxentrysize = 0x7FFFFFFF
348 352
349 353
350 354 class revlog(object):
351 355 """
352 356 the underlying revision storage object
353 357
354 358 A revlog consists of two parts, an index and the revision data.
355 359
356 360 The index is a file with a fixed record size containing
357 361 information on each revision, including its nodeid (hash), the
358 362 nodeids of its parents, the position and offset of its data within
359 363 the data file, and the revision it's based on. Finally, each entry
360 364 contains a linkrev entry that can serve as a pointer to external
361 365 data.
362 366
363 367 The revision data itself is a linear collection of data chunks.
364 368 Each chunk represents a revision and is usually represented as a
365 369 delta against the previous chunk. To bound lookup time, runs of
366 370 deltas are limited to about 2 times the length of the original
367 371 version data. This makes retrieval of a version proportional to
368 372 its size, or O(1) relative to the number of revisions.
369 373
370 374 Both pieces of the revlog are written to in an append-only
371 375 fashion, which means we never need to rewrite a file to insert or
372 376 remove data, and can use some simple techniques to avoid the need
373 377 for locking while reading.
374 378
375 379 If checkambig, indexfile is opened with checkambig=True at
376 380 writing, to avoid file stat ambiguity.
377 381
378 382 If mmaplargeindex is True, and an mmapindexthreshold is set, the
379 383 index will be mmapped rather than read if it is larger than the
380 384 configured threshold.
381 385
382 386 If censorable is True, the revlog can have censored revisions.
383 387
384 388 If `upperboundcomp` is not None, this is the expected maximal gain from
385 389 compression for the data content.
386 390
387 391 `concurrencychecker` is an optional function that receives 3 arguments: a
388 392 file handle, a filename, and an expected position. It should check whether
389 393 the current position in the file handle is valid, and log/warn/fail (by
390 394 raising).
391 395 """
392 396
393 397 _flagserrorclass = error.RevlogError
394 398
395 399 def __init__(
396 400 self,
397 401 opener,
398 402 indexfile,
399 403 datafile=None,
400 404 checkambig=False,
401 405 mmaplargeindex=False,
402 406 censorable=False,
403 407 upperboundcomp=None,
404 408 persistentnodemap=False,
405 409 concurrencychecker=None,
406 410 ):
407 411 """
408 412 create a revlog object
409 413
410 414 opener is a function that abstracts the file opening operation
411 415 and can be used to implement COW semantics or the like.
412 416
413 417 """
414 418 self.upperboundcomp = upperboundcomp
415 419 self.indexfile = indexfile
416 420 self.datafile = datafile or (indexfile[:-2] + b".d")
417 421 self.nodemap_file = None
418 422 if persistentnodemap:
419 423 self.nodemap_file = nodemaputil.get_nodemap_file(
420 424 opener, self.indexfile
421 425 )
422 426
423 427 self.opener = opener
424 428 # When True, indexfile is opened with checkambig=True at writing, to
425 429 # avoid file stat ambiguity.
426 430 self._checkambig = checkambig
427 431 self._mmaplargeindex = mmaplargeindex
428 432 self._censorable = censorable
429 433 # 3-tuple of (node, rev, text) for a raw revision.
430 434 self._revisioncache = None
431 435 # Maps rev to chain base rev.
432 436 self._chainbasecache = util.lrucachedict(100)
433 437 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
434 438 self._chunkcache = (0, b'')
435 439 # How much data to read and cache into the raw revlog data cache.
436 440 self._chunkcachesize = 65536
437 441 self._maxchainlen = None
438 442 self._deltabothparents = True
439 443 self.index = None
440 444 self._nodemap_docket = None
441 445 # Mapping of partial identifiers to full nodes.
442 446 self._pcache = {}
443 447 # Mapping of revision integer to full node.
444 448 self._compengine = b'zlib'
445 449 self._compengineopts = {}
446 450 self._maxdeltachainspan = -1
447 451 self._withsparseread = False
448 452 self._sparserevlog = False
449 453 self._srdensitythreshold = 0.50
450 454 self._srmingapsize = 262144
451 455
452 456 # Make copy of flag processors so each revlog instance can support
453 457 # custom flags.
454 458 self._flagprocessors = dict(flagutil.flagprocessors)
455 459
456 460 # 2-tuple of file handles being used for active writing.
457 461 self._writinghandles = None
458 462
459 463 self._loadindex()
460 464
461 465 self._concurrencychecker = concurrencychecker
462 466
463 467 def _loadindex(self):
464 468 mmapindexthreshold = None
465 469 opts = self.opener.options
466 470
467 471 if b'revlogv2' in opts:
468 472 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
469 473 elif b'revlogv1' in opts:
470 474 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
471 475 if b'generaldelta' in opts:
472 476 newversionflags |= FLAG_GENERALDELTA
473 477 elif b'revlogv0' in self.opener.options:
474 478 newversionflags = REVLOGV0
475 479 else:
476 480 newversionflags = REVLOG_DEFAULT_VERSION
477 481
478 482 if b'chunkcachesize' in opts:
479 483 self._chunkcachesize = opts[b'chunkcachesize']
480 484 if b'maxchainlen' in opts:
481 485 self._maxchainlen = opts[b'maxchainlen']
482 486 if b'deltabothparents' in opts:
483 487 self._deltabothparents = opts[b'deltabothparents']
484 488 self._lazydelta = bool(opts.get(b'lazydelta', True))
485 489 self._lazydeltabase = False
486 490 if self._lazydelta:
487 491 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
488 492 if b'compengine' in opts:
489 493 self._compengine = opts[b'compengine']
490 494 if b'zlib.level' in opts:
491 495 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
492 496 if b'zstd.level' in opts:
493 497 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
494 498 if b'maxdeltachainspan' in opts:
495 499 self._maxdeltachainspan = opts[b'maxdeltachainspan']
496 500 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
497 501 mmapindexthreshold = opts[b'mmapindexthreshold']
498 502 self.hassidedata = bool(opts.get(b'side-data', False))
499 503 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
500 504 withsparseread = bool(opts.get(b'with-sparse-read', False))
501 505 # sparse-revlog forces sparse-read
502 506 self._withsparseread = self._sparserevlog or withsparseread
503 507 if b'sparse-read-density-threshold' in opts:
504 508 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
505 509 if b'sparse-read-min-gap-size' in opts:
506 510 self._srmingapsize = opts[b'sparse-read-min-gap-size']
507 511 if opts.get(b'enableellipsis'):
508 512 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
509 513
510 514 # revlog v0 doesn't have flag processors
511 515 for flag, processor in pycompat.iteritems(
512 516 opts.get(b'flagprocessors', {})
513 517 ):
514 518 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
515 519
516 520 if self._chunkcachesize <= 0:
517 521 raise error.RevlogError(
518 522 _(b'revlog chunk cache size %r is not greater than 0')
519 523 % self._chunkcachesize
520 524 )
521 525 elif self._chunkcachesize & (self._chunkcachesize - 1):
522 526 raise error.RevlogError(
523 527 _(b'revlog chunk cache size %r is not a power of 2')
524 528 % self._chunkcachesize
525 529 )
526 530
527 531 indexdata = b''
528 532 self._initempty = True
529 533 try:
530 534 with self._indexfp() as f:
531 535 if (
532 536 mmapindexthreshold is not None
533 537 and self.opener.fstat(f).st_size >= mmapindexthreshold
534 538 ):
535 539 # TODO: should .close() to release resources without
536 540 # relying on Python GC
537 541 indexdata = util.buffer(util.mmapread(f))
538 542 else:
539 543 indexdata = f.read()
540 544 if len(indexdata) > 0:
541 545 versionflags = INDEX_HEADER.unpack(indexdata[:4])[0]
542 546 self._initempty = False
543 547 else:
544 548 versionflags = newversionflags
545 549 except IOError as inst:
546 550 if inst.errno != errno.ENOENT:
547 551 raise
548 552
549 553 versionflags = newversionflags
550 554
551 555 self.version = versionflags
552 556
553 557 flags = versionflags & ~0xFFFF
554 558 fmt = versionflags & 0xFFFF
555 559
556 560 if fmt == REVLOGV0:
557 561 if flags:
558 562 raise error.RevlogError(
559 563 _(b'unknown flags (%#04x) in version %d revlog %s')
560 564 % (flags >> 16, fmt, self.indexfile)
561 565 )
562 566
563 567 self._inline = False
564 568 self._generaldelta = False
565 569
566 570 elif fmt == REVLOGV1:
567 571 if flags & ~REVLOGV1_FLAGS:
568 572 raise error.RevlogError(
569 573 _(b'unknown flags (%#04x) in version %d revlog %s')
570 574 % (flags >> 16, fmt, self.indexfile)
571 575 )
572 576
573 577 self._inline = versionflags & FLAG_INLINE_DATA
574 578 self._generaldelta = versionflags & FLAG_GENERALDELTA
575 579
576 580 elif fmt == REVLOGV2:
577 581 if flags & ~REVLOGV2_FLAGS:
578 582 raise error.RevlogError(
579 583 _(b'unknown flags (%#04x) in version %d revlog %s')
580 584 % (flags >> 16, fmt, self.indexfile)
581 585 )
582 586
583 587 # There is a bug in the transaction handling when going from an
584 588 # inline revlog to a separate index and data file. Turn it off until
585 589 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
586 590 # See issue6485
587 591 self._inline = False
588 592 # generaldelta implied by version 2 revlogs.
589 593 self._generaldelta = True
590 594
591 595 else:
592 596 raise error.RevlogError(
593 597 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
594 598 )
595 599
596 600 self.nodeconstants = sha1nodeconstants
597 601 self.nullid = self.nodeconstants.nullid
598 602
599 603 # sparse-revlog can't be on without general-delta (issue6056)
600 604 if not self._generaldelta:
601 605 self._sparserevlog = False
602 606
603 607 self._storedeltachains = True
604 608
605 609 devel_nodemap = (
606 610 self.nodemap_file
607 611 and opts.get(b'devel-force-nodemap', False)
608 612 and parse_index_v1_nodemap is not None
609 613 )
610 614
611 615 use_rust_index = False
612 616 if rustrevlog is not None:
613 617 if self.nodemap_file is not None:
614 618 use_rust_index = True
615 619 else:
616 620 use_rust_index = self.opener.options.get(b'rust.index')
617 621
618 622 self._parse_index = parse_index_v1
619 623 if self.version == REVLOGV0:
620 624 self._parse_index = parse_index_v0
621 625 elif fmt == REVLOGV2:
622 626 self._parse_index = parse_index_v2
623 627 elif devel_nodemap:
624 628 self._parse_index = parse_index_v1_nodemap
625 629 elif use_rust_index:
626 630 self._parse_index = parse_index_v1_mixed
627 631 try:
628 632 d = self._parse_index(indexdata, self._inline)
629 633 index, _chunkcache = d
630 634 use_nodemap = (
631 635 not self._inline
632 636 and self.nodemap_file is not None
633 637 and util.safehasattr(index, 'update_nodemap_data')
634 638 )
635 639 if use_nodemap:
636 640 nodemap_data = nodemaputil.persisted_data(self)
637 641 if nodemap_data is not None:
638 642 docket = nodemap_data[0]
639 643 if (
640 644 len(d[0]) > docket.tip_rev
641 645 and d[0][docket.tip_rev][7] == docket.tip_node
642 646 ):
643 647 # no changelog tampering
644 648 self._nodemap_docket = docket
645 649 index.update_nodemap_data(*nodemap_data)
646 650 except (ValueError, IndexError):
647 651 raise error.RevlogError(
648 652 _(b"index %s is corrupted") % self.indexfile
649 653 )
650 654 self.index, self._chunkcache = d
651 655 if not self._chunkcache:
652 656 self._chunkclear()
653 657 # revnum -> (chain-length, sum-delta-length)
654 658 self._chaininfocache = util.lrucachedict(500)
655 659 # revlog header -> revlog compressor
656 660 self._decompressors = {}
657 661
658 662 @util.propertycache
659 663 def _compressor(self):
660 664 engine = util.compengines[self._compengine]
661 665 return engine.revlogcompressor(self._compengineopts)
662 666
663 667 def _indexfp(self, mode=b'r'):
664 668 """file object for the revlog's index file"""
665 669 args = {'mode': mode}
666 670 if mode != b'r':
667 671 args['checkambig'] = self._checkambig
668 672 if mode == b'w':
669 673 args['atomictemp'] = True
670 674 return self.opener(self.indexfile, **args)
671 675
672 676 def _datafp(self, mode=b'r'):
673 677 """file object for the revlog's data file"""
674 678 return self.opener(self.datafile, mode=mode)
675 679
676 680 @contextlib.contextmanager
677 681 def _datareadfp(self, existingfp=None):
678 682 """file object suitable to read data"""
679 683 # Use explicit file handle, if given.
680 684 if existingfp is not None:
681 685 yield existingfp
682 686
683 687 # Use a file handle being actively used for writes, if available.
684 688 # There is some danger to doing this because reads will seek the
685 689 # file. However, _writeentry() performs a SEEK_END before all writes,
686 690 # so we should be safe.
687 691 elif self._writinghandles:
688 692 if self._inline:
689 693 yield self._writinghandles[0]
690 694 else:
691 695 yield self._writinghandles[1]
692 696
693 697 # Otherwise open a new file handle.
694 698 else:
695 699 if self._inline:
696 700 func = self._indexfp
697 701 else:
698 702 func = self._datafp
699 703 with func() as fp:
700 704 yield fp
701 705
702 706 def tiprev(self):
703 707 return len(self.index) - 1
704 708
705 709 def tip(self):
706 710 return self.node(self.tiprev())
707 711
708 712 def __contains__(self, rev):
709 713 return 0 <= rev < len(self)
710 714
711 715 def __len__(self):
712 716 return len(self.index)
713 717
714 718 def __iter__(self):
715 719 return iter(pycompat.xrange(len(self)))
716 720
717 721 def revs(self, start=0, stop=None):
718 722 """iterate over all rev in this revlog (from start to stop)"""
719 723 return storageutil.iterrevs(len(self), start=start, stop=stop)
720 724
721 725 @property
722 726 def nodemap(self):
723 727 msg = (
724 728 b"revlog.nodemap is deprecated, "
725 729 b"use revlog.index.[has_node|rev|get_rev]"
726 730 )
727 731 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
728 732 return self.index.nodemap
729 733
730 734 @property
731 735 def _nodecache(self):
732 736 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
733 737 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
734 738 return self.index.nodemap
735 739
736 740 def hasnode(self, node):
737 741 try:
738 742 self.rev(node)
739 743 return True
740 744 except KeyError:
741 745 return False
742 746
743 747 def candelta(self, baserev, rev):
744 748 """whether two revisions (baserev, rev) can be delta-ed or not"""
745 749 # Disable delta if either rev requires a content-changing flag
746 750 # processor (ex. LFS). This is because such flag processor can alter
747 751 # the rawtext content that the delta will be based on, and two clients
748 752 # could have a same revlog node with different flags (i.e. different
749 753 # rawtext contents) and the delta could be incompatible.
750 754 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
751 755 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
752 756 ):
753 757 return False
754 758 return True
755 759
756 760 def update_caches(self, transaction):
757 761 if self.nodemap_file is not None:
758 762 if transaction is None:
759 763 nodemaputil.update_persistent_nodemap(self)
760 764 else:
761 765 nodemaputil.setup_persistent_nodemap(transaction, self)
762 766
763 767 def clearcaches(self):
764 768 self._revisioncache = None
765 769 self._chainbasecache.clear()
766 770 self._chunkcache = (0, b'')
767 771 self._pcache = {}
768 772 self._nodemap_docket = None
769 773 self.index.clearcaches()
770 774 # The python code is the one responsible for validating the docket, we
771 775 # end up having to refresh it here.
772 776 use_nodemap = (
773 777 not self._inline
774 778 and self.nodemap_file is not None
775 779 and util.safehasattr(self.index, 'update_nodemap_data')
776 780 )
777 781 if use_nodemap:
778 782 nodemap_data = nodemaputil.persisted_data(self)
779 783 if nodemap_data is not None:
780 784 self._nodemap_docket = nodemap_data[0]
781 785 self.index.update_nodemap_data(*nodemap_data)
782 786
783 787 def rev(self, node):
784 788 try:
785 789 return self.index.rev(node)
786 790 except TypeError:
787 791 raise
788 792 except error.RevlogError:
789 793 # parsers.c radix tree lookup failed
790 794 if (
791 795 node == self.nodeconstants.wdirid
792 796 or node in self.nodeconstants.wdirfilenodeids
793 797 ):
794 798 raise error.WdirUnsupported
795 799 raise error.LookupError(node, self.indexfile, _(b'no node'))
796 800
797 801 # Accessors for index entries.
798 802
799 803 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
800 804 # are flags.
801 805 def start(self, rev):
802 806 return int(self.index[rev][0] >> 16)
803 807
804 808 def flags(self, rev):
805 809 return self.index[rev][0] & 0xFFFF
806 810
807 811 def length(self, rev):
808 812 return self.index[rev][1]
809 813
810 814 def sidedata_length(self, rev):
811 815 if self.version & 0xFFFF != REVLOGV2:
812 816 return 0
813 817 return self.index[rev][9]
814 818
815 819 def rawsize(self, rev):
816 820 """return the length of the uncompressed text for a given revision"""
817 821 l = self.index[rev][2]
818 822 if l >= 0:
819 823 return l
820 824
821 825 t = self.rawdata(rev)
822 826 return len(t)
823 827
824 828 def size(self, rev):
825 829 """length of non-raw text (processed by a "read" flag processor)"""
826 830 # fast path: if no "read" flag processor could change the content,
827 831 # size is rawsize. note: ELLIPSIS is known to not change the content.
828 832 flags = self.flags(rev)
829 833 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
830 834 return self.rawsize(rev)
831 835
832 836 return len(self.revision(rev, raw=False))
833 837
834 838 def chainbase(self, rev):
835 839 base = self._chainbasecache.get(rev)
836 840 if base is not None:
837 841 return base
838 842
839 843 index = self.index
840 844 iterrev = rev
841 845 base = index[iterrev][3]
842 846 while base != iterrev:
843 847 iterrev = base
844 848 base = index[iterrev][3]
845 849
846 850 self._chainbasecache[rev] = base
847 851 return base
848 852
849 853 def linkrev(self, rev):
850 854 return self.index[rev][4]
851 855
852 856 def parentrevs(self, rev):
853 857 try:
854 858 entry = self.index[rev]
855 859 except IndexError:
856 860 if rev == wdirrev:
857 861 raise error.WdirUnsupported
858 862 raise
859 863 if entry[5] == nullrev:
860 864 return entry[6], entry[5]
861 865 else:
862 866 return entry[5], entry[6]
863 867
864 868 # fast parentrevs(rev) where rev isn't filtered
865 869 _uncheckedparentrevs = parentrevs
866 870
867 871 def node(self, rev):
868 872 try:
869 873 return self.index[rev][7]
870 874 except IndexError:
871 875 if rev == wdirrev:
872 876 raise error.WdirUnsupported
873 877 raise
874 878
875 879 # Derived from index values.
876 880
877 881 def end(self, rev):
878 882 return self.start(rev) + self.length(rev)
879 883
880 884 def parents(self, node):
881 885 i = self.index
882 886 d = i[self.rev(node)]
883 887 # inline node() to avoid function call overhead
884 888 if d[5] == self.nullid:
885 889 return i[d[6]][7], i[d[5]][7]
886 890 else:
887 891 return i[d[5]][7], i[d[6]][7]
888 892
889 893 def chainlen(self, rev):
890 894 return self._chaininfo(rev)[0]
891 895
892 896 def _chaininfo(self, rev):
893 897 chaininfocache = self._chaininfocache
894 898 if rev in chaininfocache:
895 899 return chaininfocache[rev]
896 900 index = self.index
897 901 generaldelta = self._generaldelta
898 902 iterrev = rev
899 903 e = index[iterrev]
900 904 clen = 0
901 905 compresseddeltalen = 0
902 906 while iterrev != e[3]:
903 907 clen += 1
904 908 compresseddeltalen += e[1]
905 909 if generaldelta:
906 910 iterrev = e[3]
907 911 else:
908 912 iterrev -= 1
909 913 if iterrev in chaininfocache:
910 914 t = chaininfocache[iterrev]
911 915 clen += t[0]
912 916 compresseddeltalen += t[1]
913 917 break
914 918 e = index[iterrev]
915 919 else:
916 920 # Add text length of base since decompressing that also takes
917 921 # work. For cache hits the length is already included.
918 922 compresseddeltalen += e[1]
919 923 r = (clen, compresseddeltalen)
920 924 chaininfocache[rev] = r
921 925 return r
922 926
923 927 def _deltachain(self, rev, stoprev=None):
924 928 """Obtain the delta chain for a revision.
925 929
926 930 ``stoprev`` specifies a revision to stop at. If not specified, we
927 931 stop at the base of the chain.
928 932
929 933 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
930 934 revs in ascending order and ``stopped`` is a bool indicating whether
931 935 ``stoprev`` was hit.
932 936 """
933 937 # Try C implementation.
934 938 try:
935 939 return self.index.deltachain(rev, stoprev, self._generaldelta)
936 940 except AttributeError:
937 941 pass
938 942
939 943 chain = []
940 944
941 945 # Alias to prevent attribute lookup in tight loop.
942 946 index = self.index
943 947 generaldelta = self._generaldelta
944 948
945 949 iterrev = rev
946 950 e = index[iterrev]
947 951 while iterrev != e[3] and iterrev != stoprev:
948 952 chain.append(iterrev)
949 953 if generaldelta:
950 954 iterrev = e[3]
951 955 else:
952 956 iterrev -= 1
953 957 e = index[iterrev]
954 958
955 959 if iterrev == stoprev:
956 960 stopped = True
957 961 else:
958 962 chain.append(iterrev)
959 963 stopped = False
960 964
961 965 chain.reverse()
962 966 return chain, stopped
963 967
964 968 def ancestors(self, revs, stoprev=0, inclusive=False):
965 969 """Generate the ancestors of 'revs' in reverse revision order.
966 970 Does not generate revs lower than stoprev.
967 971
968 972 See the documentation for ancestor.lazyancestors for more details."""
969 973
970 974 # first, make sure start revisions aren't filtered
971 975 revs = list(revs)
972 976 checkrev = self.node
973 977 for r in revs:
974 978 checkrev(r)
975 979 # and we're sure ancestors aren't filtered as well
976 980
977 981 if rustancestor is not None:
978 982 lazyancestors = rustancestor.LazyAncestors
979 983 arg = self.index
980 984 else:
981 985 lazyancestors = ancestor.lazyancestors
982 986 arg = self._uncheckedparentrevs
983 987 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
984 988
985 989 def descendants(self, revs):
986 990 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
987 991
988 992 def findcommonmissing(self, common=None, heads=None):
989 993 """Return a tuple of the ancestors of common and the ancestors of heads
990 994 that are not ancestors of common. In revset terminology, we return the
991 995 tuple:
992 996
993 997 ::common, (::heads) - (::common)
994 998
995 999 The list is sorted by revision number, meaning it is
996 1000 topologically sorted.
997 1001
998 1002 'heads' and 'common' are both lists of node IDs. If heads is
999 1003 not supplied, uses all of the revlog's heads. If common is not
1000 1004 supplied, uses nullid."""
1001 1005 if common is None:
1002 1006 common = [self.nullid]
1003 1007 if heads is None:
1004 1008 heads = self.heads()
1005 1009
1006 1010 common = [self.rev(n) for n in common]
1007 1011 heads = [self.rev(n) for n in heads]
1008 1012
1009 1013 # we want the ancestors, but inclusive
1010 1014 class lazyset(object):
1011 1015 def __init__(self, lazyvalues):
1012 1016 self.addedvalues = set()
1013 1017 self.lazyvalues = lazyvalues
1014 1018
1015 1019 def __contains__(self, value):
1016 1020 return value in self.addedvalues or value in self.lazyvalues
1017 1021
1018 1022 def __iter__(self):
1019 1023 added = self.addedvalues
1020 1024 for r in added:
1021 1025 yield r
1022 1026 for r in self.lazyvalues:
1023 1027 if not r in added:
1024 1028 yield r
1025 1029
1026 1030 def add(self, value):
1027 1031 self.addedvalues.add(value)
1028 1032
1029 1033 def update(self, values):
1030 1034 self.addedvalues.update(values)
1031 1035
1032 1036 has = lazyset(self.ancestors(common))
1033 1037 has.add(nullrev)
1034 1038 has.update(common)
1035 1039
1036 1040 # take all ancestors from heads that aren't in has
1037 1041 missing = set()
1038 1042 visit = collections.deque(r for r in heads if r not in has)
1039 1043 while visit:
1040 1044 r = visit.popleft()
1041 1045 if r in missing:
1042 1046 continue
1043 1047 else:
1044 1048 missing.add(r)
1045 1049 for p in self.parentrevs(r):
1046 1050 if p not in has:
1047 1051 visit.append(p)
1048 1052 missing = list(missing)
1049 1053 missing.sort()
1050 1054 return has, [self.node(miss) for miss in missing]
1051 1055
1052 1056 def incrementalmissingrevs(self, common=None):
1053 1057 """Return an object that can be used to incrementally compute the
1054 1058 revision numbers of the ancestors of arbitrary sets that are not
1055 1059 ancestors of common. This is an ancestor.incrementalmissingancestors
1056 1060 object.
1057 1061
1058 1062 'common' is a list of revision numbers. If common is not supplied, uses
1059 1063 nullrev.
1060 1064 """
1061 1065 if common is None:
1062 1066 common = [nullrev]
1063 1067
1064 1068 if rustancestor is not None:
1065 1069 return rustancestor.MissingAncestors(self.index, common)
1066 1070 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1067 1071
1068 1072 def findmissingrevs(self, common=None, heads=None):
1069 1073 """Return the revision numbers of the ancestors of heads that
1070 1074 are not ancestors of common.
1071 1075
1072 1076 More specifically, return a list of revision numbers corresponding to
1073 1077 nodes N such that every N satisfies the following constraints:
1074 1078
1075 1079 1. N is an ancestor of some node in 'heads'
1076 1080 2. N is not an ancestor of any node in 'common'
1077 1081
1078 1082 The list is sorted by revision number, meaning it is
1079 1083 topologically sorted.
1080 1084
1081 1085 'heads' and 'common' are both lists of revision numbers. If heads is
1082 1086 not supplied, uses all of the revlog's heads. If common is not
1083 1087 supplied, uses nullid."""
1084 1088 if common is None:
1085 1089 common = [nullrev]
1086 1090 if heads is None:
1087 1091 heads = self.headrevs()
1088 1092
1089 1093 inc = self.incrementalmissingrevs(common=common)
1090 1094 return inc.missingancestors(heads)
1091 1095
1092 1096 def findmissing(self, common=None, heads=None):
1093 1097 """Return the ancestors of heads that are not ancestors of common.
1094 1098
1095 1099 More specifically, return a list of nodes N such that every N
1096 1100 satisfies the following constraints:
1097 1101
1098 1102 1. N is an ancestor of some node in 'heads'
1099 1103 2. N is not an ancestor of any node in 'common'
1100 1104
1101 1105 The list is sorted by revision number, meaning it is
1102 1106 topologically sorted.
1103 1107
1104 1108 'heads' and 'common' are both lists of node IDs. If heads is
1105 1109 not supplied, uses all of the revlog's heads. If common is not
1106 1110 supplied, uses nullid."""
1107 1111 if common is None:
1108 1112 common = [self.nullid]
1109 1113 if heads is None:
1110 1114 heads = self.heads()
1111 1115
1112 1116 common = [self.rev(n) for n in common]
1113 1117 heads = [self.rev(n) for n in heads]
1114 1118
1115 1119 inc = self.incrementalmissingrevs(common=common)
1116 1120 return [self.node(r) for r in inc.missingancestors(heads)]
1117 1121
1118 1122 def nodesbetween(self, roots=None, heads=None):
1119 1123 """Return a topological path from 'roots' to 'heads'.
1120 1124
1121 1125 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1122 1126 topologically sorted list of all nodes N that satisfy both of
1123 1127 these constraints:
1124 1128
1125 1129 1. N is a descendant of some node in 'roots'
1126 1130 2. N is an ancestor of some node in 'heads'
1127 1131
1128 1132 Every node is considered to be both a descendant and an ancestor
1129 1133 of itself, so every reachable node in 'roots' and 'heads' will be
1130 1134 included in 'nodes'.
1131 1135
1132 1136 'outroots' is the list of reachable nodes in 'roots', i.e., the
1133 1137 subset of 'roots' that is returned in 'nodes'. Likewise,
1134 1138 'outheads' is the subset of 'heads' that is also in 'nodes'.
1135 1139
1136 1140 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1137 1141 unspecified, uses nullid as the only root. If 'heads' is
1138 1142 unspecified, uses list of all of the revlog's heads."""
1139 1143 nonodes = ([], [], [])
1140 1144 if roots is not None:
1141 1145 roots = list(roots)
1142 1146 if not roots:
1143 1147 return nonodes
1144 1148 lowestrev = min([self.rev(n) for n in roots])
1145 1149 else:
1146 1150 roots = [self.nullid] # Everybody's a descendant of nullid
1147 1151 lowestrev = nullrev
1148 1152 if (lowestrev == nullrev) and (heads is None):
1149 1153 # We want _all_ the nodes!
1150 1154 return (
1151 1155 [self.node(r) for r in self],
1152 1156 [self.nullid],
1153 1157 list(self.heads()),
1154 1158 )
1155 1159 if heads is None:
1156 1160 # All nodes are ancestors, so the latest ancestor is the last
1157 1161 # node.
1158 1162 highestrev = len(self) - 1
1159 1163 # Set ancestors to None to signal that every node is an ancestor.
1160 1164 ancestors = None
1161 1165 # Set heads to an empty dictionary for later discovery of heads
1162 1166 heads = {}
1163 1167 else:
1164 1168 heads = list(heads)
1165 1169 if not heads:
1166 1170 return nonodes
1167 1171 ancestors = set()
1168 1172 # Turn heads into a dictionary so we can remove 'fake' heads.
1169 1173 # Also, later we will be using it to filter out the heads we can't
1170 1174 # find from roots.
1171 1175 heads = dict.fromkeys(heads, False)
1172 1176 # Start at the top and keep marking parents until we're done.
1173 1177 nodestotag = set(heads)
1174 1178 # Remember where the top was so we can use it as a limit later.
1175 1179 highestrev = max([self.rev(n) for n in nodestotag])
1176 1180 while nodestotag:
1177 1181 # grab a node to tag
1178 1182 n = nodestotag.pop()
1179 1183 # Never tag nullid
1180 1184 if n == self.nullid:
1181 1185 continue
1182 1186 # A node's revision number represents its place in a
1183 1187 # topologically sorted list of nodes.
1184 1188 r = self.rev(n)
1185 1189 if r >= lowestrev:
1186 1190 if n not in ancestors:
1187 1191 # If we are possibly a descendant of one of the roots
1188 1192 # and we haven't already been marked as an ancestor
1189 1193 ancestors.add(n) # Mark as ancestor
1190 1194 # Add non-nullid parents to list of nodes to tag.
1191 1195 nodestotag.update(
1192 1196 [p for p in self.parents(n) if p != self.nullid]
1193 1197 )
1194 1198 elif n in heads: # We've seen it before, is it a fake head?
1195 1199 # So it is, real heads should not be the ancestors of
1196 1200 # any other heads.
1197 1201 heads.pop(n)
1198 1202 if not ancestors:
1199 1203 return nonodes
1200 1204 # Now that we have our set of ancestors, we want to remove any
1201 1205 # roots that are not ancestors.
1202 1206
1203 1207 # If one of the roots was nullid, everything is included anyway.
1204 1208 if lowestrev > nullrev:
1205 1209 # But, since we weren't, let's recompute the lowest rev to not
1206 1210 # include roots that aren't ancestors.
1207 1211
1208 1212 # Filter out roots that aren't ancestors of heads
1209 1213 roots = [root for root in roots if root in ancestors]
1210 1214 # Recompute the lowest revision
1211 1215 if roots:
1212 1216 lowestrev = min([self.rev(root) for root in roots])
1213 1217 else:
1214 1218 # No more roots? Return empty list
1215 1219 return nonodes
1216 1220 else:
1217 1221 # We are descending from nullid, and don't need to care about
1218 1222 # any other roots.
1219 1223 lowestrev = nullrev
1220 1224 roots = [self.nullid]
1221 1225 # Transform our roots list into a set.
1222 1226 descendants = set(roots)
1223 1227 # Also, keep the original roots so we can filter out roots that aren't
1224 1228 # 'real' roots (i.e. are descended from other roots).
1225 1229 roots = descendants.copy()
1226 1230 # Our topologically sorted list of output nodes.
1227 1231 orderedout = []
1228 1232 # Don't start at nullid since we don't want nullid in our output list,
1229 1233 # and if nullid shows up in descendants, empty parents will look like
1230 1234 # they're descendants.
1231 1235 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1232 1236 n = self.node(r)
1233 1237 isdescendant = False
1234 1238 if lowestrev == nullrev: # Everybody is a descendant of nullid
1235 1239 isdescendant = True
1236 1240 elif n in descendants:
1237 1241 # n is already a descendant
1238 1242 isdescendant = True
1239 1243 # This check only needs to be done here because all the roots
1240 1244 # will start being marked is descendants before the loop.
1241 1245 if n in roots:
1242 1246 # If n was a root, check if it's a 'real' root.
1243 1247 p = tuple(self.parents(n))
1244 1248 # If any of its parents are descendants, it's not a root.
1245 1249 if (p[0] in descendants) or (p[1] in descendants):
1246 1250 roots.remove(n)
1247 1251 else:
1248 1252 p = tuple(self.parents(n))
1249 1253 # A node is a descendant if either of its parents are
1250 1254 # descendants. (We seeded the dependents list with the roots
1251 1255 # up there, remember?)
1252 1256 if (p[0] in descendants) or (p[1] in descendants):
1253 1257 descendants.add(n)
1254 1258 isdescendant = True
1255 1259 if isdescendant and ((ancestors is None) or (n in ancestors)):
1256 1260 # Only include nodes that are both descendants and ancestors.
1257 1261 orderedout.append(n)
1258 1262 if (ancestors is not None) and (n in heads):
1259 1263 # We're trying to figure out which heads are reachable
1260 1264 # from roots.
1261 1265 # Mark this head as having been reached
1262 1266 heads[n] = True
1263 1267 elif ancestors is None:
1264 1268 # Otherwise, we're trying to discover the heads.
1265 1269 # Assume this is a head because if it isn't, the next step
1266 1270 # will eventually remove it.
1267 1271 heads[n] = True
1268 1272 # But, obviously its parents aren't.
1269 1273 for p in self.parents(n):
1270 1274 heads.pop(p, None)
1271 1275 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1272 1276 roots = list(roots)
1273 1277 assert orderedout
1274 1278 assert roots
1275 1279 assert heads
1276 1280 return (orderedout, roots, heads)
1277 1281
1278 1282 def headrevs(self, revs=None):
1279 1283 if revs is None:
1280 1284 try:
1281 1285 return self.index.headrevs()
1282 1286 except AttributeError:
1283 1287 return self._headrevs()
1284 1288 if rustdagop is not None:
1285 1289 return rustdagop.headrevs(self.index, revs)
1286 1290 return dagop.headrevs(revs, self._uncheckedparentrevs)
1287 1291
1288 1292 def computephases(self, roots):
1289 1293 return self.index.computephasesmapsets(roots)
1290 1294
1291 1295 def _headrevs(self):
1292 1296 count = len(self)
1293 1297 if not count:
1294 1298 return [nullrev]
1295 1299 # we won't iter over filtered rev so nobody is a head at start
1296 1300 ishead = [0] * (count + 1)
1297 1301 index = self.index
1298 1302 for r in self:
1299 1303 ishead[r] = 1 # I may be an head
1300 1304 e = index[r]
1301 1305 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1302 1306 return [r for r, val in enumerate(ishead) if val]
1303 1307
1304 1308 def heads(self, start=None, stop=None):
1305 1309 """return the list of all nodes that have no children
1306 1310
1307 1311 if start is specified, only heads that are descendants of
1308 1312 start will be returned
1309 1313 if stop is specified, it will consider all the revs from stop
1310 1314 as if they had no children
1311 1315 """
1312 1316 if start is None and stop is None:
1313 1317 if not len(self):
1314 1318 return [self.nullid]
1315 1319 return [self.node(r) for r in self.headrevs()]
1316 1320
1317 1321 if start is None:
1318 1322 start = nullrev
1319 1323 else:
1320 1324 start = self.rev(start)
1321 1325
1322 1326 stoprevs = {self.rev(n) for n in stop or []}
1323 1327
1324 1328 revs = dagop.headrevssubset(
1325 1329 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1326 1330 )
1327 1331
1328 1332 return [self.node(rev) for rev in revs]
1329 1333
1330 1334 def children(self, node):
1331 1335 """find the children of a given node"""
1332 1336 c = []
1333 1337 p = self.rev(node)
1334 1338 for r in self.revs(start=p + 1):
1335 1339 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1336 1340 if prevs:
1337 1341 for pr in prevs:
1338 1342 if pr == p:
1339 1343 c.append(self.node(r))
1340 1344 elif p == nullrev:
1341 1345 c.append(self.node(r))
1342 1346 return c
1343 1347
1344 1348 def commonancestorsheads(self, a, b):
1345 1349 """calculate all the heads of the common ancestors of nodes a and b"""
1346 1350 a, b = self.rev(a), self.rev(b)
1347 1351 ancs = self._commonancestorsheads(a, b)
1348 1352 return pycompat.maplist(self.node, ancs)
1349 1353
1350 1354 def _commonancestorsheads(self, *revs):
1351 1355 """calculate all the heads of the common ancestors of revs"""
1352 1356 try:
1353 1357 ancs = self.index.commonancestorsheads(*revs)
1354 1358 except (AttributeError, OverflowError): # C implementation failed
1355 1359 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1356 1360 return ancs
1357 1361
1358 1362 def isancestor(self, a, b):
1359 1363 """return True if node a is an ancestor of node b
1360 1364
1361 1365 A revision is considered an ancestor of itself."""
1362 1366 a, b = self.rev(a), self.rev(b)
1363 1367 return self.isancestorrev(a, b)
1364 1368
1365 1369 def isancestorrev(self, a, b):
1366 1370 """return True if revision a is an ancestor of revision b
1367 1371
1368 1372 A revision is considered an ancestor of itself.
1369 1373
1370 1374 The implementation of this is trivial but the use of
1371 1375 reachableroots is not."""
1372 1376 if a == nullrev:
1373 1377 return True
1374 1378 elif a == b:
1375 1379 return True
1376 1380 elif a > b:
1377 1381 return False
1378 1382 return bool(self.reachableroots(a, [b], [a], includepath=False))
1379 1383
1380 1384 def reachableroots(self, minroot, heads, roots, includepath=False):
1381 1385 """return (heads(::(<roots> and <roots>::<heads>)))
1382 1386
1383 1387 If includepath is True, return (<roots>::<heads>)."""
1384 1388 try:
1385 1389 return self.index.reachableroots2(
1386 1390 minroot, heads, roots, includepath
1387 1391 )
1388 1392 except AttributeError:
1389 1393 return dagop._reachablerootspure(
1390 1394 self.parentrevs, minroot, roots, heads, includepath
1391 1395 )
1392 1396
1393 1397 def ancestor(self, a, b):
1394 1398 """calculate the "best" common ancestor of nodes a and b"""
1395 1399
1396 1400 a, b = self.rev(a), self.rev(b)
1397 1401 try:
1398 1402 ancs = self.index.ancestors(a, b)
1399 1403 except (AttributeError, OverflowError):
1400 1404 ancs = ancestor.ancestors(self.parentrevs, a, b)
1401 1405 if ancs:
1402 1406 # choose a consistent winner when there's a tie
1403 1407 return min(map(self.node, ancs))
1404 1408 return self.nullid
1405 1409
1406 1410 def _match(self, id):
1407 1411 if isinstance(id, int):
1408 1412 # rev
1409 1413 return self.node(id)
1410 1414 if len(id) == 20:
1411 1415 # possibly a binary node
1412 1416 # odds of a binary node being all hex in ASCII are 1 in 10**25
1413 1417 try:
1414 1418 node = id
1415 1419 self.rev(node) # quick search the index
1416 1420 return node
1417 1421 except error.LookupError:
1418 1422 pass # may be partial hex id
1419 1423 try:
1420 1424 # str(rev)
1421 1425 rev = int(id)
1422 1426 if b"%d" % rev != id:
1423 1427 raise ValueError
1424 1428 if rev < 0:
1425 1429 rev = len(self) + rev
1426 1430 if rev < 0 or rev >= len(self):
1427 1431 raise ValueError
1428 1432 return self.node(rev)
1429 1433 except (ValueError, OverflowError):
1430 1434 pass
1431 1435 if len(id) == 40:
1432 1436 try:
1433 1437 # a full hex nodeid?
1434 1438 node = bin(id)
1435 1439 self.rev(node)
1436 1440 return node
1437 1441 except (TypeError, error.LookupError):
1438 1442 pass
1439 1443
1440 1444 def _partialmatch(self, id):
1441 1445 # we don't care wdirfilenodeids as they should be always full hash
1442 1446 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1443 1447 try:
1444 1448 partial = self.index.partialmatch(id)
1445 1449 if partial and self.hasnode(partial):
1446 1450 if maybewdir:
1447 1451 # single 'ff...' match in radix tree, ambiguous with wdir
1448 1452 raise error.RevlogError
1449 1453 return partial
1450 1454 if maybewdir:
1451 1455 # no 'ff...' match in radix tree, wdir identified
1452 1456 raise error.WdirUnsupported
1453 1457 return None
1454 1458 except error.RevlogError:
1455 1459 # parsers.c radix tree lookup gave multiple matches
1456 1460 # fast path: for unfiltered changelog, radix tree is accurate
1457 1461 if not getattr(self, 'filteredrevs', None):
1458 1462 raise error.AmbiguousPrefixLookupError(
1459 1463 id, self.indexfile, _(b'ambiguous identifier')
1460 1464 )
1461 1465 # fall through to slow path that filters hidden revisions
1462 1466 except (AttributeError, ValueError):
1463 1467 # we are pure python, or key was too short to search radix tree
1464 1468 pass
1465 1469
1466 1470 if id in self._pcache:
1467 1471 return self._pcache[id]
1468 1472
1469 1473 if len(id) <= 40:
1470 1474 try:
1471 1475 # hex(node)[:...]
1472 1476 l = len(id) // 2 # grab an even number of digits
1473 1477 prefix = bin(id[: l * 2])
1474 1478 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1475 1479 nl = [
1476 1480 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1477 1481 ]
1478 1482 if self.nodeconstants.nullhex.startswith(id):
1479 1483 nl.append(self.nullid)
1480 1484 if len(nl) > 0:
1481 1485 if len(nl) == 1 and not maybewdir:
1482 1486 self._pcache[id] = nl[0]
1483 1487 return nl[0]
1484 1488 raise error.AmbiguousPrefixLookupError(
1485 1489 id, self.indexfile, _(b'ambiguous identifier')
1486 1490 )
1487 1491 if maybewdir:
1488 1492 raise error.WdirUnsupported
1489 1493 return None
1490 1494 except TypeError:
1491 1495 pass
1492 1496
1493 1497 def lookup(self, id):
1494 1498 """locate a node based on:
1495 1499 - revision number or str(revision number)
1496 1500 - nodeid or subset of hex nodeid
1497 1501 """
1498 1502 n = self._match(id)
1499 1503 if n is not None:
1500 1504 return n
1501 1505 n = self._partialmatch(id)
1502 1506 if n:
1503 1507 return n
1504 1508
1505 1509 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1506 1510
1507 1511 def shortest(self, node, minlength=1):
1508 1512 """Find the shortest unambiguous prefix that matches node."""
1509 1513
1510 1514 def isvalid(prefix):
1511 1515 try:
1512 1516 matchednode = self._partialmatch(prefix)
1513 1517 except error.AmbiguousPrefixLookupError:
1514 1518 return False
1515 1519 except error.WdirUnsupported:
1516 1520 # single 'ff...' match
1517 1521 return True
1518 1522 if matchednode is None:
1519 1523 raise error.LookupError(node, self.indexfile, _(b'no node'))
1520 1524 return True
1521 1525
1522 1526 def maybewdir(prefix):
1523 1527 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1524 1528
1525 1529 hexnode = hex(node)
1526 1530
1527 1531 def disambiguate(hexnode, minlength):
1528 1532 """Disambiguate against wdirid."""
1529 1533 for length in range(minlength, len(hexnode) + 1):
1530 1534 prefix = hexnode[:length]
1531 1535 if not maybewdir(prefix):
1532 1536 return prefix
1533 1537
1534 1538 if not getattr(self, 'filteredrevs', None):
1535 1539 try:
1536 1540 length = max(self.index.shortest(node), minlength)
1537 1541 return disambiguate(hexnode, length)
1538 1542 except error.RevlogError:
1539 1543 if node != self.nodeconstants.wdirid:
1540 1544 raise error.LookupError(node, self.indexfile, _(b'no node'))
1541 1545 except AttributeError:
1542 1546 # Fall through to pure code
1543 1547 pass
1544 1548
1545 1549 if node == self.nodeconstants.wdirid:
1546 1550 for length in range(minlength, len(hexnode) + 1):
1547 1551 prefix = hexnode[:length]
1548 1552 if isvalid(prefix):
1549 1553 return prefix
1550 1554
1551 1555 for length in range(minlength, len(hexnode) + 1):
1552 1556 prefix = hexnode[:length]
1553 1557 if isvalid(prefix):
1554 1558 return disambiguate(hexnode, length)
1555 1559
1556 1560 def cmp(self, node, text):
1557 1561 """compare text with a given file revision
1558 1562
1559 1563 returns True if text is different than what is stored.
1560 1564 """
1561 1565 p1, p2 = self.parents(node)
1562 1566 return storageutil.hashrevisionsha1(text, p1, p2) != node
1563 1567
1564 1568 def _cachesegment(self, offset, data):
1565 1569 """Add a segment to the revlog cache.
1566 1570
1567 1571 Accepts an absolute offset and the data that is at that location.
1568 1572 """
1569 1573 o, d = self._chunkcache
1570 1574 # try to add to existing cache
1571 1575 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1572 1576 self._chunkcache = o, d + data
1573 1577 else:
1574 1578 self._chunkcache = offset, data
1575 1579
1576 1580 def _readsegment(self, offset, length, df=None):
1577 1581 """Load a segment of raw data from the revlog.
1578 1582
1579 1583 Accepts an absolute offset, length to read, and an optional existing
1580 1584 file handle to read from.
1581 1585
1582 1586 If an existing file handle is passed, it will be seeked and the
1583 1587 original seek position will NOT be restored.
1584 1588
1585 1589 Returns a str or buffer of raw byte data.
1586 1590
1587 1591 Raises if the requested number of bytes could not be read.
1588 1592 """
1589 1593 # Cache data both forward and backward around the requested
1590 1594 # data, in a fixed size window. This helps speed up operations
1591 1595 # involving reading the revlog backwards.
1592 1596 cachesize = self._chunkcachesize
1593 1597 realoffset = offset & ~(cachesize - 1)
1594 1598 reallength = (
1595 1599 (offset + length + cachesize) & ~(cachesize - 1)
1596 1600 ) - realoffset
1597 1601 with self._datareadfp(df) as df:
1598 1602 df.seek(realoffset)
1599 1603 d = df.read(reallength)
1600 1604
1601 1605 self._cachesegment(realoffset, d)
1602 1606 if offset != realoffset or reallength != length:
1603 1607 startoffset = offset - realoffset
1604 1608 if len(d) - startoffset < length:
1605 1609 raise error.RevlogError(
1606 1610 _(
1607 1611 b'partial read of revlog %s; expected %d bytes from '
1608 1612 b'offset %d, got %d'
1609 1613 )
1610 1614 % (
1611 1615 self.indexfile if self._inline else self.datafile,
1612 1616 length,
1613 1617 realoffset,
1614 1618 len(d) - startoffset,
1615 1619 )
1616 1620 )
1617 1621
1618 1622 return util.buffer(d, startoffset, length)
1619 1623
1620 1624 if len(d) < length:
1621 1625 raise error.RevlogError(
1622 1626 _(
1623 1627 b'partial read of revlog %s; expected %d bytes from offset '
1624 1628 b'%d, got %d'
1625 1629 )
1626 1630 % (
1627 1631 self.indexfile if self._inline else self.datafile,
1628 1632 length,
1629 1633 offset,
1630 1634 len(d),
1631 1635 )
1632 1636 )
1633 1637
1634 1638 return d
1635 1639
1636 1640 def _getsegment(self, offset, length, df=None):
1637 1641 """Obtain a segment of raw data from the revlog.
1638 1642
1639 1643 Accepts an absolute offset, length of bytes to obtain, and an
1640 1644 optional file handle to the already-opened revlog. If the file
1641 1645 handle is used, it's original seek position will not be preserved.
1642 1646
1643 1647 Requests for data may be returned from a cache.
1644 1648
1645 1649 Returns a str or a buffer instance of raw byte data.
1646 1650 """
1647 1651 o, d = self._chunkcache
1648 1652 l = len(d)
1649 1653
1650 1654 # is it in the cache?
1651 1655 cachestart = offset - o
1652 1656 cacheend = cachestart + length
1653 1657 if cachestart >= 0 and cacheend <= l:
1654 1658 if cachestart == 0 and cacheend == l:
1655 1659 return d # avoid a copy
1656 1660 return util.buffer(d, cachestart, cacheend - cachestart)
1657 1661
1658 1662 return self._readsegment(offset, length, df=df)
1659 1663
1660 1664 def _getsegmentforrevs(self, startrev, endrev, df=None):
1661 1665 """Obtain a segment of raw data corresponding to a range of revisions.
1662 1666
1663 1667 Accepts the start and end revisions and an optional already-open
1664 1668 file handle to be used for reading. If the file handle is read, its
1665 1669 seek position will not be preserved.
1666 1670
1667 1671 Requests for data may be satisfied by a cache.
1668 1672
1669 1673 Returns a 2-tuple of (offset, data) for the requested range of
1670 1674 revisions. Offset is the integer offset from the beginning of the
1671 1675 revlog and data is a str or buffer of the raw byte data.
1672 1676
1673 1677 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1674 1678 to determine where each revision's data begins and ends.
1675 1679 """
1676 1680 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1677 1681 # (functions are expensive).
1678 1682 index = self.index
1679 1683 istart = index[startrev]
1680 1684 start = int(istart[0] >> 16)
1681 1685 if startrev == endrev:
1682 1686 end = start + istart[1]
1683 1687 else:
1684 1688 iend = index[endrev]
1685 1689 end = int(iend[0] >> 16) + iend[1]
1686 1690
1687 1691 if self._inline:
1688 1692 start += (startrev + 1) * self.index.entry_size
1689 1693 end += (endrev + 1) * self.index.entry_size
1690 1694 length = end - start
1691 1695
1692 1696 return start, self._getsegment(start, length, df=df)
1693 1697
1694 1698 def _chunk(self, rev, df=None):
1695 1699 """Obtain a single decompressed chunk for a revision.
1696 1700
1697 1701 Accepts an integer revision and an optional already-open file handle
1698 1702 to be used for reading. If used, the seek position of the file will not
1699 1703 be preserved.
1700 1704
1701 1705 Returns a str holding uncompressed data for the requested revision.
1702 1706 """
1703 1707 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1704 1708
1705 1709 def _chunks(self, revs, df=None, targetsize=None):
1706 1710 """Obtain decompressed chunks for the specified revisions.
1707 1711
1708 1712 Accepts an iterable of numeric revisions that are assumed to be in
1709 1713 ascending order. Also accepts an optional already-open file handle
1710 1714 to be used for reading. If used, the seek position of the file will
1711 1715 not be preserved.
1712 1716
1713 1717 This function is similar to calling ``self._chunk()`` multiple times,
1714 1718 but is faster.
1715 1719
1716 1720 Returns a list with decompressed data for each requested revision.
1717 1721 """
1718 1722 if not revs:
1719 1723 return []
1720 1724 start = self.start
1721 1725 length = self.length
1722 1726 inline = self._inline
1723 1727 iosize = self.index.entry_size
1724 1728 buffer = util.buffer
1725 1729
1726 1730 l = []
1727 1731 ladd = l.append
1728 1732
1729 1733 if not self._withsparseread:
1730 1734 slicedchunks = (revs,)
1731 1735 else:
1732 1736 slicedchunks = deltautil.slicechunk(
1733 1737 self, revs, targetsize=targetsize
1734 1738 )
1735 1739
1736 1740 for revschunk in slicedchunks:
1737 1741 firstrev = revschunk[0]
1738 1742 # Skip trailing revisions with empty diff
1739 1743 for lastrev in revschunk[::-1]:
1740 1744 if length(lastrev) != 0:
1741 1745 break
1742 1746
1743 1747 try:
1744 1748 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1745 1749 except OverflowError:
1746 1750 # issue4215 - we can't cache a run of chunks greater than
1747 1751 # 2G on Windows
1748 1752 return [self._chunk(rev, df=df) for rev in revschunk]
1749 1753
1750 1754 decomp = self.decompress
1751 1755 for rev in revschunk:
1752 1756 chunkstart = start(rev)
1753 1757 if inline:
1754 1758 chunkstart += (rev + 1) * iosize
1755 1759 chunklength = length(rev)
1756 1760 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1757 1761
1758 1762 return l
1759 1763
1760 1764 def _chunkclear(self):
1761 1765 """Clear the raw chunk cache."""
1762 1766 self._chunkcache = (0, b'')
1763 1767
1764 1768 def deltaparent(self, rev):
1765 1769 """return deltaparent of the given revision"""
1766 1770 base = self.index[rev][3]
1767 1771 if base == rev:
1768 1772 return nullrev
1769 1773 elif self._generaldelta:
1770 1774 return base
1771 1775 else:
1772 1776 return rev - 1
1773 1777
1774 1778 def issnapshot(self, rev):
1775 1779 """tells whether rev is a snapshot"""
1776 1780 if not self._sparserevlog:
1777 1781 return self.deltaparent(rev) == nullrev
1778 1782 elif util.safehasattr(self.index, b'issnapshot'):
1779 1783 # directly assign the method to cache the testing and access
1780 1784 self.issnapshot = self.index.issnapshot
1781 1785 return self.issnapshot(rev)
1782 1786 if rev == nullrev:
1783 1787 return True
1784 1788 entry = self.index[rev]
1785 1789 base = entry[3]
1786 1790 if base == rev:
1787 1791 return True
1788 1792 if base == nullrev:
1789 1793 return True
1790 1794 p1 = entry[5]
1791 1795 p2 = entry[6]
1792 1796 if base == p1 or base == p2:
1793 1797 return False
1794 1798 return self.issnapshot(base)
1795 1799
1796 1800 def snapshotdepth(self, rev):
1797 1801 """number of snapshot in the chain before this one"""
1798 1802 if not self.issnapshot(rev):
1799 1803 raise error.ProgrammingError(b'revision %d not a snapshot')
1800 1804 return len(self._deltachain(rev)[0]) - 1
1801 1805
1802 1806 def revdiff(self, rev1, rev2):
1803 1807 """return or calculate a delta between two revisions
1804 1808
1805 1809 The delta calculated is in binary form and is intended to be written to
1806 1810 revlog data directly. So this function needs raw revision data.
1807 1811 """
1808 1812 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1809 1813 return bytes(self._chunk(rev2))
1810 1814
1811 1815 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1812 1816
1813 1817 def _processflags(self, text, flags, operation, raw=False):
1814 1818 """deprecated entry point to access flag processors"""
1815 1819 msg = b'_processflag(...) use the specialized variant'
1816 1820 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1817 1821 if raw:
1818 1822 return text, flagutil.processflagsraw(self, text, flags)
1819 1823 elif operation == b'read':
1820 1824 return flagutil.processflagsread(self, text, flags)
1821 1825 else: # write operation
1822 1826 return flagutil.processflagswrite(self, text, flags)
1823 1827
1824 1828 def revision(self, nodeorrev, _df=None, raw=False):
1825 1829 """return an uncompressed revision of a given node or revision
1826 1830 number.
1827 1831
1828 1832 _df - an existing file handle to read from. (internal-only)
1829 1833 raw - an optional argument specifying if the revision data is to be
1830 1834 treated as raw data when applying flag transforms. 'raw' should be set
1831 1835 to True when generating changegroups or in debug commands.
1832 1836 """
1833 1837 if raw:
1834 1838 msg = (
1835 1839 b'revlog.revision(..., raw=True) is deprecated, '
1836 1840 b'use revlog.rawdata(...)'
1837 1841 )
1838 1842 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1839 1843 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1840 1844
1841 1845 def sidedata(self, nodeorrev, _df=None):
1842 1846 """a map of extra data related to the changeset but not part of the hash
1843 1847
1844 1848 This function currently return a dictionary. However, more advanced
1845 1849 mapping object will likely be used in the future for a more
1846 1850 efficient/lazy code.
1847 1851 """
1848 1852 return self._revisiondata(nodeorrev, _df)[1]
1849 1853
1850 1854 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1851 1855 # deal with <nodeorrev> argument type
1852 1856 if isinstance(nodeorrev, int):
1853 1857 rev = nodeorrev
1854 1858 node = self.node(rev)
1855 1859 else:
1856 1860 node = nodeorrev
1857 1861 rev = None
1858 1862
1859 1863 # fast path the special `nullid` rev
1860 1864 if node == self.nullid:
1861 1865 return b"", {}
1862 1866
1863 1867 # ``rawtext`` is the text as stored inside the revlog. Might be the
1864 1868 # revision or might need to be processed to retrieve the revision.
1865 1869 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1866 1870
1867 1871 if self.version & 0xFFFF == REVLOGV2:
1868 1872 if rev is None:
1869 1873 rev = self.rev(node)
1870 1874 sidedata = self._sidedata(rev)
1871 1875 else:
1872 1876 sidedata = {}
1873 1877
1874 1878 if raw and validated:
1875 1879 # if we don't want to process the raw text and that raw
1876 1880 # text is cached, we can exit early.
1877 1881 return rawtext, sidedata
1878 1882 if rev is None:
1879 1883 rev = self.rev(node)
1880 1884 # the revlog's flag for this revision
1881 1885 # (usually alter its state or content)
1882 1886 flags = self.flags(rev)
1883 1887
1884 1888 if validated and flags == REVIDX_DEFAULT_FLAGS:
1885 1889 # no extra flags set, no flag processor runs, text = rawtext
1886 1890 return rawtext, sidedata
1887 1891
1888 1892 if raw:
1889 1893 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1890 1894 text = rawtext
1891 1895 else:
1892 1896 r = flagutil.processflagsread(self, rawtext, flags)
1893 1897 text, validatehash = r
1894 1898 if validatehash:
1895 1899 self.checkhash(text, node, rev=rev)
1896 1900 if not validated:
1897 1901 self._revisioncache = (node, rev, rawtext)
1898 1902
1899 1903 return text, sidedata
1900 1904
1901 1905 def _rawtext(self, node, rev, _df=None):
1902 1906 """return the possibly unvalidated rawtext for a revision
1903 1907
1904 1908 returns (rev, rawtext, validated)
1905 1909 """
1906 1910
1907 1911 # revision in the cache (could be useful to apply delta)
1908 1912 cachedrev = None
1909 1913 # An intermediate text to apply deltas to
1910 1914 basetext = None
1911 1915
1912 1916 # Check if we have the entry in cache
1913 1917 # The cache entry looks like (node, rev, rawtext)
1914 1918 if self._revisioncache:
1915 1919 if self._revisioncache[0] == node:
1916 1920 return (rev, self._revisioncache[2], True)
1917 1921 cachedrev = self._revisioncache[1]
1918 1922
1919 1923 if rev is None:
1920 1924 rev = self.rev(node)
1921 1925
1922 1926 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1923 1927 if stopped:
1924 1928 basetext = self._revisioncache[2]
1925 1929
1926 1930 # drop cache to save memory, the caller is expected to
1927 1931 # update self._revisioncache after validating the text
1928 1932 self._revisioncache = None
1929 1933
1930 1934 targetsize = None
1931 1935 rawsize = self.index[rev][2]
1932 1936 if 0 <= rawsize:
1933 1937 targetsize = 4 * rawsize
1934 1938
1935 1939 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1936 1940 if basetext is None:
1937 1941 basetext = bytes(bins[0])
1938 1942 bins = bins[1:]
1939 1943
1940 1944 rawtext = mdiff.patches(basetext, bins)
1941 1945 del basetext # let us have a chance to free memory early
1942 1946 return (rev, rawtext, False)
1943 1947
1944 1948 def _sidedata(self, rev):
1945 1949 """Return the sidedata for a given revision number."""
1946 1950 index_entry = self.index[rev]
1947 1951 sidedata_offset = index_entry[8]
1948 1952 sidedata_size = index_entry[9]
1949 1953
1950 1954 if self._inline:
1951 1955 sidedata_offset += self.index.entry_size * (1 + rev)
1952 1956 if sidedata_size == 0:
1953 1957 return {}
1954 1958
1955 1959 segment = self._getsegment(sidedata_offset, sidedata_size)
1956 1960 sidedata = sidedatautil.deserialize_sidedata(segment)
1957 1961 return sidedata
1958 1962
1959 1963 def rawdata(self, nodeorrev, _df=None):
1960 1964 """return an uncompressed raw data of a given node or revision number.
1961 1965
1962 1966 _df - an existing file handle to read from. (internal-only)
1963 1967 """
1964 1968 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1965 1969
1966 1970 def hash(self, text, p1, p2):
1967 1971 """Compute a node hash.
1968 1972
1969 1973 Available as a function so that subclasses can replace the hash
1970 1974 as needed.
1971 1975 """
1972 1976 return storageutil.hashrevisionsha1(text, p1, p2)
1973 1977
1974 1978 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1975 1979 """Check node hash integrity.
1976 1980
1977 1981 Available as a function so that subclasses can extend hash mismatch
1978 1982 behaviors as needed.
1979 1983 """
1980 1984 try:
1981 1985 if p1 is None and p2 is None:
1982 1986 p1, p2 = self.parents(node)
1983 1987 if node != self.hash(text, p1, p2):
1984 1988 # Clear the revision cache on hash failure. The revision cache
1985 1989 # only stores the raw revision and clearing the cache does have
1986 1990 # the side-effect that we won't have a cache hit when the raw
1987 1991 # revision data is accessed. But this case should be rare and
1988 1992 # it is extra work to teach the cache about the hash
1989 1993 # verification state.
1990 1994 if self._revisioncache and self._revisioncache[0] == node:
1991 1995 self._revisioncache = None
1992 1996
1993 1997 revornode = rev
1994 1998 if revornode is None:
1995 1999 revornode = templatefilters.short(hex(node))
1996 2000 raise error.RevlogError(
1997 2001 _(b"integrity check failed on %s:%s")
1998 2002 % (self.indexfile, pycompat.bytestr(revornode))
1999 2003 )
2000 2004 except error.RevlogError:
2001 2005 if self._censorable and storageutil.iscensoredtext(text):
2002 2006 raise error.CensoredNodeError(self.indexfile, node, text)
2003 2007 raise
2004 2008
2005 2009 def _enforceinlinesize(self, tr, fp=None):
2006 2010 """Check if the revlog is too big for inline and convert if so.
2007 2011
2008 2012 This should be called after revisions are added to the revlog. If the
2009 2013 revlog has grown too large to be an inline revlog, it will convert it
2010 2014 to use multiple index and data files.
2011 2015 """
2012 2016 tiprev = len(self) - 1
2013 2017 if (
2014 2018 not self._inline
2015 2019 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
2016 2020 ):
2017 2021 return
2018 2022
2019 2023 troffset = tr.findoffset(self.indexfile)
2020 2024 if troffset is None:
2021 2025 raise error.RevlogError(
2022 2026 _(b"%s not found in the transaction") % self.indexfile
2023 2027 )
2024 2028 trindex = 0
2025 2029 tr.add(self.datafile, 0)
2026 2030
2027 2031 if fp:
2028 2032 fp.flush()
2029 2033 fp.close()
2030 2034 # We can't use the cached file handle after close(). So prevent
2031 2035 # its usage.
2032 2036 self._writinghandles = None
2033 2037
2034 2038 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
2035 2039 for r in self:
2036 2040 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
2037 2041 if troffset <= self.start(r):
2038 2042 trindex = r
2039 2043
2040 2044 with self._indexfp(b'w') as fp:
2041 2045 self.version &= ~FLAG_INLINE_DATA
2042 2046 self._inline = False
2043 2047 for i in self:
2044 e = self.index.entry_binary(i, self.version)
2048 e = self.index.entry_binary(i)
2049 if i == 0:
2050 header = self.index.pack_header(self.version)
2051 e = header + e
2045 2052 fp.write(e)
2046 2053
2047 2054 # the temp file replace the real index when we exit the context
2048 2055 # manager
2049 2056
2050 2057 tr.replace(self.indexfile, trindex * self.index.entry_size)
2051 2058 nodemaputil.setup_persistent_nodemap(tr, self)
2052 2059 self._chunkclear()
2053 2060
2054 2061 def _nodeduplicatecallback(self, transaction, node):
2055 2062 """called when trying to add a node already stored."""
2056 2063
2057 2064 def addrevision(
2058 2065 self,
2059 2066 text,
2060 2067 transaction,
2061 2068 link,
2062 2069 p1,
2063 2070 p2,
2064 2071 cachedelta=None,
2065 2072 node=None,
2066 2073 flags=REVIDX_DEFAULT_FLAGS,
2067 2074 deltacomputer=None,
2068 2075 sidedata=None,
2069 2076 ):
2070 2077 """add a revision to the log
2071 2078
2072 2079 text - the revision data to add
2073 2080 transaction - the transaction object used for rollback
2074 2081 link - the linkrev data to add
2075 2082 p1, p2 - the parent nodeids of the revision
2076 2083 cachedelta - an optional precomputed delta
2077 2084 node - nodeid of revision; typically node is not specified, and it is
2078 2085 computed by default as hash(text, p1, p2), however subclasses might
2079 2086 use different hashing method (and override checkhash() in such case)
2080 2087 flags - the known flags to set on the revision
2081 2088 deltacomputer - an optional deltacomputer instance shared between
2082 2089 multiple calls
2083 2090 """
2084 2091 if link == nullrev:
2085 2092 raise error.RevlogError(
2086 2093 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2087 2094 )
2088 2095
2089 2096 if sidedata is None:
2090 2097 sidedata = {}
2091 2098 elif not self.hassidedata:
2092 2099 raise error.ProgrammingError(
2093 2100 _(b"trying to add sidedata to a revlog who don't support them")
2094 2101 )
2095 2102
2096 2103 if flags:
2097 2104 node = node or self.hash(text, p1, p2)
2098 2105
2099 2106 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2100 2107
2101 2108 # If the flag processor modifies the revision data, ignore any provided
2102 2109 # cachedelta.
2103 2110 if rawtext != text:
2104 2111 cachedelta = None
2105 2112
2106 2113 if len(rawtext) > _maxentrysize:
2107 2114 raise error.RevlogError(
2108 2115 _(
2109 2116 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2110 2117 )
2111 2118 % (self.indexfile, len(rawtext))
2112 2119 )
2113 2120
2114 2121 node = node or self.hash(rawtext, p1, p2)
2115 2122 rev = self.index.get_rev(node)
2116 2123 if rev is not None:
2117 2124 return rev
2118 2125
2119 2126 if validatehash:
2120 2127 self.checkhash(rawtext, node, p1=p1, p2=p2)
2121 2128
2122 2129 return self.addrawrevision(
2123 2130 rawtext,
2124 2131 transaction,
2125 2132 link,
2126 2133 p1,
2127 2134 p2,
2128 2135 node,
2129 2136 flags,
2130 2137 cachedelta=cachedelta,
2131 2138 deltacomputer=deltacomputer,
2132 2139 sidedata=sidedata,
2133 2140 )
2134 2141
2135 2142 def addrawrevision(
2136 2143 self,
2137 2144 rawtext,
2138 2145 transaction,
2139 2146 link,
2140 2147 p1,
2141 2148 p2,
2142 2149 node,
2143 2150 flags,
2144 2151 cachedelta=None,
2145 2152 deltacomputer=None,
2146 2153 sidedata=None,
2147 2154 ):
2148 2155 """add a raw revision with known flags, node and parents
2149 2156 useful when reusing a revision not stored in this revlog (ex: received
2150 2157 over wire, or read from an external bundle).
2151 2158 """
2152 2159 dfh = None
2153 2160 if not self._inline:
2154 2161 dfh = self._datafp(b"a+")
2155 2162 ifh = self._indexfp(b"a+")
2156 2163 try:
2157 2164 return self._addrevision(
2158 2165 node,
2159 2166 rawtext,
2160 2167 transaction,
2161 2168 link,
2162 2169 p1,
2163 2170 p2,
2164 2171 flags,
2165 2172 cachedelta,
2166 2173 ifh,
2167 2174 dfh,
2168 2175 deltacomputer=deltacomputer,
2169 2176 sidedata=sidedata,
2170 2177 )
2171 2178 finally:
2172 2179 if dfh:
2173 2180 dfh.close()
2174 2181 ifh.close()
2175 2182
2176 2183 def compress(self, data):
2177 2184 """Generate a possibly-compressed representation of data."""
2178 2185 if not data:
2179 2186 return b'', data
2180 2187
2181 2188 compressed = self._compressor.compress(data)
2182 2189
2183 2190 if compressed:
2184 2191 # The revlog compressor added the header in the returned data.
2185 2192 return b'', compressed
2186 2193
2187 2194 if data[0:1] == b'\0':
2188 2195 return b'', data
2189 2196 return b'u', data
2190 2197
2191 2198 def decompress(self, data):
2192 2199 """Decompress a revlog chunk.
2193 2200
2194 2201 The chunk is expected to begin with a header identifying the
2195 2202 format type so it can be routed to an appropriate decompressor.
2196 2203 """
2197 2204 if not data:
2198 2205 return data
2199 2206
2200 2207 # Revlogs are read much more frequently than they are written and many
2201 2208 # chunks only take microseconds to decompress, so performance is
2202 2209 # important here.
2203 2210 #
2204 2211 # We can make a few assumptions about revlogs:
2205 2212 #
2206 2213 # 1) the majority of chunks will be compressed (as opposed to inline
2207 2214 # raw data).
2208 2215 # 2) decompressing *any* data will likely by at least 10x slower than
2209 2216 # returning raw inline data.
2210 2217 # 3) we want to prioritize common and officially supported compression
2211 2218 # engines
2212 2219 #
2213 2220 # It follows that we want to optimize for "decompress compressed data
2214 2221 # when encoded with common and officially supported compression engines"
2215 2222 # case over "raw data" and "data encoded by less common or non-official
2216 2223 # compression engines." That is why we have the inline lookup first
2217 2224 # followed by the compengines lookup.
2218 2225 #
2219 2226 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2220 2227 # compressed chunks. And this matters for changelog and manifest reads.
2221 2228 t = data[0:1]
2222 2229
2223 2230 if t == b'x':
2224 2231 try:
2225 2232 return _zlibdecompress(data)
2226 2233 except zlib.error as e:
2227 2234 raise error.RevlogError(
2228 2235 _(b'revlog decompress error: %s')
2229 2236 % stringutil.forcebytestr(e)
2230 2237 )
2231 2238 # '\0' is more common than 'u' so it goes first.
2232 2239 elif t == b'\0':
2233 2240 return data
2234 2241 elif t == b'u':
2235 2242 return util.buffer(data, 1)
2236 2243
2237 2244 try:
2238 2245 compressor = self._decompressors[t]
2239 2246 except KeyError:
2240 2247 try:
2241 2248 engine = util.compengines.forrevlogheader(t)
2242 2249 compressor = engine.revlogcompressor(self._compengineopts)
2243 2250 self._decompressors[t] = compressor
2244 2251 except KeyError:
2245 2252 raise error.RevlogError(
2246 2253 _(b'unknown compression type %s') % binascii.hexlify(t)
2247 2254 )
2248 2255
2249 2256 return compressor.decompress(data)
2250 2257
2251 2258 def _addrevision(
2252 2259 self,
2253 2260 node,
2254 2261 rawtext,
2255 2262 transaction,
2256 2263 link,
2257 2264 p1,
2258 2265 p2,
2259 2266 flags,
2260 2267 cachedelta,
2261 2268 ifh,
2262 2269 dfh,
2263 2270 alwayscache=False,
2264 2271 deltacomputer=None,
2265 2272 sidedata=None,
2266 2273 ):
2267 2274 """internal function to add revisions to the log
2268 2275
2269 2276 see addrevision for argument descriptions.
2270 2277
2271 2278 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2272 2279
2273 2280 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2274 2281 be used.
2275 2282
2276 2283 invariants:
2277 2284 - rawtext is optional (can be None); if not set, cachedelta must be set.
2278 2285 if both are set, they must correspond to each other.
2279 2286 """
2280 2287 if node == self.nullid:
2281 2288 raise error.RevlogError(
2282 2289 _(b"%s: attempt to add null revision") % self.indexfile
2283 2290 )
2284 2291 if (
2285 2292 node == self.nodeconstants.wdirid
2286 2293 or node in self.nodeconstants.wdirfilenodeids
2287 2294 ):
2288 2295 raise error.RevlogError(
2289 2296 _(b"%s: attempt to add wdir revision") % self.indexfile
2290 2297 )
2291 2298
2292 2299 if self._inline:
2293 2300 fh = ifh
2294 2301 else:
2295 2302 fh = dfh
2296 2303
2297 2304 btext = [rawtext]
2298 2305
2299 2306 curr = len(self)
2300 2307 prev = curr - 1
2301 2308
2302 2309 offset = self._get_data_offset(prev)
2303 2310
2304 2311 if self._concurrencychecker:
2305 2312 if self._inline:
2306 2313 # offset is "as if" it were in the .d file, so we need to add on
2307 2314 # the size of the entry metadata.
2308 2315 self._concurrencychecker(
2309 2316 ifh, self.indexfile, offset + curr * self.index.entry_size
2310 2317 )
2311 2318 else:
2312 2319 # Entries in the .i are a consistent size.
2313 2320 self._concurrencychecker(
2314 2321 ifh, self.indexfile, curr * self.index.entry_size
2315 2322 )
2316 2323 self._concurrencychecker(dfh, self.datafile, offset)
2317 2324
2318 2325 p1r, p2r = self.rev(p1), self.rev(p2)
2319 2326
2320 2327 # full versions are inserted when the needed deltas
2321 2328 # become comparable to the uncompressed text
2322 2329 if rawtext is None:
2323 2330 # need rawtext size, before changed by flag processors, which is
2324 2331 # the non-raw size. use revlog explicitly to avoid filelog's extra
2325 2332 # logic that might remove metadata size.
2326 2333 textlen = mdiff.patchedsize(
2327 2334 revlog.size(self, cachedelta[0]), cachedelta[1]
2328 2335 )
2329 2336 else:
2330 2337 textlen = len(rawtext)
2331 2338
2332 2339 if deltacomputer is None:
2333 2340 deltacomputer = deltautil.deltacomputer(self)
2334 2341
2335 2342 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2336 2343
2337 2344 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2338 2345
2339 2346 if sidedata:
2340 2347 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2341 2348 sidedata_offset = offset + deltainfo.deltalen
2342 2349 else:
2343 2350 serialized_sidedata = b""
2344 2351 # Don't store the offset if the sidedata is empty, that way
2345 2352 # we can easily detect empty sidedata and they will be no different
2346 2353 # than ones we manually add.
2347 2354 sidedata_offset = 0
2348 2355
2349 2356 e = (
2350 2357 offset_type(offset, flags),
2351 2358 deltainfo.deltalen,
2352 2359 textlen,
2353 2360 deltainfo.base,
2354 2361 link,
2355 2362 p1r,
2356 2363 p2r,
2357 2364 node,
2358 2365 sidedata_offset,
2359 2366 len(serialized_sidedata),
2360 2367 )
2361 2368
2362 2369 if self.version & 0xFFFF != REVLOGV2:
2363 2370 e = e[:8]
2364 2371
2365 2372 self.index.append(e)
2366 entry = self.index.entry_binary(curr, self.version)
2373 entry = self.index.entry_binary(curr)
2374 if curr == 0:
2375 header = self.index.pack_header(self.version)
2376 entry = header + entry
2367 2377 self._writeentry(
2368 2378 transaction,
2369 2379 ifh,
2370 2380 dfh,
2371 2381 entry,
2372 2382 deltainfo.data,
2373 2383 link,
2374 2384 offset,
2375 2385 serialized_sidedata,
2376 2386 )
2377 2387
2378 2388 rawtext = btext[0]
2379 2389
2380 2390 if alwayscache and rawtext is None:
2381 2391 rawtext = deltacomputer.buildtext(revinfo, fh)
2382 2392
2383 2393 if type(rawtext) == bytes: # only accept immutable objects
2384 2394 self._revisioncache = (node, curr, rawtext)
2385 2395 self._chainbasecache[curr] = deltainfo.chainbase
2386 2396 return curr
2387 2397
2388 2398 def _get_data_offset(self, prev):
2389 2399 """Returns the current offset in the (in-transaction) data file.
2390 2400 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2391 2401 file to store that information: since sidedata can be rewritten to the
2392 2402 end of the data file within a transaction, you can have cases where, for
2393 2403 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2394 2404 to `n - 1`'s sidedata being written after `n`'s data.
2395 2405
2396 2406 TODO cache this in a docket file before getting out of experimental."""
2397 2407 if self.version & 0xFFFF != REVLOGV2:
2398 2408 return self.end(prev)
2399 2409
2400 2410 offset = 0
2401 2411 for rev, entry in enumerate(self.index):
2402 2412 sidedata_end = entry[8] + entry[9]
2403 2413 # Sidedata for a previous rev has potentially been written after
2404 2414 # this rev's end, so take the max.
2405 2415 offset = max(self.end(rev), offset, sidedata_end)
2406 2416 return offset
2407 2417
2408 2418 def _writeentry(
2409 2419 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2410 2420 ):
2411 2421 # Files opened in a+ mode have inconsistent behavior on various
2412 2422 # platforms. Windows requires that a file positioning call be made
2413 2423 # when the file handle transitions between reads and writes. See
2414 2424 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2415 2425 # platforms, Python or the platform itself can be buggy. Some versions
2416 2426 # of Solaris have been observed to not append at the end of the file
2417 2427 # if the file was seeked to before the end. See issue4943 for more.
2418 2428 #
2419 2429 # We work around this issue by inserting a seek() before writing.
2420 2430 # Note: This is likely not necessary on Python 3. However, because
2421 2431 # the file handle is reused for reads and may be seeked there, we need
2422 2432 # to be careful before changing this.
2423 2433 ifh.seek(0, os.SEEK_END)
2424 2434 if dfh:
2425 2435 dfh.seek(0, os.SEEK_END)
2426 2436
2427 2437 curr = len(self) - 1
2428 2438 if not self._inline:
2429 2439 transaction.add(self.datafile, offset)
2430 2440 transaction.add(self.indexfile, curr * len(entry))
2431 2441 if data[0]:
2432 2442 dfh.write(data[0])
2433 2443 dfh.write(data[1])
2434 2444 if sidedata:
2435 2445 dfh.write(sidedata)
2436 2446 ifh.write(entry)
2437 2447 else:
2438 2448 offset += curr * self.index.entry_size
2439 2449 transaction.add(self.indexfile, offset)
2440 2450 ifh.write(entry)
2441 2451 ifh.write(data[0])
2442 2452 ifh.write(data[1])
2443 2453 if sidedata:
2444 2454 ifh.write(sidedata)
2445 2455 self._enforceinlinesize(transaction, ifh)
2446 2456 nodemaputil.setup_persistent_nodemap(transaction, self)
2447 2457
2448 2458 def addgroup(
2449 2459 self,
2450 2460 deltas,
2451 2461 linkmapper,
2452 2462 transaction,
2453 2463 alwayscache=False,
2454 2464 addrevisioncb=None,
2455 2465 duplicaterevisioncb=None,
2456 2466 ):
2457 2467 """
2458 2468 add a delta group
2459 2469
2460 2470 given a set of deltas, add them to the revision log. the
2461 2471 first delta is against its parent, which should be in our
2462 2472 log, the rest are against the previous delta.
2463 2473
2464 2474 If ``addrevisioncb`` is defined, it will be called with arguments of
2465 2475 this revlog and the node that was added.
2466 2476 """
2467 2477
2468 2478 if self._writinghandles:
2469 2479 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2470 2480
2471 2481 r = len(self)
2472 2482 end = 0
2473 2483 if r:
2474 2484 end = self.end(r - 1)
2475 2485 ifh = self._indexfp(b"a+")
2476 2486 isize = r * self.index.entry_size
2477 2487 if self._inline:
2478 2488 transaction.add(self.indexfile, end + isize)
2479 2489 dfh = None
2480 2490 else:
2481 2491 transaction.add(self.indexfile, isize)
2482 2492 transaction.add(self.datafile, end)
2483 2493 dfh = self._datafp(b"a+")
2484 2494
2485 2495 def flush():
2486 2496 if dfh:
2487 2497 dfh.flush()
2488 2498 ifh.flush()
2489 2499
2490 2500 self._writinghandles = (ifh, dfh)
2491 2501 empty = True
2492 2502
2493 2503 try:
2494 2504 deltacomputer = deltautil.deltacomputer(self)
2495 2505 # loop through our set of deltas
2496 2506 for data in deltas:
2497 2507 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2498 2508 link = linkmapper(linknode)
2499 2509 flags = flags or REVIDX_DEFAULT_FLAGS
2500 2510
2501 2511 rev = self.index.get_rev(node)
2502 2512 if rev is not None:
2503 2513 # this can happen if two branches make the same change
2504 2514 self._nodeduplicatecallback(transaction, rev)
2505 2515 if duplicaterevisioncb:
2506 2516 duplicaterevisioncb(self, rev)
2507 2517 empty = False
2508 2518 continue
2509 2519
2510 2520 for p in (p1, p2):
2511 2521 if not self.index.has_node(p):
2512 2522 raise error.LookupError(
2513 2523 p, self.indexfile, _(b'unknown parent')
2514 2524 )
2515 2525
2516 2526 if not self.index.has_node(deltabase):
2517 2527 raise error.LookupError(
2518 2528 deltabase, self.indexfile, _(b'unknown delta base')
2519 2529 )
2520 2530
2521 2531 baserev = self.rev(deltabase)
2522 2532
2523 2533 if baserev != nullrev and self.iscensored(baserev):
2524 2534 # if base is censored, delta must be full replacement in a
2525 2535 # single patch operation
2526 2536 hlen = struct.calcsize(b">lll")
2527 2537 oldlen = self.rawsize(baserev)
2528 2538 newlen = len(delta) - hlen
2529 2539 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2530 2540 raise error.CensoredBaseError(
2531 2541 self.indexfile, self.node(baserev)
2532 2542 )
2533 2543
2534 2544 if not flags and self._peek_iscensored(baserev, delta, flush):
2535 2545 flags |= REVIDX_ISCENSORED
2536 2546
2537 2547 # We assume consumers of addrevisioncb will want to retrieve
2538 2548 # the added revision, which will require a call to
2539 2549 # revision(). revision() will fast path if there is a cache
2540 2550 # hit. So, we tell _addrevision() to always cache in this case.
2541 2551 # We're only using addgroup() in the context of changegroup
2542 2552 # generation so the revision data can always be handled as raw
2543 2553 # by the flagprocessor.
2544 2554 rev = self._addrevision(
2545 2555 node,
2546 2556 None,
2547 2557 transaction,
2548 2558 link,
2549 2559 p1,
2550 2560 p2,
2551 2561 flags,
2552 2562 (baserev, delta),
2553 2563 ifh,
2554 2564 dfh,
2555 2565 alwayscache=alwayscache,
2556 2566 deltacomputer=deltacomputer,
2557 2567 sidedata=sidedata,
2558 2568 )
2559 2569
2560 2570 if addrevisioncb:
2561 2571 addrevisioncb(self, rev)
2562 2572 empty = False
2563 2573
2564 2574 if not dfh and not self._inline:
2565 2575 # addrevision switched from inline to conventional
2566 2576 # reopen the index
2567 2577 ifh.close()
2568 2578 dfh = self._datafp(b"a+")
2569 2579 ifh = self._indexfp(b"a+")
2570 2580 self._writinghandles = (ifh, dfh)
2571 2581 finally:
2572 2582 self._writinghandles = None
2573 2583
2574 2584 if dfh:
2575 2585 dfh.close()
2576 2586 ifh.close()
2577 2587 return not empty
2578 2588
2579 2589 def iscensored(self, rev):
2580 2590 """Check if a file revision is censored."""
2581 2591 if not self._censorable:
2582 2592 return False
2583 2593
2584 2594 return self.flags(rev) & REVIDX_ISCENSORED
2585 2595
2586 2596 def _peek_iscensored(self, baserev, delta, flush):
2587 2597 """Quickly check if a delta produces a censored revision."""
2588 2598 if not self._censorable:
2589 2599 return False
2590 2600
2591 2601 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2592 2602
2593 2603 def getstrippoint(self, minlink):
2594 2604 """find the minimum rev that must be stripped to strip the linkrev
2595 2605
2596 2606 Returns a tuple containing the minimum rev and a set of all revs that
2597 2607 have linkrevs that will be broken by this strip.
2598 2608 """
2599 2609 return storageutil.resolvestripinfo(
2600 2610 minlink,
2601 2611 len(self) - 1,
2602 2612 self.headrevs(),
2603 2613 self.linkrev,
2604 2614 self.parentrevs,
2605 2615 )
2606 2616
2607 2617 def strip(self, minlink, transaction):
2608 2618 """truncate the revlog on the first revision with a linkrev >= minlink
2609 2619
2610 2620 This function is called when we're stripping revision minlink and
2611 2621 its descendants from the repository.
2612 2622
2613 2623 We have to remove all revisions with linkrev >= minlink, because
2614 2624 the equivalent changelog revisions will be renumbered after the
2615 2625 strip.
2616 2626
2617 2627 So we truncate the revlog on the first of these revisions, and
2618 2628 trust that the caller has saved the revisions that shouldn't be
2619 2629 removed and that it'll re-add them after this truncation.
2620 2630 """
2621 2631 if len(self) == 0:
2622 2632 return
2623 2633
2624 2634 rev, _ = self.getstrippoint(minlink)
2625 2635 if rev == len(self):
2626 2636 return
2627 2637
2628 2638 # first truncate the files on disk
2629 2639 end = self.start(rev)
2630 2640 if not self._inline:
2631 2641 transaction.add(self.datafile, end)
2632 2642 end = rev * self.index.entry_size
2633 2643 else:
2634 2644 end += rev * self.index.entry_size
2635 2645
2636 2646 transaction.add(self.indexfile, end)
2637 2647
2638 2648 # then reset internal state in memory to forget those revisions
2639 2649 self._revisioncache = None
2640 2650 self._chaininfocache = util.lrucachedict(500)
2641 2651 self._chunkclear()
2642 2652
2643 2653 del self.index[rev:-1]
2644 2654
2645 2655 def checksize(self):
2646 2656 """Check size of index and data files
2647 2657
2648 2658 return a (dd, di) tuple.
2649 2659 - dd: extra bytes for the "data" file
2650 2660 - di: extra bytes for the "index" file
2651 2661
2652 2662 A healthy revlog will return (0, 0).
2653 2663 """
2654 2664 expected = 0
2655 2665 if len(self):
2656 2666 expected = max(0, self.end(len(self) - 1))
2657 2667
2658 2668 try:
2659 2669 with self._datafp() as f:
2660 2670 f.seek(0, io.SEEK_END)
2661 2671 actual = f.tell()
2662 2672 dd = actual - expected
2663 2673 except IOError as inst:
2664 2674 if inst.errno != errno.ENOENT:
2665 2675 raise
2666 2676 dd = 0
2667 2677
2668 2678 try:
2669 2679 f = self.opener(self.indexfile)
2670 2680 f.seek(0, io.SEEK_END)
2671 2681 actual = f.tell()
2672 2682 f.close()
2673 2683 s = self.index.entry_size
2674 2684 i = max(0, actual // s)
2675 2685 di = actual - (i * s)
2676 2686 if self._inline:
2677 2687 databytes = 0
2678 2688 for r in self:
2679 2689 databytes += max(0, self.length(r))
2680 2690 dd = 0
2681 2691 di = actual - len(self) * s - databytes
2682 2692 except IOError as inst:
2683 2693 if inst.errno != errno.ENOENT:
2684 2694 raise
2685 2695 di = 0
2686 2696
2687 2697 return (dd, di)
2688 2698
2689 2699 def files(self):
2690 2700 res = [self.indexfile]
2691 2701 if not self._inline:
2692 2702 res.append(self.datafile)
2693 2703 return res
2694 2704
2695 2705 def emitrevisions(
2696 2706 self,
2697 2707 nodes,
2698 2708 nodesorder=None,
2699 2709 revisiondata=False,
2700 2710 assumehaveparentrevisions=False,
2701 2711 deltamode=repository.CG_DELTAMODE_STD,
2702 2712 sidedata_helpers=None,
2703 2713 ):
2704 2714 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2705 2715 raise error.ProgrammingError(
2706 2716 b'unhandled value for nodesorder: %s' % nodesorder
2707 2717 )
2708 2718
2709 2719 if nodesorder is None and not self._generaldelta:
2710 2720 nodesorder = b'storage'
2711 2721
2712 2722 if (
2713 2723 not self._storedeltachains
2714 2724 and deltamode != repository.CG_DELTAMODE_PREV
2715 2725 ):
2716 2726 deltamode = repository.CG_DELTAMODE_FULL
2717 2727
2718 2728 return storageutil.emitrevisions(
2719 2729 self,
2720 2730 nodes,
2721 2731 nodesorder,
2722 2732 revlogrevisiondelta,
2723 2733 deltaparentfn=self.deltaparent,
2724 2734 candeltafn=self.candelta,
2725 2735 rawsizefn=self.rawsize,
2726 2736 revdifffn=self.revdiff,
2727 2737 flagsfn=self.flags,
2728 2738 deltamode=deltamode,
2729 2739 revisiondata=revisiondata,
2730 2740 assumehaveparentrevisions=assumehaveparentrevisions,
2731 2741 sidedata_helpers=sidedata_helpers,
2732 2742 )
2733 2743
2734 2744 DELTAREUSEALWAYS = b'always'
2735 2745 DELTAREUSESAMEREVS = b'samerevs'
2736 2746 DELTAREUSENEVER = b'never'
2737 2747
2738 2748 DELTAREUSEFULLADD = b'fulladd'
2739 2749
2740 2750 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2741 2751
2742 2752 def clone(
2743 2753 self,
2744 2754 tr,
2745 2755 destrevlog,
2746 2756 addrevisioncb=None,
2747 2757 deltareuse=DELTAREUSESAMEREVS,
2748 2758 forcedeltabothparents=None,
2749 2759 sidedatacompanion=None,
2750 2760 ):
2751 2761 """Copy this revlog to another, possibly with format changes.
2752 2762
2753 2763 The destination revlog will contain the same revisions and nodes.
2754 2764 However, it may not be bit-for-bit identical due to e.g. delta encoding
2755 2765 differences.
2756 2766
2757 2767 The ``deltareuse`` argument control how deltas from the existing revlog
2758 2768 are preserved in the destination revlog. The argument can have the
2759 2769 following values:
2760 2770
2761 2771 DELTAREUSEALWAYS
2762 2772 Deltas will always be reused (if possible), even if the destination
2763 2773 revlog would not select the same revisions for the delta. This is the
2764 2774 fastest mode of operation.
2765 2775 DELTAREUSESAMEREVS
2766 2776 Deltas will be reused if the destination revlog would pick the same
2767 2777 revisions for the delta. This mode strikes a balance between speed
2768 2778 and optimization.
2769 2779 DELTAREUSENEVER
2770 2780 Deltas will never be reused. This is the slowest mode of execution.
2771 2781 This mode can be used to recompute deltas (e.g. if the diff/delta
2772 2782 algorithm changes).
2773 2783 DELTAREUSEFULLADD
2774 2784 Revision will be re-added as if their were new content. This is
2775 2785 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2776 2786 eg: large file detection and handling.
2777 2787
2778 2788 Delta computation can be slow, so the choice of delta reuse policy can
2779 2789 significantly affect run time.
2780 2790
2781 2791 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2782 2792 two extremes. Deltas will be reused if they are appropriate. But if the
2783 2793 delta could choose a better revision, it will do so. This means if you
2784 2794 are converting a non-generaldelta revlog to a generaldelta revlog,
2785 2795 deltas will be recomputed if the delta's parent isn't a parent of the
2786 2796 revision.
2787 2797
2788 2798 In addition to the delta policy, the ``forcedeltabothparents``
2789 2799 argument controls whether to force compute deltas against both parents
2790 2800 for merges. By default, the current default is used.
2791 2801
2792 2802 If not None, the `sidedatacompanion` is callable that accept two
2793 2803 arguments:
2794 2804
2795 2805 (srcrevlog, rev)
2796 2806
2797 2807 and return a quintet that control changes to sidedata content from the
2798 2808 old revision to the new clone result:
2799 2809
2800 2810 (dropall, filterout, update, new_flags, dropped_flags)
2801 2811
2802 2812 * if `dropall` is True, all sidedata should be dropped
2803 2813 * `filterout` is a set of sidedata keys that should be dropped
2804 2814 * `update` is a mapping of additionnal/new key -> value
2805 2815 * new_flags is a bitfields of new flags that the revision should get
2806 2816 * dropped_flags is a bitfields of new flags that the revision shoudl not longer have
2807 2817 """
2808 2818 if deltareuse not in self.DELTAREUSEALL:
2809 2819 raise ValueError(
2810 2820 _(b'value for deltareuse invalid: %s') % deltareuse
2811 2821 )
2812 2822
2813 2823 if len(destrevlog):
2814 2824 raise ValueError(_(b'destination revlog is not empty'))
2815 2825
2816 2826 if getattr(self, 'filteredrevs', None):
2817 2827 raise ValueError(_(b'source revlog has filtered revisions'))
2818 2828 if getattr(destrevlog, 'filteredrevs', None):
2819 2829 raise ValueError(_(b'destination revlog has filtered revisions'))
2820 2830
2821 2831 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2822 2832 # if possible.
2823 2833 oldlazydelta = destrevlog._lazydelta
2824 2834 oldlazydeltabase = destrevlog._lazydeltabase
2825 2835 oldamd = destrevlog._deltabothparents
2826 2836
2827 2837 try:
2828 2838 if deltareuse == self.DELTAREUSEALWAYS:
2829 2839 destrevlog._lazydeltabase = True
2830 2840 destrevlog._lazydelta = True
2831 2841 elif deltareuse == self.DELTAREUSESAMEREVS:
2832 2842 destrevlog._lazydeltabase = False
2833 2843 destrevlog._lazydelta = True
2834 2844 elif deltareuse == self.DELTAREUSENEVER:
2835 2845 destrevlog._lazydeltabase = False
2836 2846 destrevlog._lazydelta = False
2837 2847
2838 2848 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2839 2849
2840 2850 self._clone(
2841 2851 tr,
2842 2852 destrevlog,
2843 2853 addrevisioncb,
2844 2854 deltareuse,
2845 2855 forcedeltabothparents,
2846 2856 sidedatacompanion,
2847 2857 )
2848 2858
2849 2859 finally:
2850 2860 destrevlog._lazydelta = oldlazydelta
2851 2861 destrevlog._lazydeltabase = oldlazydeltabase
2852 2862 destrevlog._deltabothparents = oldamd
2853 2863
2854 2864 def _clone(
2855 2865 self,
2856 2866 tr,
2857 2867 destrevlog,
2858 2868 addrevisioncb,
2859 2869 deltareuse,
2860 2870 forcedeltabothparents,
2861 2871 sidedatacompanion,
2862 2872 ):
2863 2873 """perform the core duty of `revlog.clone` after parameter processing"""
2864 2874 deltacomputer = deltautil.deltacomputer(destrevlog)
2865 2875 index = self.index
2866 2876 for rev in self:
2867 2877 entry = index[rev]
2868 2878
2869 2879 # Some classes override linkrev to take filtered revs into
2870 2880 # account. Use raw entry from index.
2871 2881 flags = entry[0] & 0xFFFF
2872 2882 linkrev = entry[4]
2873 2883 p1 = index[entry[5]][7]
2874 2884 p2 = index[entry[6]][7]
2875 2885 node = entry[7]
2876 2886
2877 2887 sidedataactions = (False, [], {}, 0, 0)
2878 2888 if sidedatacompanion is not None:
2879 2889 sidedataactions = sidedatacompanion(self, rev)
2880 2890
2881 2891 # (Possibly) reuse the delta from the revlog if allowed and
2882 2892 # the revlog chunk is a delta.
2883 2893 cachedelta = None
2884 2894 rawtext = None
2885 2895 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2886 2896 dropall = sidedataactions[0]
2887 2897 filterout = sidedataactions[1]
2888 2898 update = sidedataactions[2]
2889 2899 new_flags = sidedataactions[3]
2890 2900 dropped_flags = sidedataactions[4]
2891 2901 text, sidedata = self._revisiondata(rev)
2892 2902 if dropall:
2893 2903 sidedata = {}
2894 2904 for key in filterout:
2895 2905 sidedata.pop(key, None)
2896 2906 sidedata.update(update)
2897 2907 if not sidedata:
2898 2908 sidedata = None
2899 2909
2900 2910 flags |= new_flags
2901 2911 flags &= ~dropped_flags
2902 2912
2903 2913 destrevlog.addrevision(
2904 2914 text,
2905 2915 tr,
2906 2916 linkrev,
2907 2917 p1,
2908 2918 p2,
2909 2919 cachedelta=cachedelta,
2910 2920 node=node,
2911 2921 flags=flags,
2912 2922 deltacomputer=deltacomputer,
2913 2923 sidedata=sidedata,
2914 2924 )
2915 2925 else:
2916 2926 if destrevlog._lazydelta:
2917 2927 dp = self.deltaparent(rev)
2918 2928 if dp != nullrev:
2919 2929 cachedelta = (dp, bytes(self._chunk(rev)))
2920 2930
2921 2931 if not cachedelta:
2922 2932 rawtext = self.rawdata(rev)
2923 2933
2924 2934 ifh = destrevlog.opener(
2925 2935 destrevlog.indexfile, b'a+', checkambig=False
2926 2936 )
2927 2937 dfh = None
2928 2938 if not destrevlog._inline:
2929 2939 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2930 2940 try:
2931 2941 destrevlog._addrevision(
2932 2942 node,
2933 2943 rawtext,
2934 2944 tr,
2935 2945 linkrev,
2936 2946 p1,
2937 2947 p2,
2938 2948 flags,
2939 2949 cachedelta,
2940 2950 ifh,
2941 2951 dfh,
2942 2952 deltacomputer=deltacomputer,
2943 2953 )
2944 2954 finally:
2945 2955 if dfh:
2946 2956 dfh.close()
2947 2957 ifh.close()
2948 2958
2949 2959 if addrevisioncb:
2950 2960 addrevisioncb(self, rev, node)
2951 2961
2952 2962 def censorrevision(self, tr, censornode, tombstone=b''):
2953 2963 if (self.version & 0xFFFF) == REVLOGV0:
2954 2964 raise error.RevlogError(
2955 2965 _(b'cannot censor with version %d revlogs') % self.version
2956 2966 )
2957 2967
2958 2968 censorrev = self.rev(censornode)
2959 2969 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2960 2970
2961 2971 if len(tombstone) > self.rawsize(censorrev):
2962 2972 raise error.Abort(
2963 2973 _(b'censor tombstone must be no longer than censored data')
2964 2974 )
2965 2975
2966 2976 # Rewriting the revlog in place is hard. Our strategy for censoring is
2967 2977 # to create a new revlog, copy all revisions to it, then replace the
2968 2978 # revlogs on transaction close.
2969 2979
2970 2980 newindexfile = self.indexfile + b'.tmpcensored'
2971 2981 newdatafile = self.datafile + b'.tmpcensored'
2972 2982
2973 2983 # This is a bit dangerous. We could easily have a mismatch of state.
2974 2984 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2975 2985 newrl.version = self.version
2976 2986 newrl._generaldelta = self._generaldelta
2977 2987 newrl._parse_index = self._parse_index
2978 2988
2979 2989 for rev in self.revs():
2980 2990 node = self.node(rev)
2981 2991 p1, p2 = self.parents(node)
2982 2992
2983 2993 if rev == censorrev:
2984 2994 newrl.addrawrevision(
2985 2995 tombstone,
2986 2996 tr,
2987 2997 self.linkrev(censorrev),
2988 2998 p1,
2989 2999 p2,
2990 3000 censornode,
2991 3001 REVIDX_ISCENSORED,
2992 3002 )
2993 3003
2994 3004 if newrl.deltaparent(rev) != nullrev:
2995 3005 raise error.Abort(
2996 3006 _(
2997 3007 b'censored revision stored as delta; '
2998 3008 b'cannot censor'
2999 3009 ),
3000 3010 hint=_(
3001 3011 b'censoring of revlogs is not '
3002 3012 b'fully implemented; please report '
3003 3013 b'this bug'
3004 3014 ),
3005 3015 )
3006 3016 continue
3007 3017
3008 3018 if self.iscensored(rev):
3009 3019 if self.deltaparent(rev) != nullrev:
3010 3020 raise error.Abort(
3011 3021 _(
3012 3022 b'cannot censor due to censored '
3013 3023 b'revision having delta stored'
3014 3024 )
3015 3025 )
3016 3026 rawtext = self._chunk(rev)
3017 3027 else:
3018 3028 rawtext = self.rawdata(rev)
3019 3029
3020 3030 newrl.addrawrevision(
3021 3031 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3022 3032 )
3023 3033
3024 3034 tr.addbackup(self.indexfile, location=b'store')
3025 3035 if not self._inline:
3026 3036 tr.addbackup(self.datafile, location=b'store')
3027 3037
3028 3038 self.opener.rename(newrl.indexfile, self.indexfile)
3029 3039 if not self._inline:
3030 3040 self.opener.rename(newrl.datafile, self.datafile)
3031 3041
3032 3042 self.clearcaches()
3033 3043 self._loadindex()
3034 3044
3035 3045 def verifyintegrity(self, state):
3036 3046 """Verifies the integrity of the revlog.
3037 3047
3038 3048 Yields ``revlogproblem`` instances describing problems that are
3039 3049 found.
3040 3050 """
3041 3051 dd, di = self.checksize()
3042 3052 if dd:
3043 3053 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3044 3054 if di:
3045 3055 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3046 3056
3047 3057 version = self.version & 0xFFFF
3048 3058
3049 3059 # The verifier tells us what version revlog we should be.
3050 3060 if version != state[b'expectedversion']:
3051 3061 yield revlogproblem(
3052 3062 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3053 3063 % (self.indexfile, version, state[b'expectedversion'])
3054 3064 )
3055 3065
3056 3066 state[b'skipread'] = set()
3057 3067 state[b'safe_renamed'] = set()
3058 3068
3059 3069 for rev in self:
3060 3070 node = self.node(rev)
3061 3071
3062 3072 # Verify contents. 4 cases to care about:
3063 3073 #
3064 3074 # common: the most common case
3065 3075 # rename: with a rename
3066 3076 # meta: file content starts with b'\1\n', the metadata
3067 3077 # header defined in filelog.py, but without a rename
3068 3078 # ext: content stored externally
3069 3079 #
3070 3080 # More formally, their differences are shown below:
3071 3081 #
3072 3082 # | common | rename | meta | ext
3073 3083 # -------------------------------------------------------
3074 3084 # flags() | 0 | 0 | 0 | not 0
3075 3085 # renamed() | False | True | False | ?
3076 3086 # rawtext[0:2]=='\1\n'| False | True | True | ?
3077 3087 #
3078 3088 # "rawtext" means the raw text stored in revlog data, which
3079 3089 # could be retrieved by "rawdata(rev)". "text"
3080 3090 # mentioned below is "revision(rev)".
3081 3091 #
3082 3092 # There are 3 different lengths stored physically:
3083 3093 # 1. L1: rawsize, stored in revlog index
3084 3094 # 2. L2: len(rawtext), stored in revlog data
3085 3095 # 3. L3: len(text), stored in revlog data if flags==0, or
3086 3096 # possibly somewhere else if flags!=0
3087 3097 #
3088 3098 # L1 should be equal to L2. L3 could be different from them.
3089 3099 # "text" may or may not affect commit hash depending on flag
3090 3100 # processors (see flagutil.addflagprocessor).
3091 3101 #
3092 3102 # | common | rename | meta | ext
3093 3103 # -------------------------------------------------
3094 3104 # rawsize() | L1 | L1 | L1 | L1
3095 3105 # size() | L1 | L2-LM | L1(*) | L1 (?)
3096 3106 # len(rawtext) | L2 | L2 | L2 | L2
3097 3107 # len(text) | L2 | L2 | L2 | L3
3098 3108 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3099 3109 #
3100 3110 # LM: length of metadata, depending on rawtext
3101 3111 # (*): not ideal, see comment in filelog.size
3102 3112 # (?): could be "- len(meta)" if the resolved content has
3103 3113 # rename metadata
3104 3114 #
3105 3115 # Checks needed to be done:
3106 3116 # 1. length check: L1 == L2, in all cases.
3107 3117 # 2. hash check: depending on flag processor, we may need to
3108 3118 # use either "text" (external), or "rawtext" (in revlog).
3109 3119
3110 3120 try:
3111 3121 skipflags = state.get(b'skipflags', 0)
3112 3122 if skipflags:
3113 3123 skipflags &= self.flags(rev)
3114 3124
3115 3125 _verify_revision(self, skipflags, state, node)
3116 3126
3117 3127 l1 = self.rawsize(rev)
3118 3128 l2 = len(self.rawdata(node))
3119 3129
3120 3130 if l1 != l2:
3121 3131 yield revlogproblem(
3122 3132 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3123 3133 node=node,
3124 3134 )
3125 3135
3126 3136 except error.CensoredNodeError:
3127 3137 if state[b'erroroncensored']:
3128 3138 yield revlogproblem(
3129 3139 error=_(b'censored file data'), node=node
3130 3140 )
3131 3141 state[b'skipread'].add(node)
3132 3142 except Exception as e:
3133 3143 yield revlogproblem(
3134 3144 error=_(b'unpacking %s: %s')
3135 3145 % (short(node), stringutil.forcebytestr(e)),
3136 3146 node=node,
3137 3147 )
3138 3148 state[b'skipread'].add(node)
3139 3149
3140 3150 def storageinfo(
3141 3151 self,
3142 3152 exclusivefiles=False,
3143 3153 sharedfiles=False,
3144 3154 revisionscount=False,
3145 3155 trackedsize=False,
3146 3156 storedsize=False,
3147 3157 ):
3148 3158 d = {}
3149 3159
3150 3160 if exclusivefiles:
3151 3161 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3152 3162 if not self._inline:
3153 3163 d[b'exclusivefiles'].append((self.opener, self.datafile))
3154 3164
3155 3165 if sharedfiles:
3156 3166 d[b'sharedfiles'] = []
3157 3167
3158 3168 if revisionscount:
3159 3169 d[b'revisionscount'] = len(self)
3160 3170
3161 3171 if trackedsize:
3162 3172 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3163 3173
3164 3174 if storedsize:
3165 3175 d[b'storedsize'] = sum(
3166 3176 self.opener.stat(path).st_size for path in self.files()
3167 3177 )
3168 3178
3169 3179 return d
3170 3180
3171 3181 def rewrite_sidedata(self, helpers, startrev, endrev):
3172 3182 if self.version & 0xFFFF != REVLOGV2:
3173 3183 return
3174 3184 # inline are not yet supported because they suffer from an issue when
3175 3185 # rewriting them (since it's not an append-only operation).
3176 3186 # See issue6485.
3177 3187 assert not self._inline
3178 3188 if not helpers[1] and not helpers[2]:
3179 3189 # Nothing to generate or remove
3180 3190 return
3181 3191
3182 3192 new_entries = []
3183 3193 # append the new sidedata
3184 3194 with self._datafp(b'a+') as fp:
3185 3195 # Maybe this bug still exists, see revlog._writeentry
3186 3196 fp.seek(0, os.SEEK_END)
3187 3197 current_offset = fp.tell()
3188 3198 for rev in range(startrev, endrev + 1):
3189 3199 entry = self.index[rev]
3190 3200 new_sidedata = storageutil.run_sidedata_helpers(
3191 3201 store=self,
3192 3202 sidedata_helpers=helpers,
3193 3203 sidedata={},
3194 3204 rev=rev,
3195 3205 )
3196 3206
3197 3207 serialized_sidedata = sidedatautil.serialize_sidedata(
3198 3208 new_sidedata
3199 3209 )
3200 3210 if entry[8] != 0 or entry[9] != 0:
3201 3211 # rewriting entries that already have sidedata is not
3202 3212 # supported yet, because it introduces garbage data in the
3203 3213 # revlog.
3204 3214 msg = b"Rewriting existing sidedata is not supported yet"
3205 3215 raise error.Abort(msg)
3206 3216 entry = entry[:8]
3207 3217 entry += (current_offset, len(serialized_sidedata))
3208 3218
3209 3219 fp.write(serialized_sidedata)
3210 3220 new_entries.append(entry)
3211 3221 current_offset += len(serialized_sidedata)
3212 3222
3213 3223 # rewrite the new index entries
3214 3224 with self._indexfp(b'w+') as fp:
3215 3225 fp.seek(startrev * self.index.entry_size)
3216 3226 for i, entry in enumerate(new_entries):
3217 3227 rev = startrev + i
3218 3228 self.index.replace_sidedata_info(rev, entry[8], entry[9])
3219 packed = self.index.entry_binary(rev, self.version)
3229 packed = self.index.entry_binary(rev)
3230 if rev == 0:
3231 header = self.index.pack_header(self.version)
3232 packed = header + packed
3220 3233 fp.write(packed)
@@ -1,499 +1,504 b''
1 1 // revlog.rs
2 2 //
3 3 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 use crate::{
9 9 cindex,
10 10 utils::{node_from_py_bytes, node_from_py_object},
11 11 };
12 12 use cpython::{
13 13 buffer::{Element, PyBuffer},
14 14 exc::{IndexError, ValueError},
15 15 ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyInt, PyModule,
16 16 PyObject, PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject,
17 17 };
18 18 use hg::{
19 19 nodemap::{Block, NodeMapError, NodeTree},
20 20 revlog::{nodemap::NodeMap, NodePrefix, RevlogIndex},
21 21 Revision,
22 22 };
23 23 use std::cell::RefCell;
24 24
25 25 /// Return a Struct implementing the Graph trait
26 26 pub(crate) fn pyindex_to_graph(
27 27 py: Python,
28 28 index: PyObject,
29 29 ) -> PyResult<cindex::Index> {
30 30 match index.extract::<MixedIndex>(py) {
31 31 Ok(midx) => Ok(midx.clone_cindex(py)),
32 32 Err(_) => cindex::Index::new(py, index),
33 33 }
34 34 }
35 35
36 36 py_class!(pub class MixedIndex |py| {
37 37 data cindex: RefCell<cindex::Index>;
38 38 data nt: RefCell<Option<NodeTree>>;
39 39 data docket: RefCell<Option<PyObject>>;
40 40 // Holds a reference to the mmap'ed persistent nodemap data
41 41 data mmap: RefCell<Option<PyBuffer>>;
42 42
43 43 def __new__(_cls, cindex: PyObject) -> PyResult<MixedIndex> {
44 44 Self::new(py, cindex)
45 45 }
46 46
47 47 /// Compatibility layer used for Python consumers needing access to the C index
48 48 ///
49 49 /// Only use case so far is `scmutil.shortesthexnodeidprefix`,
50 50 /// that may need to build a custom `nodetree`, based on a specified revset.
51 51 /// With a Rust implementation of the nodemap, we will be able to get rid of
52 52 /// this, by exposing our own standalone nodemap class,
53 53 /// ready to accept `MixedIndex`.
54 54 def get_cindex(&self) -> PyResult<PyObject> {
55 55 Ok(self.cindex(py).borrow().inner().clone_ref(py))
56 56 }
57 57
58 58 // Index API involving nodemap, as defined in mercurial/pure/parsers.py
59 59
60 60 /// Return Revision if found, raises a bare `error.RevlogError`
61 61 /// in case of ambiguity, same as C version does
62 62 def get_rev(&self, node: PyBytes) -> PyResult<Option<Revision>> {
63 63 let opt = self.get_nodetree(py)?.borrow();
64 64 let nt = opt.as_ref().unwrap();
65 65 let idx = &*self.cindex(py).borrow();
66 66 let node = node_from_py_bytes(py, &node)?;
67 67 nt.find_bin(idx, node.into()).map_err(|e| nodemap_error(py, e))
68 68 }
69 69
70 70 /// same as `get_rev()` but raises a bare `error.RevlogError` if node
71 71 /// is not found.
72 72 ///
73 73 /// No need to repeat `node` in the exception, `mercurial/revlog.py`
74 74 /// will catch and rewrap with it
75 75 def rev(&self, node: PyBytes) -> PyResult<Revision> {
76 76 self.get_rev(py, node)?.ok_or_else(|| revlog_error(py))
77 77 }
78 78
79 79 /// return True if the node exist in the index
80 80 def has_node(&self, node: PyBytes) -> PyResult<bool> {
81 81 self.get_rev(py, node).map(|opt| opt.is_some())
82 82 }
83 83
84 84 /// find length of shortest hex nodeid of a binary ID
85 85 def shortest(&self, node: PyBytes) -> PyResult<usize> {
86 86 let opt = self.get_nodetree(py)?.borrow();
87 87 let nt = opt.as_ref().unwrap();
88 88 let idx = &*self.cindex(py).borrow();
89 89 match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
90 90 {
91 91 Ok(Some(l)) => Ok(l),
92 92 Ok(None) => Err(revlog_error(py)),
93 93 Err(e) => Err(nodemap_error(py, e)),
94 94 }
95 95 }
96 96
97 97 def partialmatch(&self, node: PyObject) -> PyResult<Option<PyBytes>> {
98 98 let opt = self.get_nodetree(py)?.borrow();
99 99 let nt = opt.as_ref().unwrap();
100 100 let idx = &*self.cindex(py).borrow();
101 101
102 102 let node_as_string = if cfg!(feature = "python3-sys") {
103 103 node.cast_as::<PyString>(py)?.to_string(py)?.to_string()
104 104 }
105 105 else {
106 106 let node = node.extract::<PyBytes>(py)?;
107 107 String::from_utf8_lossy(node.data(py)).to_string()
108 108 };
109 109
110 110 let prefix = NodePrefix::from_hex(&node_as_string).map_err(|_| PyErr::new::<ValueError, _>(py, "Invalid node or prefix"))?;
111 111
112 112 nt.find_bin(idx, prefix)
113 113 // TODO make an inner API returning the node directly
114 114 .map(|opt| opt.map(
115 115 |rev| PyBytes::new(py, idx.node(rev).unwrap().as_bytes())))
116 116 .map_err(|e| nodemap_error(py, e))
117 117
118 118 }
119 119
120 120 /// append an index entry
121 121 def append(&self, tup: PyTuple) -> PyResult<PyObject> {
122 122 if tup.len(py) < 8 {
123 123 // this is better than the panic promised by tup.get_item()
124 124 return Err(
125 125 PyErr::new::<IndexError, _>(py, "tuple index out of range"))
126 126 }
127 127 let node_bytes = tup.get_item(py, 7).extract(py)?;
128 128 let node = node_from_py_object(py, &node_bytes)?;
129 129
130 130 let mut idx = self.cindex(py).borrow_mut();
131 131 let rev = idx.len() as Revision;
132 132
133 133 idx.append(py, tup)?;
134 134 self.get_nodetree(py)?.borrow_mut().as_mut().unwrap()
135 135 .insert(&*idx, &node, rev)
136 136 .map_err(|e| nodemap_error(py, e))?;
137 137 Ok(py.None())
138 138 }
139 139
140 140 def __delitem__(&self, key: PyObject) -> PyResult<()> {
141 141 // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]`
142 142 self.cindex(py).borrow().inner().del_item(py, key)?;
143 143 let mut opt = self.get_nodetree(py)?.borrow_mut();
144 144 let mut nt = opt.as_mut().unwrap();
145 145 nt.invalidate_all();
146 146 self.fill_nodemap(py, &mut nt)?;
147 147 Ok(())
148 148 }
149 149
150 150 //
151 151 // Reforwarded C index API
152 152 //
153 153
154 154 // index_methods (tp_methods). Same ordering as in revlog.c
155 155
156 156 /// return the gca set of the given revs
157 157 def ancestors(&self, *args, **kw) -> PyResult<PyObject> {
158 158 self.call_cindex(py, "ancestors", args, kw)
159 159 }
160 160
161 161 /// return the heads of the common ancestors of the given revs
162 162 def commonancestorsheads(&self, *args, **kw) -> PyResult<PyObject> {
163 163 self.call_cindex(py, "commonancestorsheads", args, kw)
164 164 }
165 165
166 166 /// Clear the index caches and inner py_class data.
167 167 /// It is Python's responsibility to call `update_nodemap_data` again.
168 168 def clearcaches(&self, *args, **kw) -> PyResult<PyObject> {
169 169 self.nt(py).borrow_mut().take();
170 170 self.docket(py).borrow_mut().take();
171 171 self.mmap(py).borrow_mut().take();
172 172 self.call_cindex(py, "clearcaches", args, kw)
173 173 }
174 174
175 175 /// return the raw binary string representing a revision
176 176 def entry_binary(&self, *args, **kw) -> PyResult<PyObject> {
177 177 self.call_cindex(py, "entry_binary", args, kw)
178 178 }
179 179
180 /// return a binary packed version of the header
181 def pack_header(&self, *args, **kw) -> PyResult<PyObject> {
182 self.call_cindex(py, "pack_header", args, kw)
183 }
184
180 185 /// get an index entry
181 186 def get(&self, *args, **kw) -> PyResult<PyObject> {
182 187 self.call_cindex(py, "get", args, kw)
183 188 }
184 189
185 190 /// compute phases
186 191 def computephasesmapsets(&self, *args, **kw) -> PyResult<PyObject> {
187 192 self.call_cindex(py, "computephasesmapsets", args, kw)
188 193 }
189 194
190 195 /// reachableroots
191 196 def reachableroots2(&self, *args, **kw) -> PyResult<PyObject> {
192 197 self.call_cindex(py, "reachableroots2", args, kw)
193 198 }
194 199
195 200 /// get head revisions
196 201 def headrevs(&self, *args, **kw) -> PyResult<PyObject> {
197 202 self.call_cindex(py, "headrevs", args, kw)
198 203 }
199 204
200 205 /// get filtered head revisions
201 206 def headrevsfiltered(&self, *args, **kw) -> PyResult<PyObject> {
202 207 self.call_cindex(py, "headrevsfiltered", args, kw)
203 208 }
204 209
205 210 /// True if the object is a snapshot
206 211 def issnapshot(&self, *args, **kw) -> PyResult<PyObject> {
207 212 self.call_cindex(py, "issnapshot", args, kw)
208 213 }
209 214
210 215 /// Gather snapshot data in a cache dict
211 216 def findsnapshots(&self, *args, **kw) -> PyResult<PyObject> {
212 217 self.call_cindex(py, "findsnapshots", args, kw)
213 218 }
214 219
215 220 /// determine revisions with deltas to reconstruct fulltext
216 221 def deltachain(&self, *args, **kw) -> PyResult<PyObject> {
217 222 self.call_cindex(py, "deltachain", args, kw)
218 223 }
219 224
220 225 /// slice planned chunk read to reach a density threshold
221 226 def slicechunktodensity(&self, *args, **kw) -> PyResult<PyObject> {
222 227 self.call_cindex(py, "slicechunktodensity", args, kw)
223 228 }
224 229
225 230 /// stats for the index
226 231 def stats(&self, *args, **kw) -> PyResult<PyObject> {
227 232 self.call_cindex(py, "stats", args, kw)
228 233 }
229 234
230 235 // index_sequence_methods and index_mapping_methods.
231 236 //
232 237 // Since we call back through the high level Python API,
233 238 // there's no point making a distinction between index_get
234 239 // and index_getitem.
235 240
236 241 def __len__(&self) -> PyResult<usize> {
237 242 self.cindex(py).borrow().inner().len(py)
238 243 }
239 244
240 245 def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
241 246 // this conversion seems needless, but that's actually because
242 247 // `index_getitem` does not handle conversion from PyLong,
243 248 // which expressions such as [e for e in index] internally use.
244 249 // Note that we don't seem to have a direct way to call
245 250 // PySequence_GetItem (does the job), which would possibly be better
246 251 // for performance
247 252 let key = match key.extract::<Revision>(py) {
248 253 Ok(rev) => rev.to_py_object(py).into_object(),
249 254 Err(_) => key,
250 255 };
251 256 self.cindex(py).borrow().inner().get_item(py, key)
252 257 }
253 258
254 259 def __setitem__(&self, key: PyObject, value: PyObject) -> PyResult<()> {
255 260 self.cindex(py).borrow().inner().set_item(py, key, value)
256 261 }
257 262
258 263 def __contains__(&self, item: PyObject) -> PyResult<bool> {
259 264 // ObjectProtocol does not seem to provide contains(), so
260 265 // this is an equivalent implementation of the index_contains()
261 266 // defined in revlog.c
262 267 let cindex = self.cindex(py).borrow();
263 268 match item.extract::<Revision>(py) {
264 269 Ok(rev) => {
265 270 Ok(rev >= -1 && rev < cindex.inner().len(py)? as Revision)
266 271 }
267 272 Err(_) => {
268 273 cindex.inner().call_method(
269 274 py,
270 275 "has_node",
271 276 PyTuple::new(py, &[item]),
272 277 None)?
273 278 .extract(py)
274 279 }
275 280 }
276 281 }
277 282
278 283 def nodemap_data_all(&self) -> PyResult<PyBytes> {
279 284 self.inner_nodemap_data_all(py)
280 285 }
281 286
282 287 def nodemap_data_incremental(&self) -> PyResult<PyObject> {
283 288 self.inner_nodemap_data_incremental(py)
284 289 }
285 290 def update_nodemap_data(
286 291 &self,
287 292 docket: PyObject,
288 293 nm_data: PyObject
289 294 ) -> PyResult<PyObject> {
290 295 self.inner_update_nodemap_data(py, docket, nm_data)
291 296 }
292 297
293 298 @property
294 299 def entry_size(&self) -> PyResult<PyInt> {
295 300 self.cindex(py).borrow().inner().getattr(py, "entry_size")?.extract::<PyInt>(py)
296 301 }
297 302
298 303 });
299 304
300 305 impl MixedIndex {
301 306 fn new(py: Python, cindex: PyObject) -> PyResult<MixedIndex> {
302 307 Self::create_instance(
303 308 py,
304 309 RefCell::new(cindex::Index::new(py, cindex)?),
305 310 RefCell::new(None),
306 311 RefCell::new(None),
307 312 RefCell::new(None),
308 313 )
309 314 }
310 315
311 316 /// This is scaffolding at this point, but it could also become
312 317 /// a way to start a persistent nodemap or perform a
313 318 /// vacuum / repack operation
314 319 fn fill_nodemap(
315 320 &self,
316 321 py: Python,
317 322 nt: &mut NodeTree,
318 323 ) -> PyResult<PyObject> {
319 324 let index = self.cindex(py).borrow();
320 325 for r in 0..index.len() {
321 326 let rev = r as Revision;
322 327 // in this case node() won't ever return None
323 328 nt.insert(&*index, index.node(rev).unwrap(), rev)
324 329 .map_err(|e| nodemap_error(py, e))?
325 330 }
326 331 Ok(py.None())
327 332 }
328 333
329 334 fn get_nodetree<'a>(
330 335 &'a self,
331 336 py: Python<'a>,
332 337 ) -> PyResult<&'a RefCell<Option<NodeTree>>> {
333 338 if self.nt(py).borrow().is_none() {
334 339 let readonly = Box::new(Vec::new());
335 340 let mut nt = NodeTree::load_bytes(readonly, 0);
336 341 self.fill_nodemap(py, &mut nt)?;
337 342 self.nt(py).borrow_mut().replace(nt);
338 343 }
339 344 Ok(self.nt(py))
340 345 }
341 346
342 347 /// forward a method call to the underlying C index
343 348 fn call_cindex(
344 349 &self,
345 350 py: Python,
346 351 name: &str,
347 352 args: &PyTuple,
348 353 kwargs: Option<&PyDict>,
349 354 ) -> PyResult<PyObject> {
350 355 self.cindex(py)
351 356 .borrow()
352 357 .inner()
353 358 .call_method(py, name, args, kwargs)
354 359 }
355 360
356 361 pub fn clone_cindex(&self, py: Python) -> cindex::Index {
357 362 self.cindex(py).borrow().clone_ref(py)
358 363 }
359 364
360 365 /// Returns the full nodemap bytes to be written as-is to disk
361 366 fn inner_nodemap_data_all(&self, py: Python) -> PyResult<PyBytes> {
362 367 let nodemap = self.get_nodetree(py)?.borrow_mut().take().unwrap();
363 368 let (readonly, bytes) = nodemap.into_readonly_and_added_bytes();
364 369
365 370 // If there's anything readonly, we need to build the data again from
366 371 // scratch
367 372 let bytes = if readonly.len() > 0 {
368 373 let mut nt = NodeTree::load_bytes(Box::new(vec![]), 0);
369 374 self.fill_nodemap(py, &mut nt)?;
370 375
371 376 let (readonly, bytes) = nt.into_readonly_and_added_bytes();
372 377 assert_eq!(readonly.len(), 0);
373 378
374 379 bytes
375 380 } else {
376 381 bytes
377 382 };
378 383
379 384 let bytes = PyBytes::new(py, &bytes);
380 385 Ok(bytes)
381 386 }
382 387
383 388 /// Returns the last saved docket along with the size of any changed data
384 389 /// (in number of blocks), and said data as bytes.
385 390 fn inner_nodemap_data_incremental(
386 391 &self,
387 392 py: Python,
388 393 ) -> PyResult<PyObject> {
389 394 let docket = self.docket(py).borrow();
390 395 let docket = match docket.as_ref() {
391 396 Some(d) => d,
392 397 None => return Ok(py.None()),
393 398 };
394 399
395 400 let node_tree = self.get_nodetree(py)?.borrow_mut().take().unwrap();
396 401 let masked_blocks = node_tree.masked_readonly_blocks();
397 402 let (_, data) = node_tree.into_readonly_and_added_bytes();
398 403 let changed = masked_blocks * std::mem::size_of::<Block>();
399 404
400 405 Ok((docket, changed, PyBytes::new(py, &data))
401 406 .to_py_object(py)
402 407 .into_object())
403 408 }
404 409
405 410 /// Update the nodemap from the new (mmaped) data.
406 411 /// The docket is kept as a reference for later incremental calls.
407 412 fn inner_update_nodemap_data(
408 413 &self,
409 414 py: Python,
410 415 docket: PyObject,
411 416 nm_data: PyObject,
412 417 ) -> PyResult<PyObject> {
413 418 let buf = PyBuffer::get(py, &nm_data)?;
414 419 let len = buf.item_count();
415 420
416 421 // Build a slice from the mmap'ed buffer data
417 422 let cbuf = buf.buf_ptr();
418 423 let bytes = if std::mem::size_of::<u8>() == buf.item_size()
419 424 && buf.is_c_contiguous()
420 425 && u8::is_compatible_format(buf.format())
421 426 {
422 427 unsafe { std::slice::from_raw_parts(cbuf as *const u8, len) }
423 428 } else {
424 429 return Err(PyErr::new::<ValueError, _>(
425 430 py,
426 431 "Nodemap data buffer has an invalid memory representation"
427 432 .to_string(),
428 433 ));
429 434 };
430 435
431 436 // Keep a reference to the mmap'ed buffer, otherwise we get a dangling
432 437 // pointer.
433 438 self.mmap(py).borrow_mut().replace(buf);
434 439
435 440 let mut nt = NodeTree::load_bytes(Box::new(bytes), len);
436 441
437 442 let data_tip =
438 443 docket.getattr(py, "tip_rev")?.extract::<Revision>(py)?;
439 444 self.docket(py).borrow_mut().replace(docket.clone_ref(py));
440 445 let idx = self.cindex(py).borrow();
441 446 let current_tip = idx.len();
442 447
443 448 for r in (data_tip + 1)..current_tip as Revision {
444 449 let rev = r as Revision;
445 450 // in this case node() won't ever return None
446 451 nt.insert(&*idx, idx.node(rev).unwrap(), rev)
447 452 .map_err(|e| nodemap_error(py, e))?
448 453 }
449 454
450 455 *self.nt(py).borrow_mut() = Some(nt);
451 456
452 457 Ok(py.None())
453 458 }
454 459 }
455 460
456 461 fn revlog_error(py: Python) -> PyErr {
457 462 match py
458 463 .import("mercurial.error")
459 464 .and_then(|m| m.get(py, "RevlogError"))
460 465 {
461 466 Err(e) => e,
462 467 Ok(cls) => PyErr::from_instance(py, cls),
463 468 }
464 469 }
465 470
466 471 fn rev_not_in_index(py: Python, rev: Revision) -> PyErr {
467 472 PyErr::new::<ValueError, _>(
468 473 py,
469 474 format!(
470 475 "Inconsistency: Revision {} found in nodemap \
471 476 is not in revlog index",
472 477 rev
473 478 ),
474 479 )
475 480 }
476 481
477 482 /// Standard treatment of NodeMapError
478 483 fn nodemap_error(py: Python, err: NodeMapError) -> PyErr {
479 484 match err {
480 485 NodeMapError::MultipleResults => revlog_error(py),
481 486 NodeMapError::RevisionNotInIndex(r) => rev_not_in_index(py, r),
482 487 }
483 488 }
484 489
485 490 /// Create the module, with __package__ given from parent
486 491 pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
487 492 let dotted_name = &format!("{}.revlog", package);
488 493 let m = PyModule::new(py, dotted_name)?;
489 494 m.add(py, "__package__", package)?;
490 495 m.add(py, "__doc__", "RevLog - Rust implementations")?;
491 496
492 497 m.add_class::<MixedIndex>(py)?;
493 498
494 499 let sys = PyModule::import(py, "sys")?;
495 500 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
496 501 sys_modules.set_item(py, dotted_name, &m)?;
497 502
498 503 Ok(m)
499 504 }
General Comments 0
You need to be logged in to leave comments. Login now