##// END OF EJS Templates
revlog: finer computation of "issnapshot"...
marmoute -
r50371:5846bc8a default
parent child Browse files
Show More
@@ -1,3300 +1,3315 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Olivia Mackall <olivia@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #define PY_SSIZE_T_CLEAN
11 11 #include <Python.h>
12 12 #include <assert.h>
13 13 #include <ctype.h>
14 14 #include <limits.h>
15 15 #include <stddef.h>
16 16 #include <stdlib.h>
17 17 #include <string.h>
18 18 #include <structmember.h>
19 19
20 20 #include "bitmanipulation.h"
21 21 #include "charencode.h"
22 22 #include "compat.h"
23 23 #include "revlog.h"
24 24 #include "util.h"
25 25
26 26 typedef struct indexObjectStruct indexObject;
27 27
28 28 typedef struct {
29 29 int children[16];
30 30 } nodetreenode;
31 31
32 32 typedef struct {
33 33 int abi_version;
34 34 Py_ssize_t (*index_length)(const indexObject *);
35 35 const char *(*index_node)(indexObject *, Py_ssize_t);
36 36 int (*fast_rank)(indexObject *, Py_ssize_t);
37 37 int (*index_parents)(PyObject *, int, int *);
38 38 } Revlog_CAPI;
39 39
40 40 /*
41 41 * A base-16 trie for fast node->rev mapping.
42 42 *
43 43 * Positive value is index of the next node in the trie
44 44 * Negative value is a leaf: -(rev + 2)
45 45 * Zero is empty
46 46 */
47 47 typedef struct {
48 48 indexObject *index;
49 49 nodetreenode *nodes;
50 50 Py_ssize_t nodelen;
51 51 size_t length; /* # nodes in use */
52 52 size_t capacity; /* # nodes allocated */
53 53 int depth; /* maximum depth of tree */
54 54 int splits; /* # splits performed */
55 55 } nodetree;
56 56
57 57 typedef struct {
58 58 PyObject_HEAD /* ; */
59 59 nodetree nt;
60 60 } nodetreeObject;
61 61
62 62 /*
63 63 * This class has two behaviors.
64 64 *
65 65 * When used in a list-like way (with integer keys), we decode an
66 66 * entry in a RevlogNG index file on demand. We have limited support for
67 67 * integer-keyed insert and delete, only at elements right before the
68 68 * end.
69 69 *
70 70 * With string keys, we lazily perform a reverse mapping from node to
71 71 * rev, using a base-16 trie.
72 72 */
73 73 struct indexObjectStruct {
74 74 PyObject_HEAD
75 75 /* Type-specific fields go here. */
76 76 PyObject *data; /* raw bytes of index */
77 77 Py_ssize_t nodelen; /* digest size of the hash, 20 for SHA-1 */
78 78 PyObject *nullentry; /* fast path for references to null */
79 79 Py_buffer buf; /* buffer of data */
80 80 const char **offsets; /* populated on demand */
81 81 Py_ssize_t length; /* current on-disk number of elements */
82 82 unsigned new_length; /* number of added elements */
83 83 unsigned added_length; /* space reserved for added elements */
84 84 char *added; /* populated on demand */
85 85 PyObject *headrevs; /* cache, invalidated on changes */
86 86 PyObject *filteredrevs; /* filtered revs set */
87 87 nodetree nt; /* base-16 trie */
88 88 int ntinitialized; /* 0 or 1 */
89 89 int ntrev; /* last rev scanned */
90 90 int ntlookups; /* # lookups */
91 91 int ntmisses; /* # lookups that miss the cache */
92 92 int inlined;
93 93 long entry_size; /* size of index headers. Differs in v1 v.s. v2 format
94 94 */
95 95 long rust_ext_compat; /* compatibility with being used in rust
96 96 extensions */
97 97 long format_version; /* format version selector (format_*) */
98 98 };
99 99
100 100 static Py_ssize_t index_length(const indexObject *self)
101 101 {
102 102 return self->length + self->new_length;
103 103 }
104 104
105 105 static const char nullid[32] = {0};
106 106 static const Py_ssize_t nullrev = -1;
107 107
108 108 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
109 109
110 110 static int index_find_node(indexObject *self, const char *node);
111 111
112 112 #if LONG_MAX == 0x7fffffffL
113 113 static const char *const tuple_format = "Kiiiiiiy#KiBBi";
114 114 #else
115 115 static const char *const tuple_format = "kiiiiiiy#kiBBi";
116 116 #endif
117 117
118 118 /* A RevlogNG v1 index entry is 64 bytes long. */
119 119 static const long v1_entry_size = 64;
120 120
121 121 /* A Revlogv2 index entry is 96 bytes long. */
122 122 static const long v2_entry_size = 96;
123 123
124 124 /* A Changelogv2 index entry is 96 bytes long. */
125 125 static const long cl2_entry_size = 96;
126 126
127 127 /* Internal format version.
128 128 * Must match their counterparts in revlogutils/constants.py */
129 129 static const long format_v1 = 1; /* constants.py: REVLOGV1 */
130 130 static const long format_v2 = 0xDEAD; /* constants.py: REVLOGV2 */
131 131 static const long format_cl2 = 0xD34D; /* constants.py: CHANGELOGV2 */
132 132
133 133 static const long entry_v1_offset_high = 0;
134 134 static const long entry_v1_offset_offset_flags = 4;
135 135 static const long entry_v1_offset_comp_len = 8;
136 136 static const long entry_v1_offset_uncomp_len = 12;
137 137 static const long entry_v1_offset_base_rev = 16;
138 138 static const long entry_v1_offset_link_rev = 20;
139 139 static const long entry_v1_offset_parent_1 = 24;
140 140 static const long entry_v1_offset_parent_2 = 28;
141 141 static const long entry_v1_offset_node_id = 32;
142 142
143 143 static const long entry_v2_offset_high = 0;
144 144 static const long entry_v2_offset_offset_flags = 4;
145 145 static const long entry_v2_offset_comp_len = 8;
146 146 static const long entry_v2_offset_uncomp_len = 12;
147 147 static const long entry_v2_offset_base_rev = 16;
148 148 static const long entry_v2_offset_link_rev = 20;
149 149 static const long entry_v2_offset_parent_1 = 24;
150 150 static const long entry_v2_offset_parent_2 = 28;
151 151 static const long entry_v2_offset_node_id = 32;
152 152 static const long entry_v2_offset_sidedata_offset = 64;
153 153 static const long entry_v2_offset_sidedata_comp_len = 72;
154 154 static const long entry_v2_offset_all_comp_mode = 76;
155 155 /* next free offset: 77 */
156 156
157 157 static const long entry_cl2_offset_high = 0;
158 158 static const long entry_cl2_offset_offset_flags = 4;
159 159 static const long entry_cl2_offset_comp_len = 8;
160 160 static const long entry_cl2_offset_uncomp_len = 12;
161 161 static const long entry_cl2_offset_parent_1 = 16;
162 162 static const long entry_cl2_offset_parent_2 = 20;
163 163 static const long entry_cl2_offset_node_id = 24;
164 164 static const long entry_cl2_offset_sidedata_offset = 56;
165 165 static const long entry_cl2_offset_sidedata_comp_len = 64;
166 166 static const long entry_cl2_offset_all_comp_mode = 68;
167 167 static const long entry_cl2_offset_rank = 69;
168 168 /* next free offset: 73 */
169 169
170 170 static const char comp_mode_inline = 2;
171 171 static const int rank_unknown = -1;
172 172
173 173 static void raise_revlog_error(void)
174 174 {
175 175 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
176 176
177 177 mod = PyImport_ImportModule("mercurial.error");
178 178 if (mod == NULL) {
179 179 goto cleanup;
180 180 }
181 181
182 182 dict = PyModule_GetDict(mod);
183 183 if (dict == NULL) {
184 184 goto cleanup;
185 185 }
186 186 Py_INCREF(dict);
187 187
188 188 errclass = PyDict_GetItemString(dict, "RevlogError");
189 189 if (errclass == NULL) {
190 190 PyErr_SetString(PyExc_SystemError,
191 191 "could not find RevlogError");
192 192 goto cleanup;
193 193 }
194 194
195 195 /* value of exception is ignored by callers */
196 196 PyErr_SetString(errclass, "RevlogError");
197 197
198 198 cleanup:
199 199 Py_XDECREF(dict);
200 200 Py_XDECREF(mod);
201 201 }
202 202
203 203 /*
204 204 * Return a pointer to the beginning of a RevlogNG record.
205 205 */
206 206 static const char *index_deref(indexObject *self, Py_ssize_t pos)
207 207 {
208 208 if (pos >= self->length)
209 209 return self->added + (pos - self->length) * self->entry_size;
210 210
211 211 if (self->inlined && pos > 0) {
212 212 if (self->offsets == NULL) {
213 213 Py_ssize_t ret;
214 214 self->offsets =
215 215 PyMem_Malloc(self->length * sizeof(*self->offsets));
216 216 if (self->offsets == NULL)
217 217 return (const char *)PyErr_NoMemory();
218 218 ret = inline_scan(self, self->offsets);
219 219 if (ret == -1) {
220 220 return NULL;
221 221 };
222 222 }
223 223 return self->offsets[pos];
224 224 }
225 225
226 226 return (const char *)(self->buf.buf) + pos * self->entry_size;
227 227 }
228 228
229 229 /*
230 230 * Get parents of the given rev.
231 231 *
232 232 * The specified rev must be valid and must not be nullrev. A returned
233 233 * parent revision may be nullrev, but is guaranteed to be in valid range.
234 234 */
235 235 static inline int index_get_parents(indexObject *self, Py_ssize_t rev, int *ps,
236 236 int maxrev)
237 237 {
238 238 const char *data = index_deref(self, rev);
239 239
240 240 if (self->format_version == format_v1) {
241 241 ps[0] = getbe32(data + entry_v1_offset_parent_1);
242 242 ps[1] = getbe32(data + entry_v1_offset_parent_2);
243 243 } else if (self->format_version == format_v2) {
244 244 ps[0] = getbe32(data + entry_v2_offset_parent_1);
245 245 ps[1] = getbe32(data + entry_v2_offset_parent_2);
246 246 } else if (self->format_version == format_cl2) {
247 247 ps[0] = getbe32(data + entry_cl2_offset_parent_1);
248 248 ps[1] = getbe32(data + entry_cl2_offset_parent_2);
249 249 } else {
250 250 raise_revlog_error();
251 251 return -1;
252 252 }
253 253
254 254 /* If index file is corrupted, ps[] may point to invalid revisions. So
255 255 * there is a risk of buffer overflow to trust them unconditionally. */
256 256 if (ps[0] < -1 || ps[0] > maxrev || ps[1] < -1 || ps[1] > maxrev) {
257 257 PyErr_SetString(PyExc_ValueError, "parent out of range");
258 258 return -1;
259 259 }
260 260 return 0;
261 261 }
262 262
263 263 /*
264 264 * Get parents of the given rev.
265 265 *
266 266 * If the specified rev is out of range, IndexError will be raised. If the
267 267 * revlog entry is corrupted, ValueError may be raised.
268 268 *
269 269 * Returns 0 on success or -1 on failure.
270 270 */
271 271 static int HgRevlogIndex_GetParents(PyObject *op, int rev, int *ps)
272 272 {
273 273 int tiprev;
274 274 if (!op || !HgRevlogIndex_Check(op) || !ps) {
275 275 PyErr_BadInternalCall();
276 276 return -1;
277 277 }
278 278 tiprev = (int)index_length((indexObject *)op) - 1;
279 279 if (rev < -1 || rev > tiprev) {
280 280 PyErr_Format(PyExc_IndexError, "rev out of range: %d", rev);
281 281 return -1;
282 282 } else if (rev == -1) {
283 283 ps[0] = ps[1] = -1;
284 284 return 0;
285 285 } else {
286 286 return index_get_parents((indexObject *)op, rev, ps, tiprev);
287 287 }
288 288 }
289 289
290 290 static inline int64_t index_get_start(indexObject *self, Py_ssize_t rev)
291 291 {
292 292 const char *data;
293 293 uint64_t offset;
294 294
295 295 if (rev == nullrev)
296 296 return 0;
297 297
298 298 data = index_deref(self, rev);
299 299
300 300 if (self->format_version == format_v1) {
301 301 offset = getbe32(data + entry_v1_offset_offset_flags);
302 302 if (rev == 0) {
303 303 /* mask out version number for the first entry */
304 304 offset &= 0xFFFF;
305 305 } else {
306 306 uint32_t offset_high =
307 307 getbe32(data + entry_v1_offset_high);
308 308 offset |= ((uint64_t)offset_high) << 32;
309 309 }
310 310 } else if (self->format_version == format_v2) {
311 311 offset = getbe32(data + entry_v2_offset_offset_flags);
312 312 if (rev == 0) {
313 313 /* mask out version number for the first entry */
314 314 offset &= 0xFFFF;
315 315 } else {
316 316 uint32_t offset_high =
317 317 getbe32(data + entry_v2_offset_high);
318 318 offset |= ((uint64_t)offset_high) << 32;
319 319 }
320 320 } else if (self->format_version == format_cl2) {
321 321 uint32_t offset_high = getbe32(data + entry_cl2_offset_high);
322 322 offset = getbe32(data + entry_cl2_offset_offset_flags);
323 323 offset |= ((uint64_t)offset_high) << 32;
324 324 } else {
325 325 raise_revlog_error();
326 326 return -1;
327 327 }
328 328
329 329 return (int64_t)(offset >> 16);
330 330 }
331 331
332 332 static inline int index_get_length(indexObject *self, Py_ssize_t rev)
333 333 {
334 334 const char *data;
335 335 int tmp;
336 336
337 337 if (rev == nullrev)
338 338 return 0;
339 339
340 340 data = index_deref(self, rev);
341 341
342 342 if (self->format_version == format_v1) {
343 343 tmp = (int)getbe32(data + entry_v1_offset_comp_len);
344 344 } else if (self->format_version == format_v2) {
345 345 tmp = (int)getbe32(data + entry_v2_offset_comp_len);
346 346 } else if (self->format_version == format_cl2) {
347 347 tmp = (int)getbe32(data + entry_cl2_offset_comp_len);
348 348 } else {
349 349 raise_revlog_error();
350 350 return -1;
351 351 }
352 352 if (tmp < 0) {
353 353 PyErr_Format(PyExc_OverflowError,
354 354 "revlog entry size out of bound (%d)", tmp);
355 355 return -1;
356 356 }
357 357 return tmp;
358 358 }
359 359
360 360 /*
361 361 * RevlogNG format (all in big endian, data may be inlined):
362 362 * 6 bytes: offset
363 363 * 2 bytes: flags
364 364 * 4 bytes: compressed length
365 365 * 4 bytes: uncompressed length
366 366 * 4 bytes: base revision
367 367 * 4 bytes: link revision
368 368 * 4 bytes: parent 1 revision
369 369 * 4 bytes: parent 2 revision
370 370 * 32 bytes: nodeid (only 20 bytes used with SHA-1)
371 371 */
372 372 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
373 373 {
374 374 uint64_t offset_flags, sidedata_offset;
375 375 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
376 376 sidedata_comp_len, rank = rank_unknown;
377 377 char data_comp_mode, sidedata_comp_mode;
378 378 const char *c_node_id;
379 379 const char *data;
380 380 Py_ssize_t length = index_length(self);
381 381
382 382 if (pos == nullrev) {
383 383 Py_INCREF(self->nullentry);
384 384 return self->nullentry;
385 385 }
386 386
387 387 if (pos < 0 || pos >= length) {
388 388 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
389 389 return NULL;
390 390 }
391 391
392 392 data = index_deref(self, pos);
393 393 if (data == NULL)
394 394 return NULL;
395 395
396 396 if (self->format_version == format_v1) {
397 397 offset_flags = getbe32(data + entry_v1_offset_offset_flags);
398 398 /*
399 399 * The first entry on-disk needs the version number masked out,
400 400 * but this doesn't apply if entries are added to an empty
401 401 * index.
402 402 */
403 403 if (self->length && pos == 0)
404 404 offset_flags &= 0xFFFF;
405 405 else {
406 406 uint32_t offset_high =
407 407 getbe32(data + entry_v1_offset_high);
408 408 offset_flags |= ((uint64_t)offset_high) << 32;
409 409 }
410 410
411 411 comp_len = getbe32(data + entry_v1_offset_comp_len);
412 412 uncomp_len = getbe32(data + entry_v1_offset_uncomp_len);
413 413 base_rev = getbe32(data + entry_v1_offset_base_rev);
414 414 link_rev = getbe32(data + entry_v1_offset_link_rev);
415 415 parent_1 = getbe32(data + entry_v1_offset_parent_1);
416 416 parent_2 = getbe32(data + entry_v1_offset_parent_2);
417 417 c_node_id = data + entry_v1_offset_node_id;
418 418
419 419 sidedata_offset = 0;
420 420 sidedata_comp_len = 0;
421 421 data_comp_mode = comp_mode_inline;
422 422 sidedata_comp_mode = comp_mode_inline;
423 423 } else if (self->format_version == format_v2) {
424 424 offset_flags = getbe32(data + entry_v2_offset_offset_flags);
425 425 /*
426 426 * The first entry on-disk needs the version number masked out,
427 427 * but this doesn't apply if entries are added to an empty
428 428 * index.
429 429 */
430 430 if (self->length && pos == 0)
431 431 offset_flags &= 0xFFFF;
432 432 else {
433 433 uint32_t offset_high =
434 434 getbe32(data + entry_v2_offset_high);
435 435 offset_flags |= ((uint64_t)offset_high) << 32;
436 436 }
437 437
438 438 comp_len = getbe32(data + entry_v2_offset_comp_len);
439 439 uncomp_len = getbe32(data + entry_v2_offset_uncomp_len);
440 440 base_rev = getbe32(data + entry_v2_offset_base_rev);
441 441 link_rev = getbe32(data + entry_v2_offset_link_rev);
442 442 parent_1 = getbe32(data + entry_v2_offset_parent_1);
443 443 parent_2 = getbe32(data + entry_v2_offset_parent_2);
444 444 c_node_id = data + entry_v2_offset_node_id;
445 445
446 446 sidedata_offset =
447 447 getbe64(data + entry_v2_offset_sidedata_offset);
448 448 sidedata_comp_len =
449 449 getbe32(data + entry_v2_offset_sidedata_comp_len);
450 450 data_comp_mode = data[entry_v2_offset_all_comp_mode] & 3;
451 451 sidedata_comp_mode =
452 452 ((data[entry_v2_offset_all_comp_mode] >> 2) & 3);
453 453 } else if (self->format_version == format_cl2) {
454 454 uint32_t offset_high = getbe32(data + entry_cl2_offset_high);
455 455 offset_flags = getbe32(data + entry_cl2_offset_offset_flags);
456 456 offset_flags |= ((uint64_t)offset_high) << 32;
457 457 comp_len = getbe32(data + entry_cl2_offset_comp_len);
458 458 uncomp_len = getbe32(data + entry_cl2_offset_uncomp_len);
459 459 /* base_rev and link_rev are not stored in changelogv2, but are
460 460 still used by some functions shared with the other revlogs.
461 461 They are supposed to contain links to other revisions,
462 462 but they always point to themselves in the case of a changelog.
463 463 */
464 464 base_rev = pos;
465 465 link_rev = pos;
466 466 parent_1 = getbe32(data + entry_cl2_offset_parent_1);
467 467 parent_2 = getbe32(data + entry_cl2_offset_parent_2);
468 468 c_node_id = data + entry_cl2_offset_node_id;
469 469 sidedata_offset =
470 470 getbe64(data + entry_cl2_offset_sidedata_offset);
471 471 sidedata_comp_len =
472 472 getbe32(data + entry_cl2_offset_sidedata_comp_len);
473 473 data_comp_mode = data[entry_cl2_offset_all_comp_mode] & 3;
474 474 sidedata_comp_mode =
475 475 ((data[entry_cl2_offset_all_comp_mode] >> 2) & 3);
476 476 rank = getbe32(data + entry_cl2_offset_rank);
477 477 } else {
478 478 raise_revlog_error();
479 479 return NULL;
480 480 }
481 481
482 482 return Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len,
483 483 base_rev, link_rev, parent_1, parent_2, c_node_id,
484 484 self->nodelen, sidedata_offset, sidedata_comp_len,
485 485 data_comp_mode, sidedata_comp_mode, rank);
486 486 }
487 487 /*
488 488 * Pack header information in binary
489 489 */
490 490 static PyObject *index_pack_header(indexObject *self, PyObject *args)
491 491 {
492 492 int header;
493 493 char out[4];
494 494 if (!PyArg_ParseTuple(args, "i", &header)) {
495 495 return NULL;
496 496 }
497 497 if (self->format_version != format_v1) {
498 498 PyErr_Format(PyExc_RuntimeError,
499 499 "version header should go in the docket, not the "
500 500 "index: %d",
501 501 header);
502 502 return NULL;
503 503 }
504 504 putbe32(header, out);
505 505 return PyBytes_FromStringAndSize(out, 4);
506 506 }
507 507 /*
508 508 * Return the raw binary string representing a revision
509 509 */
510 510 static PyObject *index_entry_binary(indexObject *self, PyObject *value)
511 511 {
512 512 long rev;
513 513 const char *data;
514 514 Py_ssize_t length = index_length(self);
515 515
516 516 if (!pylong_to_long(value, &rev)) {
517 517 return NULL;
518 518 }
519 519 if (rev < 0 || rev >= length) {
520 520 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
521 521 rev);
522 522 return NULL;
523 523 };
524 524
525 525 data = index_deref(self, rev);
526 526 if (data == NULL)
527 527 return NULL;
528 528 if (rev == 0 && self->format_version == format_v1) {
529 529 /* the header is eating the start of the first entry */
530 530 return PyBytes_FromStringAndSize(data + 4,
531 531 self->entry_size - 4);
532 532 }
533 533 return PyBytes_FromStringAndSize(data, self->entry_size);
534 534 }
535 535
536 536 /*
537 537 * Return the hash of node corresponding to the given rev.
538 538 */
539 539 static const char *index_node(indexObject *self, Py_ssize_t pos)
540 540 {
541 541 Py_ssize_t length = index_length(self);
542 542 const char *data;
543 543 const char *node_id;
544 544
545 545 if (pos == nullrev)
546 546 return nullid;
547 547
548 548 if (pos >= length)
549 549 return NULL;
550 550
551 551 data = index_deref(self, pos);
552 552
553 553 if (self->format_version == format_v1) {
554 554 node_id = data + entry_v1_offset_node_id;
555 555 } else if (self->format_version == format_v2) {
556 556 node_id = data + entry_v2_offset_node_id;
557 557 } else if (self->format_version == format_cl2) {
558 558 node_id = data + entry_cl2_offset_node_id;
559 559 } else {
560 560 raise_revlog_error();
561 561 return NULL;
562 562 }
563 563
564 564 return data ? node_id : NULL;
565 565 }
566 566
567 567 /*
568 568 * Return the stored rank of a given revision if known, or rank_unknown
569 569 * otherwise.
570 570 *
571 571 * The rank of a revision is the size of the sub-graph it defines as a head.
572 572 * Equivalently, the rank of a revision `r` is the size of the set
573 573 * `ancestors(r)`, `r` included.
574 574 *
575 575 * This method returns the rank retrieved from the revlog in constant time. It
576 576 * makes no attempt at computing unknown values for versions of the revlog
577 577 * which do not persist the rank.
578 578 */
579 579 static int index_fast_rank(indexObject *self, Py_ssize_t pos)
580 580 {
581 581 Py_ssize_t length = index_length(self);
582 582
583 583 if (self->format_version != format_cl2 || pos >= length) {
584 584 return rank_unknown;
585 585 }
586 586
587 587 if (pos == nullrev) {
588 588 return 0; /* convention */
589 589 }
590 590
591 591 return getbe32(index_deref(self, pos) + entry_cl2_offset_rank);
592 592 }
593 593
594 594 /*
595 595 * Return the hash of the node corresponding to the given rev. The
596 596 * rev is assumed to be existing. If not, an exception is set.
597 597 */
598 598 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
599 599 {
600 600 const char *node = index_node(self, pos);
601 601 if (node == NULL) {
602 602 PyErr_Format(PyExc_IndexError, "could not access rev %d",
603 603 (int)pos);
604 604 }
605 605 return node;
606 606 }
607 607
608 608 static int nt_insert(nodetree *self, const char *node, int rev);
609 609
610 610 static int node_check(Py_ssize_t nodelen, PyObject *obj, char **node)
611 611 {
612 612 Py_ssize_t thisnodelen;
613 613 if (PyBytes_AsStringAndSize(obj, node, &thisnodelen) == -1)
614 614 return -1;
615 615 if (nodelen == thisnodelen)
616 616 return 0;
617 617 PyErr_Format(PyExc_ValueError, "node len %zd != expected node len %zd",
618 618 thisnodelen, nodelen);
619 619 return -1;
620 620 }
621 621
622 622 static PyObject *index_append(indexObject *self, PyObject *obj)
623 623 {
624 624 uint64_t offset_flags, sidedata_offset;
625 625 int rev, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
626 626 sidedata_comp_len, rank;
627 627 char data_comp_mode, sidedata_comp_mode;
628 628 Py_ssize_t c_node_id_len;
629 629 const char *c_node_id;
630 630 char comp_field;
631 631 char *data;
632 632
633 633 if (!PyArg_ParseTuple(obj, tuple_format, &offset_flags, &comp_len,
634 634 &uncomp_len, &base_rev, &link_rev, &parent_1,
635 635 &parent_2, &c_node_id, &c_node_id_len,
636 636 &sidedata_offset, &sidedata_comp_len,
637 637 &data_comp_mode, &sidedata_comp_mode, &rank)) {
638 638 PyErr_SetString(PyExc_TypeError, "12-tuple required");
639 639 return NULL;
640 640 }
641 641
642 642 if (c_node_id_len != self->nodelen) {
643 643 PyErr_SetString(PyExc_TypeError, "invalid node");
644 644 return NULL;
645 645 }
646 646 if (self->format_version == format_v1) {
647 647
648 648 if (data_comp_mode != comp_mode_inline) {
649 649 PyErr_Format(PyExc_ValueError,
650 650 "invalid data compression mode: %i",
651 651 data_comp_mode);
652 652 return NULL;
653 653 }
654 654 if (sidedata_comp_mode != comp_mode_inline) {
655 655 PyErr_Format(PyExc_ValueError,
656 656 "invalid sidedata compression mode: %i",
657 657 sidedata_comp_mode);
658 658 return NULL;
659 659 }
660 660 }
661 661
662 662 if (self->new_length == self->added_length) {
663 663 size_t new_added_length =
664 664 self->added_length ? self->added_length * 2 : 4096;
665 665 void *new_added = PyMem_Realloc(
666 666 self->added, new_added_length * self->entry_size);
667 667 if (!new_added)
668 668 return PyErr_NoMemory();
669 669 self->added = new_added;
670 670 self->added_length = new_added_length;
671 671 }
672 672 rev = self->length + self->new_length;
673 673 data = self->added + self->entry_size * self->new_length++;
674 674
675 675 memset(data, 0, self->entry_size);
676 676
677 677 if (self->format_version == format_v1) {
678 678 putbe32(offset_flags >> 32, data + entry_v1_offset_high);
679 679 putbe32(offset_flags & 0xffffffffU,
680 680 data + entry_v1_offset_offset_flags);
681 681 putbe32(comp_len, data + entry_v1_offset_comp_len);
682 682 putbe32(uncomp_len, data + entry_v1_offset_uncomp_len);
683 683 putbe32(base_rev, data + entry_v1_offset_base_rev);
684 684 putbe32(link_rev, data + entry_v1_offset_link_rev);
685 685 putbe32(parent_1, data + entry_v1_offset_parent_1);
686 686 putbe32(parent_2, data + entry_v1_offset_parent_2);
687 687 memcpy(data + entry_v1_offset_node_id, c_node_id,
688 688 c_node_id_len);
689 689 } else if (self->format_version == format_v2) {
690 690 putbe32(offset_flags >> 32, data + entry_v2_offset_high);
691 691 putbe32(offset_flags & 0xffffffffU,
692 692 data + entry_v2_offset_offset_flags);
693 693 putbe32(comp_len, data + entry_v2_offset_comp_len);
694 694 putbe32(uncomp_len, data + entry_v2_offset_uncomp_len);
695 695 putbe32(base_rev, data + entry_v2_offset_base_rev);
696 696 putbe32(link_rev, data + entry_v2_offset_link_rev);
697 697 putbe32(parent_1, data + entry_v2_offset_parent_1);
698 698 putbe32(parent_2, data + entry_v2_offset_parent_2);
699 699 memcpy(data + entry_v2_offset_node_id, c_node_id,
700 700 c_node_id_len);
701 701 putbe64(sidedata_offset,
702 702 data + entry_v2_offset_sidedata_offset);
703 703 putbe32(sidedata_comp_len,
704 704 data + entry_v2_offset_sidedata_comp_len);
705 705 comp_field = data_comp_mode & 3;
706 706 comp_field = comp_field | (sidedata_comp_mode & 3) << 2;
707 707 data[entry_v2_offset_all_comp_mode] = comp_field;
708 708 } else if (self->format_version == format_cl2) {
709 709 putbe32(offset_flags >> 32, data + entry_cl2_offset_high);
710 710 putbe32(offset_flags & 0xffffffffU,
711 711 data + entry_cl2_offset_offset_flags);
712 712 putbe32(comp_len, data + entry_cl2_offset_comp_len);
713 713 putbe32(uncomp_len, data + entry_cl2_offset_uncomp_len);
714 714 putbe32(parent_1, data + entry_cl2_offset_parent_1);
715 715 putbe32(parent_2, data + entry_cl2_offset_parent_2);
716 716 memcpy(data + entry_cl2_offset_node_id, c_node_id,
717 717 c_node_id_len);
718 718 putbe64(sidedata_offset,
719 719 data + entry_cl2_offset_sidedata_offset);
720 720 putbe32(sidedata_comp_len,
721 721 data + entry_cl2_offset_sidedata_comp_len);
722 722 comp_field = data_comp_mode & 3;
723 723 comp_field = comp_field | (sidedata_comp_mode & 3) << 2;
724 724 data[entry_cl2_offset_all_comp_mode] = comp_field;
725 725 putbe32(rank, data + entry_cl2_offset_rank);
726 726 } else {
727 727 raise_revlog_error();
728 728 return NULL;
729 729 }
730 730
731 731 if (self->ntinitialized)
732 732 nt_insert(&self->nt, c_node_id, rev);
733 733
734 734 Py_CLEAR(self->headrevs);
735 735 Py_RETURN_NONE;
736 736 }
737 737
738 738 /* Replace an existing index entry's sidedata offset and length with new ones.
739 739 This cannot be used outside of the context of sidedata rewriting,
740 740 inside the transaction that creates the given revision. */
741 741 static PyObject *index_replace_sidedata_info(indexObject *self, PyObject *args)
742 742 {
743 743 uint64_t offset_flags, sidedata_offset;
744 744 Py_ssize_t rev;
745 745 int sidedata_comp_len;
746 746 char comp_mode;
747 747 char *data;
748 748 #if LONG_MAX == 0x7fffffffL
749 749 const char *const sidedata_format = "nKiKB";
750 750 #else
751 751 const char *const sidedata_format = "nkikB";
752 752 #endif
753 753
754 754 if (self->entry_size == v1_entry_size || self->inlined) {
755 755 /*
756 756 There is a bug in the transaction handling when going from an
757 757 inline revlog to a separate index and data file. Turn it off until
758 758 it's fixed, since v2 revlogs sometimes get rewritten on exchange.
759 759 See issue6485.
760 760 */
761 761 raise_revlog_error();
762 762 return NULL;
763 763 }
764 764
765 765 if (!PyArg_ParseTuple(args, sidedata_format, &rev, &sidedata_offset,
766 766 &sidedata_comp_len, &offset_flags, &comp_mode))
767 767 return NULL;
768 768
769 769 if (rev < 0 || rev >= index_length(self)) {
770 770 PyErr_SetString(PyExc_IndexError, "revision outside index");
771 771 return NULL;
772 772 }
773 773 if (rev < self->length) {
774 774 PyErr_SetString(
775 775 PyExc_IndexError,
776 776 "cannot rewrite entries outside of this transaction");
777 777 return NULL;
778 778 }
779 779
780 780 /* Find the newly added node, offset from the "already on-disk" length
781 781 */
782 782 data = self->added + self->entry_size * (rev - self->length);
783 783 if (self->format_version == format_v2) {
784 784 putbe64(offset_flags, data + entry_v2_offset_high);
785 785 putbe64(sidedata_offset,
786 786 data + entry_v2_offset_sidedata_offset);
787 787 putbe32(sidedata_comp_len,
788 788 data + entry_v2_offset_sidedata_comp_len);
789 789 data[entry_v2_offset_all_comp_mode] =
790 790 (data[entry_v2_offset_all_comp_mode] & ~(3 << 2)) |
791 791 ((comp_mode & 3) << 2);
792 792 } else if (self->format_version == format_cl2) {
793 793 putbe64(offset_flags, data + entry_cl2_offset_high);
794 794 putbe64(sidedata_offset,
795 795 data + entry_cl2_offset_sidedata_offset);
796 796 putbe32(sidedata_comp_len,
797 797 data + entry_cl2_offset_sidedata_comp_len);
798 798 data[entry_cl2_offset_all_comp_mode] =
799 799 (data[entry_cl2_offset_all_comp_mode] & ~(3 << 2)) |
800 800 ((comp_mode & 3) << 2);
801 801 } else {
802 802 raise_revlog_error();
803 803 return NULL;
804 804 }
805 805
806 806 Py_RETURN_NONE;
807 807 }
808 808
809 809 static PyObject *index_stats(indexObject *self)
810 810 {
811 811 PyObject *obj = PyDict_New();
812 812 PyObject *s = NULL;
813 813 PyObject *t = NULL;
814 814
815 815 if (obj == NULL)
816 816 return NULL;
817 817
818 818 #define istat(__n, __d) \
819 819 do { \
820 820 s = PyBytes_FromString(__d); \
821 821 t = PyLong_FromSsize_t(self->__n); \
822 822 if (!s || !t) \
823 823 goto bail; \
824 824 if (PyDict_SetItem(obj, s, t) == -1) \
825 825 goto bail; \
826 826 Py_CLEAR(s); \
827 827 Py_CLEAR(t); \
828 828 } while (0)
829 829
830 830 if (self->added_length)
831 831 istat(new_length, "index entries added");
832 832 istat(length, "revs in memory");
833 833 istat(ntlookups, "node trie lookups");
834 834 istat(ntmisses, "node trie misses");
835 835 istat(ntrev, "node trie last rev scanned");
836 836 if (self->ntinitialized) {
837 837 istat(nt.capacity, "node trie capacity");
838 838 istat(nt.depth, "node trie depth");
839 839 istat(nt.length, "node trie count");
840 840 istat(nt.splits, "node trie splits");
841 841 }
842 842
843 843 #undef istat
844 844
845 845 return obj;
846 846
847 847 bail:
848 848 Py_XDECREF(obj);
849 849 Py_XDECREF(s);
850 850 Py_XDECREF(t);
851 851 return NULL;
852 852 }
853 853
854 854 /*
855 855 * When we cache a list, we want to be sure the caller can't mutate
856 856 * the cached copy.
857 857 */
858 858 static PyObject *list_copy(PyObject *list)
859 859 {
860 860 Py_ssize_t len = PyList_GET_SIZE(list);
861 861 PyObject *newlist = PyList_New(len);
862 862 Py_ssize_t i;
863 863
864 864 if (newlist == NULL)
865 865 return NULL;
866 866
867 867 for (i = 0; i < len; i++) {
868 868 PyObject *obj = PyList_GET_ITEM(list, i);
869 869 Py_INCREF(obj);
870 870 PyList_SET_ITEM(newlist, i, obj);
871 871 }
872 872
873 873 return newlist;
874 874 }
875 875
876 876 static int check_filter(PyObject *filter, Py_ssize_t arg)
877 877 {
878 878 if (filter) {
879 879 PyObject *arglist, *result;
880 880 int isfiltered;
881 881
882 882 arglist = Py_BuildValue("(n)", arg);
883 883 if (!arglist) {
884 884 return -1;
885 885 }
886 886
887 887 result = PyObject_Call(filter, arglist, NULL);
888 888 Py_DECREF(arglist);
889 889 if (!result) {
890 890 return -1;
891 891 }
892 892
893 893 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
894 894 * same as this function, so we can just return it directly.*/
895 895 isfiltered = PyObject_IsTrue(result);
896 896 Py_DECREF(result);
897 897 return isfiltered;
898 898 } else {
899 899 return 0;
900 900 }
901 901 }
902 902
903 903 static inline void set_phase_from_parents(char *phases, int parent_1,
904 904 int parent_2, Py_ssize_t i)
905 905 {
906 906 if (parent_1 >= 0 && phases[parent_1] > phases[i])
907 907 phases[i] = phases[parent_1];
908 908 if (parent_2 >= 0 && phases[parent_2] > phases[i])
909 909 phases[i] = phases[parent_2];
910 910 }
911 911
912 912 static PyObject *reachableroots2(indexObject *self, PyObject *args)
913 913 {
914 914
915 915 /* Input */
916 916 long minroot;
917 917 PyObject *includepatharg = NULL;
918 918 int includepath = 0;
919 919 /* heads and roots are lists */
920 920 PyObject *heads = NULL;
921 921 PyObject *roots = NULL;
922 922 PyObject *reachable = NULL;
923 923
924 924 PyObject *val;
925 925 Py_ssize_t len = index_length(self);
926 926 long revnum;
927 927 Py_ssize_t k;
928 928 Py_ssize_t i;
929 929 Py_ssize_t l;
930 930 int r;
931 931 int parents[2];
932 932
933 933 /* Internal data structure:
934 934 * tovisit: array of length len+1 (all revs + nullrev), filled upto
935 935 * lentovisit
936 936 *
937 937 * revstates: array of length len+1 (all revs + nullrev) */
938 938 int *tovisit = NULL;
939 939 long lentovisit = 0;
940 940 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
941 941 char *revstates = NULL;
942 942
943 943 /* Get arguments */
944 944 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
945 945 &PyList_Type, &roots, &PyBool_Type,
946 946 &includepatharg))
947 947 goto bail;
948 948
949 949 if (includepatharg == Py_True)
950 950 includepath = 1;
951 951
952 952 /* Initialize return set */
953 953 reachable = PyList_New(0);
954 954 if (reachable == NULL)
955 955 goto bail;
956 956
957 957 /* Initialize internal datastructures */
958 958 tovisit = (int *)malloc((len + 1) * sizeof(int));
959 959 if (tovisit == NULL) {
960 960 PyErr_NoMemory();
961 961 goto bail;
962 962 }
963 963
964 964 revstates = (char *)calloc(len + 1, 1);
965 965 if (revstates == NULL) {
966 966 PyErr_NoMemory();
967 967 goto bail;
968 968 }
969 969
970 970 l = PyList_GET_SIZE(roots);
971 971 for (i = 0; i < l; i++) {
972 972 revnum = PyLong_AsLong(PyList_GET_ITEM(roots, i));
973 973 if (revnum == -1 && PyErr_Occurred())
974 974 goto bail;
975 975 /* If root is out of range, e.g. wdir(), it must be unreachable
976 976 * from heads. So we can just ignore it. */
977 977 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
978 978 continue;
979 979 revstates[revnum + 1] |= RS_ROOT;
980 980 }
981 981
982 982 /* Populate tovisit with all the heads */
983 983 l = PyList_GET_SIZE(heads);
984 984 for (i = 0; i < l; i++) {
985 985 revnum = PyLong_AsLong(PyList_GET_ITEM(heads, i));
986 986 if (revnum == -1 && PyErr_Occurred())
987 987 goto bail;
988 988 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
989 989 PyErr_SetString(PyExc_IndexError, "head out of range");
990 990 goto bail;
991 991 }
992 992 if (!(revstates[revnum + 1] & RS_SEEN)) {
993 993 tovisit[lentovisit++] = (int)revnum;
994 994 revstates[revnum + 1] |= RS_SEEN;
995 995 }
996 996 }
997 997
998 998 /* Visit the tovisit list and find the reachable roots */
999 999 k = 0;
1000 1000 while (k < lentovisit) {
1001 1001 /* Add the node to reachable if it is a root*/
1002 1002 revnum = tovisit[k++];
1003 1003 if (revstates[revnum + 1] & RS_ROOT) {
1004 1004 revstates[revnum + 1] |= RS_REACHABLE;
1005 1005 val = PyLong_FromLong(revnum);
1006 1006 if (val == NULL)
1007 1007 goto bail;
1008 1008 r = PyList_Append(reachable, val);
1009 1009 Py_DECREF(val);
1010 1010 if (r < 0)
1011 1011 goto bail;
1012 1012 if (includepath == 0)
1013 1013 continue;
1014 1014 }
1015 1015
1016 1016 /* Add its parents to the list of nodes to visit */
1017 1017 if (revnum == nullrev)
1018 1018 continue;
1019 1019 r = index_get_parents(self, revnum, parents, (int)len - 1);
1020 1020 if (r < 0)
1021 1021 goto bail;
1022 1022 for (i = 0; i < 2; i++) {
1023 1023 if (!(revstates[parents[i] + 1] & RS_SEEN) &&
1024 1024 parents[i] >= minroot) {
1025 1025 tovisit[lentovisit++] = parents[i];
1026 1026 revstates[parents[i] + 1] |= RS_SEEN;
1027 1027 }
1028 1028 }
1029 1029 }
1030 1030
1031 1031 /* Find all the nodes in between the roots we found and the heads
1032 1032 * and add them to the reachable set */
1033 1033 if (includepath == 1) {
1034 1034 long minidx = minroot;
1035 1035 if (minidx < 0)
1036 1036 minidx = 0;
1037 1037 for (i = minidx; i < len; i++) {
1038 1038 if (!(revstates[i + 1] & RS_SEEN))
1039 1039 continue;
1040 1040 r = index_get_parents(self, i, parents, (int)len - 1);
1041 1041 /* Corrupted index file, error is set from
1042 1042 * index_get_parents */
1043 1043 if (r < 0)
1044 1044 goto bail;
1045 1045 if (((revstates[parents[0] + 1] |
1046 1046 revstates[parents[1] + 1]) &
1047 1047 RS_REACHABLE) &&
1048 1048 !(revstates[i + 1] & RS_REACHABLE)) {
1049 1049 revstates[i + 1] |= RS_REACHABLE;
1050 1050 val = PyLong_FromSsize_t(i);
1051 1051 if (val == NULL)
1052 1052 goto bail;
1053 1053 r = PyList_Append(reachable, val);
1054 1054 Py_DECREF(val);
1055 1055 if (r < 0)
1056 1056 goto bail;
1057 1057 }
1058 1058 }
1059 1059 }
1060 1060
1061 1061 free(revstates);
1062 1062 free(tovisit);
1063 1063 return reachable;
1064 1064 bail:
1065 1065 Py_XDECREF(reachable);
1066 1066 free(revstates);
1067 1067 free(tovisit);
1068 1068 return NULL;
1069 1069 }
1070 1070
1071 1071 static int add_roots_get_min(indexObject *self, PyObject *roots, char *phases,
1072 1072 char phase)
1073 1073 {
1074 1074 Py_ssize_t len = index_length(self);
1075 1075 PyObject *item;
1076 1076 PyObject *iterator;
1077 1077 int rev, minrev = -1;
1078 1078 char *node;
1079 1079
1080 1080 if (!PySet_Check(roots)) {
1081 1081 PyErr_SetString(PyExc_TypeError,
1082 1082 "roots must be a set of nodes");
1083 1083 return -2;
1084 1084 }
1085 1085 iterator = PyObject_GetIter(roots);
1086 1086 if (iterator == NULL)
1087 1087 return -2;
1088 1088 while ((item = PyIter_Next(iterator))) {
1089 1089 if (node_check(self->nodelen, item, &node) == -1)
1090 1090 goto failed;
1091 1091 rev = index_find_node(self, node);
1092 1092 /* null is implicitly public, so negative is invalid */
1093 1093 if (rev < 0 || rev >= len)
1094 1094 goto failed;
1095 1095 phases[rev] = phase;
1096 1096 if (minrev == -1 || minrev > rev)
1097 1097 minrev = rev;
1098 1098 Py_DECREF(item);
1099 1099 }
1100 1100 Py_DECREF(iterator);
1101 1101 return minrev;
1102 1102 failed:
1103 1103 Py_DECREF(iterator);
1104 1104 Py_DECREF(item);
1105 1105 return -2;
1106 1106 }
1107 1107
1108 1108 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
1109 1109 {
1110 1110 /* 0: public (untracked), 1: draft, 2: secret, 32: archive,
1111 1111 96: internal */
1112 1112 static const char trackedphases[] = {1, 2, 32, 96};
1113 1113 PyObject *roots = Py_None;
1114 1114 PyObject *phasesetsdict = NULL;
1115 1115 PyObject *phasesets[4] = {NULL, NULL, NULL, NULL};
1116 1116 Py_ssize_t len = index_length(self);
1117 1117 char *phases = NULL;
1118 1118 int minphaserev = -1, rev, i;
1119 1119 const int numphases = (int)(sizeof(phasesets) / sizeof(phasesets[0]));
1120 1120
1121 1121 if (!PyArg_ParseTuple(args, "O", &roots))
1122 1122 return NULL;
1123 1123 if (roots == NULL || !PyDict_Check(roots)) {
1124 1124 PyErr_SetString(PyExc_TypeError, "roots must be a dictionary");
1125 1125 return NULL;
1126 1126 }
1127 1127
1128 1128 phases = calloc(len, 1);
1129 1129 if (phases == NULL) {
1130 1130 PyErr_NoMemory();
1131 1131 return NULL;
1132 1132 }
1133 1133
1134 1134 for (i = 0; i < numphases; ++i) {
1135 1135 PyObject *pyphase = PyLong_FromLong(trackedphases[i]);
1136 1136 PyObject *phaseroots = NULL;
1137 1137 if (pyphase == NULL)
1138 1138 goto release;
1139 1139 phaseroots = PyDict_GetItem(roots, pyphase);
1140 1140 Py_DECREF(pyphase);
1141 1141 if (phaseroots == NULL)
1142 1142 continue;
1143 1143 rev = add_roots_get_min(self, phaseroots, phases,
1144 1144 trackedphases[i]);
1145 1145 if (rev == -2)
1146 1146 goto release;
1147 1147 if (rev != -1 && (minphaserev == -1 || rev < minphaserev))
1148 1148 minphaserev = rev;
1149 1149 }
1150 1150
1151 1151 for (i = 0; i < numphases; ++i) {
1152 1152 phasesets[i] = PySet_New(NULL);
1153 1153 if (phasesets[i] == NULL)
1154 1154 goto release;
1155 1155 }
1156 1156
1157 1157 if (minphaserev == -1)
1158 1158 minphaserev = len;
1159 1159 for (rev = minphaserev; rev < len; ++rev) {
1160 1160 PyObject *pyphase = NULL;
1161 1161 PyObject *pyrev = NULL;
1162 1162 int parents[2];
1163 1163 /*
1164 1164 * The parent lookup could be skipped for phaseroots, but
1165 1165 * phase --force would historically not recompute them
1166 1166 * correctly, leaving descendents with a lower phase around.
1167 1167 * As such, unconditionally recompute the phase.
1168 1168 */
1169 1169 if (index_get_parents(self, rev, parents, (int)len - 1) < 0)
1170 1170 goto release;
1171 1171 set_phase_from_parents(phases, parents[0], parents[1], rev);
1172 1172 switch (phases[rev]) {
1173 1173 case 0:
1174 1174 continue;
1175 1175 case 1:
1176 1176 pyphase = phasesets[0];
1177 1177 break;
1178 1178 case 2:
1179 1179 pyphase = phasesets[1];
1180 1180 break;
1181 1181 case 32:
1182 1182 pyphase = phasesets[2];
1183 1183 break;
1184 1184 case 96:
1185 1185 pyphase = phasesets[3];
1186 1186 break;
1187 1187 default:
1188 1188 /* this should never happen since the phase number is
1189 1189 * specified by this function. */
1190 1190 PyErr_SetString(PyExc_SystemError,
1191 1191 "bad phase number in internal list");
1192 1192 goto release;
1193 1193 }
1194 1194 pyrev = PyLong_FromLong(rev);
1195 1195 if (pyrev == NULL)
1196 1196 goto release;
1197 1197 if (PySet_Add(pyphase, pyrev) == -1) {
1198 1198 Py_DECREF(pyrev);
1199 1199 goto release;
1200 1200 }
1201 1201 Py_DECREF(pyrev);
1202 1202 }
1203 1203
1204 1204 phasesetsdict = _dict_new_presized(numphases);
1205 1205 if (phasesetsdict == NULL)
1206 1206 goto release;
1207 1207 for (i = 0; i < numphases; ++i) {
1208 1208 PyObject *pyphase = PyLong_FromLong(trackedphases[i]);
1209 1209 if (pyphase == NULL)
1210 1210 goto release;
1211 1211 if (PyDict_SetItem(phasesetsdict, pyphase, phasesets[i]) ==
1212 1212 -1) {
1213 1213 Py_DECREF(pyphase);
1214 1214 goto release;
1215 1215 }
1216 1216 Py_DECREF(phasesets[i]);
1217 1217 phasesets[i] = NULL;
1218 1218 }
1219 1219
1220 1220 free(phases);
1221 1221 return Py_BuildValue("nN", len, phasesetsdict);
1222 1222
1223 1223 release:
1224 1224 for (i = 0; i < numphases; ++i)
1225 1225 Py_XDECREF(phasesets[i]);
1226 1226 Py_XDECREF(phasesetsdict);
1227 1227
1228 1228 free(phases);
1229 1229 return NULL;
1230 1230 }
1231 1231
1232 1232 static PyObject *index_headrevs(indexObject *self, PyObject *args)
1233 1233 {
1234 1234 Py_ssize_t i, j, len;
1235 1235 char *nothead = NULL;
1236 1236 PyObject *heads = NULL;
1237 1237 PyObject *filter = NULL;
1238 1238 PyObject *filteredrevs = Py_None;
1239 1239
1240 1240 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
1241 1241 return NULL;
1242 1242 }
1243 1243
1244 1244 if (self->headrevs && filteredrevs == self->filteredrevs)
1245 1245 return list_copy(self->headrevs);
1246 1246
1247 1247 Py_DECREF(self->filteredrevs);
1248 1248 self->filteredrevs = filteredrevs;
1249 1249 Py_INCREF(filteredrevs);
1250 1250
1251 1251 if (filteredrevs != Py_None) {
1252 1252 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
1253 1253 if (!filter) {
1254 1254 PyErr_SetString(
1255 1255 PyExc_TypeError,
1256 1256 "filteredrevs has no attribute __contains__");
1257 1257 goto bail;
1258 1258 }
1259 1259 }
1260 1260
1261 1261 len = index_length(self);
1262 1262 heads = PyList_New(0);
1263 1263 if (heads == NULL)
1264 1264 goto bail;
1265 1265 if (len == 0) {
1266 1266 PyObject *nullid = PyLong_FromLong(-1);
1267 1267 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
1268 1268 Py_XDECREF(nullid);
1269 1269 goto bail;
1270 1270 }
1271 1271 goto done;
1272 1272 }
1273 1273
1274 1274 nothead = calloc(len, 1);
1275 1275 if (nothead == NULL) {
1276 1276 PyErr_NoMemory();
1277 1277 goto bail;
1278 1278 }
1279 1279
1280 1280 for (i = len - 1; i >= 0; i--) {
1281 1281 int isfiltered;
1282 1282 int parents[2];
1283 1283
1284 1284 /* If nothead[i] == 1, it means we've seen an unfiltered child
1285 1285 * of this node already, and therefore this node is not
1286 1286 * filtered. So we can skip the expensive check_filter step.
1287 1287 */
1288 1288 if (nothead[i] != 1) {
1289 1289 isfiltered = check_filter(filter, i);
1290 1290 if (isfiltered == -1) {
1291 1291 PyErr_SetString(PyExc_TypeError,
1292 1292 "unable to check filter");
1293 1293 goto bail;
1294 1294 }
1295 1295
1296 1296 if (isfiltered) {
1297 1297 nothead[i] = 1;
1298 1298 continue;
1299 1299 }
1300 1300 }
1301 1301
1302 1302 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
1303 1303 goto bail;
1304 1304 for (j = 0; j < 2; j++) {
1305 1305 if (parents[j] >= 0)
1306 1306 nothead[parents[j]] = 1;
1307 1307 }
1308 1308 }
1309 1309
1310 1310 for (i = 0; i < len; i++) {
1311 1311 PyObject *head;
1312 1312
1313 1313 if (nothead[i])
1314 1314 continue;
1315 1315 head = PyLong_FromSsize_t(i);
1316 1316 if (head == NULL || PyList_Append(heads, head) == -1) {
1317 1317 Py_XDECREF(head);
1318 1318 goto bail;
1319 1319 }
1320 1320 }
1321 1321
1322 1322 done:
1323 1323 self->headrevs = heads;
1324 1324 Py_XDECREF(filter);
1325 1325 free(nothead);
1326 1326 return list_copy(self->headrevs);
1327 1327 bail:
1328 1328 Py_XDECREF(filter);
1329 1329 Py_XDECREF(heads);
1330 1330 free(nothead);
1331 1331 return NULL;
1332 1332 }
1333 1333
1334 1334 /**
1335 1335 * Obtain the base revision index entry.
1336 1336 *
1337 1337 * Callers must ensure that rev >= 0 or illegal memory access may occur.
1338 1338 */
1339 1339 static inline int index_baserev(indexObject *self, int rev)
1340 1340 {
1341 1341 const char *data;
1342 1342 int result;
1343 1343
1344 1344 data = index_deref(self, rev);
1345 1345 if (data == NULL)
1346 1346 return -2;
1347 1347
1348 1348 if (self->format_version == format_v1) {
1349 1349 result = getbe32(data + entry_v1_offset_base_rev);
1350 1350 } else if (self->format_version == format_v2) {
1351 1351 result = getbe32(data + entry_v2_offset_base_rev);
1352 1352 } else if (self->format_version == format_cl2) {
1353 1353 return rev;
1354 1354 } else {
1355 1355 raise_revlog_error();
1356 1356 return -1;
1357 1357 }
1358 1358
1359 1359 if (result > rev) {
1360 1360 PyErr_Format(
1361 1361 PyExc_ValueError,
1362 1362 "corrupted revlog, revision base above revision: %d, %d",
1363 1363 rev, result);
1364 1364 return -2;
1365 1365 }
1366 1366 if (result < -1) {
1367 1367 PyErr_Format(
1368 1368 PyExc_ValueError,
1369 1369 "corrupted revlog, revision base out of range: %d, %d", rev,
1370 1370 result);
1371 1371 return -2;
1372 1372 }
1373 1373 return result;
1374 1374 }
1375 1375
1376 1376 /**
1377 1377 * Find if a revision is a snapshot or not
1378 1378 *
1379 1379 * Only relevant for sparse-revlog case.
1380 1380 * Callers must ensure that rev is in a valid range.
1381 1381 */
1382 1382 static int index_issnapshotrev(indexObject *self, Py_ssize_t rev)
1383 1383 {
1384 1384 int ps[2];
1385 int b;
1385 1386 Py_ssize_t base;
1386 1387 while (rev >= 0) {
1387 1388 base = (Py_ssize_t)index_baserev(self, rev);
1388 1389 if (base == rev) {
1389 1390 base = -1;
1390 1391 }
1391 1392 if (base == -2) {
1392 1393 assert(PyErr_Occurred());
1393 1394 return -1;
1394 1395 }
1395 1396 if (base == -1) {
1396 1397 return 1;
1397 1398 }
1398 1399 if (index_get_parents(self, rev, ps, (int)rev) < 0) {
1399 1400 assert(PyErr_Occurred());
1400 1401 return -1;
1401 1402 };
1403 while ((index_get_length(self, ps[0]) == 0) && ps[0] >= 0) {
1404 b = index_baserev(self, ps[0]);
1405 if (b == ps[0]) {
1406 break;
1407 }
1408 ps[0] = b;
1409 }
1410 while ((index_get_length(self, ps[1]) == 0) && ps[1] >= 0) {
1411 b = index_baserev(self, ps[1]);
1412 if (b == ps[1]) {
1413 break;
1414 }
1415 ps[1] = b;
1416 }
1402 1417 if (base == ps[0] || base == ps[1]) {
1403 1418 return 0;
1404 1419 }
1405 1420 rev = base;
1406 1421 }
1407 1422 return rev == -1;
1408 1423 }
1409 1424
1410 1425 static PyObject *index_issnapshot(indexObject *self, PyObject *value)
1411 1426 {
1412 1427 long rev;
1413 1428 int issnap;
1414 1429 Py_ssize_t length = index_length(self);
1415 1430
1416 1431 if (!pylong_to_long(value, &rev)) {
1417 1432 return NULL;
1418 1433 }
1419 1434 if (rev < -1 || rev >= length) {
1420 1435 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
1421 1436 rev);
1422 1437 return NULL;
1423 1438 };
1424 1439 issnap = index_issnapshotrev(self, (Py_ssize_t)rev);
1425 1440 if (issnap < 0) {
1426 1441 return NULL;
1427 1442 };
1428 1443 return PyBool_FromLong((long)issnap);
1429 1444 }
1430 1445
1431 1446 static PyObject *index_findsnapshots(indexObject *self, PyObject *args)
1432 1447 {
1433 1448 Py_ssize_t start_rev;
1434 1449 PyObject *cache;
1435 1450 Py_ssize_t base;
1436 1451 Py_ssize_t rev;
1437 1452 PyObject *key = NULL;
1438 1453 PyObject *value = NULL;
1439 1454 const Py_ssize_t length = index_length(self);
1440 1455 if (!PyArg_ParseTuple(args, "O!n", &PyDict_Type, &cache, &start_rev)) {
1441 1456 return NULL;
1442 1457 }
1443 1458 for (rev = start_rev; rev < length; rev++) {
1444 1459 int issnap;
1445 1460 PyObject *allvalues = NULL;
1446 1461 issnap = index_issnapshotrev(self, rev);
1447 1462 if (issnap < 0) {
1448 1463 goto bail;
1449 1464 }
1450 1465 if (issnap == 0) {
1451 1466 continue;
1452 1467 }
1453 1468 base = (Py_ssize_t)index_baserev(self, rev);
1454 1469 if (base == rev) {
1455 1470 base = -1;
1456 1471 }
1457 1472 if (base == -2) {
1458 1473 assert(PyErr_Occurred());
1459 1474 goto bail;
1460 1475 }
1461 1476 key = PyLong_FromSsize_t(base);
1462 1477 allvalues = PyDict_GetItem(cache, key);
1463 1478 if (allvalues == NULL && PyErr_Occurred()) {
1464 1479 goto bail;
1465 1480 }
1466 1481 if (allvalues == NULL) {
1467 1482 int r;
1468 1483 allvalues = PyList_New(0);
1469 1484 if (!allvalues) {
1470 1485 goto bail;
1471 1486 }
1472 1487 r = PyDict_SetItem(cache, key, allvalues);
1473 1488 Py_DECREF(allvalues);
1474 1489 if (r < 0) {
1475 1490 goto bail;
1476 1491 }
1477 1492 }
1478 1493 value = PyLong_FromSsize_t(rev);
1479 1494 if (PyList_Append(allvalues, value)) {
1480 1495 goto bail;
1481 1496 }
1482 1497 Py_CLEAR(key);
1483 1498 Py_CLEAR(value);
1484 1499 }
1485 1500 Py_RETURN_NONE;
1486 1501 bail:
1487 1502 Py_XDECREF(key);
1488 1503 Py_XDECREF(value);
1489 1504 return NULL;
1490 1505 }
1491 1506
1492 1507 static PyObject *index_deltachain(indexObject *self, PyObject *args)
1493 1508 {
1494 1509 int rev, generaldelta;
1495 1510 PyObject *stoparg;
1496 1511 int stoprev, iterrev, baserev = -1;
1497 1512 int stopped;
1498 1513 PyObject *chain = NULL, *result = NULL;
1499 1514 const Py_ssize_t length = index_length(self);
1500 1515
1501 1516 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
1502 1517 return NULL;
1503 1518 }
1504 1519
1505 1520 if (PyLong_Check(stoparg)) {
1506 1521 stoprev = (int)PyLong_AsLong(stoparg);
1507 1522 if (stoprev == -1 && PyErr_Occurred()) {
1508 1523 return NULL;
1509 1524 }
1510 1525 } else if (stoparg == Py_None) {
1511 1526 stoprev = -2;
1512 1527 } else {
1513 1528 PyErr_SetString(PyExc_ValueError,
1514 1529 "stoprev must be integer or None");
1515 1530 return NULL;
1516 1531 }
1517 1532
1518 1533 if (rev < 0 || rev >= length) {
1519 1534 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1520 1535 return NULL;
1521 1536 }
1522 1537
1523 1538 chain = PyList_New(0);
1524 1539 if (chain == NULL) {
1525 1540 return NULL;
1526 1541 }
1527 1542
1528 1543 baserev = index_baserev(self, rev);
1529 1544
1530 1545 /* This should never happen. */
1531 1546 if (baserev <= -2) {
1532 1547 /* Error should be set by index_deref() */
1533 1548 assert(PyErr_Occurred());
1534 1549 goto bail;
1535 1550 }
1536 1551
1537 1552 iterrev = rev;
1538 1553
1539 1554 while (iterrev != baserev && iterrev != stoprev) {
1540 1555 PyObject *value = PyLong_FromLong(iterrev);
1541 1556 if (value == NULL) {
1542 1557 goto bail;
1543 1558 }
1544 1559 if (PyList_Append(chain, value)) {
1545 1560 Py_DECREF(value);
1546 1561 goto bail;
1547 1562 }
1548 1563 Py_DECREF(value);
1549 1564
1550 1565 if (generaldelta) {
1551 1566 iterrev = baserev;
1552 1567 } else {
1553 1568 iterrev--;
1554 1569 }
1555 1570
1556 1571 if (iterrev < 0) {
1557 1572 break;
1558 1573 }
1559 1574
1560 1575 if (iterrev >= length) {
1561 1576 PyErr_SetString(PyExc_IndexError,
1562 1577 "revision outside index");
1563 1578 return NULL;
1564 1579 }
1565 1580
1566 1581 baserev = index_baserev(self, iterrev);
1567 1582
1568 1583 /* This should never happen. */
1569 1584 if (baserev <= -2) {
1570 1585 /* Error should be set by index_deref() */
1571 1586 assert(PyErr_Occurred());
1572 1587 goto bail;
1573 1588 }
1574 1589 }
1575 1590
1576 1591 if (iterrev == stoprev) {
1577 1592 stopped = 1;
1578 1593 } else {
1579 1594 PyObject *value = PyLong_FromLong(iterrev);
1580 1595 if (value == NULL) {
1581 1596 goto bail;
1582 1597 }
1583 1598 if (PyList_Append(chain, value)) {
1584 1599 Py_DECREF(value);
1585 1600 goto bail;
1586 1601 }
1587 1602 Py_DECREF(value);
1588 1603
1589 1604 stopped = 0;
1590 1605 }
1591 1606
1592 1607 if (PyList_Reverse(chain)) {
1593 1608 goto bail;
1594 1609 }
1595 1610
1596 1611 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
1597 1612 Py_DECREF(chain);
1598 1613 return result;
1599 1614
1600 1615 bail:
1601 1616 Py_DECREF(chain);
1602 1617 return NULL;
1603 1618 }
1604 1619
1605 1620 static inline int64_t
1606 1621 index_segment_span(indexObject *self, Py_ssize_t start_rev, Py_ssize_t end_rev)
1607 1622 {
1608 1623 int64_t start_offset;
1609 1624 int64_t end_offset;
1610 1625 int end_size;
1611 1626 start_offset = index_get_start(self, start_rev);
1612 1627 if (start_offset < 0) {
1613 1628 return -1;
1614 1629 }
1615 1630 end_offset = index_get_start(self, end_rev);
1616 1631 if (end_offset < 0) {
1617 1632 return -1;
1618 1633 }
1619 1634 end_size = index_get_length(self, end_rev);
1620 1635 if (end_size < 0) {
1621 1636 return -1;
1622 1637 }
1623 1638 if (end_offset < start_offset) {
1624 1639 PyErr_Format(PyExc_ValueError,
1625 1640 "corrupted revlog index: inconsistent offset "
1626 1641 "between revisions (%zd) and (%zd)",
1627 1642 start_rev, end_rev);
1628 1643 return -1;
1629 1644 }
1630 1645 return (end_offset - start_offset) + (int64_t)end_size;
1631 1646 }
1632 1647
1633 1648 /* returns endidx so that revs[startidx:endidx] has no empty trailing revs */
1634 1649 static Py_ssize_t trim_endidx(indexObject *self, const Py_ssize_t *revs,
1635 1650 Py_ssize_t startidx, Py_ssize_t endidx)
1636 1651 {
1637 1652 int length;
1638 1653 while (endidx > 1 && endidx > startidx) {
1639 1654 length = index_get_length(self, revs[endidx - 1]);
1640 1655 if (length < 0) {
1641 1656 return -1;
1642 1657 }
1643 1658 if (length != 0) {
1644 1659 break;
1645 1660 }
1646 1661 endidx -= 1;
1647 1662 }
1648 1663 return endidx;
1649 1664 }
1650 1665
1651 1666 struct Gap {
1652 1667 int64_t size;
1653 1668 Py_ssize_t idx;
1654 1669 };
1655 1670
1656 1671 static int gap_compare(const void *left, const void *right)
1657 1672 {
1658 1673 const struct Gap *l_left = ((const struct Gap *)left);
1659 1674 const struct Gap *l_right = ((const struct Gap *)right);
1660 1675 if (l_left->size < l_right->size) {
1661 1676 return -1;
1662 1677 } else if (l_left->size > l_right->size) {
1663 1678 return 1;
1664 1679 }
1665 1680 return 0;
1666 1681 }
1667 1682 static int Py_ssize_t_compare(const void *left, const void *right)
1668 1683 {
1669 1684 const Py_ssize_t l_left = *(const Py_ssize_t *)left;
1670 1685 const Py_ssize_t l_right = *(const Py_ssize_t *)right;
1671 1686 if (l_left < l_right) {
1672 1687 return -1;
1673 1688 } else if (l_left > l_right) {
1674 1689 return 1;
1675 1690 }
1676 1691 return 0;
1677 1692 }
1678 1693
1679 1694 static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
1680 1695 {
1681 1696 /* method arguments */
1682 1697 PyObject *list_revs = NULL; /* revisions in the chain */
1683 1698 double targetdensity = 0; /* min density to achieve */
1684 1699 Py_ssize_t mingapsize = 0; /* threshold to ignore gaps */
1685 1700
1686 1701 /* other core variables */
1687 1702 Py_ssize_t idxlen = index_length(self);
1688 1703 Py_ssize_t i; /* used for various iteration */
1689 1704 PyObject *result = NULL; /* the final return of the function */
1690 1705
1691 1706 /* generic information about the delta chain being slice */
1692 1707 Py_ssize_t num_revs = 0; /* size of the full delta chain */
1693 1708 Py_ssize_t *revs = NULL; /* native array of revision in the chain */
1694 1709 int64_t chainpayload = 0; /* sum of all delta in the chain */
1695 1710 int64_t deltachainspan = 0; /* distance from first byte to last byte */
1696 1711
1697 1712 /* variable used for slicing the delta chain */
1698 1713 int64_t readdata = 0; /* amount of data currently planned to be read */
1699 1714 double density = 0; /* ration of payload data compared to read ones */
1700 1715 int64_t previous_end;
1701 1716 struct Gap *gaps = NULL; /* array of notable gap in the chain */
1702 1717 Py_ssize_t num_gaps =
1703 1718 0; /* total number of notable gap recorded so far */
1704 1719 Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
1705 1720 Py_ssize_t num_selected = 0; /* number of gaps skipped */
1706 1721 PyObject *chunk = NULL; /* individual slice */
1707 1722 PyObject *allchunks = NULL; /* all slices */
1708 1723 Py_ssize_t previdx;
1709 1724
1710 1725 /* parsing argument */
1711 1726 if (!PyArg_ParseTuple(args, "O!dn", &PyList_Type, &list_revs,
1712 1727 &targetdensity, &mingapsize)) {
1713 1728 goto bail;
1714 1729 }
1715 1730
1716 1731 /* If the delta chain contains a single element, we do not need slicing
1717 1732 */
1718 1733 num_revs = PyList_GET_SIZE(list_revs);
1719 1734 if (num_revs <= 1) {
1720 1735 result = PyTuple_Pack(1, list_revs);
1721 1736 goto done;
1722 1737 }
1723 1738
1724 1739 /* Turn the python list into a native integer array (for efficiency) */
1725 1740 revs = (Py_ssize_t *)calloc(num_revs, sizeof(Py_ssize_t));
1726 1741 if (revs == NULL) {
1727 1742 PyErr_NoMemory();
1728 1743 goto bail;
1729 1744 }
1730 1745 for (i = 0; i < num_revs; i++) {
1731 1746 Py_ssize_t revnum =
1732 1747 PyLong_AsLong(PyList_GET_ITEM(list_revs, i));
1733 1748 if (revnum == -1 && PyErr_Occurred()) {
1734 1749 goto bail;
1735 1750 }
1736 1751 if (revnum < nullrev || revnum >= idxlen) {
1737 1752 PyErr_Format(PyExc_IndexError,
1738 1753 "index out of range: %zd", revnum);
1739 1754 goto bail;
1740 1755 }
1741 1756 revs[i] = revnum;
1742 1757 }
1743 1758
1744 1759 /* Compute and check various property of the unsliced delta chain */
1745 1760 deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
1746 1761 if (deltachainspan < 0) {
1747 1762 goto bail;
1748 1763 }
1749 1764
1750 1765 if (deltachainspan <= mingapsize) {
1751 1766 result = PyTuple_Pack(1, list_revs);
1752 1767 goto done;
1753 1768 }
1754 1769 chainpayload = 0;
1755 1770 for (i = 0; i < num_revs; i++) {
1756 1771 int tmp = index_get_length(self, revs[i]);
1757 1772 if (tmp < 0) {
1758 1773 goto bail;
1759 1774 }
1760 1775 chainpayload += tmp;
1761 1776 }
1762 1777
1763 1778 readdata = deltachainspan;
1764 1779 density = 1.0;
1765 1780
1766 1781 if (0 < deltachainspan) {
1767 1782 density = (double)chainpayload / (double)deltachainspan;
1768 1783 }
1769 1784
1770 1785 if (density >= targetdensity) {
1771 1786 result = PyTuple_Pack(1, list_revs);
1772 1787 goto done;
1773 1788 }
1774 1789
1775 1790 /* if chain is too sparse, look for relevant gaps */
1776 1791 gaps = (struct Gap *)calloc(num_revs, sizeof(struct Gap));
1777 1792 if (gaps == NULL) {
1778 1793 PyErr_NoMemory();
1779 1794 goto bail;
1780 1795 }
1781 1796
1782 1797 previous_end = -1;
1783 1798 for (i = 0; i < num_revs; i++) {
1784 1799 int64_t revstart;
1785 1800 int revsize;
1786 1801 revstart = index_get_start(self, revs[i]);
1787 1802 if (revstart < 0) {
1788 1803 goto bail;
1789 1804 };
1790 1805 revsize = index_get_length(self, revs[i]);
1791 1806 if (revsize < 0) {
1792 1807 goto bail;
1793 1808 };
1794 1809 if (revsize == 0) {
1795 1810 continue;
1796 1811 }
1797 1812 if (previous_end >= 0) {
1798 1813 int64_t gapsize = revstart - previous_end;
1799 1814 if (gapsize > mingapsize) {
1800 1815 gaps[num_gaps].size = gapsize;
1801 1816 gaps[num_gaps].idx = i;
1802 1817 num_gaps += 1;
1803 1818 }
1804 1819 }
1805 1820 previous_end = revstart + revsize;
1806 1821 }
1807 1822 if (num_gaps == 0) {
1808 1823 result = PyTuple_Pack(1, list_revs);
1809 1824 goto done;
1810 1825 }
1811 1826 qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
1812 1827
1813 1828 /* Slice the largest gap first, they improve the density the most */
1814 1829 selected_indices =
1815 1830 (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
1816 1831 if (selected_indices == NULL) {
1817 1832 PyErr_NoMemory();
1818 1833 goto bail;
1819 1834 }
1820 1835
1821 1836 for (i = num_gaps - 1; i >= 0; i--) {
1822 1837 selected_indices[num_selected] = gaps[i].idx;
1823 1838 readdata -= gaps[i].size;
1824 1839 num_selected += 1;
1825 1840 if (readdata <= 0) {
1826 1841 density = 1.0;
1827 1842 } else {
1828 1843 density = (double)chainpayload / (double)readdata;
1829 1844 }
1830 1845 if (density >= targetdensity) {
1831 1846 break;
1832 1847 }
1833 1848 }
1834 1849 qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
1835 1850 &Py_ssize_t_compare);
1836 1851
1837 1852 /* create the resulting slice */
1838 1853 allchunks = PyList_New(0);
1839 1854 if (allchunks == NULL) {
1840 1855 goto bail;
1841 1856 }
1842 1857 previdx = 0;
1843 1858 selected_indices[num_selected] = num_revs;
1844 1859 for (i = 0; i <= num_selected; i++) {
1845 1860 Py_ssize_t idx = selected_indices[i];
1846 1861 Py_ssize_t endidx = trim_endidx(self, revs, previdx, idx);
1847 1862 if (endidx < 0) {
1848 1863 goto bail;
1849 1864 }
1850 1865 if (previdx < endidx) {
1851 1866 chunk = PyList_GetSlice(list_revs, previdx, endidx);
1852 1867 if (chunk == NULL) {
1853 1868 goto bail;
1854 1869 }
1855 1870 if (PyList_Append(allchunks, chunk) == -1) {
1856 1871 goto bail;
1857 1872 }
1858 1873 Py_DECREF(chunk);
1859 1874 chunk = NULL;
1860 1875 }
1861 1876 previdx = idx;
1862 1877 }
1863 1878 result = allchunks;
1864 1879 goto done;
1865 1880
1866 1881 bail:
1867 1882 Py_XDECREF(allchunks);
1868 1883 Py_XDECREF(chunk);
1869 1884 done:
1870 1885 free(revs);
1871 1886 free(gaps);
1872 1887 free(selected_indices);
1873 1888 return result;
1874 1889 }
1875 1890
1876 1891 static inline int nt_level(const char *node, Py_ssize_t level)
1877 1892 {
1878 1893 int v = node[level >> 1];
1879 1894 if (!(level & 1))
1880 1895 v >>= 4;
1881 1896 return v & 0xf;
1882 1897 }
1883 1898
1884 1899 /*
1885 1900 * Return values:
1886 1901 *
1887 1902 * -4: match is ambiguous (multiple candidates)
1888 1903 * -2: not found
1889 1904 * rest: valid rev
1890 1905 */
1891 1906 static int nt_find(nodetree *self, const char *node, Py_ssize_t nodelen,
1892 1907 int hex)
1893 1908 {
1894 1909 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
1895 1910 int level, maxlevel, off;
1896 1911
1897 1912 /* If the input is binary, do a fast check for the nullid first. */
1898 1913 if (!hex && nodelen == self->nodelen && node[0] == '\0' &&
1899 1914 node[1] == '\0' && memcmp(node, nullid, self->nodelen) == 0)
1900 1915 return -1;
1901 1916
1902 1917 if (hex)
1903 1918 maxlevel = nodelen;
1904 1919 else
1905 1920 maxlevel = 2 * nodelen;
1906 1921 if (maxlevel > 2 * self->nodelen)
1907 1922 maxlevel = 2 * self->nodelen;
1908 1923
1909 1924 for (level = off = 0; level < maxlevel; level++) {
1910 1925 int k = getnybble(node, level);
1911 1926 nodetreenode *n = &self->nodes[off];
1912 1927 int v = n->children[k];
1913 1928
1914 1929 if (v < 0) {
1915 1930 const char *n;
1916 1931 Py_ssize_t i;
1917 1932
1918 1933 v = -(v + 2);
1919 1934 n = index_node(self->index, v);
1920 1935 if (n == NULL)
1921 1936 return -2;
1922 1937 for (i = level; i < maxlevel; i++)
1923 1938 if (getnybble(node, i) != nt_level(n, i))
1924 1939 return -2;
1925 1940 return v;
1926 1941 }
1927 1942 if (v == 0)
1928 1943 return -2;
1929 1944 off = v;
1930 1945 }
1931 1946 /* multiple matches against an ambiguous prefix */
1932 1947 return -4;
1933 1948 }
1934 1949
1935 1950 static int nt_new(nodetree *self)
1936 1951 {
1937 1952 if (self->length == self->capacity) {
1938 1953 size_t newcapacity;
1939 1954 nodetreenode *newnodes;
1940 1955 newcapacity = self->capacity * 2;
1941 1956 if (newcapacity >= SIZE_MAX / sizeof(nodetreenode)) {
1942 1957 PyErr_SetString(PyExc_MemoryError,
1943 1958 "overflow in nt_new");
1944 1959 return -1;
1945 1960 }
1946 1961 newnodes =
1947 1962 realloc(self->nodes, newcapacity * sizeof(nodetreenode));
1948 1963 if (newnodes == NULL) {
1949 1964 PyErr_SetString(PyExc_MemoryError, "out of memory");
1950 1965 return -1;
1951 1966 }
1952 1967 self->capacity = newcapacity;
1953 1968 self->nodes = newnodes;
1954 1969 memset(&self->nodes[self->length], 0,
1955 1970 sizeof(nodetreenode) * (self->capacity - self->length));
1956 1971 }
1957 1972 return self->length++;
1958 1973 }
1959 1974
1960 1975 static int nt_insert(nodetree *self, const char *node, int rev)
1961 1976 {
1962 1977 int level = 0;
1963 1978 int off = 0;
1964 1979
1965 1980 while (level < 2 * self->nodelen) {
1966 1981 int k = nt_level(node, level);
1967 1982 nodetreenode *n;
1968 1983 int v;
1969 1984
1970 1985 n = &self->nodes[off];
1971 1986 v = n->children[k];
1972 1987
1973 1988 if (v == 0) {
1974 1989 n->children[k] = -rev - 2;
1975 1990 return 0;
1976 1991 }
1977 1992 if (v < 0) {
1978 1993 const char *oldnode =
1979 1994 index_node_existing(self->index, -(v + 2));
1980 1995 int noff;
1981 1996
1982 1997 if (oldnode == NULL)
1983 1998 return -1;
1984 1999 if (!memcmp(oldnode, node, self->nodelen)) {
1985 2000 n->children[k] = -rev - 2;
1986 2001 return 0;
1987 2002 }
1988 2003 noff = nt_new(self);
1989 2004 if (noff == -1)
1990 2005 return -1;
1991 2006 /* self->nodes may have been changed by realloc */
1992 2007 self->nodes[off].children[k] = noff;
1993 2008 off = noff;
1994 2009 n = &self->nodes[off];
1995 2010 n->children[nt_level(oldnode, ++level)] = v;
1996 2011 if (level > self->depth)
1997 2012 self->depth = level;
1998 2013 self->splits += 1;
1999 2014 } else {
2000 2015 level += 1;
2001 2016 off = v;
2002 2017 }
2003 2018 }
2004 2019
2005 2020 return -1;
2006 2021 }
2007 2022
2008 2023 static PyObject *ntobj_insert(nodetreeObject *self, PyObject *args)
2009 2024 {
2010 2025 Py_ssize_t rev;
2011 2026 const char *node;
2012 2027 Py_ssize_t length;
2013 2028 if (!PyArg_ParseTuple(args, "n", &rev))
2014 2029 return NULL;
2015 2030 length = index_length(self->nt.index);
2016 2031 if (rev < 0 || rev >= length) {
2017 2032 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
2018 2033 return NULL;
2019 2034 }
2020 2035 node = index_node_existing(self->nt.index, rev);
2021 2036 if (nt_insert(&self->nt, node, (int)rev) == -1)
2022 2037 return NULL;
2023 2038 Py_RETURN_NONE;
2024 2039 }
2025 2040
2026 2041 static int nt_delete_node(nodetree *self, const char *node)
2027 2042 {
2028 2043 /* rev==-2 happens to get encoded as 0, which is interpreted as not set
2029 2044 */
2030 2045 return nt_insert(self, node, -2);
2031 2046 }
2032 2047
2033 2048 static int nt_init(nodetree *self, indexObject *index, unsigned capacity)
2034 2049 {
2035 2050 /* Initialize before overflow-checking to avoid nt_dealloc() crash. */
2036 2051 self->nodes = NULL;
2037 2052
2038 2053 self->index = index;
2039 2054 /* The input capacity is in terms of revisions, while the field is in
2040 2055 * terms of nodetree nodes. */
2041 2056 self->capacity = (capacity < 4 ? 4 : capacity / 2);
2042 2057 self->nodelen = index->nodelen;
2043 2058 self->depth = 0;
2044 2059 self->splits = 0;
2045 2060 if (self->capacity > SIZE_MAX / sizeof(nodetreenode)) {
2046 2061 PyErr_SetString(PyExc_ValueError, "overflow in init_nt");
2047 2062 return -1;
2048 2063 }
2049 2064 self->nodes = calloc(self->capacity, sizeof(nodetreenode));
2050 2065 if (self->nodes == NULL) {
2051 2066 PyErr_NoMemory();
2052 2067 return -1;
2053 2068 }
2054 2069 self->length = 1;
2055 2070 return 0;
2056 2071 }
2057 2072
2058 2073 static int ntobj_init(nodetreeObject *self, PyObject *args)
2059 2074 {
2060 2075 PyObject *index;
2061 2076 unsigned capacity;
2062 2077 if (!PyArg_ParseTuple(args, "O!I", &HgRevlogIndex_Type, &index,
2063 2078 &capacity))
2064 2079 return -1;
2065 2080 Py_INCREF(index);
2066 2081 return nt_init(&self->nt, (indexObject *)index, capacity);
2067 2082 }
2068 2083
2069 2084 static int nt_partialmatch(nodetree *self, const char *node, Py_ssize_t nodelen)
2070 2085 {
2071 2086 return nt_find(self, node, nodelen, 1);
2072 2087 }
2073 2088
2074 2089 /*
2075 2090 * Find the length of the shortest unique prefix of node.
2076 2091 *
2077 2092 * Return values:
2078 2093 *
2079 2094 * -3: error (exception set)
2080 2095 * -2: not found (no exception set)
2081 2096 * rest: length of shortest prefix
2082 2097 */
2083 2098 static int nt_shortest(nodetree *self, const char *node)
2084 2099 {
2085 2100 int level, off;
2086 2101
2087 2102 for (level = off = 0; level < 2 * self->nodelen; level++) {
2088 2103 int k, v;
2089 2104 nodetreenode *n = &self->nodes[off];
2090 2105 k = nt_level(node, level);
2091 2106 v = n->children[k];
2092 2107 if (v < 0) {
2093 2108 const char *n;
2094 2109 v = -(v + 2);
2095 2110 n = index_node_existing(self->index, v);
2096 2111 if (n == NULL)
2097 2112 return -3;
2098 2113 if (memcmp(node, n, self->nodelen) != 0)
2099 2114 /*
2100 2115 * Found a unique prefix, but it wasn't for the
2101 2116 * requested node (i.e the requested node does
2102 2117 * not exist).
2103 2118 */
2104 2119 return -2;
2105 2120 return level + 1;
2106 2121 }
2107 2122 if (v == 0)
2108 2123 return -2;
2109 2124 off = v;
2110 2125 }
2111 2126 /*
2112 2127 * The node was still not unique after 40 hex digits, so this won't
2113 2128 * happen. Also, if we get here, then there's a programming error in
2114 2129 * this file that made us insert a node longer than 40 hex digits.
2115 2130 */
2116 2131 PyErr_SetString(PyExc_Exception, "broken node tree");
2117 2132 return -3;
2118 2133 }
2119 2134
2120 2135 static PyObject *ntobj_shortest(nodetreeObject *self, PyObject *args)
2121 2136 {
2122 2137 PyObject *val;
2123 2138 char *node;
2124 2139 int length;
2125 2140
2126 2141 if (!PyArg_ParseTuple(args, "O", &val))
2127 2142 return NULL;
2128 2143 if (node_check(self->nt.nodelen, val, &node) == -1)
2129 2144 return NULL;
2130 2145
2131 2146 length = nt_shortest(&self->nt, node);
2132 2147 if (length == -3)
2133 2148 return NULL;
2134 2149 if (length == -2) {
2135 2150 raise_revlog_error();
2136 2151 return NULL;
2137 2152 }
2138 2153 return PyLong_FromLong(length);
2139 2154 }
2140 2155
2141 2156 static void nt_dealloc(nodetree *self)
2142 2157 {
2143 2158 free(self->nodes);
2144 2159 self->nodes = NULL;
2145 2160 }
2146 2161
2147 2162 static void ntobj_dealloc(nodetreeObject *self)
2148 2163 {
2149 2164 Py_XDECREF(self->nt.index);
2150 2165 nt_dealloc(&self->nt);
2151 2166 PyObject_Del(self);
2152 2167 }
2153 2168
2154 2169 static PyMethodDef ntobj_methods[] = {
2155 2170 {"insert", (PyCFunction)ntobj_insert, METH_VARARGS,
2156 2171 "insert an index entry"},
2157 2172 {"shortest", (PyCFunction)ntobj_shortest, METH_VARARGS,
2158 2173 "find length of shortest hex nodeid of a binary ID"},
2159 2174 {NULL} /* Sentinel */
2160 2175 };
2161 2176
2162 2177 static PyTypeObject nodetreeType = {
2163 2178 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2164 2179 "parsers.nodetree", /* tp_name */
2165 2180 sizeof(nodetreeObject), /* tp_basicsize */
2166 2181 0, /* tp_itemsize */
2167 2182 (destructor)ntobj_dealloc, /* tp_dealloc */
2168 2183 0, /* tp_print */
2169 2184 0, /* tp_getattr */
2170 2185 0, /* tp_setattr */
2171 2186 0, /* tp_compare */
2172 2187 0, /* tp_repr */
2173 2188 0, /* tp_as_number */
2174 2189 0, /* tp_as_sequence */
2175 2190 0, /* tp_as_mapping */
2176 2191 0, /* tp_hash */
2177 2192 0, /* tp_call */
2178 2193 0, /* tp_str */
2179 2194 0, /* tp_getattro */
2180 2195 0, /* tp_setattro */
2181 2196 0, /* tp_as_buffer */
2182 2197 Py_TPFLAGS_DEFAULT, /* tp_flags */
2183 2198 "nodetree", /* tp_doc */
2184 2199 0, /* tp_traverse */
2185 2200 0, /* tp_clear */
2186 2201 0, /* tp_richcompare */
2187 2202 0, /* tp_weaklistoffset */
2188 2203 0, /* tp_iter */
2189 2204 0, /* tp_iternext */
2190 2205 ntobj_methods, /* tp_methods */
2191 2206 0, /* tp_members */
2192 2207 0, /* tp_getset */
2193 2208 0, /* tp_base */
2194 2209 0, /* tp_dict */
2195 2210 0, /* tp_descr_get */
2196 2211 0, /* tp_descr_set */
2197 2212 0, /* tp_dictoffset */
2198 2213 (initproc)ntobj_init, /* tp_init */
2199 2214 0, /* tp_alloc */
2200 2215 };
2201 2216
2202 2217 static int index_init_nt(indexObject *self)
2203 2218 {
2204 2219 if (!self->ntinitialized) {
2205 2220 if (nt_init(&self->nt, self, (int)self->length) == -1) {
2206 2221 nt_dealloc(&self->nt);
2207 2222 return -1;
2208 2223 }
2209 2224 if (nt_insert(&self->nt, nullid, -1) == -1) {
2210 2225 nt_dealloc(&self->nt);
2211 2226 return -1;
2212 2227 }
2213 2228 self->ntinitialized = 1;
2214 2229 self->ntrev = (int)index_length(self);
2215 2230 self->ntlookups = 1;
2216 2231 self->ntmisses = 0;
2217 2232 }
2218 2233 return 0;
2219 2234 }
2220 2235
2221 2236 /*
2222 2237 * Return values:
2223 2238 *
2224 2239 * -3: error (exception set)
2225 2240 * -2: not found (no exception set)
2226 2241 * rest: valid rev
2227 2242 */
2228 2243 static int index_find_node(indexObject *self, const char *node)
2229 2244 {
2230 2245 int rev;
2231 2246
2232 2247 if (index_init_nt(self) == -1)
2233 2248 return -3;
2234 2249
2235 2250 self->ntlookups++;
2236 2251 rev = nt_find(&self->nt, node, self->nodelen, 0);
2237 2252 if (rev >= -1)
2238 2253 return rev;
2239 2254
2240 2255 /*
2241 2256 * For the first handful of lookups, we scan the entire index,
2242 2257 * and cache only the matching nodes. This optimizes for cases
2243 2258 * like "hg tip", where only a few nodes are accessed.
2244 2259 *
2245 2260 * After that, we cache every node we visit, using a single
2246 2261 * scan amortized over multiple lookups. This gives the best
2247 2262 * bulk performance, e.g. for "hg log".
2248 2263 */
2249 2264 if (self->ntmisses++ < 4) {
2250 2265 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2251 2266 const char *n = index_node_existing(self, rev);
2252 2267 if (n == NULL)
2253 2268 return -3;
2254 2269 if (memcmp(node, n, self->nodelen) == 0) {
2255 2270 if (nt_insert(&self->nt, n, rev) == -1)
2256 2271 return -3;
2257 2272 break;
2258 2273 }
2259 2274 }
2260 2275 } else {
2261 2276 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2262 2277 const char *n = index_node_existing(self, rev);
2263 2278 if (n == NULL)
2264 2279 return -3;
2265 2280 if (nt_insert(&self->nt, n, rev) == -1) {
2266 2281 self->ntrev = rev + 1;
2267 2282 return -3;
2268 2283 }
2269 2284 if (memcmp(node, n, self->nodelen) == 0) {
2270 2285 break;
2271 2286 }
2272 2287 }
2273 2288 self->ntrev = rev;
2274 2289 }
2275 2290
2276 2291 if (rev >= 0)
2277 2292 return rev;
2278 2293 return -2;
2279 2294 }
2280 2295
2281 2296 static PyObject *index_getitem(indexObject *self, PyObject *value)
2282 2297 {
2283 2298 char *node;
2284 2299 int rev;
2285 2300
2286 2301 if (PyLong_Check(value)) {
2287 2302 long idx;
2288 2303 if (!pylong_to_long(value, &idx)) {
2289 2304 return NULL;
2290 2305 }
2291 2306 return index_get(self, idx);
2292 2307 }
2293 2308
2294 2309 if (node_check(self->nodelen, value, &node) == -1)
2295 2310 return NULL;
2296 2311 rev = index_find_node(self, node);
2297 2312 if (rev >= -1)
2298 2313 return PyLong_FromLong(rev);
2299 2314 if (rev == -2)
2300 2315 raise_revlog_error();
2301 2316 return NULL;
2302 2317 }
2303 2318
2304 2319 /*
2305 2320 * Fully populate the radix tree.
2306 2321 */
2307 2322 static int index_populate_nt(indexObject *self)
2308 2323 {
2309 2324 int rev;
2310 2325 if (self->ntrev > 0) {
2311 2326 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2312 2327 const char *n = index_node_existing(self, rev);
2313 2328 if (n == NULL)
2314 2329 return -1;
2315 2330 if (nt_insert(&self->nt, n, rev) == -1)
2316 2331 return -1;
2317 2332 }
2318 2333 self->ntrev = -1;
2319 2334 }
2320 2335 return 0;
2321 2336 }
2322 2337
2323 2338 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
2324 2339 {
2325 2340 const char *fullnode;
2326 2341 Py_ssize_t nodelen;
2327 2342 char *node;
2328 2343 int rev, i;
2329 2344
2330 2345 if (!PyArg_ParseTuple(args, "y#", &node, &nodelen))
2331 2346 return NULL;
2332 2347
2333 2348 if (nodelen < 1) {
2334 2349 PyErr_SetString(PyExc_ValueError, "key too short");
2335 2350 return NULL;
2336 2351 }
2337 2352
2338 2353 if (nodelen > 2 * self->nodelen) {
2339 2354 PyErr_SetString(PyExc_ValueError, "key too long");
2340 2355 return NULL;
2341 2356 }
2342 2357
2343 2358 for (i = 0; i < nodelen; i++)
2344 2359 hexdigit(node, i);
2345 2360 if (PyErr_Occurred()) {
2346 2361 /* input contains non-hex characters */
2347 2362 PyErr_Clear();
2348 2363 Py_RETURN_NONE;
2349 2364 }
2350 2365
2351 2366 if (index_init_nt(self) == -1)
2352 2367 return NULL;
2353 2368 if (index_populate_nt(self) == -1)
2354 2369 return NULL;
2355 2370 rev = nt_partialmatch(&self->nt, node, nodelen);
2356 2371
2357 2372 switch (rev) {
2358 2373 case -4:
2359 2374 raise_revlog_error();
2360 2375 return NULL;
2361 2376 case -2:
2362 2377 Py_RETURN_NONE;
2363 2378 case -1:
2364 2379 return PyBytes_FromStringAndSize(nullid, self->nodelen);
2365 2380 }
2366 2381
2367 2382 fullnode = index_node_existing(self, rev);
2368 2383 if (fullnode == NULL) {
2369 2384 return NULL;
2370 2385 }
2371 2386 return PyBytes_FromStringAndSize(fullnode, self->nodelen);
2372 2387 }
2373 2388
2374 2389 static PyObject *index_shortest(indexObject *self, PyObject *args)
2375 2390 {
2376 2391 PyObject *val;
2377 2392 char *node;
2378 2393 int length;
2379 2394
2380 2395 if (!PyArg_ParseTuple(args, "O", &val))
2381 2396 return NULL;
2382 2397 if (node_check(self->nodelen, val, &node) == -1)
2383 2398 return NULL;
2384 2399
2385 2400 self->ntlookups++;
2386 2401 if (index_init_nt(self) == -1)
2387 2402 return NULL;
2388 2403 if (index_populate_nt(self) == -1)
2389 2404 return NULL;
2390 2405 length = nt_shortest(&self->nt, node);
2391 2406 if (length == -3)
2392 2407 return NULL;
2393 2408 if (length == -2) {
2394 2409 raise_revlog_error();
2395 2410 return NULL;
2396 2411 }
2397 2412 return PyLong_FromLong(length);
2398 2413 }
2399 2414
2400 2415 static PyObject *index_m_get(indexObject *self, PyObject *args)
2401 2416 {
2402 2417 PyObject *val;
2403 2418 char *node;
2404 2419 int rev;
2405 2420
2406 2421 if (!PyArg_ParseTuple(args, "O", &val))
2407 2422 return NULL;
2408 2423 if (node_check(self->nodelen, val, &node) == -1)
2409 2424 return NULL;
2410 2425 rev = index_find_node(self, node);
2411 2426 if (rev == -3)
2412 2427 return NULL;
2413 2428 if (rev == -2)
2414 2429 Py_RETURN_NONE;
2415 2430 return PyLong_FromLong(rev);
2416 2431 }
2417 2432
2418 2433 static int index_contains(indexObject *self, PyObject *value)
2419 2434 {
2420 2435 char *node;
2421 2436
2422 2437 if (PyLong_Check(value)) {
2423 2438 long rev;
2424 2439 if (!pylong_to_long(value, &rev)) {
2425 2440 return -1;
2426 2441 }
2427 2442 return rev >= -1 && rev < index_length(self);
2428 2443 }
2429 2444
2430 2445 if (node_check(self->nodelen, value, &node) == -1)
2431 2446 return -1;
2432 2447
2433 2448 switch (index_find_node(self, node)) {
2434 2449 case -3:
2435 2450 return -1;
2436 2451 case -2:
2437 2452 return 0;
2438 2453 default:
2439 2454 return 1;
2440 2455 }
2441 2456 }
2442 2457
2443 2458 static PyObject *index_m_has_node(indexObject *self, PyObject *args)
2444 2459 {
2445 2460 int ret = index_contains(self, args);
2446 2461 if (ret < 0)
2447 2462 return NULL;
2448 2463 return PyBool_FromLong((long)ret);
2449 2464 }
2450 2465
2451 2466 static PyObject *index_m_rev(indexObject *self, PyObject *val)
2452 2467 {
2453 2468 char *node;
2454 2469 int rev;
2455 2470
2456 2471 if (node_check(self->nodelen, val, &node) == -1)
2457 2472 return NULL;
2458 2473 rev = index_find_node(self, node);
2459 2474 if (rev >= -1)
2460 2475 return PyLong_FromLong(rev);
2461 2476 if (rev == -2)
2462 2477 raise_revlog_error();
2463 2478 return NULL;
2464 2479 }
2465 2480
2466 2481 typedef uint64_t bitmask;
2467 2482
2468 2483 /*
2469 2484 * Given a disjoint set of revs, return all candidates for the
2470 2485 * greatest common ancestor. In revset notation, this is the set
2471 2486 * "heads(::a and ::b and ...)"
2472 2487 */
2473 2488 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
2474 2489 int revcount)
2475 2490 {
2476 2491 const bitmask allseen = (1ull << revcount) - 1;
2477 2492 const bitmask poison = 1ull << revcount;
2478 2493 PyObject *gca = PyList_New(0);
2479 2494 int i, v, interesting;
2480 2495 int maxrev = -1;
2481 2496 bitmask sp;
2482 2497 bitmask *seen;
2483 2498
2484 2499 if (gca == NULL)
2485 2500 return PyErr_NoMemory();
2486 2501
2487 2502 for (i = 0; i < revcount; i++) {
2488 2503 if (revs[i] > maxrev)
2489 2504 maxrev = revs[i];
2490 2505 }
2491 2506
2492 2507 seen = calloc(sizeof(*seen), maxrev + 1);
2493 2508 if (seen == NULL) {
2494 2509 Py_DECREF(gca);
2495 2510 return PyErr_NoMemory();
2496 2511 }
2497 2512
2498 2513 for (i = 0; i < revcount; i++)
2499 2514 seen[revs[i]] = 1ull << i;
2500 2515
2501 2516 interesting = revcount;
2502 2517
2503 2518 for (v = maxrev; v >= 0 && interesting; v--) {
2504 2519 bitmask sv = seen[v];
2505 2520 int parents[2];
2506 2521
2507 2522 if (!sv)
2508 2523 continue;
2509 2524
2510 2525 if (sv < poison) {
2511 2526 interesting -= 1;
2512 2527 if (sv == allseen) {
2513 2528 PyObject *obj = PyLong_FromLong(v);
2514 2529 if (obj == NULL)
2515 2530 goto bail;
2516 2531 if (PyList_Append(gca, obj) == -1) {
2517 2532 Py_DECREF(obj);
2518 2533 goto bail;
2519 2534 }
2520 2535 sv |= poison;
2521 2536 for (i = 0; i < revcount; i++) {
2522 2537 if (revs[i] == v)
2523 2538 goto done;
2524 2539 }
2525 2540 }
2526 2541 }
2527 2542 if (index_get_parents(self, v, parents, maxrev) < 0)
2528 2543 goto bail;
2529 2544
2530 2545 for (i = 0; i < 2; i++) {
2531 2546 int p = parents[i];
2532 2547 if (p == -1)
2533 2548 continue;
2534 2549 sp = seen[p];
2535 2550 if (sv < poison) {
2536 2551 if (sp == 0) {
2537 2552 seen[p] = sv;
2538 2553 interesting++;
2539 2554 } else if (sp != sv)
2540 2555 seen[p] |= sv;
2541 2556 } else {
2542 2557 if (sp && sp < poison)
2543 2558 interesting--;
2544 2559 seen[p] = sv;
2545 2560 }
2546 2561 }
2547 2562 }
2548 2563
2549 2564 done:
2550 2565 free(seen);
2551 2566 return gca;
2552 2567 bail:
2553 2568 free(seen);
2554 2569 Py_XDECREF(gca);
2555 2570 return NULL;
2556 2571 }
2557 2572
2558 2573 /*
2559 2574 * Given a disjoint set of revs, return the subset with the longest
2560 2575 * path to the root.
2561 2576 */
2562 2577 static PyObject *find_deepest(indexObject *self, PyObject *revs)
2563 2578 {
2564 2579 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
2565 2580 static const Py_ssize_t capacity = 24;
2566 2581 int *depth, *interesting = NULL;
2567 2582 int i, j, v, ninteresting;
2568 2583 PyObject *dict = NULL, *keys = NULL;
2569 2584 long *seen = NULL;
2570 2585 int maxrev = -1;
2571 2586 long final;
2572 2587
2573 2588 if (revcount > capacity) {
2574 2589 PyErr_Format(PyExc_OverflowError,
2575 2590 "bitset size (%ld) > capacity (%ld)",
2576 2591 (long)revcount, (long)capacity);
2577 2592 return NULL;
2578 2593 }
2579 2594
2580 2595 for (i = 0; i < revcount; i++) {
2581 2596 int n = (int)PyLong_AsLong(PyList_GET_ITEM(revs, i));
2582 2597 if (n > maxrev)
2583 2598 maxrev = n;
2584 2599 }
2585 2600
2586 2601 depth = calloc(sizeof(*depth), maxrev + 1);
2587 2602 if (depth == NULL)
2588 2603 return PyErr_NoMemory();
2589 2604
2590 2605 seen = calloc(sizeof(*seen), maxrev + 1);
2591 2606 if (seen == NULL) {
2592 2607 PyErr_NoMemory();
2593 2608 goto bail;
2594 2609 }
2595 2610
2596 2611 interesting = calloc(sizeof(*interesting), ((size_t)1) << revcount);
2597 2612 if (interesting == NULL) {
2598 2613 PyErr_NoMemory();
2599 2614 goto bail;
2600 2615 }
2601 2616
2602 2617 if (PyList_Sort(revs) == -1)
2603 2618 goto bail;
2604 2619
2605 2620 for (i = 0; i < revcount; i++) {
2606 2621 int n = (int)PyLong_AsLong(PyList_GET_ITEM(revs, i));
2607 2622 long b = 1l << i;
2608 2623 depth[n] = 1;
2609 2624 seen[n] = b;
2610 2625 interesting[b] = 1;
2611 2626 }
2612 2627
2613 2628 /* invariant: ninteresting is the number of non-zero entries in
2614 2629 * interesting. */
2615 2630 ninteresting = (int)revcount;
2616 2631
2617 2632 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
2618 2633 int dv = depth[v];
2619 2634 int parents[2];
2620 2635 long sv;
2621 2636
2622 2637 if (dv == 0)
2623 2638 continue;
2624 2639
2625 2640 sv = seen[v];
2626 2641 if (index_get_parents(self, v, parents, maxrev) < 0)
2627 2642 goto bail;
2628 2643
2629 2644 for (i = 0; i < 2; i++) {
2630 2645 int p = parents[i];
2631 2646 long sp;
2632 2647 int dp;
2633 2648
2634 2649 if (p == -1)
2635 2650 continue;
2636 2651
2637 2652 dp = depth[p];
2638 2653 sp = seen[p];
2639 2654 if (dp <= dv) {
2640 2655 depth[p] = dv + 1;
2641 2656 if (sp != sv) {
2642 2657 interesting[sv] += 1;
2643 2658 seen[p] = sv;
2644 2659 if (sp) {
2645 2660 interesting[sp] -= 1;
2646 2661 if (interesting[sp] == 0)
2647 2662 ninteresting -= 1;
2648 2663 }
2649 2664 }
2650 2665 } else if (dv == dp - 1) {
2651 2666 long nsp = sp | sv;
2652 2667 if (nsp == sp)
2653 2668 continue;
2654 2669 seen[p] = nsp;
2655 2670 interesting[sp] -= 1;
2656 2671 if (interesting[sp] == 0)
2657 2672 ninteresting -= 1;
2658 2673 if (interesting[nsp] == 0)
2659 2674 ninteresting += 1;
2660 2675 interesting[nsp] += 1;
2661 2676 }
2662 2677 }
2663 2678 interesting[sv] -= 1;
2664 2679 if (interesting[sv] == 0)
2665 2680 ninteresting -= 1;
2666 2681 }
2667 2682
2668 2683 final = 0;
2669 2684 j = ninteresting;
2670 2685 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
2671 2686 if (interesting[i] == 0)
2672 2687 continue;
2673 2688 final |= i;
2674 2689 j -= 1;
2675 2690 }
2676 2691 if (final == 0) {
2677 2692 keys = PyList_New(0);
2678 2693 goto bail;
2679 2694 }
2680 2695
2681 2696 dict = PyDict_New();
2682 2697 if (dict == NULL)
2683 2698 goto bail;
2684 2699
2685 2700 for (i = 0; i < revcount; i++) {
2686 2701 PyObject *key;
2687 2702
2688 2703 if ((final & (1 << i)) == 0)
2689 2704 continue;
2690 2705
2691 2706 key = PyList_GET_ITEM(revs, i);
2692 2707 Py_INCREF(key);
2693 2708 Py_INCREF(Py_None);
2694 2709 if (PyDict_SetItem(dict, key, Py_None) == -1) {
2695 2710 Py_DECREF(key);
2696 2711 Py_DECREF(Py_None);
2697 2712 goto bail;
2698 2713 }
2699 2714 }
2700 2715
2701 2716 keys = PyDict_Keys(dict);
2702 2717
2703 2718 bail:
2704 2719 free(depth);
2705 2720 free(seen);
2706 2721 free(interesting);
2707 2722 Py_XDECREF(dict);
2708 2723
2709 2724 return keys;
2710 2725 }
2711 2726
2712 2727 /*
2713 2728 * Given a (possibly overlapping) set of revs, return all the
2714 2729 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
2715 2730 */
2716 2731 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
2717 2732 {
2718 2733 PyObject *ret = NULL;
2719 2734 Py_ssize_t argcount, i, len;
2720 2735 bitmask repeat = 0;
2721 2736 int revcount = 0;
2722 2737 int *revs;
2723 2738
2724 2739 argcount = PySequence_Length(args);
2725 2740 revs = PyMem_Malloc(argcount * sizeof(*revs));
2726 2741 if (argcount > 0 && revs == NULL)
2727 2742 return PyErr_NoMemory();
2728 2743 len = index_length(self);
2729 2744
2730 2745 for (i = 0; i < argcount; i++) {
2731 2746 static const int capacity = 24;
2732 2747 PyObject *obj = PySequence_GetItem(args, i);
2733 2748 bitmask x;
2734 2749 long val;
2735 2750
2736 2751 if (!PyLong_Check(obj)) {
2737 2752 PyErr_SetString(PyExc_TypeError,
2738 2753 "arguments must all be ints");
2739 2754 Py_DECREF(obj);
2740 2755 goto bail;
2741 2756 }
2742 2757 val = PyLong_AsLong(obj);
2743 2758 Py_DECREF(obj);
2744 2759 if (val == -1) {
2745 2760 ret = PyList_New(0);
2746 2761 goto done;
2747 2762 }
2748 2763 if (val < 0 || val >= len) {
2749 2764 PyErr_SetString(PyExc_IndexError, "index out of range");
2750 2765 goto bail;
2751 2766 }
2752 2767 /* this cheesy bloom filter lets us avoid some more
2753 2768 * expensive duplicate checks in the common set-is-disjoint
2754 2769 * case */
2755 2770 x = 1ull << (val & 0x3f);
2756 2771 if (repeat & x) {
2757 2772 int k;
2758 2773 for (k = 0; k < revcount; k++) {
2759 2774 if (val == revs[k])
2760 2775 goto duplicate;
2761 2776 }
2762 2777 } else
2763 2778 repeat |= x;
2764 2779 if (revcount >= capacity) {
2765 2780 PyErr_Format(PyExc_OverflowError,
2766 2781 "bitset size (%d) > capacity (%d)",
2767 2782 revcount, capacity);
2768 2783 goto bail;
2769 2784 }
2770 2785 revs[revcount++] = (int)val;
2771 2786 duplicate:;
2772 2787 }
2773 2788
2774 2789 if (revcount == 0) {
2775 2790 ret = PyList_New(0);
2776 2791 goto done;
2777 2792 }
2778 2793 if (revcount == 1) {
2779 2794 PyObject *obj;
2780 2795 ret = PyList_New(1);
2781 2796 if (ret == NULL)
2782 2797 goto bail;
2783 2798 obj = PyLong_FromLong(revs[0]);
2784 2799 if (obj == NULL)
2785 2800 goto bail;
2786 2801 PyList_SET_ITEM(ret, 0, obj);
2787 2802 goto done;
2788 2803 }
2789 2804
2790 2805 ret = find_gca_candidates(self, revs, revcount);
2791 2806 if (ret == NULL)
2792 2807 goto bail;
2793 2808
2794 2809 done:
2795 2810 PyMem_Free(revs);
2796 2811 return ret;
2797 2812
2798 2813 bail:
2799 2814 PyMem_Free(revs);
2800 2815 Py_XDECREF(ret);
2801 2816 return NULL;
2802 2817 }
2803 2818
2804 2819 /*
2805 2820 * Given a (possibly overlapping) set of revs, return the greatest
2806 2821 * common ancestors: those with the longest path to the root.
2807 2822 */
2808 2823 static PyObject *index_ancestors(indexObject *self, PyObject *args)
2809 2824 {
2810 2825 PyObject *ret;
2811 2826 PyObject *gca = index_commonancestorsheads(self, args);
2812 2827 if (gca == NULL)
2813 2828 return NULL;
2814 2829
2815 2830 if (PyList_GET_SIZE(gca) <= 1) {
2816 2831 return gca;
2817 2832 }
2818 2833
2819 2834 ret = find_deepest(self, gca);
2820 2835 Py_DECREF(gca);
2821 2836 return ret;
2822 2837 }
2823 2838
2824 2839 /*
2825 2840 * Invalidate any trie entries introduced by added revs.
2826 2841 */
2827 2842 static void index_invalidate_added(indexObject *self, Py_ssize_t start)
2828 2843 {
2829 2844 Py_ssize_t i, len;
2830 2845
2831 2846 len = self->length + self->new_length;
2832 2847 i = start - self->length;
2833 2848 if (i < 0)
2834 2849 return;
2835 2850
2836 2851 for (i = start; i < len; i++) {
2837 2852 const char *node = index_node(self, i);
2838 2853 nt_delete_node(&self->nt, node);
2839 2854 }
2840 2855
2841 2856 self->new_length = start - self->length;
2842 2857 }
2843 2858
2844 2859 /*
2845 2860 * Delete a numeric range of revs, which must be at the end of the
2846 2861 * range.
2847 2862 */
2848 2863 static int index_slice_del(indexObject *self, PyObject *item)
2849 2864 {
2850 2865 Py_ssize_t start, stop, step, slicelength;
2851 2866 Py_ssize_t length = index_length(self) + 1;
2852 2867 int ret = 0;
2853 2868
2854 2869 if (PySlice_GetIndicesEx(item, length, &start, &stop, &step,
2855 2870 &slicelength) < 0)
2856 2871 return -1;
2857 2872
2858 2873 if (slicelength <= 0)
2859 2874 return 0;
2860 2875
2861 2876 if ((step < 0 && start < stop) || (step > 0 && start > stop))
2862 2877 stop = start;
2863 2878
2864 2879 if (step < 0) {
2865 2880 stop = start + 1;
2866 2881 start = stop + step * (slicelength - 1) - 1;
2867 2882 step = -step;
2868 2883 }
2869 2884
2870 2885 if (step != 1) {
2871 2886 PyErr_SetString(PyExc_ValueError,
2872 2887 "revlog index delete requires step size of 1");
2873 2888 return -1;
2874 2889 }
2875 2890
2876 2891 if (stop != length - 1) {
2877 2892 PyErr_SetString(PyExc_IndexError,
2878 2893 "revlog index deletion indices are invalid");
2879 2894 return -1;
2880 2895 }
2881 2896
2882 2897 if (start < self->length) {
2883 2898 if (self->ntinitialized) {
2884 2899 Py_ssize_t i;
2885 2900
2886 2901 for (i = start; i < self->length; i++) {
2887 2902 const char *node = index_node_existing(self, i);
2888 2903 if (node == NULL)
2889 2904 return -1;
2890 2905
2891 2906 nt_delete_node(&self->nt, node);
2892 2907 }
2893 2908 if (self->new_length)
2894 2909 index_invalidate_added(self, self->length);
2895 2910 if (self->ntrev > start)
2896 2911 self->ntrev = (int)start;
2897 2912 } else if (self->new_length) {
2898 2913 self->new_length = 0;
2899 2914 }
2900 2915
2901 2916 self->length = start;
2902 2917 goto done;
2903 2918 }
2904 2919
2905 2920 if (self->ntinitialized) {
2906 2921 index_invalidate_added(self, start);
2907 2922 if (self->ntrev > start)
2908 2923 self->ntrev = (int)start;
2909 2924 } else {
2910 2925 self->new_length = start - self->length;
2911 2926 }
2912 2927 done:
2913 2928 Py_CLEAR(self->headrevs);
2914 2929 return ret;
2915 2930 }
2916 2931
2917 2932 /*
2918 2933 * Supported ops:
2919 2934 *
2920 2935 * slice deletion
2921 2936 * string assignment (extend node->rev mapping)
2922 2937 * string deletion (shrink node->rev mapping)
2923 2938 */
2924 2939 static int index_assign_subscript(indexObject *self, PyObject *item,
2925 2940 PyObject *value)
2926 2941 {
2927 2942 char *node;
2928 2943 long rev;
2929 2944
2930 2945 if (PySlice_Check(item) && value == NULL)
2931 2946 return index_slice_del(self, item);
2932 2947
2933 2948 if (node_check(self->nodelen, item, &node) == -1)
2934 2949 return -1;
2935 2950
2936 2951 if (value == NULL)
2937 2952 return self->ntinitialized ? nt_delete_node(&self->nt, node)
2938 2953 : 0;
2939 2954 rev = PyLong_AsLong(value);
2940 2955 if (rev > INT_MAX || rev < 0) {
2941 2956 if (!PyErr_Occurred())
2942 2957 PyErr_SetString(PyExc_ValueError, "rev out of range");
2943 2958 return -1;
2944 2959 }
2945 2960
2946 2961 if (index_init_nt(self) == -1)
2947 2962 return -1;
2948 2963 return nt_insert(&self->nt, node, (int)rev);
2949 2964 }
2950 2965
2951 2966 /*
2952 2967 * Find all RevlogNG entries in an index that has inline data. Update
2953 2968 * the optional "offsets" table with those entries.
2954 2969 */
2955 2970 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
2956 2971 {
2957 2972 const char *data = (const char *)self->buf.buf;
2958 2973 Py_ssize_t pos = 0;
2959 2974 Py_ssize_t end = self->buf.len;
2960 2975 long incr = self->entry_size;
2961 2976 Py_ssize_t len = 0;
2962 2977
2963 2978 while (pos + self->entry_size <= end && pos >= 0) {
2964 2979 uint32_t comp_len, sidedata_comp_len = 0;
2965 2980 /* 3rd element of header is length of compressed inline data */
2966 2981 if (self->format_version == format_v1) {
2967 2982 comp_len =
2968 2983 getbe32(data + pos + entry_v1_offset_comp_len);
2969 2984 sidedata_comp_len = 0;
2970 2985 } else if (self->format_version == format_v2) {
2971 2986 comp_len =
2972 2987 getbe32(data + pos + entry_v2_offset_comp_len);
2973 2988 sidedata_comp_len = getbe32(
2974 2989 data + pos + entry_v2_offset_sidedata_comp_len);
2975 2990 } else {
2976 2991 raise_revlog_error();
2977 2992 return -1;
2978 2993 }
2979 2994 incr = self->entry_size + comp_len + sidedata_comp_len;
2980 2995 if (offsets)
2981 2996 offsets[len] = data + pos;
2982 2997 len++;
2983 2998 pos += incr;
2984 2999 }
2985 3000
2986 3001 if (pos != end) {
2987 3002 if (!PyErr_Occurred())
2988 3003 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2989 3004 return -1;
2990 3005 }
2991 3006
2992 3007 return len;
2993 3008 }
2994 3009
2995 3010 static int index_init(indexObject *self, PyObject *args, PyObject *kwargs)
2996 3011 {
2997 3012 PyObject *data_obj, *inlined_obj;
2998 3013 Py_ssize_t size;
2999 3014
3000 3015 static char *kwlist[] = {"data", "inlined", "format", NULL};
3001 3016
3002 3017 /* Initialize before argument-checking to avoid index_dealloc() crash.
3003 3018 */
3004 3019 self->added = NULL;
3005 3020 self->new_length = 0;
3006 3021 self->added_length = 0;
3007 3022 self->data = NULL;
3008 3023 memset(&self->buf, 0, sizeof(self->buf));
3009 3024 self->headrevs = NULL;
3010 3025 self->filteredrevs = Py_None;
3011 3026 Py_INCREF(Py_None);
3012 3027 self->ntinitialized = 0;
3013 3028 self->offsets = NULL;
3014 3029 self->nodelen = 20;
3015 3030 self->nullentry = NULL;
3016 3031 self->rust_ext_compat = 1;
3017 3032 self->format_version = format_v1;
3018 3033
3019 3034 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|l", kwlist,
3020 3035 &data_obj, &inlined_obj,
3021 3036 &(self->format_version)))
3022 3037 return -1;
3023 3038 if (!PyObject_CheckBuffer(data_obj)) {
3024 3039 PyErr_SetString(PyExc_TypeError,
3025 3040 "data does not support buffer interface");
3026 3041 return -1;
3027 3042 }
3028 3043 if (self->nodelen < 20 || self->nodelen > (Py_ssize_t)sizeof(nullid)) {
3029 3044 PyErr_SetString(PyExc_RuntimeError, "unsupported node size");
3030 3045 return -1;
3031 3046 }
3032 3047
3033 3048 if (self->format_version == format_v1) {
3034 3049 self->entry_size = v1_entry_size;
3035 3050 } else if (self->format_version == format_v2) {
3036 3051 self->entry_size = v2_entry_size;
3037 3052 } else if (self->format_version == format_cl2) {
3038 3053 self->entry_size = cl2_entry_size;
3039 3054 }
3040 3055
3041 3056 self->nullentry = Py_BuildValue(
3042 3057 "iiiiiiiy#iiBBi", 0, 0, 0, -1, -1, -1, -1, nullid, self->nodelen, 0,
3043 3058 0, comp_mode_inline, comp_mode_inline, rank_unknown);
3044 3059
3045 3060 if (!self->nullentry)
3046 3061 return -1;
3047 3062 PyObject_GC_UnTrack(self->nullentry);
3048 3063
3049 3064 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
3050 3065 return -1;
3051 3066 size = self->buf.len;
3052 3067
3053 3068 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
3054 3069 self->data = data_obj;
3055 3070
3056 3071 self->ntlookups = self->ntmisses = 0;
3057 3072 self->ntrev = -1;
3058 3073 Py_INCREF(self->data);
3059 3074
3060 3075 if (self->inlined) {
3061 3076 Py_ssize_t len = inline_scan(self, NULL);
3062 3077 if (len == -1)
3063 3078 goto bail;
3064 3079 self->length = len;
3065 3080 } else {
3066 3081 if (size % self->entry_size) {
3067 3082 PyErr_SetString(PyExc_ValueError, "corrupt index file");
3068 3083 goto bail;
3069 3084 }
3070 3085 self->length = size / self->entry_size;
3071 3086 }
3072 3087
3073 3088 return 0;
3074 3089 bail:
3075 3090 return -1;
3076 3091 }
3077 3092
3078 3093 static PyObject *index_nodemap(indexObject *self)
3079 3094 {
3080 3095 Py_INCREF(self);
3081 3096 return (PyObject *)self;
3082 3097 }
3083 3098
3084 3099 static void _index_clearcaches(indexObject *self)
3085 3100 {
3086 3101 if (self->offsets) {
3087 3102 PyMem_Free((void *)self->offsets);
3088 3103 self->offsets = NULL;
3089 3104 }
3090 3105 if (self->ntinitialized) {
3091 3106 nt_dealloc(&self->nt);
3092 3107 }
3093 3108 self->ntinitialized = 0;
3094 3109 Py_CLEAR(self->headrevs);
3095 3110 }
3096 3111
3097 3112 static PyObject *index_clearcaches(indexObject *self)
3098 3113 {
3099 3114 _index_clearcaches(self);
3100 3115 self->ntrev = -1;
3101 3116 self->ntlookups = self->ntmisses = 0;
3102 3117 Py_RETURN_NONE;
3103 3118 }
3104 3119
3105 3120 static void index_dealloc(indexObject *self)
3106 3121 {
3107 3122 _index_clearcaches(self);
3108 3123 Py_XDECREF(self->filteredrevs);
3109 3124 if (self->buf.buf) {
3110 3125 PyBuffer_Release(&self->buf);
3111 3126 memset(&self->buf, 0, sizeof(self->buf));
3112 3127 }
3113 3128 Py_XDECREF(self->data);
3114 3129 PyMem_Free(self->added);
3115 3130 Py_XDECREF(self->nullentry);
3116 3131 PyObject_Del(self);
3117 3132 }
3118 3133
3119 3134 static PySequenceMethods index_sequence_methods = {
3120 3135 (lenfunc)index_length, /* sq_length */
3121 3136 0, /* sq_concat */
3122 3137 0, /* sq_repeat */
3123 3138 (ssizeargfunc)index_get, /* sq_item */
3124 3139 0, /* sq_slice */
3125 3140 0, /* sq_ass_item */
3126 3141 0, /* sq_ass_slice */
3127 3142 (objobjproc)index_contains, /* sq_contains */
3128 3143 };
3129 3144
3130 3145 static PyMappingMethods index_mapping_methods = {
3131 3146 (lenfunc)index_length, /* mp_length */
3132 3147 (binaryfunc)index_getitem, /* mp_subscript */
3133 3148 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
3134 3149 };
3135 3150
3136 3151 static PyMethodDef index_methods[] = {
3137 3152 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
3138 3153 "return the gca set of the given revs"},
3139 3154 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
3140 3155 METH_VARARGS,
3141 3156 "return the heads of the common ancestors of the given revs"},
3142 3157 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
3143 3158 "clear the index caches"},
3144 3159 {"get", (PyCFunction)index_m_get, METH_VARARGS, "get an index entry"},
3145 3160 {"get_rev", (PyCFunction)index_m_get, METH_VARARGS,
3146 3161 "return `rev` associated with a node or None"},
3147 3162 {"has_node", (PyCFunction)index_m_has_node, METH_O,
3148 3163 "return True if the node exist in the index"},
3149 3164 {"rev", (PyCFunction)index_m_rev, METH_O,
3150 3165 "return `rev` associated with a node or raise RevlogError"},
3151 3166 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets, METH_VARARGS,
3152 3167 "compute phases"},
3153 3168 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
3154 3169 "reachableroots"},
3155 3170 {"replace_sidedata_info", (PyCFunction)index_replace_sidedata_info,
3156 3171 METH_VARARGS, "replace an existing index entry with a new value"},
3157 3172 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
3158 3173 "get head revisions"}, /* Can do filtering since 3.2 */
3159 3174 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
3160 3175 "get filtered head revisions"}, /* Can always do filtering */
3161 3176 {"issnapshot", (PyCFunction)index_issnapshot, METH_O,
3162 3177 "True if the object is a snapshot"},
3163 3178 {"findsnapshots", (PyCFunction)index_findsnapshots, METH_VARARGS,
3164 3179 "Gather snapshot data in a cache dict"},
3165 3180 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
3166 3181 "determine revisions with deltas to reconstruct fulltext"},
3167 3182 {"slicechunktodensity", (PyCFunction)index_slicechunktodensity,
3168 3183 METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"},
3169 3184 {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
3170 3185 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
3171 3186 "match a potentially ambiguous node ID"},
3172 3187 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
3173 3188 "find length of shortest hex nodeid of a binary ID"},
3174 3189 {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"},
3175 3190 {"entry_binary", (PyCFunction)index_entry_binary, METH_O,
3176 3191 "return an entry in binary form"},
3177 3192 {"pack_header", (PyCFunction)index_pack_header, METH_VARARGS,
3178 3193 "pack the revlog header information into binary"},
3179 3194 {NULL} /* Sentinel */
3180 3195 };
3181 3196
3182 3197 static PyGetSetDef index_getset[] = {
3183 3198 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
3184 3199 {NULL} /* Sentinel */
3185 3200 };
3186 3201
3187 3202 static PyMemberDef index_members[] = {
3188 3203 {"entry_size", T_LONG, offsetof(indexObject, entry_size), 0,
3189 3204 "size of an index entry"},
3190 3205 {"rust_ext_compat", T_LONG, offsetof(indexObject, rust_ext_compat), 0,
3191 3206 "size of an index entry"},
3192 3207 {NULL} /* Sentinel */
3193 3208 };
3194 3209
3195 3210 PyTypeObject HgRevlogIndex_Type = {
3196 3211 PyVarObject_HEAD_INIT(NULL, 0) /* header */
3197 3212 "parsers.index", /* tp_name */
3198 3213 sizeof(indexObject), /* tp_basicsize */
3199 3214 0, /* tp_itemsize */
3200 3215 (destructor)index_dealloc, /* tp_dealloc */
3201 3216 0, /* tp_print */
3202 3217 0, /* tp_getattr */
3203 3218 0, /* tp_setattr */
3204 3219 0, /* tp_compare */
3205 3220 0, /* tp_repr */
3206 3221 0, /* tp_as_number */
3207 3222 &index_sequence_methods, /* tp_as_sequence */
3208 3223 &index_mapping_methods, /* tp_as_mapping */
3209 3224 0, /* tp_hash */
3210 3225 0, /* tp_call */
3211 3226 0, /* tp_str */
3212 3227 0, /* tp_getattro */
3213 3228 0, /* tp_setattro */
3214 3229 0, /* tp_as_buffer */
3215 3230 Py_TPFLAGS_DEFAULT, /* tp_flags */
3216 3231 "revlog index", /* tp_doc */
3217 3232 0, /* tp_traverse */
3218 3233 0, /* tp_clear */
3219 3234 0, /* tp_richcompare */
3220 3235 0, /* tp_weaklistoffset */
3221 3236 0, /* tp_iter */
3222 3237 0, /* tp_iternext */
3223 3238 index_methods, /* tp_methods */
3224 3239 index_members, /* tp_members */
3225 3240 index_getset, /* tp_getset */
3226 3241 0, /* tp_base */
3227 3242 0, /* tp_dict */
3228 3243 0, /* tp_descr_get */
3229 3244 0, /* tp_descr_set */
3230 3245 0, /* tp_dictoffset */
3231 3246 (initproc)index_init, /* tp_init */
3232 3247 0, /* tp_alloc */
3233 3248 };
3234 3249
3235 3250 /*
3236 3251 * returns a tuple of the form (index, cache) with elements as
3237 3252 * follows:
3238 3253 *
3239 3254 * index: an index object that lazily parses Revlog (v1 or v2) records
3240 3255 * cache: if data is inlined, a tuple (0, index_file_content), else None
3241 3256 * index_file_content could be a string, or a buffer
3242 3257 *
3243 3258 * added complications are for backwards compatibility
3244 3259 */
3245 3260 PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs)
3246 3261 {
3247 3262 PyObject *cache = NULL;
3248 3263 indexObject *idx;
3249 3264 int ret;
3250 3265
3251 3266 idx = PyObject_New(indexObject, &HgRevlogIndex_Type);
3252 3267 if (idx == NULL)
3253 3268 goto bail;
3254 3269
3255 3270 ret = index_init(idx, args, kwargs);
3256 3271 if (ret == -1)
3257 3272 goto bail;
3258 3273
3259 3274 if (idx->inlined) {
3260 3275 cache = Py_BuildValue("iO", 0, idx->data);
3261 3276 if (cache == NULL)
3262 3277 goto bail;
3263 3278 } else {
3264 3279 cache = Py_None;
3265 3280 Py_INCREF(cache);
3266 3281 }
3267 3282
3268 3283 return Py_BuildValue("NN", idx, cache);
3269 3284
3270 3285 bail:
3271 3286 Py_XDECREF(idx);
3272 3287 Py_XDECREF(cache);
3273 3288 return NULL;
3274 3289 }
3275 3290
3276 3291 static Revlog_CAPI CAPI = {
3277 3292 /* increment the abi_version field upon each change in the Revlog_CAPI
3278 3293 struct or in the ABI of the listed functions */
3279 3294 3, index_length, index_node, index_fast_rank, HgRevlogIndex_GetParents,
3280 3295 };
3281 3296
3282 3297 void revlog_module_init(PyObject *mod)
3283 3298 {
3284 3299 PyObject *caps = NULL;
3285 3300 HgRevlogIndex_Type.tp_new = PyType_GenericNew;
3286 3301 if (PyType_Ready(&HgRevlogIndex_Type) < 0)
3287 3302 return;
3288 3303 Py_INCREF(&HgRevlogIndex_Type);
3289 3304 PyModule_AddObject(mod, "index", (PyObject *)&HgRevlogIndex_Type);
3290 3305
3291 3306 nodetreeType.tp_new = PyType_GenericNew;
3292 3307 if (PyType_Ready(&nodetreeType) < 0)
3293 3308 return;
3294 3309 Py_INCREF(&nodetreeType);
3295 3310 PyModule_AddObject(mod, "nodetree", (PyObject *)&nodetreeType);
3296 3311
3297 3312 caps = PyCapsule_New(&CAPI, "mercurial.cext.parsers.revlog_CAPI", NULL);
3298 3313 if (caps != NULL)
3299 3314 PyModule_AddObject(mod, "revlog_CAPI", caps);
3300 3315 }
@@ -1,3337 +1,3347 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import io
20 20 import os
21 21 import struct
22 22 import zlib
23 23
24 24 # import stuff from node for others to import from revlog
25 25 from .node import (
26 26 bin,
27 27 hex,
28 28 nullrev,
29 29 sha1nodeconstants,
30 30 short,
31 31 wdirrev,
32 32 )
33 33 from .i18n import _
34 34 from .pycompat import getattr
35 35 from .revlogutils.constants import (
36 36 ALL_KINDS,
37 37 CHANGELOGV2,
38 38 COMP_MODE_DEFAULT,
39 39 COMP_MODE_INLINE,
40 40 COMP_MODE_PLAIN,
41 41 ENTRY_RANK,
42 42 FEATURES_BY_VERSION,
43 43 FLAG_GENERALDELTA,
44 44 FLAG_INLINE_DATA,
45 45 INDEX_HEADER,
46 46 KIND_CHANGELOG,
47 47 RANK_UNKNOWN,
48 48 REVLOGV0,
49 49 REVLOGV1,
50 50 REVLOGV1_FLAGS,
51 51 REVLOGV2,
52 52 REVLOGV2_FLAGS,
53 53 REVLOG_DEFAULT_FLAGS,
54 54 REVLOG_DEFAULT_FORMAT,
55 55 REVLOG_DEFAULT_VERSION,
56 56 SUPPORTED_FLAGS,
57 57 )
58 58 from .revlogutils.flagutil import (
59 59 REVIDX_DEFAULT_FLAGS,
60 60 REVIDX_ELLIPSIS,
61 61 REVIDX_EXTSTORED,
62 62 REVIDX_FLAGS_ORDER,
63 63 REVIDX_HASCOPIESINFO,
64 64 REVIDX_ISCENSORED,
65 65 REVIDX_RAWTEXT_CHANGING_FLAGS,
66 66 )
67 67 from .thirdparty import attr
68 68 from . import (
69 69 ancestor,
70 70 dagop,
71 71 error,
72 72 mdiff,
73 73 policy,
74 74 pycompat,
75 75 revlogutils,
76 76 templatefilters,
77 77 util,
78 78 )
79 79 from .interfaces import (
80 80 repository,
81 81 util as interfaceutil,
82 82 )
83 83 from .revlogutils import (
84 84 deltas as deltautil,
85 85 docket as docketutil,
86 86 flagutil,
87 87 nodemap as nodemaputil,
88 88 randomaccessfile,
89 89 revlogv0,
90 90 rewrite,
91 91 sidedata as sidedatautil,
92 92 )
93 93 from .utils import (
94 94 storageutil,
95 95 stringutil,
96 96 )
97 97
98 98 # blanked usage of all the name to prevent pyflakes constraints
99 99 # We need these name available in the module for extensions.
100 100
101 101 REVLOGV0
102 102 REVLOGV1
103 103 REVLOGV2
104 104 CHANGELOGV2
105 105 FLAG_INLINE_DATA
106 106 FLAG_GENERALDELTA
107 107 REVLOG_DEFAULT_FLAGS
108 108 REVLOG_DEFAULT_FORMAT
109 109 REVLOG_DEFAULT_VERSION
110 110 REVLOGV1_FLAGS
111 111 REVLOGV2_FLAGS
112 112 REVIDX_ISCENSORED
113 113 REVIDX_ELLIPSIS
114 114 REVIDX_HASCOPIESINFO
115 115 REVIDX_EXTSTORED
116 116 REVIDX_DEFAULT_FLAGS
117 117 REVIDX_FLAGS_ORDER
118 118 REVIDX_RAWTEXT_CHANGING_FLAGS
119 119
120 120 parsers = policy.importmod('parsers')
121 121 rustancestor = policy.importrust('ancestor')
122 122 rustdagop = policy.importrust('dagop')
123 123 rustrevlog = policy.importrust('revlog')
124 124
125 125 # Aliased for performance.
126 126 _zlibdecompress = zlib.decompress
127 127
128 128 # max size of revlog with inline data
129 129 _maxinline = 131072
130 130
131 131 # Flag processors for REVIDX_ELLIPSIS.
132 132 def ellipsisreadprocessor(rl, text):
133 133 return text, False
134 134
135 135
136 136 def ellipsiswriteprocessor(rl, text):
137 137 return text, False
138 138
139 139
140 140 def ellipsisrawprocessor(rl, text):
141 141 return False
142 142
143 143
144 144 ellipsisprocessor = (
145 145 ellipsisreadprocessor,
146 146 ellipsiswriteprocessor,
147 147 ellipsisrawprocessor,
148 148 )
149 149
150 150
151 151 def _verify_revision(rl, skipflags, state, node):
152 152 """Verify the integrity of the given revlog ``node`` while providing a hook
153 153 point for extensions to influence the operation."""
154 154 if skipflags:
155 155 state[b'skipread'].add(node)
156 156 else:
157 157 # Side-effect: read content and verify hash.
158 158 rl.revision(node)
159 159
160 160
161 161 # True if a fast implementation for persistent-nodemap is available
162 162 #
163 163 # We also consider we have a "fast" implementation in "pure" python because
164 164 # people using pure don't really have performance consideration (and a
165 165 # wheelbarrow of other slowness source)
166 166 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
167 167 parsers, 'BaseIndexObject'
168 168 )
169 169
170 170
171 171 @interfaceutil.implementer(repository.irevisiondelta)
172 172 @attr.s(slots=True)
173 173 class revlogrevisiondelta:
174 174 node = attr.ib()
175 175 p1node = attr.ib()
176 176 p2node = attr.ib()
177 177 basenode = attr.ib()
178 178 flags = attr.ib()
179 179 baserevisionsize = attr.ib()
180 180 revision = attr.ib()
181 181 delta = attr.ib()
182 182 sidedata = attr.ib()
183 183 protocol_flags = attr.ib()
184 184 linknode = attr.ib(default=None)
185 185
186 186
187 187 @interfaceutil.implementer(repository.iverifyproblem)
188 188 @attr.s(frozen=True)
189 189 class revlogproblem:
190 190 warning = attr.ib(default=None)
191 191 error = attr.ib(default=None)
192 192 node = attr.ib(default=None)
193 193
194 194
195 195 def parse_index_v1(data, inline):
196 196 # call the C implementation to parse the index data
197 197 index, cache = parsers.parse_index2(data, inline)
198 198 return index, cache
199 199
200 200
201 201 def parse_index_v2(data, inline):
202 202 # call the C implementation to parse the index data
203 203 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
204 204 return index, cache
205 205
206 206
207 207 def parse_index_cl_v2(data, inline):
208 208 # call the C implementation to parse the index data
209 209 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
210 210 return index, cache
211 211
212 212
213 213 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
214 214
215 215 def parse_index_v1_nodemap(data, inline):
216 216 index, cache = parsers.parse_index_devel_nodemap(data, inline)
217 217 return index, cache
218 218
219 219
220 220 else:
221 221 parse_index_v1_nodemap = None
222 222
223 223
224 224 def parse_index_v1_mixed(data, inline):
225 225 index, cache = parse_index_v1(data, inline)
226 226 return rustrevlog.MixedIndex(index), cache
227 227
228 228
229 229 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
230 230 # signed integer)
231 231 _maxentrysize = 0x7FFFFFFF
232 232
233 233 FILE_TOO_SHORT_MSG = _(
234 234 b'cannot read from revlog %s;'
235 235 b' expected %d bytes from offset %d, data size is %d'
236 236 )
237 237
238 238 hexdigits = b'0123456789abcdefABCDEF'
239 239
240 240
241 241 class revlog:
242 242 """
243 243 the underlying revision storage object
244 244
245 245 A revlog consists of two parts, an index and the revision data.
246 246
247 247 The index is a file with a fixed record size containing
248 248 information on each revision, including its nodeid (hash), the
249 249 nodeids of its parents, the position and offset of its data within
250 250 the data file, and the revision it's based on. Finally, each entry
251 251 contains a linkrev entry that can serve as a pointer to external
252 252 data.
253 253
254 254 The revision data itself is a linear collection of data chunks.
255 255 Each chunk represents a revision and is usually represented as a
256 256 delta against the previous chunk. To bound lookup time, runs of
257 257 deltas are limited to about 2 times the length of the original
258 258 version data. This makes retrieval of a version proportional to
259 259 its size, or O(1) relative to the number of revisions.
260 260
261 261 Both pieces of the revlog are written to in an append-only
262 262 fashion, which means we never need to rewrite a file to insert or
263 263 remove data, and can use some simple techniques to avoid the need
264 264 for locking while reading.
265 265
266 266 If checkambig, indexfile is opened with checkambig=True at
267 267 writing, to avoid file stat ambiguity.
268 268
269 269 If mmaplargeindex is True, and an mmapindexthreshold is set, the
270 270 index will be mmapped rather than read if it is larger than the
271 271 configured threshold.
272 272
273 273 If censorable is True, the revlog can have censored revisions.
274 274
275 275 If `upperboundcomp` is not None, this is the expected maximal gain from
276 276 compression for the data content.
277 277
278 278 `concurrencychecker` is an optional function that receives 3 arguments: a
279 279 file handle, a filename, and an expected position. It should check whether
280 280 the current position in the file handle is valid, and log/warn/fail (by
281 281 raising).
282 282
283 283 See mercurial/revlogutils/contants.py for details about the content of an
284 284 index entry.
285 285 """
286 286
287 287 _flagserrorclass = error.RevlogError
288 288
289 289 def __init__(
290 290 self,
291 291 opener,
292 292 target,
293 293 radix,
294 294 postfix=None, # only exist for `tmpcensored` now
295 295 checkambig=False,
296 296 mmaplargeindex=False,
297 297 censorable=False,
298 298 upperboundcomp=None,
299 299 persistentnodemap=False,
300 300 concurrencychecker=None,
301 301 trypending=False,
302 302 canonical_parent_order=True,
303 303 ):
304 304 """
305 305 create a revlog object
306 306
307 307 opener is a function that abstracts the file opening operation
308 308 and can be used to implement COW semantics or the like.
309 309
310 310 `target`: a (KIND, ID) tuple that identify the content stored in
311 311 this revlog. It help the rest of the code to understand what the revlog
312 312 is about without having to resort to heuristic and index filename
313 313 analysis. Note: that this must be reliably be set by normal code, but
314 314 that test, debug, or performance measurement code might not set this to
315 315 accurate value.
316 316 """
317 317 self.upperboundcomp = upperboundcomp
318 318
319 319 self.radix = radix
320 320
321 321 self._docket_file = None
322 322 self._indexfile = None
323 323 self._datafile = None
324 324 self._sidedatafile = None
325 325 self._nodemap_file = None
326 326 self.postfix = postfix
327 327 self._trypending = trypending
328 328 self.opener = opener
329 329 if persistentnodemap:
330 330 self._nodemap_file = nodemaputil.get_nodemap_file(self)
331 331
332 332 assert target[0] in ALL_KINDS
333 333 assert len(target) == 2
334 334 self.target = target
335 335 # When True, indexfile is opened with checkambig=True at writing, to
336 336 # avoid file stat ambiguity.
337 337 self._checkambig = checkambig
338 338 self._mmaplargeindex = mmaplargeindex
339 339 self._censorable = censorable
340 340 # 3-tuple of (node, rev, text) for a raw revision.
341 341 self._revisioncache = None
342 342 # Maps rev to chain base rev.
343 343 self._chainbasecache = util.lrucachedict(100)
344 344 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
345 345 self._chunkcache = (0, b'')
346 346 # How much data to read and cache into the raw revlog data cache.
347 347 self._chunkcachesize = 65536
348 348 self._maxchainlen = None
349 349 self._deltabothparents = True
350 350 self._debug_delta = False
351 351 self.index = None
352 352 self._docket = None
353 353 self._nodemap_docket = None
354 354 # Mapping of partial identifiers to full nodes.
355 355 self._pcache = {}
356 356 # Mapping of revision integer to full node.
357 357 self._compengine = b'zlib'
358 358 self._compengineopts = {}
359 359 self._maxdeltachainspan = -1
360 360 self._withsparseread = False
361 361 self._sparserevlog = False
362 362 self.hassidedata = False
363 363 self._srdensitythreshold = 0.50
364 364 self._srmingapsize = 262144
365 365
366 366 # Make copy of flag processors so each revlog instance can support
367 367 # custom flags.
368 368 self._flagprocessors = dict(flagutil.flagprocessors)
369 369
370 370 # 3-tuple of file handles being used for active writing.
371 371 self._writinghandles = None
372 372 # prevent nesting of addgroup
373 373 self._adding_group = None
374 374
375 375 self._loadindex()
376 376
377 377 self._concurrencychecker = concurrencychecker
378 378
379 379 # parent order is supposed to be semantically irrelevant, so we
380 380 # normally resort parents to ensure that the first parent is non-null,
381 381 # if there is a non-null parent at all.
382 382 # filelog abuses the parent order as flag to mark some instances of
383 383 # meta-encoded files, so allow it to disable this behavior.
384 384 self.canonical_parent_order = canonical_parent_order
385 385
386 386 def _init_opts(self):
387 387 """process options (from above/config) to setup associated default revlog mode
388 388
389 389 These values might be affected when actually reading on disk information.
390 390
391 391 The relevant values are returned for use in _loadindex().
392 392
393 393 * newversionflags:
394 394 version header to use if we need to create a new revlog
395 395
396 396 * mmapindexthreshold:
397 397 minimal index size for start to use mmap
398 398
399 399 * force_nodemap:
400 400 force the usage of a "development" version of the nodemap code
401 401 """
402 402 mmapindexthreshold = None
403 403 opts = self.opener.options
404 404
405 405 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
406 406 new_header = CHANGELOGV2
407 407 elif b'revlogv2' in opts:
408 408 new_header = REVLOGV2
409 409 elif b'revlogv1' in opts:
410 410 new_header = REVLOGV1 | FLAG_INLINE_DATA
411 411 if b'generaldelta' in opts:
412 412 new_header |= FLAG_GENERALDELTA
413 413 elif b'revlogv0' in self.opener.options:
414 414 new_header = REVLOGV0
415 415 else:
416 416 new_header = REVLOG_DEFAULT_VERSION
417 417
418 418 if b'chunkcachesize' in opts:
419 419 self._chunkcachesize = opts[b'chunkcachesize']
420 420 if b'maxchainlen' in opts:
421 421 self._maxchainlen = opts[b'maxchainlen']
422 422 if b'deltabothparents' in opts:
423 423 self._deltabothparents = opts[b'deltabothparents']
424 424 self._lazydelta = bool(opts.get(b'lazydelta', True))
425 425 self._lazydeltabase = False
426 426 if self._lazydelta:
427 427 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
428 428 if b'debug-delta' in opts:
429 429 self._debug_delta = opts[b'debug-delta']
430 430 if b'compengine' in opts:
431 431 self._compengine = opts[b'compengine']
432 432 if b'zlib.level' in opts:
433 433 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
434 434 if b'zstd.level' in opts:
435 435 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
436 436 if b'maxdeltachainspan' in opts:
437 437 self._maxdeltachainspan = opts[b'maxdeltachainspan']
438 438 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
439 439 mmapindexthreshold = opts[b'mmapindexthreshold']
440 440 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
441 441 withsparseread = bool(opts.get(b'with-sparse-read', False))
442 442 # sparse-revlog forces sparse-read
443 443 self._withsparseread = self._sparserevlog or withsparseread
444 444 if b'sparse-read-density-threshold' in opts:
445 445 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
446 446 if b'sparse-read-min-gap-size' in opts:
447 447 self._srmingapsize = opts[b'sparse-read-min-gap-size']
448 448 if opts.get(b'enableellipsis'):
449 449 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
450 450
451 451 # revlog v0 doesn't have flag processors
452 452 for flag, processor in opts.get(b'flagprocessors', {}).items():
453 453 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
454 454
455 455 if self._chunkcachesize <= 0:
456 456 raise error.RevlogError(
457 457 _(b'revlog chunk cache size %r is not greater than 0')
458 458 % self._chunkcachesize
459 459 )
460 460 elif self._chunkcachesize & (self._chunkcachesize - 1):
461 461 raise error.RevlogError(
462 462 _(b'revlog chunk cache size %r is not a power of 2')
463 463 % self._chunkcachesize
464 464 )
465 465 force_nodemap = opts.get(b'devel-force-nodemap', False)
466 466 return new_header, mmapindexthreshold, force_nodemap
467 467
468 468 def _get_data(self, filepath, mmap_threshold, size=None):
469 469 """return a file content with or without mmap
470 470
471 471 If the file is missing return the empty string"""
472 472 try:
473 473 with self.opener(filepath) as fp:
474 474 if mmap_threshold is not None:
475 475 file_size = self.opener.fstat(fp).st_size
476 476 if file_size >= mmap_threshold:
477 477 if size is not None:
478 478 # avoid potentiel mmap crash
479 479 size = min(file_size, size)
480 480 # TODO: should .close() to release resources without
481 481 # relying on Python GC
482 482 if size is None:
483 483 return util.buffer(util.mmapread(fp))
484 484 else:
485 485 return util.buffer(util.mmapread(fp, size))
486 486 if size is None:
487 487 return fp.read()
488 488 else:
489 489 return fp.read(size)
490 490 except FileNotFoundError:
491 491 return b''
492 492
493 493 def _loadindex(self, docket=None):
494 494
495 495 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
496 496
497 497 if self.postfix is not None:
498 498 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
499 499 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
500 500 entry_point = b'%s.i.a' % self.radix
501 501 else:
502 502 entry_point = b'%s.i' % self.radix
503 503
504 504 if docket is not None:
505 505 self._docket = docket
506 506 self._docket_file = entry_point
507 507 else:
508 508 entry_data = b''
509 509 self._initempty = True
510 510 entry_data = self._get_data(entry_point, mmapindexthreshold)
511 511 if len(entry_data) > 0:
512 512 header = INDEX_HEADER.unpack(entry_data[:4])[0]
513 513 self._initempty = False
514 514 else:
515 515 header = new_header
516 516
517 517 self._format_flags = header & ~0xFFFF
518 518 self._format_version = header & 0xFFFF
519 519
520 520 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
521 521 if supported_flags is None:
522 522 msg = _(b'unknown version (%d) in revlog %s')
523 523 msg %= (self._format_version, self.display_id)
524 524 raise error.RevlogError(msg)
525 525 elif self._format_flags & ~supported_flags:
526 526 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
527 527 display_flag = self._format_flags >> 16
528 528 msg %= (display_flag, self._format_version, self.display_id)
529 529 raise error.RevlogError(msg)
530 530
531 531 features = FEATURES_BY_VERSION[self._format_version]
532 532 self._inline = features[b'inline'](self._format_flags)
533 533 self._generaldelta = features[b'generaldelta'](self._format_flags)
534 534 self.hassidedata = features[b'sidedata']
535 535
536 536 if not features[b'docket']:
537 537 self._indexfile = entry_point
538 538 index_data = entry_data
539 539 else:
540 540 self._docket_file = entry_point
541 541 if self._initempty:
542 542 self._docket = docketutil.default_docket(self, header)
543 543 else:
544 544 self._docket = docketutil.parse_docket(
545 545 self, entry_data, use_pending=self._trypending
546 546 )
547 547
548 548 if self._docket is not None:
549 549 self._indexfile = self._docket.index_filepath()
550 550 index_data = b''
551 551 index_size = self._docket.index_end
552 552 if index_size > 0:
553 553 index_data = self._get_data(
554 554 self._indexfile, mmapindexthreshold, size=index_size
555 555 )
556 556 if len(index_data) < index_size:
557 557 msg = _(b'too few index data for %s: got %d, expected %d')
558 558 msg %= (self.display_id, len(index_data), index_size)
559 559 raise error.RevlogError(msg)
560 560
561 561 self._inline = False
562 562 # generaldelta implied by version 2 revlogs.
563 563 self._generaldelta = True
564 564 # the logic for persistent nodemap will be dealt with within the
565 565 # main docket, so disable it for now.
566 566 self._nodemap_file = None
567 567
568 568 if self._docket is not None:
569 569 self._datafile = self._docket.data_filepath()
570 570 self._sidedatafile = self._docket.sidedata_filepath()
571 571 elif self.postfix is None:
572 572 self._datafile = b'%s.d' % self.radix
573 573 else:
574 574 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
575 575
576 576 self.nodeconstants = sha1nodeconstants
577 577 self.nullid = self.nodeconstants.nullid
578 578
579 579 # sparse-revlog can't be on without general-delta (issue6056)
580 580 if not self._generaldelta:
581 581 self._sparserevlog = False
582 582
583 583 self._storedeltachains = True
584 584
585 585 devel_nodemap = (
586 586 self._nodemap_file
587 587 and force_nodemap
588 588 and parse_index_v1_nodemap is not None
589 589 )
590 590
591 591 use_rust_index = False
592 592 if rustrevlog is not None:
593 593 if self._nodemap_file is not None:
594 594 use_rust_index = True
595 595 else:
596 596 use_rust_index = self.opener.options.get(b'rust.index')
597 597
598 598 self._parse_index = parse_index_v1
599 599 if self._format_version == REVLOGV0:
600 600 self._parse_index = revlogv0.parse_index_v0
601 601 elif self._format_version == REVLOGV2:
602 602 self._parse_index = parse_index_v2
603 603 elif self._format_version == CHANGELOGV2:
604 604 self._parse_index = parse_index_cl_v2
605 605 elif devel_nodemap:
606 606 self._parse_index = parse_index_v1_nodemap
607 607 elif use_rust_index:
608 608 self._parse_index = parse_index_v1_mixed
609 609 try:
610 610 d = self._parse_index(index_data, self._inline)
611 611 index, chunkcache = d
612 612 use_nodemap = (
613 613 not self._inline
614 614 and self._nodemap_file is not None
615 615 and util.safehasattr(index, 'update_nodemap_data')
616 616 )
617 617 if use_nodemap:
618 618 nodemap_data = nodemaputil.persisted_data(self)
619 619 if nodemap_data is not None:
620 620 docket = nodemap_data[0]
621 621 if (
622 622 len(d[0]) > docket.tip_rev
623 623 and d[0][docket.tip_rev][7] == docket.tip_node
624 624 ):
625 625 # no changelog tampering
626 626 self._nodemap_docket = docket
627 627 index.update_nodemap_data(*nodemap_data)
628 628 except (ValueError, IndexError):
629 629 raise error.RevlogError(
630 630 _(b"index %s is corrupted") % self.display_id
631 631 )
632 632 self.index = index
633 633 self._segmentfile = randomaccessfile.randomaccessfile(
634 634 self.opener,
635 635 (self._indexfile if self._inline else self._datafile),
636 636 self._chunkcachesize,
637 637 chunkcache,
638 638 )
639 639 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
640 640 self.opener,
641 641 self._sidedatafile,
642 642 self._chunkcachesize,
643 643 )
644 644 # revnum -> (chain-length, sum-delta-length)
645 645 self._chaininfocache = util.lrucachedict(500)
646 646 # revlog header -> revlog compressor
647 647 self._decompressors = {}
648 648
649 649 @util.propertycache
650 650 def revlog_kind(self):
651 651 return self.target[0]
652 652
653 653 @util.propertycache
654 654 def display_id(self):
655 655 """The public facing "ID" of the revlog that we use in message"""
656 656 # Maybe we should build a user facing representation of
657 657 # revlog.target instead of using `self.radix`
658 658 return self.radix
659 659
660 660 def _get_decompressor(self, t):
661 661 try:
662 662 compressor = self._decompressors[t]
663 663 except KeyError:
664 664 try:
665 665 engine = util.compengines.forrevlogheader(t)
666 666 compressor = engine.revlogcompressor(self._compengineopts)
667 667 self._decompressors[t] = compressor
668 668 except KeyError:
669 669 raise error.RevlogError(
670 670 _(b'unknown compression type %s') % binascii.hexlify(t)
671 671 )
672 672 return compressor
673 673
674 674 @util.propertycache
675 675 def _compressor(self):
676 676 engine = util.compengines[self._compengine]
677 677 return engine.revlogcompressor(self._compengineopts)
678 678
679 679 @util.propertycache
680 680 def _decompressor(self):
681 681 """the default decompressor"""
682 682 if self._docket is None:
683 683 return None
684 684 t = self._docket.default_compression_header
685 685 c = self._get_decompressor(t)
686 686 return c.decompress
687 687
688 688 def _indexfp(self):
689 689 """file object for the revlog's index file"""
690 690 return self.opener(self._indexfile, mode=b"r")
691 691
692 692 def __index_write_fp(self):
693 693 # You should not use this directly and use `_writing` instead
694 694 try:
695 695 f = self.opener(
696 696 self._indexfile, mode=b"r+", checkambig=self._checkambig
697 697 )
698 698 if self._docket is None:
699 699 f.seek(0, os.SEEK_END)
700 700 else:
701 701 f.seek(self._docket.index_end, os.SEEK_SET)
702 702 return f
703 703 except FileNotFoundError:
704 704 return self.opener(
705 705 self._indexfile, mode=b"w+", checkambig=self._checkambig
706 706 )
707 707
708 708 def __index_new_fp(self):
709 709 # You should not use this unless you are upgrading from inline revlog
710 710 return self.opener(
711 711 self._indexfile,
712 712 mode=b"w",
713 713 checkambig=self._checkambig,
714 714 atomictemp=True,
715 715 )
716 716
717 717 def _datafp(self, mode=b'r'):
718 718 """file object for the revlog's data file"""
719 719 return self.opener(self._datafile, mode=mode)
720 720
721 721 @contextlib.contextmanager
722 722 def _sidedatareadfp(self):
723 723 """file object suitable to read sidedata"""
724 724 if self._writinghandles:
725 725 yield self._writinghandles[2]
726 726 else:
727 727 with self.opener(self._sidedatafile) as fp:
728 728 yield fp
729 729
730 730 def tiprev(self):
731 731 return len(self.index) - 1
732 732
733 733 def tip(self):
734 734 return self.node(self.tiprev())
735 735
736 736 def __contains__(self, rev):
737 737 return 0 <= rev < len(self)
738 738
739 739 def __len__(self):
740 740 return len(self.index)
741 741
742 742 def __iter__(self):
743 743 return iter(range(len(self)))
744 744
745 745 def revs(self, start=0, stop=None):
746 746 """iterate over all rev in this revlog (from start to stop)"""
747 747 return storageutil.iterrevs(len(self), start=start, stop=stop)
748 748
749 749 def hasnode(self, node):
750 750 try:
751 751 self.rev(node)
752 752 return True
753 753 except KeyError:
754 754 return False
755 755
756 756 def candelta(self, baserev, rev):
757 757 """whether two revisions (baserev, rev) can be delta-ed or not"""
758 758 # Disable delta if either rev requires a content-changing flag
759 759 # processor (ex. LFS). This is because such flag processor can alter
760 760 # the rawtext content that the delta will be based on, and two clients
761 761 # could have a same revlog node with different flags (i.e. different
762 762 # rawtext contents) and the delta could be incompatible.
763 763 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
764 764 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
765 765 ):
766 766 return False
767 767 return True
768 768
769 769 def update_caches(self, transaction):
770 770 if self._nodemap_file is not None:
771 771 if transaction is None:
772 772 nodemaputil.update_persistent_nodemap(self)
773 773 else:
774 774 nodemaputil.setup_persistent_nodemap(transaction, self)
775 775
776 776 def clearcaches(self):
777 777 self._revisioncache = None
778 778 self._chainbasecache.clear()
779 779 self._segmentfile.clear_cache()
780 780 self._segmentfile_sidedata.clear_cache()
781 781 self._pcache = {}
782 782 self._nodemap_docket = None
783 783 self.index.clearcaches()
784 784 # The python code is the one responsible for validating the docket, we
785 785 # end up having to refresh it here.
786 786 use_nodemap = (
787 787 not self._inline
788 788 and self._nodemap_file is not None
789 789 and util.safehasattr(self.index, 'update_nodemap_data')
790 790 )
791 791 if use_nodemap:
792 792 nodemap_data = nodemaputil.persisted_data(self)
793 793 if nodemap_data is not None:
794 794 self._nodemap_docket = nodemap_data[0]
795 795 self.index.update_nodemap_data(*nodemap_data)
796 796
797 797 def rev(self, node):
798 798 try:
799 799 return self.index.rev(node)
800 800 except TypeError:
801 801 raise
802 802 except error.RevlogError:
803 803 # parsers.c radix tree lookup failed
804 804 if (
805 805 node == self.nodeconstants.wdirid
806 806 or node in self.nodeconstants.wdirfilenodeids
807 807 ):
808 808 raise error.WdirUnsupported
809 809 raise error.LookupError(node, self.display_id, _(b'no node'))
810 810
811 811 # Accessors for index entries.
812 812
813 813 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
814 814 # are flags.
815 815 def start(self, rev):
816 816 return int(self.index[rev][0] >> 16)
817 817
818 818 def sidedata_cut_off(self, rev):
819 819 sd_cut_off = self.index[rev][8]
820 820 if sd_cut_off != 0:
821 821 return sd_cut_off
822 822 # This is some annoying dance, because entries without sidedata
823 823 # currently use 0 as their ofsset. (instead of previous-offset +
824 824 # previous-size)
825 825 #
826 826 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
827 827 # In the meantime, we need this.
828 828 while 0 <= rev:
829 829 e = self.index[rev]
830 830 if e[9] != 0:
831 831 return e[8] + e[9]
832 832 rev -= 1
833 833 return 0
834 834
835 835 def flags(self, rev):
836 836 return self.index[rev][0] & 0xFFFF
837 837
838 838 def length(self, rev):
839 839 return self.index[rev][1]
840 840
841 841 def sidedata_length(self, rev):
842 842 if not self.hassidedata:
843 843 return 0
844 844 return self.index[rev][9]
845 845
846 846 def rawsize(self, rev):
847 847 """return the length of the uncompressed text for a given revision"""
848 848 l = self.index[rev][2]
849 849 if l >= 0:
850 850 return l
851 851
852 852 t = self.rawdata(rev)
853 853 return len(t)
854 854
855 855 def size(self, rev):
856 856 """length of non-raw text (processed by a "read" flag processor)"""
857 857 # fast path: if no "read" flag processor could change the content,
858 858 # size is rawsize. note: ELLIPSIS is known to not change the content.
859 859 flags = self.flags(rev)
860 860 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
861 861 return self.rawsize(rev)
862 862
863 863 return len(self.revision(rev))
864 864
865 865 def fast_rank(self, rev):
866 866 """Return the rank of a revision if already known, or None otherwise.
867 867
868 868 The rank of a revision is the size of the sub-graph it defines as a
869 869 head. Equivalently, the rank of a revision `r` is the size of the set
870 870 `ancestors(r)`, `r` included.
871 871
872 872 This method returns the rank retrieved from the revlog in constant
873 873 time. It makes no attempt at computing unknown values for versions of
874 874 the revlog which do not persist the rank.
875 875 """
876 876 rank = self.index[rev][ENTRY_RANK]
877 877 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
878 878 return None
879 879 if rev == nullrev:
880 880 return 0 # convention
881 881 return rank
882 882
883 883 def chainbase(self, rev):
884 884 base = self._chainbasecache.get(rev)
885 885 if base is not None:
886 886 return base
887 887
888 888 index = self.index
889 889 iterrev = rev
890 890 base = index[iterrev][3]
891 891 while base != iterrev:
892 892 iterrev = base
893 893 base = index[iterrev][3]
894 894
895 895 self._chainbasecache[rev] = base
896 896 return base
897 897
898 898 def linkrev(self, rev):
899 899 return self.index[rev][4]
900 900
901 901 def parentrevs(self, rev):
902 902 try:
903 903 entry = self.index[rev]
904 904 except IndexError:
905 905 if rev == wdirrev:
906 906 raise error.WdirUnsupported
907 907 raise
908 908
909 909 if self.canonical_parent_order and entry[5] == nullrev:
910 910 return entry[6], entry[5]
911 911 else:
912 912 return entry[5], entry[6]
913 913
914 914 # fast parentrevs(rev) where rev isn't filtered
915 915 _uncheckedparentrevs = parentrevs
916 916
917 917 def node(self, rev):
918 918 try:
919 919 return self.index[rev][7]
920 920 except IndexError:
921 921 if rev == wdirrev:
922 922 raise error.WdirUnsupported
923 923 raise
924 924
925 925 # Derived from index values.
926 926
927 927 def end(self, rev):
928 928 return self.start(rev) + self.length(rev)
929 929
930 930 def parents(self, node):
931 931 i = self.index
932 932 d = i[self.rev(node)]
933 933 # inline node() to avoid function call overhead
934 934 if self.canonical_parent_order and d[5] == self.nullid:
935 935 return i[d[6]][7], i[d[5]][7]
936 936 else:
937 937 return i[d[5]][7], i[d[6]][7]
938 938
939 939 def chainlen(self, rev):
940 940 return self._chaininfo(rev)[0]
941 941
942 942 def _chaininfo(self, rev):
943 943 chaininfocache = self._chaininfocache
944 944 if rev in chaininfocache:
945 945 return chaininfocache[rev]
946 946 index = self.index
947 947 generaldelta = self._generaldelta
948 948 iterrev = rev
949 949 e = index[iterrev]
950 950 clen = 0
951 951 compresseddeltalen = 0
952 952 while iterrev != e[3]:
953 953 clen += 1
954 954 compresseddeltalen += e[1]
955 955 if generaldelta:
956 956 iterrev = e[3]
957 957 else:
958 958 iterrev -= 1
959 959 if iterrev in chaininfocache:
960 960 t = chaininfocache[iterrev]
961 961 clen += t[0]
962 962 compresseddeltalen += t[1]
963 963 break
964 964 e = index[iterrev]
965 965 else:
966 966 # Add text length of base since decompressing that also takes
967 967 # work. For cache hits the length is already included.
968 968 compresseddeltalen += e[1]
969 969 r = (clen, compresseddeltalen)
970 970 chaininfocache[rev] = r
971 971 return r
972 972
973 973 def _deltachain(self, rev, stoprev=None):
974 974 """Obtain the delta chain for a revision.
975 975
976 976 ``stoprev`` specifies a revision to stop at. If not specified, we
977 977 stop at the base of the chain.
978 978
979 979 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
980 980 revs in ascending order and ``stopped`` is a bool indicating whether
981 981 ``stoprev`` was hit.
982 982 """
983 983 # Try C implementation.
984 984 try:
985 985 return self.index.deltachain(rev, stoprev, self._generaldelta)
986 986 except AttributeError:
987 987 pass
988 988
989 989 chain = []
990 990
991 991 # Alias to prevent attribute lookup in tight loop.
992 992 index = self.index
993 993 generaldelta = self._generaldelta
994 994
995 995 iterrev = rev
996 996 e = index[iterrev]
997 997 while iterrev != e[3] and iterrev != stoprev:
998 998 chain.append(iterrev)
999 999 if generaldelta:
1000 1000 iterrev = e[3]
1001 1001 else:
1002 1002 iterrev -= 1
1003 1003 e = index[iterrev]
1004 1004
1005 1005 if iterrev == stoprev:
1006 1006 stopped = True
1007 1007 else:
1008 1008 chain.append(iterrev)
1009 1009 stopped = False
1010 1010
1011 1011 chain.reverse()
1012 1012 return chain, stopped
1013 1013
1014 1014 def ancestors(self, revs, stoprev=0, inclusive=False):
1015 1015 """Generate the ancestors of 'revs' in reverse revision order.
1016 1016 Does not generate revs lower than stoprev.
1017 1017
1018 1018 See the documentation for ancestor.lazyancestors for more details."""
1019 1019
1020 1020 # first, make sure start revisions aren't filtered
1021 1021 revs = list(revs)
1022 1022 checkrev = self.node
1023 1023 for r in revs:
1024 1024 checkrev(r)
1025 1025 # and we're sure ancestors aren't filtered as well
1026 1026
1027 1027 if rustancestor is not None and self.index.rust_ext_compat:
1028 1028 lazyancestors = rustancestor.LazyAncestors
1029 1029 arg = self.index
1030 1030 else:
1031 1031 lazyancestors = ancestor.lazyancestors
1032 1032 arg = self._uncheckedparentrevs
1033 1033 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1034 1034
1035 1035 def descendants(self, revs):
1036 1036 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1037 1037
1038 1038 def findcommonmissing(self, common=None, heads=None):
1039 1039 """Return a tuple of the ancestors of common and the ancestors of heads
1040 1040 that are not ancestors of common. In revset terminology, we return the
1041 1041 tuple:
1042 1042
1043 1043 ::common, (::heads) - (::common)
1044 1044
1045 1045 The list is sorted by revision number, meaning it is
1046 1046 topologically sorted.
1047 1047
1048 1048 'heads' and 'common' are both lists of node IDs. If heads is
1049 1049 not supplied, uses all of the revlog's heads. If common is not
1050 1050 supplied, uses nullid."""
1051 1051 if common is None:
1052 1052 common = [self.nullid]
1053 1053 if heads is None:
1054 1054 heads = self.heads()
1055 1055
1056 1056 common = [self.rev(n) for n in common]
1057 1057 heads = [self.rev(n) for n in heads]
1058 1058
1059 1059 # we want the ancestors, but inclusive
1060 1060 class lazyset:
1061 1061 def __init__(self, lazyvalues):
1062 1062 self.addedvalues = set()
1063 1063 self.lazyvalues = lazyvalues
1064 1064
1065 1065 def __contains__(self, value):
1066 1066 return value in self.addedvalues or value in self.lazyvalues
1067 1067
1068 1068 def __iter__(self):
1069 1069 added = self.addedvalues
1070 1070 for r in added:
1071 1071 yield r
1072 1072 for r in self.lazyvalues:
1073 1073 if not r in added:
1074 1074 yield r
1075 1075
1076 1076 def add(self, value):
1077 1077 self.addedvalues.add(value)
1078 1078
1079 1079 def update(self, values):
1080 1080 self.addedvalues.update(values)
1081 1081
1082 1082 has = lazyset(self.ancestors(common))
1083 1083 has.add(nullrev)
1084 1084 has.update(common)
1085 1085
1086 1086 # take all ancestors from heads that aren't in has
1087 1087 missing = set()
1088 1088 visit = collections.deque(r for r in heads if r not in has)
1089 1089 while visit:
1090 1090 r = visit.popleft()
1091 1091 if r in missing:
1092 1092 continue
1093 1093 else:
1094 1094 missing.add(r)
1095 1095 for p in self.parentrevs(r):
1096 1096 if p not in has:
1097 1097 visit.append(p)
1098 1098 missing = list(missing)
1099 1099 missing.sort()
1100 1100 return has, [self.node(miss) for miss in missing]
1101 1101
1102 1102 def incrementalmissingrevs(self, common=None):
1103 1103 """Return an object that can be used to incrementally compute the
1104 1104 revision numbers of the ancestors of arbitrary sets that are not
1105 1105 ancestors of common. This is an ancestor.incrementalmissingancestors
1106 1106 object.
1107 1107
1108 1108 'common' is a list of revision numbers. If common is not supplied, uses
1109 1109 nullrev.
1110 1110 """
1111 1111 if common is None:
1112 1112 common = [nullrev]
1113 1113
1114 1114 if rustancestor is not None and self.index.rust_ext_compat:
1115 1115 return rustancestor.MissingAncestors(self.index, common)
1116 1116 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1117 1117
1118 1118 def findmissingrevs(self, common=None, heads=None):
1119 1119 """Return the revision numbers of the ancestors of heads that
1120 1120 are not ancestors of common.
1121 1121
1122 1122 More specifically, return a list of revision numbers corresponding to
1123 1123 nodes N such that every N satisfies the following constraints:
1124 1124
1125 1125 1. N is an ancestor of some node in 'heads'
1126 1126 2. N is not an ancestor of any node in 'common'
1127 1127
1128 1128 The list is sorted by revision number, meaning it is
1129 1129 topologically sorted.
1130 1130
1131 1131 'heads' and 'common' are both lists of revision numbers. If heads is
1132 1132 not supplied, uses all of the revlog's heads. If common is not
1133 1133 supplied, uses nullid."""
1134 1134 if common is None:
1135 1135 common = [nullrev]
1136 1136 if heads is None:
1137 1137 heads = self.headrevs()
1138 1138
1139 1139 inc = self.incrementalmissingrevs(common=common)
1140 1140 return inc.missingancestors(heads)
1141 1141
1142 1142 def findmissing(self, common=None, heads=None):
1143 1143 """Return the ancestors of heads that are not ancestors of common.
1144 1144
1145 1145 More specifically, return a list of nodes N such that every N
1146 1146 satisfies the following constraints:
1147 1147
1148 1148 1. N is an ancestor of some node in 'heads'
1149 1149 2. N is not an ancestor of any node in 'common'
1150 1150
1151 1151 The list is sorted by revision number, meaning it is
1152 1152 topologically sorted.
1153 1153
1154 1154 'heads' and 'common' are both lists of node IDs. If heads is
1155 1155 not supplied, uses all of the revlog's heads. If common is not
1156 1156 supplied, uses nullid."""
1157 1157 if common is None:
1158 1158 common = [self.nullid]
1159 1159 if heads is None:
1160 1160 heads = self.heads()
1161 1161
1162 1162 common = [self.rev(n) for n in common]
1163 1163 heads = [self.rev(n) for n in heads]
1164 1164
1165 1165 inc = self.incrementalmissingrevs(common=common)
1166 1166 return [self.node(r) for r in inc.missingancestors(heads)]
1167 1167
1168 1168 def nodesbetween(self, roots=None, heads=None):
1169 1169 """Return a topological path from 'roots' to 'heads'.
1170 1170
1171 1171 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1172 1172 topologically sorted list of all nodes N that satisfy both of
1173 1173 these constraints:
1174 1174
1175 1175 1. N is a descendant of some node in 'roots'
1176 1176 2. N is an ancestor of some node in 'heads'
1177 1177
1178 1178 Every node is considered to be both a descendant and an ancestor
1179 1179 of itself, so every reachable node in 'roots' and 'heads' will be
1180 1180 included in 'nodes'.
1181 1181
1182 1182 'outroots' is the list of reachable nodes in 'roots', i.e., the
1183 1183 subset of 'roots' that is returned in 'nodes'. Likewise,
1184 1184 'outheads' is the subset of 'heads' that is also in 'nodes'.
1185 1185
1186 1186 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1187 1187 unspecified, uses nullid as the only root. If 'heads' is
1188 1188 unspecified, uses list of all of the revlog's heads."""
1189 1189 nonodes = ([], [], [])
1190 1190 if roots is not None:
1191 1191 roots = list(roots)
1192 1192 if not roots:
1193 1193 return nonodes
1194 1194 lowestrev = min([self.rev(n) for n in roots])
1195 1195 else:
1196 1196 roots = [self.nullid] # Everybody's a descendant of nullid
1197 1197 lowestrev = nullrev
1198 1198 if (lowestrev == nullrev) and (heads is None):
1199 1199 # We want _all_ the nodes!
1200 1200 return (
1201 1201 [self.node(r) for r in self],
1202 1202 [self.nullid],
1203 1203 list(self.heads()),
1204 1204 )
1205 1205 if heads is None:
1206 1206 # All nodes are ancestors, so the latest ancestor is the last
1207 1207 # node.
1208 1208 highestrev = len(self) - 1
1209 1209 # Set ancestors to None to signal that every node is an ancestor.
1210 1210 ancestors = None
1211 1211 # Set heads to an empty dictionary for later discovery of heads
1212 1212 heads = {}
1213 1213 else:
1214 1214 heads = list(heads)
1215 1215 if not heads:
1216 1216 return nonodes
1217 1217 ancestors = set()
1218 1218 # Turn heads into a dictionary so we can remove 'fake' heads.
1219 1219 # Also, later we will be using it to filter out the heads we can't
1220 1220 # find from roots.
1221 1221 heads = dict.fromkeys(heads, False)
1222 1222 # Start at the top and keep marking parents until we're done.
1223 1223 nodestotag = set(heads)
1224 1224 # Remember where the top was so we can use it as a limit later.
1225 1225 highestrev = max([self.rev(n) for n in nodestotag])
1226 1226 while nodestotag:
1227 1227 # grab a node to tag
1228 1228 n = nodestotag.pop()
1229 1229 # Never tag nullid
1230 1230 if n == self.nullid:
1231 1231 continue
1232 1232 # A node's revision number represents its place in a
1233 1233 # topologically sorted list of nodes.
1234 1234 r = self.rev(n)
1235 1235 if r >= lowestrev:
1236 1236 if n not in ancestors:
1237 1237 # If we are possibly a descendant of one of the roots
1238 1238 # and we haven't already been marked as an ancestor
1239 1239 ancestors.add(n) # Mark as ancestor
1240 1240 # Add non-nullid parents to list of nodes to tag.
1241 1241 nodestotag.update(
1242 1242 [p for p in self.parents(n) if p != self.nullid]
1243 1243 )
1244 1244 elif n in heads: # We've seen it before, is it a fake head?
1245 1245 # So it is, real heads should not be the ancestors of
1246 1246 # any other heads.
1247 1247 heads.pop(n)
1248 1248 if not ancestors:
1249 1249 return nonodes
1250 1250 # Now that we have our set of ancestors, we want to remove any
1251 1251 # roots that are not ancestors.
1252 1252
1253 1253 # If one of the roots was nullid, everything is included anyway.
1254 1254 if lowestrev > nullrev:
1255 1255 # But, since we weren't, let's recompute the lowest rev to not
1256 1256 # include roots that aren't ancestors.
1257 1257
1258 1258 # Filter out roots that aren't ancestors of heads
1259 1259 roots = [root for root in roots if root in ancestors]
1260 1260 # Recompute the lowest revision
1261 1261 if roots:
1262 1262 lowestrev = min([self.rev(root) for root in roots])
1263 1263 else:
1264 1264 # No more roots? Return empty list
1265 1265 return nonodes
1266 1266 else:
1267 1267 # We are descending from nullid, and don't need to care about
1268 1268 # any other roots.
1269 1269 lowestrev = nullrev
1270 1270 roots = [self.nullid]
1271 1271 # Transform our roots list into a set.
1272 1272 descendants = set(roots)
1273 1273 # Also, keep the original roots so we can filter out roots that aren't
1274 1274 # 'real' roots (i.e. are descended from other roots).
1275 1275 roots = descendants.copy()
1276 1276 # Our topologically sorted list of output nodes.
1277 1277 orderedout = []
1278 1278 # Don't start at nullid since we don't want nullid in our output list,
1279 1279 # and if nullid shows up in descendants, empty parents will look like
1280 1280 # they're descendants.
1281 1281 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1282 1282 n = self.node(r)
1283 1283 isdescendant = False
1284 1284 if lowestrev == nullrev: # Everybody is a descendant of nullid
1285 1285 isdescendant = True
1286 1286 elif n in descendants:
1287 1287 # n is already a descendant
1288 1288 isdescendant = True
1289 1289 # This check only needs to be done here because all the roots
1290 1290 # will start being marked is descendants before the loop.
1291 1291 if n in roots:
1292 1292 # If n was a root, check if it's a 'real' root.
1293 1293 p = tuple(self.parents(n))
1294 1294 # If any of its parents are descendants, it's not a root.
1295 1295 if (p[0] in descendants) or (p[1] in descendants):
1296 1296 roots.remove(n)
1297 1297 else:
1298 1298 p = tuple(self.parents(n))
1299 1299 # A node is a descendant if either of its parents are
1300 1300 # descendants. (We seeded the dependents list with the roots
1301 1301 # up there, remember?)
1302 1302 if (p[0] in descendants) or (p[1] in descendants):
1303 1303 descendants.add(n)
1304 1304 isdescendant = True
1305 1305 if isdescendant and ((ancestors is None) or (n in ancestors)):
1306 1306 # Only include nodes that are both descendants and ancestors.
1307 1307 orderedout.append(n)
1308 1308 if (ancestors is not None) and (n in heads):
1309 1309 # We're trying to figure out which heads are reachable
1310 1310 # from roots.
1311 1311 # Mark this head as having been reached
1312 1312 heads[n] = True
1313 1313 elif ancestors is None:
1314 1314 # Otherwise, we're trying to discover the heads.
1315 1315 # Assume this is a head because if it isn't, the next step
1316 1316 # will eventually remove it.
1317 1317 heads[n] = True
1318 1318 # But, obviously its parents aren't.
1319 1319 for p in self.parents(n):
1320 1320 heads.pop(p, None)
1321 1321 heads = [head for head, flag in heads.items() if flag]
1322 1322 roots = list(roots)
1323 1323 assert orderedout
1324 1324 assert roots
1325 1325 assert heads
1326 1326 return (orderedout, roots, heads)
1327 1327
1328 1328 def headrevs(self, revs=None):
1329 1329 if revs is None:
1330 1330 try:
1331 1331 return self.index.headrevs()
1332 1332 except AttributeError:
1333 1333 return self._headrevs()
1334 1334 if rustdagop is not None and self.index.rust_ext_compat:
1335 1335 return rustdagop.headrevs(self.index, revs)
1336 1336 return dagop.headrevs(revs, self._uncheckedparentrevs)
1337 1337
1338 1338 def computephases(self, roots):
1339 1339 return self.index.computephasesmapsets(roots)
1340 1340
1341 1341 def _headrevs(self):
1342 1342 count = len(self)
1343 1343 if not count:
1344 1344 return [nullrev]
1345 1345 # we won't iter over filtered rev so nobody is a head at start
1346 1346 ishead = [0] * (count + 1)
1347 1347 index = self.index
1348 1348 for r in self:
1349 1349 ishead[r] = 1 # I may be an head
1350 1350 e = index[r]
1351 1351 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1352 1352 return [r for r, val in enumerate(ishead) if val]
1353 1353
1354 1354 def heads(self, start=None, stop=None):
1355 1355 """return the list of all nodes that have no children
1356 1356
1357 1357 if start is specified, only heads that are descendants of
1358 1358 start will be returned
1359 1359 if stop is specified, it will consider all the revs from stop
1360 1360 as if they had no children
1361 1361 """
1362 1362 if start is None and stop is None:
1363 1363 if not len(self):
1364 1364 return [self.nullid]
1365 1365 return [self.node(r) for r in self.headrevs()]
1366 1366
1367 1367 if start is None:
1368 1368 start = nullrev
1369 1369 else:
1370 1370 start = self.rev(start)
1371 1371
1372 1372 stoprevs = {self.rev(n) for n in stop or []}
1373 1373
1374 1374 revs = dagop.headrevssubset(
1375 1375 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1376 1376 )
1377 1377
1378 1378 return [self.node(rev) for rev in revs]
1379 1379
1380 1380 def children(self, node):
1381 1381 """find the children of a given node"""
1382 1382 c = []
1383 1383 p = self.rev(node)
1384 1384 for r in self.revs(start=p + 1):
1385 1385 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1386 1386 if prevs:
1387 1387 for pr in prevs:
1388 1388 if pr == p:
1389 1389 c.append(self.node(r))
1390 1390 elif p == nullrev:
1391 1391 c.append(self.node(r))
1392 1392 return c
1393 1393
1394 1394 def commonancestorsheads(self, a, b):
1395 1395 """calculate all the heads of the common ancestors of nodes a and b"""
1396 1396 a, b = self.rev(a), self.rev(b)
1397 1397 ancs = self._commonancestorsheads(a, b)
1398 1398 return pycompat.maplist(self.node, ancs)
1399 1399
1400 1400 def _commonancestorsheads(self, *revs):
1401 1401 """calculate all the heads of the common ancestors of revs"""
1402 1402 try:
1403 1403 ancs = self.index.commonancestorsheads(*revs)
1404 1404 except (AttributeError, OverflowError): # C implementation failed
1405 1405 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1406 1406 return ancs
1407 1407
1408 1408 def isancestor(self, a, b):
1409 1409 """return True if node a is an ancestor of node b
1410 1410
1411 1411 A revision is considered an ancestor of itself."""
1412 1412 a, b = self.rev(a), self.rev(b)
1413 1413 return self.isancestorrev(a, b)
1414 1414
1415 1415 def isancestorrev(self, a, b):
1416 1416 """return True if revision a is an ancestor of revision b
1417 1417
1418 1418 A revision is considered an ancestor of itself.
1419 1419
1420 1420 The implementation of this is trivial but the use of
1421 1421 reachableroots is not."""
1422 1422 if a == nullrev:
1423 1423 return True
1424 1424 elif a == b:
1425 1425 return True
1426 1426 elif a > b:
1427 1427 return False
1428 1428 return bool(self.reachableroots(a, [b], [a], includepath=False))
1429 1429
1430 1430 def reachableroots(self, minroot, heads, roots, includepath=False):
1431 1431 """return (heads(::(<roots> and <roots>::<heads>)))
1432 1432
1433 1433 If includepath is True, return (<roots>::<heads>)."""
1434 1434 try:
1435 1435 return self.index.reachableroots2(
1436 1436 minroot, heads, roots, includepath
1437 1437 )
1438 1438 except AttributeError:
1439 1439 return dagop._reachablerootspure(
1440 1440 self.parentrevs, minroot, roots, heads, includepath
1441 1441 )
1442 1442
1443 1443 def ancestor(self, a, b):
1444 1444 """calculate the "best" common ancestor of nodes a and b"""
1445 1445
1446 1446 a, b = self.rev(a), self.rev(b)
1447 1447 try:
1448 1448 ancs = self.index.ancestors(a, b)
1449 1449 except (AttributeError, OverflowError):
1450 1450 ancs = ancestor.ancestors(self.parentrevs, a, b)
1451 1451 if ancs:
1452 1452 # choose a consistent winner when there's a tie
1453 1453 return min(map(self.node, ancs))
1454 1454 return self.nullid
1455 1455
1456 1456 def _match(self, id):
1457 1457 if isinstance(id, int):
1458 1458 # rev
1459 1459 return self.node(id)
1460 1460 if len(id) == self.nodeconstants.nodelen:
1461 1461 # possibly a binary node
1462 1462 # odds of a binary node being all hex in ASCII are 1 in 10**25
1463 1463 try:
1464 1464 node = id
1465 1465 self.rev(node) # quick search the index
1466 1466 return node
1467 1467 except error.LookupError:
1468 1468 pass # may be partial hex id
1469 1469 try:
1470 1470 # str(rev)
1471 1471 rev = int(id)
1472 1472 if b"%d" % rev != id:
1473 1473 raise ValueError
1474 1474 if rev < 0:
1475 1475 rev = len(self) + rev
1476 1476 if rev < 0 or rev >= len(self):
1477 1477 raise ValueError
1478 1478 return self.node(rev)
1479 1479 except (ValueError, OverflowError):
1480 1480 pass
1481 1481 if len(id) == 2 * self.nodeconstants.nodelen:
1482 1482 try:
1483 1483 # a full hex nodeid?
1484 1484 node = bin(id)
1485 1485 self.rev(node)
1486 1486 return node
1487 1487 except (binascii.Error, error.LookupError):
1488 1488 pass
1489 1489
1490 1490 def _partialmatch(self, id):
1491 1491 # we don't care wdirfilenodeids as they should be always full hash
1492 1492 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1493 1493 ambiguous = False
1494 1494 try:
1495 1495 partial = self.index.partialmatch(id)
1496 1496 if partial and self.hasnode(partial):
1497 1497 if maybewdir:
1498 1498 # single 'ff...' match in radix tree, ambiguous with wdir
1499 1499 ambiguous = True
1500 1500 else:
1501 1501 return partial
1502 1502 elif maybewdir:
1503 1503 # no 'ff...' match in radix tree, wdir identified
1504 1504 raise error.WdirUnsupported
1505 1505 else:
1506 1506 return None
1507 1507 except error.RevlogError:
1508 1508 # parsers.c radix tree lookup gave multiple matches
1509 1509 # fast path: for unfiltered changelog, radix tree is accurate
1510 1510 if not getattr(self, 'filteredrevs', None):
1511 1511 ambiguous = True
1512 1512 # fall through to slow path that filters hidden revisions
1513 1513 except (AttributeError, ValueError):
1514 1514 # we are pure python, or key is not hex
1515 1515 pass
1516 1516 if ambiguous:
1517 1517 raise error.AmbiguousPrefixLookupError(
1518 1518 id, self.display_id, _(b'ambiguous identifier')
1519 1519 )
1520 1520
1521 1521 if id in self._pcache:
1522 1522 return self._pcache[id]
1523 1523
1524 1524 if len(id) <= 40:
1525 1525 # hex(node)[:...]
1526 1526 l = len(id) // 2 * 2 # grab an even number of digits
1527 1527 try:
1528 1528 # we're dropping the last digit, so let's check that it's hex,
1529 1529 # to avoid the expensive computation below if it's not
1530 1530 if len(id) % 2 > 0:
1531 1531 if not (id[-1] in hexdigits):
1532 1532 return None
1533 1533 prefix = bin(id[:l])
1534 1534 except binascii.Error:
1535 1535 pass
1536 1536 else:
1537 1537 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1538 1538 nl = [
1539 1539 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1540 1540 ]
1541 1541 if self.nodeconstants.nullhex.startswith(id):
1542 1542 nl.append(self.nullid)
1543 1543 if len(nl) > 0:
1544 1544 if len(nl) == 1 and not maybewdir:
1545 1545 self._pcache[id] = nl[0]
1546 1546 return nl[0]
1547 1547 raise error.AmbiguousPrefixLookupError(
1548 1548 id, self.display_id, _(b'ambiguous identifier')
1549 1549 )
1550 1550 if maybewdir:
1551 1551 raise error.WdirUnsupported
1552 1552 return None
1553 1553
1554 1554 def lookup(self, id):
1555 1555 """locate a node based on:
1556 1556 - revision number or str(revision number)
1557 1557 - nodeid or subset of hex nodeid
1558 1558 """
1559 1559 n = self._match(id)
1560 1560 if n is not None:
1561 1561 return n
1562 1562 n = self._partialmatch(id)
1563 1563 if n:
1564 1564 return n
1565 1565
1566 1566 raise error.LookupError(id, self.display_id, _(b'no match found'))
1567 1567
1568 1568 def shortest(self, node, minlength=1):
1569 1569 """Find the shortest unambiguous prefix that matches node."""
1570 1570
1571 1571 def isvalid(prefix):
1572 1572 try:
1573 1573 matchednode = self._partialmatch(prefix)
1574 1574 except error.AmbiguousPrefixLookupError:
1575 1575 return False
1576 1576 except error.WdirUnsupported:
1577 1577 # single 'ff...' match
1578 1578 return True
1579 1579 if matchednode is None:
1580 1580 raise error.LookupError(node, self.display_id, _(b'no node'))
1581 1581 return True
1582 1582
1583 1583 def maybewdir(prefix):
1584 1584 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1585 1585
1586 1586 hexnode = hex(node)
1587 1587
1588 1588 def disambiguate(hexnode, minlength):
1589 1589 """Disambiguate against wdirid."""
1590 1590 for length in range(minlength, len(hexnode) + 1):
1591 1591 prefix = hexnode[:length]
1592 1592 if not maybewdir(prefix):
1593 1593 return prefix
1594 1594
1595 1595 if not getattr(self, 'filteredrevs', None):
1596 1596 try:
1597 1597 length = max(self.index.shortest(node), minlength)
1598 1598 return disambiguate(hexnode, length)
1599 1599 except error.RevlogError:
1600 1600 if node != self.nodeconstants.wdirid:
1601 1601 raise error.LookupError(
1602 1602 node, self.display_id, _(b'no node')
1603 1603 )
1604 1604 except AttributeError:
1605 1605 # Fall through to pure code
1606 1606 pass
1607 1607
1608 1608 if node == self.nodeconstants.wdirid:
1609 1609 for length in range(minlength, len(hexnode) + 1):
1610 1610 prefix = hexnode[:length]
1611 1611 if isvalid(prefix):
1612 1612 return prefix
1613 1613
1614 1614 for length in range(minlength, len(hexnode) + 1):
1615 1615 prefix = hexnode[:length]
1616 1616 if isvalid(prefix):
1617 1617 return disambiguate(hexnode, length)
1618 1618
1619 1619 def cmp(self, node, text):
1620 1620 """compare text with a given file revision
1621 1621
1622 1622 returns True if text is different than what is stored.
1623 1623 """
1624 1624 p1, p2 = self.parents(node)
1625 1625 return storageutil.hashrevisionsha1(text, p1, p2) != node
1626 1626
1627 1627 def _getsegmentforrevs(self, startrev, endrev, df=None):
1628 1628 """Obtain a segment of raw data corresponding to a range of revisions.
1629 1629
1630 1630 Accepts the start and end revisions and an optional already-open
1631 1631 file handle to be used for reading. If the file handle is read, its
1632 1632 seek position will not be preserved.
1633 1633
1634 1634 Requests for data may be satisfied by a cache.
1635 1635
1636 1636 Returns a 2-tuple of (offset, data) for the requested range of
1637 1637 revisions. Offset is the integer offset from the beginning of the
1638 1638 revlog and data is a str or buffer of the raw byte data.
1639 1639
1640 1640 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1641 1641 to determine where each revision's data begins and ends.
1642 1642 """
1643 1643 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1644 1644 # (functions are expensive).
1645 1645 index = self.index
1646 1646 istart = index[startrev]
1647 1647 start = int(istart[0] >> 16)
1648 1648 if startrev == endrev:
1649 1649 end = start + istart[1]
1650 1650 else:
1651 1651 iend = index[endrev]
1652 1652 end = int(iend[0] >> 16) + iend[1]
1653 1653
1654 1654 if self._inline:
1655 1655 start += (startrev + 1) * self.index.entry_size
1656 1656 end += (endrev + 1) * self.index.entry_size
1657 1657 length = end - start
1658 1658
1659 1659 return start, self._segmentfile.read_chunk(start, length, df)
1660 1660
1661 1661 def _chunk(self, rev, df=None):
1662 1662 """Obtain a single decompressed chunk for a revision.
1663 1663
1664 1664 Accepts an integer revision and an optional already-open file handle
1665 1665 to be used for reading. If used, the seek position of the file will not
1666 1666 be preserved.
1667 1667
1668 1668 Returns a str holding uncompressed data for the requested revision.
1669 1669 """
1670 1670 compression_mode = self.index[rev][10]
1671 1671 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1672 1672 if compression_mode == COMP_MODE_PLAIN:
1673 1673 return data
1674 1674 elif compression_mode == COMP_MODE_DEFAULT:
1675 1675 return self._decompressor(data)
1676 1676 elif compression_mode == COMP_MODE_INLINE:
1677 1677 return self.decompress(data)
1678 1678 else:
1679 1679 msg = b'unknown compression mode %d'
1680 1680 msg %= compression_mode
1681 1681 raise error.RevlogError(msg)
1682 1682
1683 1683 def _chunks(self, revs, df=None, targetsize=None):
1684 1684 """Obtain decompressed chunks for the specified revisions.
1685 1685
1686 1686 Accepts an iterable of numeric revisions that are assumed to be in
1687 1687 ascending order. Also accepts an optional already-open file handle
1688 1688 to be used for reading. If used, the seek position of the file will
1689 1689 not be preserved.
1690 1690
1691 1691 This function is similar to calling ``self._chunk()`` multiple times,
1692 1692 but is faster.
1693 1693
1694 1694 Returns a list with decompressed data for each requested revision.
1695 1695 """
1696 1696 if not revs:
1697 1697 return []
1698 1698 start = self.start
1699 1699 length = self.length
1700 1700 inline = self._inline
1701 1701 iosize = self.index.entry_size
1702 1702 buffer = util.buffer
1703 1703
1704 1704 l = []
1705 1705 ladd = l.append
1706 1706
1707 1707 if not self._withsparseread:
1708 1708 slicedchunks = (revs,)
1709 1709 else:
1710 1710 slicedchunks = deltautil.slicechunk(
1711 1711 self, revs, targetsize=targetsize
1712 1712 )
1713 1713
1714 1714 for revschunk in slicedchunks:
1715 1715 firstrev = revschunk[0]
1716 1716 # Skip trailing revisions with empty diff
1717 1717 for lastrev in revschunk[::-1]:
1718 1718 if length(lastrev) != 0:
1719 1719 break
1720 1720
1721 1721 try:
1722 1722 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1723 1723 except OverflowError:
1724 1724 # issue4215 - we can't cache a run of chunks greater than
1725 1725 # 2G on Windows
1726 1726 return [self._chunk(rev, df=df) for rev in revschunk]
1727 1727
1728 1728 decomp = self.decompress
1729 1729 # self._decompressor might be None, but will not be used in that case
1730 1730 def_decomp = self._decompressor
1731 1731 for rev in revschunk:
1732 1732 chunkstart = start(rev)
1733 1733 if inline:
1734 1734 chunkstart += (rev + 1) * iosize
1735 1735 chunklength = length(rev)
1736 1736 comp_mode = self.index[rev][10]
1737 1737 c = buffer(data, chunkstart - offset, chunklength)
1738 1738 if comp_mode == COMP_MODE_PLAIN:
1739 1739 ladd(c)
1740 1740 elif comp_mode == COMP_MODE_INLINE:
1741 1741 ladd(decomp(c))
1742 1742 elif comp_mode == COMP_MODE_DEFAULT:
1743 1743 ladd(def_decomp(c))
1744 1744 else:
1745 1745 msg = b'unknown compression mode %d'
1746 1746 msg %= comp_mode
1747 1747 raise error.RevlogError(msg)
1748 1748
1749 1749 return l
1750 1750
1751 1751 def deltaparent(self, rev):
1752 1752 """return deltaparent of the given revision"""
1753 1753 base = self.index[rev][3]
1754 1754 if base == rev:
1755 1755 return nullrev
1756 1756 elif self._generaldelta:
1757 1757 return base
1758 1758 else:
1759 1759 return rev - 1
1760 1760
1761 1761 def issnapshot(self, rev):
1762 1762 """tells whether rev is a snapshot"""
1763 1763 if not self._sparserevlog:
1764 1764 return self.deltaparent(rev) == nullrev
1765 1765 elif util.safehasattr(self.index, b'issnapshot'):
1766 1766 # directly assign the method to cache the testing and access
1767 1767 self.issnapshot = self.index.issnapshot
1768 1768 return self.issnapshot(rev)
1769 1769 if rev == nullrev:
1770 1770 return True
1771 1771 entry = self.index[rev]
1772 1772 base = entry[3]
1773 1773 if base == rev:
1774 1774 return True
1775 1775 if base == nullrev:
1776 1776 return True
1777 1777 p1 = entry[5]
1778 while self.length(p1) == 0:
1779 b = self.deltaparent(p1)
1780 if b == p1:
1781 break
1782 p1 = b
1778 1783 p2 = entry[6]
1784 while self.length(p2) == 0:
1785 b = self.deltaparent(p2)
1786 if b == p2:
1787 break
1788 p2 = b
1779 1789 if base == p1 or base == p2:
1780 1790 return False
1781 1791 return self.issnapshot(base)
1782 1792
1783 1793 def snapshotdepth(self, rev):
1784 1794 """number of snapshot in the chain before this one"""
1785 1795 if not self.issnapshot(rev):
1786 1796 raise error.ProgrammingError(b'revision %d not a snapshot')
1787 1797 return len(self._deltachain(rev)[0]) - 1
1788 1798
1789 1799 def revdiff(self, rev1, rev2):
1790 1800 """return or calculate a delta between two revisions
1791 1801
1792 1802 The delta calculated is in binary form and is intended to be written to
1793 1803 revlog data directly. So this function needs raw revision data.
1794 1804 """
1795 1805 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1796 1806 return bytes(self._chunk(rev2))
1797 1807
1798 1808 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1799 1809
1800 1810 def revision(self, nodeorrev, _df=None):
1801 1811 """return an uncompressed revision of a given node or revision
1802 1812 number.
1803 1813
1804 1814 _df - an existing file handle to read from. (internal-only)
1805 1815 """
1806 1816 return self._revisiondata(nodeorrev, _df)
1807 1817
1808 1818 def sidedata(self, nodeorrev, _df=None):
1809 1819 """a map of extra data related to the changeset but not part of the hash
1810 1820
1811 1821 This function currently return a dictionary. However, more advanced
1812 1822 mapping object will likely be used in the future for a more
1813 1823 efficient/lazy code.
1814 1824 """
1815 1825 # deal with <nodeorrev> argument type
1816 1826 if isinstance(nodeorrev, int):
1817 1827 rev = nodeorrev
1818 1828 else:
1819 1829 rev = self.rev(nodeorrev)
1820 1830 return self._sidedata(rev)
1821 1831
1822 1832 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1823 1833 # deal with <nodeorrev> argument type
1824 1834 if isinstance(nodeorrev, int):
1825 1835 rev = nodeorrev
1826 1836 node = self.node(rev)
1827 1837 else:
1828 1838 node = nodeorrev
1829 1839 rev = None
1830 1840
1831 1841 # fast path the special `nullid` rev
1832 1842 if node == self.nullid:
1833 1843 return b""
1834 1844
1835 1845 # ``rawtext`` is the text as stored inside the revlog. Might be the
1836 1846 # revision or might need to be processed to retrieve the revision.
1837 1847 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1838 1848
1839 1849 if raw and validated:
1840 1850 # if we don't want to process the raw text and that raw
1841 1851 # text is cached, we can exit early.
1842 1852 return rawtext
1843 1853 if rev is None:
1844 1854 rev = self.rev(node)
1845 1855 # the revlog's flag for this revision
1846 1856 # (usually alter its state or content)
1847 1857 flags = self.flags(rev)
1848 1858
1849 1859 if validated and flags == REVIDX_DEFAULT_FLAGS:
1850 1860 # no extra flags set, no flag processor runs, text = rawtext
1851 1861 return rawtext
1852 1862
1853 1863 if raw:
1854 1864 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1855 1865 text = rawtext
1856 1866 else:
1857 1867 r = flagutil.processflagsread(self, rawtext, flags)
1858 1868 text, validatehash = r
1859 1869 if validatehash:
1860 1870 self.checkhash(text, node, rev=rev)
1861 1871 if not validated:
1862 1872 self._revisioncache = (node, rev, rawtext)
1863 1873
1864 1874 return text
1865 1875
1866 1876 def _rawtext(self, node, rev, _df=None):
1867 1877 """return the possibly unvalidated rawtext for a revision
1868 1878
1869 1879 returns (rev, rawtext, validated)
1870 1880 """
1871 1881
1872 1882 # revision in the cache (could be useful to apply delta)
1873 1883 cachedrev = None
1874 1884 # An intermediate text to apply deltas to
1875 1885 basetext = None
1876 1886
1877 1887 # Check if we have the entry in cache
1878 1888 # The cache entry looks like (node, rev, rawtext)
1879 1889 if self._revisioncache:
1880 1890 if self._revisioncache[0] == node:
1881 1891 return (rev, self._revisioncache[2], True)
1882 1892 cachedrev = self._revisioncache[1]
1883 1893
1884 1894 if rev is None:
1885 1895 rev = self.rev(node)
1886 1896
1887 1897 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1888 1898 if stopped:
1889 1899 basetext = self._revisioncache[2]
1890 1900
1891 1901 # drop cache to save memory, the caller is expected to
1892 1902 # update self._revisioncache after validating the text
1893 1903 self._revisioncache = None
1894 1904
1895 1905 targetsize = None
1896 1906 rawsize = self.index[rev][2]
1897 1907 if 0 <= rawsize:
1898 1908 targetsize = 4 * rawsize
1899 1909
1900 1910 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1901 1911 if basetext is None:
1902 1912 basetext = bytes(bins[0])
1903 1913 bins = bins[1:]
1904 1914
1905 1915 rawtext = mdiff.patches(basetext, bins)
1906 1916 del basetext # let us have a chance to free memory early
1907 1917 return (rev, rawtext, False)
1908 1918
1909 1919 def _sidedata(self, rev):
1910 1920 """Return the sidedata for a given revision number."""
1911 1921 index_entry = self.index[rev]
1912 1922 sidedata_offset = index_entry[8]
1913 1923 sidedata_size = index_entry[9]
1914 1924
1915 1925 if self._inline:
1916 1926 sidedata_offset += self.index.entry_size * (1 + rev)
1917 1927 if sidedata_size == 0:
1918 1928 return {}
1919 1929
1920 1930 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
1921 1931 filename = self._sidedatafile
1922 1932 end = self._docket.sidedata_end
1923 1933 offset = sidedata_offset
1924 1934 length = sidedata_size
1925 1935 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1926 1936 raise error.RevlogError(m)
1927 1937
1928 1938 comp_segment = self._segmentfile_sidedata.read_chunk(
1929 1939 sidedata_offset, sidedata_size
1930 1940 )
1931 1941
1932 1942 comp = self.index[rev][11]
1933 1943 if comp == COMP_MODE_PLAIN:
1934 1944 segment = comp_segment
1935 1945 elif comp == COMP_MODE_DEFAULT:
1936 1946 segment = self._decompressor(comp_segment)
1937 1947 elif comp == COMP_MODE_INLINE:
1938 1948 segment = self.decompress(comp_segment)
1939 1949 else:
1940 1950 msg = b'unknown compression mode %d'
1941 1951 msg %= comp
1942 1952 raise error.RevlogError(msg)
1943 1953
1944 1954 sidedata = sidedatautil.deserialize_sidedata(segment)
1945 1955 return sidedata
1946 1956
1947 1957 def rawdata(self, nodeorrev, _df=None):
1948 1958 """return an uncompressed raw data of a given node or revision number.
1949 1959
1950 1960 _df - an existing file handle to read from. (internal-only)
1951 1961 """
1952 1962 return self._revisiondata(nodeorrev, _df, raw=True)
1953 1963
1954 1964 def hash(self, text, p1, p2):
1955 1965 """Compute a node hash.
1956 1966
1957 1967 Available as a function so that subclasses can replace the hash
1958 1968 as needed.
1959 1969 """
1960 1970 return storageutil.hashrevisionsha1(text, p1, p2)
1961 1971
1962 1972 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1963 1973 """Check node hash integrity.
1964 1974
1965 1975 Available as a function so that subclasses can extend hash mismatch
1966 1976 behaviors as needed.
1967 1977 """
1968 1978 try:
1969 1979 if p1 is None and p2 is None:
1970 1980 p1, p2 = self.parents(node)
1971 1981 if node != self.hash(text, p1, p2):
1972 1982 # Clear the revision cache on hash failure. The revision cache
1973 1983 # only stores the raw revision and clearing the cache does have
1974 1984 # the side-effect that we won't have a cache hit when the raw
1975 1985 # revision data is accessed. But this case should be rare and
1976 1986 # it is extra work to teach the cache about the hash
1977 1987 # verification state.
1978 1988 if self._revisioncache and self._revisioncache[0] == node:
1979 1989 self._revisioncache = None
1980 1990
1981 1991 revornode = rev
1982 1992 if revornode is None:
1983 1993 revornode = templatefilters.short(hex(node))
1984 1994 raise error.RevlogError(
1985 1995 _(b"integrity check failed on %s:%s")
1986 1996 % (self.display_id, pycompat.bytestr(revornode))
1987 1997 )
1988 1998 except error.RevlogError:
1989 1999 if self._censorable and storageutil.iscensoredtext(text):
1990 2000 raise error.CensoredNodeError(self.display_id, node, text)
1991 2001 raise
1992 2002
1993 2003 def _enforceinlinesize(self, tr):
1994 2004 """Check if the revlog is too big for inline and convert if so.
1995 2005
1996 2006 This should be called after revisions are added to the revlog. If the
1997 2007 revlog has grown too large to be an inline revlog, it will convert it
1998 2008 to use multiple index and data files.
1999 2009 """
2000 2010 tiprev = len(self) - 1
2001 2011 total_size = self.start(tiprev) + self.length(tiprev)
2002 2012 if not self._inline or total_size < _maxinline:
2003 2013 return
2004 2014
2005 2015 troffset = tr.findoffset(self._indexfile)
2006 2016 if troffset is None:
2007 2017 raise error.RevlogError(
2008 2018 _(b"%s not found in the transaction") % self._indexfile
2009 2019 )
2010 2020 trindex = None
2011 2021 tr.add(self._datafile, 0)
2012 2022
2013 2023 existing_handles = False
2014 2024 if self._writinghandles is not None:
2015 2025 existing_handles = True
2016 2026 fp = self._writinghandles[0]
2017 2027 fp.flush()
2018 2028 fp.close()
2019 2029 # We can't use the cached file handle after close(). So prevent
2020 2030 # its usage.
2021 2031 self._writinghandles = None
2022 2032 self._segmentfile.writing_handle = None
2023 2033 # No need to deal with sidedata writing handle as it is only
2024 2034 # relevant with revlog-v2 which is never inline, not reaching
2025 2035 # this code
2026 2036
2027 2037 new_dfh = self._datafp(b'w+')
2028 2038 new_dfh.truncate(0) # drop any potentially existing data
2029 2039 try:
2030 2040 with self._indexfp() as read_ifh:
2031 2041 for r in self:
2032 2042 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2033 2043 if (
2034 2044 trindex is None
2035 2045 and troffset
2036 2046 <= self.start(r) + r * self.index.entry_size
2037 2047 ):
2038 2048 trindex = r
2039 2049 new_dfh.flush()
2040 2050
2041 2051 if trindex is None:
2042 2052 trindex = 0
2043 2053
2044 2054 with self.__index_new_fp() as fp:
2045 2055 self._format_flags &= ~FLAG_INLINE_DATA
2046 2056 self._inline = False
2047 2057 for i in self:
2048 2058 e = self.index.entry_binary(i)
2049 2059 if i == 0 and self._docket is None:
2050 2060 header = self._format_flags | self._format_version
2051 2061 header = self.index.pack_header(header)
2052 2062 e = header + e
2053 2063 fp.write(e)
2054 2064 if self._docket is not None:
2055 2065 self._docket.index_end = fp.tell()
2056 2066
2057 2067 # There is a small transactional race here. If the rename of
2058 2068 # the index fails, we should remove the datafile. It is more
2059 2069 # important to ensure that the data file is not truncated
2060 2070 # when the index is replaced as otherwise data is lost.
2061 2071 tr.replace(self._datafile, self.start(trindex))
2062 2072
2063 2073 # the temp file replace the real index when we exit the context
2064 2074 # manager
2065 2075
2066 2076 tr.replace(self._indexfile, trindex * self.index.entry_size)
2067 2077 nodemaputil.setup_persistent_nodemap(tr, self)
2068 2078 self._segmentfile = randomaccessfile.randomaccessfile(
2069 2079 self.opener,
2070 2080 self._datafile,
2071 2081 self._chunkcachesize,
2072 2082 )
2073 2083
2074 2084 if existing_handles:
2075 2085 # switched from inline to conventional reopen the index
2076 2086 ifh = self.__index_write_fp()
2077 2087 self._writinghandles = (ifh, new_dfh, None)
2078 2088 self._segmentfile.writing_handle = new_dfh
2079 2089 new_dfh = None
2080 2090 # No need to deal with sidedata writing handle as it is only
2081 2091 # relevant with revlog-v2 which is never inline, not reaching
2082 2092 # this code
2083 2093 finally:
2084 2094 if new_dfh is not None:
2085 2095 new_dfh.close()
2086 2096
2087 2097 def _nodeduplicatecallback(self, transaction, node):
2088 2098 """called when trying to add a node already stored."""
2089 2099
2090 2100 @contextlib.contextmanager
2091 2101 def reading(self):
2092 2102 """Context manager that keeps data and sidedata files open for reading"""
2093 2103 with self._segmentfile.reading():
2094 2104 with self._segmentfile_sidedata.reading():
2095 2105 yield
2096 2106
2097 2107 @contextlib.contextmanager
2098 2108 def _writing(self, transaction):
2099 2109 if self._trypending:
2100 2110 msg = b'try to write in a `trypending` revlog: %s'
2101 2111 msg %= self.display_id
2102 2112 raise error.ProgrammingError(msg)
2103 2113 if self._writinghandles is not None:
2104 2114 yield
2105 2115 else:
2106 2116 ifh = dfh = sdfh = None
2107 2117 try:
2108 2118 r = len(self)
2109 2119 # opening the data file.
2110 2120 dsize = 0
2111 2121 if r:
2112 2122 dsize = self.end(r - 1)
2113 2123 dfh = None
2114 2124 if not self._inline:
2115 2125 try:
2116 2126 dfh = self._datafp(b"r+")
2117 2127 if self._docket is None:
2118 2128 dfh.seek(0, os.SEEK_END)
2119 2129 else:
2120 2130 dfh.seek(self._docket.data_end, os.SEEK_SET)
2121 2131 except FileNotFoundError:
2122 2132 dfh = self._datafp(b"w+")
2123 2133 transaction.add(self._datafile, dsize)
2124 2134 if self._sidedatafile is not None:
2125 2135 # revlog-v2 does not inline, help Pytype
2126 2136 assert dfh is not None
2127 2137 try:
2128 2138 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2129 2139 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2130 2140 except FileNotFoundError:
2131 2141 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2132 2142 transaction.add(
2133 2143 self._sidedatafile, self._docket.sidedata_end
2134 2144 )
2135 2145
2136 2146 # opening the index file.
2137 2147 isize = r * self.index.entry_size
2138 2148 ifh = self.__index_write_fp()
2139 2149 if self._inline:
2140 2150 transaction.add(self._indexfile, dsize + isize)
2141 2151 else:
2142 2152 transaction.add(self._indexfile, isize)
2143 2153 # exposing all file handle for writing.
2144 2154 self._writinghandles = (ifh, dfh, sdfh)
2145 2155 self._segmentfile.writing_handle = ifh if self._inline else dfh
2146 2156 self._segmentfile_sidedata.writing_handle = sdfh
2147 2157 yield
2148 2158 if self._docket is not None:
2149 2159 self._write_docket(transaction)
2150 2160 finally:
2151 2161 self._writinghandles = None
2152 2162 self._segmentfile.writing_handle = None
2153 2163 self._segmentfile_sidedata.writing_handle = None
2154 2164 if dfh is not None:
2155 2165 dfh.close()
2156 2166 if sdfh is not None:
2157 2167 sdfh.close()
2158 2168 # closing the index file last to avoid exposing referent to
2159 2169 # potential unflushed data content.
2160 2170 if ifh is not None:
2161 2171 ifh.close()
2162 2172
2163 2173 def _write_docket(self, transaction):
2164 2174 """write the current docket on disk
2165 2175
2166 2176 Exist as a method to help changelog to implement transaction logic
2167 2177
2168 2178 We could also imagine using the same transaction logic for all revlog
2169 2179 since docket are cheap."""
2170 2180 self._docket.write(transaction)
2171 2181
2172 2182 def addrevision(
2173 2183 self,
2174 2184 text,
2175 2185 transaction,
2176 2186 link,
2177 2187 p1,
2178 2188 p2,
2179 2189 cachedelta=None,
2180 2190 node=None,
2181 2191 flags=REVIDX_DEFAULT_FLAGS,
2182 2192 deltacomputer=None,
2183 2193 sidedata=None,
2184 2194 ):
2185 2195 """add a revision to the log
2186 2196
2187 2197 text - the revision data to add
2188 2198 transaction - the transaction object used for rollback
2189 2199 link - the linkrev data to add
2190 2200 p1, p2 - the parent nodeids of the revision
2191 2201 cachedelta - an optional precomputed delta
2192 2202 node - nodeid of revision; typically node is not specified, and it is
2193 2203 computed by default as hash(text, p1, p2), however subclasses might
2194 2204 use different hashing method (and override checkhash() in such case)
2195 2205 flags - the known flags to set on the revision
2196 2206 deltacomputer - an optional deltacomputer instance shared between
2197 2207 multiple calls
2198 2208 """
2199 2209 if link == nullrev:
2200 2210 raise error.RevlogError(
2201 2211 _(b"attempted to add linkrev -1 to %s") % self.display_id
2202 2212 )
2203 2213
2204 2214 if sidedata is None:
2205 2215 sidedata = {}
2206 2216 elif sidedata and not self.hassidedata:
2207 2217 raise error.ProgrammingError(
2208 2218 _(b"trying to add sidedata to a revlog who don't support them")
2209 2219 )
2210 2220
2211 2221 if flags:
2212 2222 node = node or self.hash(text, p1, p2)
2213 2223
2214 2224 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2215 2225
2216 2226 # If the flag processor modifies the revision data, ignore any provided
2217 2227 # cachedelta.
2218 2228 if rawtext != text:
2219 2229 cachedelta = None
2220 2230
2221 2231 if len(rawtext) > _maxentrysize:
2222 2232 raise error.RevlogError(
2223 2233 _(
2224 2234 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2225 2235 )
2226 2236 % (self.display_id, len(rawtext))
2227 2237 )
2228 2238
2229 2239 node = node or self.hash(rawtext, p1, p2)
2230 2240 rev = self.index.get_rev(node)
2231 2241 if rev is not None:
2232 2242 return rev
2233 2243
2234 2244 if validatehash:
2235 2245 self.checkhash(rawtext, node, p1=p1, p2=p2)
2236 2246
2237 2247 return self.addrawrevision(
2238 2248 rawtext,
2239 2249 transaction,
2240 2250 link,
2241 2251 p1,
2242 2252 p2,
2243 2253 node,
2244 2254 flags,
2245 2255 cachedelta=cachedelta,
2246 2256 deltacomputer=deltacomputer,
2247 2257 sidedata=sidedata,
2248 2258 )
2249 2259
2250 2260 def addrawrevision(
2251 2261 self,
2252 2262 rawtext,
2253 2263 transaction,
2254 2264 link,
2255 2265 p1,
2256 2266 p2,
2257 2267 node,
2258 2268 flags,
2259 2269 cachedelta=None,
2260 2270 deltacomputer=None,
2261 2271 sidedata=None,
2262 2272 ):
2263 2273 """add a raw revision with known flags, node and parents
2264 2274 useful when reusing a revision not stored in this revlog (ex: received
2265 2275 over wire, or read from an external bundle).
2266 2276 """
2267 2277 with self._writing(transaction):
2268 2278 return self._addrevision(
2269 2279 node,
2270 2280 rawtext,
2271 2281 transaction,
2272 2282 link,
2273 2283 p1,
2274 2284 p2,
2275 2285 flags,
2276 2286 cachedelta,
2277 2287 deltacomputer=deltacomputer,
2278 2288 sidedata=sidedata,
2279 2289 )
2280 2290
2281 2291 def compress(self, data):
2282 2292 """Generate a possibly-compressed representation of data."""
2283 2293 if not data:
2284 2294 return b'', data
2285 2295
2286 2296 compressed = self._compressor.compress(data)
2287 2297
2288 2298 if compressed:
2289 2299 # The revlog compressor added the header in the returned data.
2290 2300 return b'', compressed
2291 2301
2292 2302 if data[0:1] == b'\0':
2293 2303 return b'', data
2294 2304 return b'u', data
2295 2305
2296 2306 def decompress(self, data):
2297 2307 """Decompress a revlog chunk.
2298 2308
2299 2309 The chunk is expected to begin with a header identifying the
2300 2310 format type so it can be routed to an appropriate decompressor.
2301 2311 """
2302 2312 if not data:
2303 2313 return data
2304 2314
2305 2315 # Revlogs are read much more frequently than they are written and many
2306 2316 # chunks only take microseconds to decompress, so performance is
2307 2317 # important here.
2308 2318 #
2309 2319 # We can make a few assumptions about revlogs:
2310 2320 #
2311 2321 # 1) the majority of chunks will be compressed (as opposed to inline
2312 2322 # raw data).
2313 2323 # 2) decompressing *any* data will likely by at least 10x slower than
2314 2324 # returning raw inline data.
2315 2325 # 3) we want to prioritize common and officially supported compression
2316 2326 # engines
2317 2327 #
2318 2328 # It follows that we want to optimize for "decompress compressed data
2319 2329 # when encoded with common and officially supported compression engines"
2320 2330 # case over "raw data" and "data encoded by less common or non-official
2321 2331 # compression engines." That is why we have the inline lookup first
2322 2332 # followed by the compengines lookup.
2323 2333 #
2324 2334 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2325 2335 # compressed chunks. And this matters for changelog and manifest reads.
2326 2336 t = data[0:1]
2327 2337
2328 2338 if t == b'x':
2329 2339 try:
2330 2340 return _zlibdecompress(data)
2331 2341 except zlib.error as e:
2332 2342 raise error.RevlogError(
2333 2343 _(b'revlog decompress error: %s')
2334 2344 % stringutil.forcebytestr(e)
2335 2345 )
2336 2346 # '\0' is more common than 'u' so it goes first.
2337 2347 elif t == b'\0':
2338 2348 return data
2339 2349 elif t == b'u':
2340 2350 return util.buffer(data, 1)
2341 2351
2342 2352 compressor = self._get_decompressor(t)
2343 2353
2344 2354 return compressor.decompress(data)
2345 2355
2346 2356 def _addrevision(
2347 2357 self,
2348 2358 node,
2349 2359 rawtext,
2350 2360 transaction,
2351 2361 link,
2352 2362 p1,
2353 2363 p2,
2354 2364 flags,
2355 2365 cachedelta,
2356 2366 alwayscache=False,
2357 2367 deltacomputer=None,
2358 2368 sidedata=None,
2359 2369 ):
2360 2370 """internal function to add revisions to the log
2361 2371
2362 2372 see addrevision for argument descriptions.
2363 2373
2364 2374 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2365 2375
2366 2376 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2367 2377 be used.
2368 2378
2369 2379 invariants:
2370 2380 - rawtext is optional (can be None); if not set, cachedelta must be set.
2371 2381 if both are set, they must correspond to each other.
2372 2382 """
2373 2383 if node == self.nullid:
2374 2384 raise error.RevlogError(
2375 2385 _(b"%s: attempt to add null revision") % self.display_id
2376 2386 )
2377 2387 if (
2378 2388 node == self.nodeconstants.wdirid
2379 2389 or node in self.nodeconstants.wdirfilenodeids
2380 2390 ):
2381 2391 raise error.RevlogError(
2382 2392 _(b"%s: attempt to add wdir revision") % self.display_id
2383 2393 )
2384 2394 if self._writinghandles is None:
2385 2395 msg = b'adding revision outside `revlog._writing` context'
2386 2396 raise error.ProgrammingError(msg)
2387 2397
2388 2398 if self._inline:
2389 2399 fh = self._writinghandles[0]
2390 2400 else:
2391 2401 fh = self._writinghandles[1]
2392 2402
2393 2403 btext = [rawtext]
2394 2404
2395 2405 curr = len(self)
2396 2406 prev = curr - 1
2397 2407
2398 2408 offset = self._get_data_offset(prev)
2399 2409
2400 2410 if self._concurrencychecker:
2401 2411 ifh, dfh, sdfh = self._writinghandles
2402 2412 # XXX no checking for the sidedata file
2403 2413 if self._inline:
2404 2414 # offset is "as if" it were in the .d file, so we need to add on
2405 2415 # the size of the entry metadata.
2406 2416 self._concurrencychecker(
2407 2417 ifh, self._indexfile, offset + curr * self.index.entry_size
2408 2418 )
2409 2419 else:
2410 2420 # Entries in the .i are a consistent size.
2411 2421 self._concurrencychecker(
2412 2422 ifh, self._indexfile, curr * self.index.entry_size
2413 2423 )
2414 2424 self._concurrencychecker(dfh, self._datafile, offset)
2415 2425
2416 2426 p1r, p2r = self.rev(p1), self.rev(p2)
2417 2427
2418 2428 # full versions are inserted when the needed deltas
2419 2429 # become comparable to the uncompressed text
2420 2430 if rawtext is None:
2421 2431 # need rawtext size, before changed by flag processors, which is
2422 2432 # the non-raw size. use revlog explicitly to avoid filelog's extra
2423 2433 # logic that might remove metadata size.
2424 2434 textlen = mdiff.patchedsize(
2425 2435 revlog.size(self, cachedelta[0]), cachedelta[1]
2426 2436 )
2427 2437 else:
2428 2438 textlen = len(rawtext)
2429 2439
2430 2440 if deltacomputer is None:
2431 2441 write_debug = None
2432 2442 if self._debug_delta:
2433 2443 write_debug = transaction._report
2434 2444 deltacomputer = deltautil.deltacomputer(
2435 2445 self, write_debug=write_debug
2436 2446 )
2437 2447
2438 2448 revinfo = revlogutils.revisioninfo(
2439 2449 node,
2440 2450 p1,
2441 2451 p2,
2442 2452 btext,
2443 2453 textlen,
2444 2454 cachedelta,
2445 2455 flags,
2446 2456 )
2447 2457
2448 2458 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2449 2459
2450 2460 compression_mode = COMP_MODE_INLINE
2451 2461 if self._docket is not None:
2452 2462 default_comp = self._docket.default_compression_header
2453 2463 r = deltautil.delta_compression(default_comp, deltainfo)
2454 2464 compression_mode, deltainfo = r
2455 2465
2456 2466 sidedata_compression_mode = COMP_MODE_INLINE
2457 2467 if sidedata and self.hassidedata:
2458 2468 sidedata_compression_mode = COMP_MODE_PLAIN
2459 2469 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2460 2470 sidedata_offset = self._docket.sidedata_end
2461 2471 h, comp_sidedata = self.compress(serialized_sidedata)
2462 2472 if (
2463 2473 h != b'u'
2464 2474 and comp_sidedata[0:1] != b'\0'
2465 2475 and len(comp_sidedata) < len(serialized_sidedata)
2466 2476 ):
2467 2477 assert not h
2468 2478 if (
2469 2479 comp_sidedata[0:1]
2470 2480 == self._docket.default_compression_header
2471 2481 ):
2472 2482 sidedata_compression_mode = COMP_MODE_DEFAULT
2473 2483 serialized_sidedata = comp_sidedata
2474 2484 else:
2475 2485 sidedata_compression_mode = COMP_MODE_INLINE
2476 2486 serialized_sidedata = comp_sidedata
2477 2487 else:
2478 2488 serialized_sidedata = b""
2479 2489 # Don't store the offset if the sidedata is empty, that way
2480 2490 # we can easily detect empty sidedata and they will be no different
2481 2491 # than ones we manually add.
2482 2492 sidedata_offset = 0
2483 2493
2484 2494 rank = RANK_UNKNOWN
2485 2495 if self._format_version == CHANGELOGV2:
2486 2496 if (p1r, p2r) == (nullrev, nullrev):
2487 2497 rank = 1
2488 2498 elif p1r != nullrev and p2r == nullrev:
2489 2499 rank = 1 + self.fast_rank(p1r)
2490 2500 elif p1r == nullrev and p2r != nullrev:
2491 2501 rank = 1 + self.fast_rank(p2r)
2492 2502 else: # merge node
2493 2503 if rustdagop is not None and self.index.rust_ext_compat:
2494 2504 rank = rustdagop.rank(self.index, p1r, p2r)
2495 2505 else:
2496 2506 pmin, pmax = sorted((p1r, p2r))
2497 2507 rank = 1 + self.fast_rank(pmax)
2498 2508 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2499 2509
2500 2510 e = revlogutils.entry(
2501 2511 flags=flags,
2502 2512 data_offset=offset,
2503 2513 data_compressed_length=deltainfo.deltalen,
2504 2514 data_uncompressed_length=textlen,
2505 2515 data_compression_mode=compression_mode,
2506 2516 data_delta_base=deltainfo.base,
2507 2517 link_rev=link,
2508 2518 parent_rev_1=p1r,
2509 2519 parent_rev_2=p2r,
2510 2520 node_id=node,
2511 2521 sidedata_offset=sidedata_offset,
2512 2522 sidedata_compressed_length=len(serialized_sidedata),
2513 2523 sidedata_compression_mode=sidedata_compression_mode,
2514 2524 rank=rank,
2515 2525 )
2516 2526
2517 2527 self.index.append(e)
2518 2528 entry = self.index.entry_binary(curr)
2519 2529 if curr == 0 and self._docket is None:
2520 2530 header = self._format_flags | self._format_version
2521 2531 header = self.index.pack_header(header)
2522 2532 entry = header + entry
2523 2533 self._writeentry(
2524 2534 transaction,
2525 2535 entry,
2526 2536 deltainfo.data,
2527 2537 link,
2528 2538 offset,
2529 2539 serialized_sidedata,
2530 2540 sidedata_offset,
2531 2541 )
2532 2542
2533 2543 rawtext = btext[0]
2534 2544
2535 2545 if alwayscache and rawtext is None:
2536 2546 rawtext = deltacomputer.buildtext(revinfo, fh)
2537 2547
2538 2548 if type(rawtext) == bytes: # only accept immutable objects
2539 2549 self._revisioncache = (node, curr, rawtext)
2540 2550 self._chainbasecache[curr] = deltainfo.chainbase
2541 2551 return curr
2542 2552
2543 2553 def _get_data_offset(self, prev):
2544 2554 """Returns the current offset in the (in-transaction) data file.
2545 2555 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2546 2556 file to store that information: since sidedata can be rewritten to the
2547 2557 end of the data file within a transaction, you can have cases where, for
2548 2558 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2549 2559 to `n - 1`'s sidedata being written after `n`'s data.
2550 2560
2551 2561 TODO cache this in a docket file before getting out of experimental."""
2552 2562 if self._docket is None:
2553 2563 return self.end(prev)
2554 2564 else:
2555 2565 return self._docket.data_end
2556 2566
2557 2567 def _writeentry(
2558 2568 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2559 2569 ):
2560 2570 # Files opened in a+ mode have inconsistent behavior on various
2561 2571 # platforms. Windows requires that a file positioning call be made
2562 2572 # when the file handle transitions between reads and writes. See
2563 2573 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2564 2574 # platforms, Python or the platform itself can be buggy. Some versions
2565 2575 # of Solaris have been observed to not append at the end of the file
2566 2576 # if the file was seeked to before the end. See issue4943 for more.
2567 2577 #
2568 2578 # We work around this issue by inserting a seek() before writing.
2569 2579 # Note: This is likely not necessary on Python 3. However, because
2570 2580 # the file handle is reused for reads and may be seeked there, we need
2571 2581 # to be careful before changing this.
2572 2582 if self._writinghandles is None:
2573 2583 msg = b'adding revision outside `revlog._writing` context'
2574 2584 raise error.ProgrammingError(msg)
2575 2585 ifh, dfh, sdfh = self._writinghandles
2576 2586 if self._docket is None:
2577 2587 ifh.seek(0, os.SEEK_END)
2578 2588 else:
2579 2589 ifh.seek(self._docket.index_end, os.SEEK_SET)
2580 2590 if dfh:
2581 2591 if self._docket is None:
2582 2592 dfh.seek(0, os.SEEK_END)
2583 2593 else:
2584 2594 dfh.seek(self._docket.data_end, os.SEEK_SET)
2585 2595 if sdfh:
2586 2596 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2587 2597
2588 2598 curr = len(self) - 1
2589 2599 if not self._inline:
2590 2600 transaction.add(self._datafile, offset)
2591 2601 if self._sidedatafile:
2592 2602 transaction.add(self._sidedatafile, sidedata_offset)
2593 2603 transaction.add(self._indexfile, curr * len(entry))
2594 2604 if data[0]:
2595 2605 dfh.write(data[0])
2596 2606 dfh.write(data[1])
2597 2607 if sidedata:
2598 2608 sdfh.write(sidedata)
2599 2609 ifh.write(entry)
2600 2610 else:
2601 2611 offset += curr * self.index.entry_size
2602 2612 transaction.add(self._indexfile, offset)
2603 2613 ifh.write(entry)
2604 2614 ifh.write(data[0])
2605 2615 ifh.write(data[1])
2606 2616 assert not sidedata
2607 2617 self._enforceinlinesize(transaction)
2608 2618 if self._docket is not None:
2609 2619 # revlog-v2 always has 3 writing handles, help Pytype
2610 2620 wh1 = self._writinghandles[0]
2611 2621 wh2 = self._writinghandles[1]
2612 2622 wh3 = self._writinghandles[2]
2613 2623 assert wh1 is not None
2614 2624 assert wh2 is not None
2615 2625 assert wh3 is not None
2616 2626 self._docket.index_end = wh1.tell()
2617 2627 self._docket.data_end = wh2.tell()
2618 2628 self._docket.sidedata_end = wh3.tell()
2619 2629
2620 2630 nodemaputil.setup_persistent_nodemap(transaction, self)
2621 2631
2622 2632 def addgroup(
2623 2633 self,
2624 2634 deltas,
2625 2635 linkmapper,
2626 2636 transaction,
2627 2637 alwayscache=False,
2628 2638 addrevisioncb=None,
2629 2639 duplicaterevisioncb=None,
2630 2640 ):
2631 2641 """
2632 2642 add a delta group
2633 2643
2634 2644 given a set of deltas, add them to the revision log. the
2635 2645 first delta is against its parent, which should be in our
2636 2646 log, the rest are against the previous delta.
2637 2647
2638 2648 If ``addrevisioncb`` is defined, it will be called with arguments of
2639 2649 this revlog and the node that was added.
2640 2650 """
2641 2651
2642 2652 if self._adding_group:
2643 2653 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2644 2654
2645 2655 self._adding_group = True
2646 2656 empty = True
2647 2657 try:
2648 2658 with self._writing(transaction):
2649 2659 write_debug = None
2650 2660 if self._debug_delta:
2651 2661 write_debug = transaction._report
2652 2662 deltacomputer = deltautil.deltacomputer(
2653 2663 self,
2654 2664 write_debug=write_debug,
2655 2665 )
2656 2666 # loop through our set of deltas
2657 2667 for data in deltas:
2658 2668 (
2659 2669 node,
2660 2670 p1,
2661 2671 p2,
2662 2672 linknode,
2663 2673 deltabase,
2664 2674 delta,
2665 2675 flags,
2666 2676 sidedata,
2667 2677 ) = data
2668 2678 link = linkmapper(linknode)
2669 2679 flags = flags or REVIDX_DEFAULT_FLAGS
2670 2680
2671 2681 rev = self.index.get_rev(node)
2672 2682 if rev is not None:
2673 2683 # this can happen if two branches make the same change
2674 2684 self._nodeduplicatecallback(transaction, rev)
2675 2685 if duplicaterevisioncb:
2676 2686 duplicaterevisioncb(self, rev)
2677 2687 empty = False
2678 2688 continue
2679 2689
2680 2690 for p in (p1, p2):
2681 2691 if not self.index.has_node(p):
2682 2692 raise error.LookupError(
2683 2693 p, self.radix, _(b'unknown parent')
2684 2694 )
2685 2695
2686 2696 if not self.index.has_node(deltabase):
2687 2697 raise error.LookupError(
2688 2698 deltabase, self.display_id, _(b'unknown delta base')
2689 2699 )
2690 2700
2691 2701 baserev = self.rev(deltabase)
2692 2702
2693 2703 if baserev != nullrev and self.iscensored(baserev):
2694 2704 # if base is censored, delta must be full replacement in a
2695 2705 # single patch operation
2696 2706 hlen = struct.calcsize(b">lll")
2697 2707 oldlen = self.rawsize(baserev)
2698 2708 newlen = len(delta) - hlen
2699 2709 if delta[:hlen] != mdiff.replacediffheader(
2700 2710 oldlen, newlen
2701 2711 ):
2702 2712 raise error.CensoredBaseError(
2703 2713 self.display_id, self.node(baserev)
2704 2714 )
2705 2715
2706 2716 if not flags and self._peek_iscensored(baserev, delta):
2707 2717 flags |= REVIDX_ISCENSORED
2708 2718
2709 2719 # We assume consumers of addrevisioncb will want to retrieve
2710 2720 # the added revision, which will require a call to
2711 2721 # revision(). revision() will fast path if there is a cache
2712 2722 # hit. So, we tell _addrevision() to always cache in this case.
2713 2723 # We're only using addgroup() in the context of changegroup
2714 2724 # generation so the revision data can always be handled as raw
2715 2725 # by the flagprocessor.
2716 2726 rev = self._addrevision(
2717 2727 node,
2718 2728 None,
2719 2729 transaction,
2720 2730 link,
2721 2731 p1,
2722 2732 p2,
2723 2733 flags,
2724 2734 (baserev, delta),
2725 2735 alwayscache=alwayscache,
2726 2736 deltacomputer=deltacomputer,
2727 2737 sidedata=sidedata,
2728 2738 )
2729 2739
2730 2740 if addrevisioncb:
2731 2741 addrevisioncb(self, rev)
2732 2742 empty = False
2733 2743 finally:
2734 2744 self._adding_group = False
2735 2745 return not empty
2736 2746
2737 2747 def iscensored(self, rev):
2738 2748 """Check if a file revision is censored."""
2739 2749 if not self._censorable:
2740 2750 return False
2741 2751
2742 2752 return self.flags(rev) & REVIDX_ISCENSORED
2743 2753
2744 2754 def _peek_iscensored(self, baserev, delta):
2745 2755 """Quickly check if a delta produces a censored revision."""
2746 2756 if not self._censorable:
2747 2757 return False
2748 2758
2749 2759 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2750 2760
2751 2761 def getstrippoint(self, minlink):
2752 2762 """find the minimum rev that must be stripped to strip the linkrev
2753 2763
2754 2764 Returns a tuple containing the minimum rev and a set of all revs that
2755 2765 have linkrevs that will be broken by this strip.
2756 2766 """
2757 2767 return storageutil.resolvestripinfo(
2758 2768 minlink,
2759 2769 len(self) - 1,
2760 2770 self.headrevs(),
2761 2771 self.linkrev,
2762 2772 self.parentrevs,
2763 2773 )
2764 2774
2765 2775 def strip(self, minlink, transaction):
2766 2776 """truncate the revlog on the first revision with a linkrev >= minlink
2767 2777
2768 2778 This function is called when we're stripping revision minlink and
2769 2779 its descendants from the repository.
2770 2780
2771 2781 We have to remove all revisions with linkrev >= minlink, because
2772 2782 the equivalent changelog revisions will be renumbered after the
2773 2783 strip.
2774 2784
2775 2785 So we truncate the revlog on the first of these revisions, and
2776 2786 trust that the caller has saved the revisions that shouldn't be
2777 2787 removed and that it'll re-add them after this truncation.
2778 2788 """
2779 2789 if len(self) == 0:
2780 2790 return
2781 2791
2782 2792 rev, _ = self.getstrippoint(minlink)
2783 2793 if rev == len(self):
2784 2794 return
2785 2795
2786 2796 # first truncate the files on disk
2787 2797 data_end = self.start(rev)
2788 2798 if not self._inline:
2789 2799 transaction.add(self._datafile, data_end)
2790 2800 end = rev * self.index.entry_size
2791 2801 else:
2792 2802 end = data_end + (rev * self.index.entry_size)
2793 2803
2794 2804 if self._sidedatafile:
2795 2805 sidedata_end = self.sidedata_cut_off(rev)
2796 2806 transaction.add(self._sidedatafile, sidedata_end)
2797 2807
2798 2808 transaction.add(self._indexfile, end)
2799 2809 if self._docket is not None:
2800 2810 # XXX we could, leverage the docket while stripping. However it is
2801 2811 # not powerfull enough at the time of this comment
2802 2812 self._docket.index_end = end
2803 2813 self._docket.data_end = data_end
2804 2814 self._docket.sidedata_end = sidedata_end
2805 2815 self._docket.write(transaction, stripping=True)
2806 2816
2807 2817 # then reset internal state in memory to forget those revisions
2808 2818 self._revisioncache = None
2809 2819 self._chaininfocache = util.lrucachedict(500)
2810 2820 self._segmentfile.clear_cache()
2811 2821 self._segmentfile_sidedata.clear_cache()
2812 2822
2813 2823 del self.index[rev:-1]
2814 2824
2815 2825 def checksize(self):
2816 2826 """Check size of index and data files
2817 2827
2818 2828 return a (dd, di) tuple.
2819 2829 - dd: extra bytes for the "data" file
2820 2830 - di: extra bytes for the "index" file
2821 2831
2822 2832 A healthy revlog will return (0, 0).
2823 2833 """
2824 2834 expected = 0
2825 2835 if len(self):
2826 2836 expected = max(0, self.end(len(self) - 1))
2827 2837
2828 2838 try:
2829 2839 with self._datafp() as f:
2830 2840 f.seek(0, io.SEEK_END)
2831 2841 actual = f.tell()
2832 2842 dd = actual - expected
2833 2843 except FileNotFoundError:
2834 2844 dd = 0
2835 2845
2836 2846 try:
2837 2847 f = self.opener(self._indexfile)
2838 2848 f.seek(0, io.SEEK_END)
2839 2849 actual = f.tell()
2840 2850 f.close()
2841 2851 s = self.index.entry_size
2842 2852 i = max(0, actual // s)
2843 2853 di = actual - (i * s)
2844 2854 if self._inline:
2845 2855 databytes = 0
2846 2856 for r in self:
2847 2857 databytes += max(0, self.length(r))
2848 2858 dd = 0
2849 2859 di = actual - len(self) * s - databytes
2850 2860 except FileNotFoundError:
2851 2861 di = 0
2852 2862
2853 2863 return (dd, di)
2854 2864
2855 2865 def files(self):
2856 2866 res = [self._indexfile]
2857 2867 if self._docket_file is None:
2858 2868 if not self._inline:
2859 2869 res.append(self._datafile)
2860 2870 else:
2861 2871 res.append(self._docket_file)
2862 2872 res.extend(self._docket.old_index_filepaths(include_empty=False))
2863 2873 if self._docket.data_end:
2864 2874 res.append(self._datafile)
2865 2875 res.extend(self._docket.old_data_filepaths(include_empty=False))
2866 2876 if self._docket.sidedata_end:
2867 2877 res.append(self._sidedatafile)
2868 2878 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
2869 2879 return res
2870 2880
2871 2881 def emitrevisions(
2872 2882 self,
2873 2883 nodes,
2874 2884 nodesorder=None,
2875 2885 revisiondata=False,
2876 2886 assumehaveparentrevisions=False,
2877 2887 deltamode=repository.CG_DELTAMODE_STD,
2878 2888 sidedata_helpers=None,
2879 2889 ):
2880 2890 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2881 2891 raise error.ProgrammingError(
2882 2892 b'unhandled value for nodesorder: %s' % nodesorder
2883 2893 )
2884 2894
2885 2895 if nodesorder is None and not self._generaldelta:
2886 2896 nodesorder = b'storage'
2887 2897
2888 2898 if (
2889 2899 not self._storedeltachains
2890 2900 and deltamode != repository.CG_DELTAMODE_PREV
2891 2901 ):
2892 2902 deltamode = repository.CG_DELTAMODE_FULL
2893 2903
2894 2904 return storageutil.emitrevisions(
2895 2905 self,
2896 2906 nodes,
2897 2907 nodesorder,
2898 2908 revlogrevisiondelta,
2899 2909 deltaparentfn=self.deltaparent,
2900 2910 candeltafn=self.candelta,
2901 2911 rawsizefn=self.rawsize,
2902 2912 revdifffn=self.revdiff,
2903 2913 flagsfn=self.flags,
2904 2914 deltamode=deltamode,
2905 2915 revisiondata=revisiondata,
2906 2916 assumehaveparentrevisions=assumehaveparentrevisions,
2907 2917 sidedata_helpers=sidedata_helpers,
2908 2918 )
2909 2919
2910 2920 DELTAREUSEALWAYS = b'always'
2911 2921 DELTAREUSESAMEREVS = b'samerevs'
2912 2922 DELTAREUSENEVER = b'never'
2913 2923
2914 2924 DELTAREUSEFULLADD = b'fulladd'
2915 2925
2916 2926 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2917 2927
2918 2928 def clone(
2919 2929 self,
2920 2930 tr,
2921 2931 destrevlog,
2922 2932 addrevisioncb=None,
2923 2933 deltareuse=DELTAREUSESAMEREVS,
2924 2934 forcedeltabothparents=None,
2925 2935 sidedata_helpers=None,
2926 2936 ):
2927 2937 """Copy this revlog to another, possibly with format changes.
2928 2938
2929 2939 The destination revlog will contain the same revisions and nodes.
2930 2940 However, it may not be bit-for-bit identical due to e.g. delta encoding
2931 2941 differences.
2932 2942
2933 2943 The ``deltareuse`` argument control how deltas from the existing revlog
2934 2944 are preserved in the destination revlog. The argument can have the
2935 2945 following values:
2936 2946
2937 2947 DELTAREUSEALWAYS
2938 2948 Deltas will always be reused (if possible), even if the destination
2939 2949 revlog would not select the same revisions for the delta. This is the
2940 2950 fastest mode of operation.
2941 2951 DELTAREUSESAMEREVS
2942 2952 Deltas will be reused if the destination revlog would pick the same
2943 2953 revisions for the delta. This mode strikes a balance between speed
2944 2954 and optimization.
2945 2955 DELTAREUSENEVER
2946 2956 Deltas will never be reused. This is the slowest mode of execution.
2947 2957 This mode can be used to recompute deltas (e.g. if the diff/delta
2948 2958 algorithm changes).
2949 2959 DELTAREUSEFULLADD
2950 2960 Revision will be re-added as if their were new content. This is
2951 2961 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2952 2962 eg: large file detection and handling.
2953 2963
2954 2964 Delta computation can be slow, so the choice of delta reuse policy can
2955 2965 significantly affect run time.
2956 2966
2957 2967 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2958 2968 two extremes. Deltas will be reused if they are appropriate. But if the
2959 2969 delta could choose a better revision, it will do so. This means if you
2960 2970 are converting a non-generaldelta revlog to a generaldelta revlog,
2961 2971 deltas will be recomputed if the delta's parent isn't a parent of the
2962 2972 revision.
2963 2973
2964 2974 In addition to the delta policy, the ``forcedeltabothparents``
2965 2975 argument controls whether to force compute deltas against both parents
2966 2976 for merges. By default, the current default is used.
2967 2977
2968 2978 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2969 2979 `sidedata_helpers`.
2970 2980 """
2971 2981 if deltareuse not in self.DELTAREUSEALL:
2972 2982 raise ValueError(
2973 2983 _(b'value for deltareuse invalid: %s') % deltareuse
2974 2984 )
2975 2985
2976 2986 if len(destrevlog):
2977 2987 raise ValueError(_(b'destination revlog is not empty'))
2978 2988
2979 2989 if getattr(self, 'filteredrevs', None):
2980 2990 raise ValueError(_(b'source revlog has filtered revisions'))
2981 2991 if getattr(destrevlog, 'filteredrevs', None):
2982 2992 raise ValueError(_(b'destination revlog has filtered revisions'))
2983 2993
2984 2994 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2985 2995 # if possible.
2986 2996 oldlazydelta = destrevlog._lazydelta
2987 2997 oldlazydeltabase = destrevlog._lazydeltabase
2988 2998 oldamd = destrevlog._deltabothparents
2989 2999
2990 3000 try:
2991 3001 if deltareuse == self.DELTAREUSEALWAYS:
2992 3002 destrevlog._lazydeltabase = True
2993 3003 destrevlog._lazydelta = True
2994 3004 elif deltareuse == self.DELTAREUSESAMEREVS:
2995 3005 destrevlog._lazydeltabase = False
2996 3006 destrevlog._lazydelta = True
2997 3007 elif deltareuse == self.DELTAREUSENEVER:
2998 3008 destrevlog._lazydeltabase = False
2999 3009 destrevlog._lazydelta = False
3000 3010
3001 3011 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3002 3012
3003 3013 self._clone(
3004 3014 tr,
3005 3015 destrevlog,
3006 3016 addrevisioncb,
3007 3017 deltareuse,
3008 3018 forcedeltabothparents,
3009 3019 sidedata_helpers,
3010 3020 )
3011 3021
3012 3022 finally:
3013 3023 destrevlog._lazydelta = oldlazydelta
3014 3024 destrevlog._lazydeltabase = oldlazydeltabase
3015 3025 destrevlog._deltabothparents = oldamd
3016 3026
3017 3027 def _clone(
3018 3028 self,
3019 3029 tr,
3020 3030 destrevlog,
3021 3031 addrevisioncb,
3022 3032 deltareuse,
3023 3033 forcedeltabothparents,
3024 3034 sidedata_helpers,
3025 3035 ):
3026 3036 """perform the core duty of `revlog.clone` after parameter processing"""
3027 3037 write_debug = None
3028 3038 if self._debug_delta:
3029 3039 write_debug = tr._report
3030 3040 deltacomputer = deltautil.deltacomputer(
3031 3041 destrevlog,
3032 3042 write_debug=write_debug,
3033 3043 )
3034 3044 index = self.index
3035 3045 for rev in self:
3036 3046 entry = index[rev]
3037 3047
3038 3048 # Some classes override linkrev to take filtered revs into
3039 3049 # account. Use raw entry from index.
3040 3050 flags = entry[0] & 0xFFFF
3041 3051 linkrev = entry[4]
3042 3052 p1 = index[entry[5]][7]
3043 3053 p2 = index[entry[6]][7]
3044 3054 node = entry[7]
3045 3055
3046 3056 # (Possibly) reuse the delta from the revlog if allowed and
3047 3057 # the revlog chunk is a delta.
3048 3058 cachedelta = None
3049 3059 rawtext = None
3050 3060 if deltareuse == self.DELTAREUSEFULLADD:
3051 3061 text = self._revisiondata(rev)
3052 3062 sidedata = self.sidedata(rev)
3053 3063
3054 3064 if sidedata_helpers is not None:
3055 3065 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3056 3066 self, sidedata_helpers, sidedata, rev
3057 3067 )
3058 3068 flags = flags | new_flags[0] & ~new_flags[1]
3059 3069
3060 3070 destrevlog.addrevision(
3061 3071 text,
3062 3072 tr,
3063 3073 linkrev,
3064 3074 p1,
3065 3075 p2,
3066 3076 cachedelta=cachedelta,
3067 3077 node=node,
3068 3078 flags=flags,
3069 3079 deltacomputer=deltacomputer,
3070 3080 sidedata=sidedata,
3071 3081 )
3072 3082 else:
3073 3083 if destrevlog._lazydelta:
3074 3084 dp = self.deltaparent(rev)
3075 3085 if dp != nullrev:
3076 3086 cachedelta = (dp, bytes(self._chunk(rev)))
3077 3087
3078 3088 sidedata = None
3079 3089 if not cachedelta:
3080 3090 rawtext = self._revisiondata(rev)
3081 3091 sidedata = self.sidedata(rev)
3082 3092 if sidedata is None:
3083 3093 sidedata = self.sidedata(rev)
3084 3094
3085 3095 if sidedata_helpers is not None:
3086 3096 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3087 3097 self, sidedata_helpers, sidedata, rev
3088 3098 )
3089 3099 flags = flags | new_flags[0] & ~new_flags[1]
3090 3100
3091 3101 with destrevlog._writing(tr):
3092 3102 destrevlog._addrevision(
3093 3103 node,
3094 3104 rawtext,
3095 3105 tr,
3096 3106 linkrev,
3097 3107 p1,
3098 3108 p2,
3099 3109 flags,
3100 3110 cachedelta,
3101 3111 deltacomputer=deltacomputer,
3102 3112 sidedata=sidedata,
3103 3113 )
3104 3114
3105 3115 if addrevisioncb:
3106 3116 addrevisioncb(self, rev, node)
3107 3117
3108 3118 def censorrevision(self, tr, censornode, tombstone=b''):
3109 3119 if self._format_version == REVLOGV0:
3110 3120 raise error.RevlogError(
3111 3121 _(b'cannot censor with version %d revlogs')
3112 3122 % self._format_version
3113 3123 )
3114 3124 elif self._format_version == REVLOGV1:
3115 3125 rewrite.v1_censor(self, tr, censornode, tombstone)
3116 3126 else:
3117 3127 rewrite.v2_censor(self, tr, censornode, tombstone)
3118 3128
3119 3129 def verifyintegrity(self, state):
3120 3130 """Verifies the integrity of the revlog.
3121 3131
3122 3132 Yields ``revlogproblem`` instances describing problems that are
3123 3133 found.
3124 3134 """
3125 3135 dd, di = self.checksize()
3126 3136 if dd:
3127 3137 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3128 3138 if di:
3129 3139 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3130 3140
3131 3141 version = self._format_version
3132 3142
3133 3143 # The verifier tells us what version revlog we should be.
3134 3144 if version != state[b'expectedversion']:
3135 3145 yield revlogproblem(
3136 3146 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3137 3147 % (self.display_id, version, state[b'expectedversion'])
3138 3148 )
3139 3149
3140 3150 state[b'skipread'] = set()
3141 3151 state[b'safe_renamed'] = set()
3142 3152
3143 3153 for rev in self:
3144 3154 node = self.node(rev)
3145 3155
3146 3156 # Verify contents. 4 cases to care about:
3147 3157 #
3148 3158 # common: the most common case
3149 3159 # rename: with a rename
3150 3160 # meta: file content starts with b'\1\n', the metadata
3151 3161 # header defined in filelog.py, but without a rename
3152 3162 # ext: content stored externally
3153 3163 #
3154 3164 # More formally, their differences are shown below:
3155 3165 #
3156 3166 # | common | rename | meta | ext
3157 3167 # -------------------------------------------------------
3158 3168 # flags() | 0 | 0 | 0 | not 0
3159 3169 # renamed() | False | True | False | ?
3160 3170 # rawtext[0:2]=='\1\n'| False | True | True | ?
3161 3171 #
3162 3172 # "rawtext" means the raw text stored in revlog data, which
3163 3173 # could be retrieved by "rawdata(rev)". "text"
3164 3174 # mentioned below is "revision(rev)".
3165 3175 #
3166 3176 # There are 3 different lengths stored physically:
3167 3177 # 1. L1: rawsize, stored in revlog index
3168 3178 # 2. L2: len(rawtext), stored in revlog data
3169 3179 # 3. L3: len(text), stored in revlog data if flags==0, or
3170 3180 # possibly somewhere else if flags!=0
3171 3181 #
3172 3182 # L1 should be equal to L2. L3 could be different from them.
3173 3183 # "text" may or may not affect commit hash depending on flag
3174 3184 # processors (see flagutil.addflagprocessor).
3175 3185 #
3176 3186 # | common | rename | meta | ext
3177 3187 # -------------------------------------------------
3178 3188 # rawsize() | L1 | L1 | L1 | L1
3179 3189 # size() | L1 | L2-LM | L1(*) | L1 (?)
3180 3190 # len(rawtext) | L2 | L2 | L2 | L2
3181 3191 # len(text) | L2 | L2 | L2 | L3
3182 3192 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3183 3193 #
3184 3194 # LM: length of metadata, depending on rawtext
3185 3195 # (*): not ideal, see comment in filelog.size
3186 3196 # (?): could be "- len(meta)" if the resolved content has
3187 3197 # rename metadata
3188 3198 #
3189 3199 # Checks needed to be done:
3190 3200 # 1. length check: L1 == L2, in all cases.
3191 3201 # 2. hash check: depending on flag processor, we may need to
3192 3202 # use either "text" (external), or "rawtext" (in revlog).
3193 3203
3194 3204 try:
3195 3205 skipflags = state.get(b'skipflags', 0)
3196 3206 if skipflags:
3197 3207 skipflags &= self.flags(rev)
3198 3208
3199 3209 _verify_revision(self, skipflags, state, node)
3200 3210
3201 3211 l1 = self.rawsize(rev)
3202 3212 l2 = len(self.rawdata(node))
3203 3213
3204 3214 if l1 != l2:
3205 3215 yield revlogproblem(
3206 3216 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3207 3217 node=node,
3208 3218 )
3209 3219
3210 3220 except error.CensoredNodeError:
3211 3221 if state[b'erroroncensored']:
3212 3222 yield revlogproblem(
3213 3223 error=_(b'censored file data'), node=node
3214 3224 )
3215 3225 state[b'skipread'].add(node)
3216 3226 except Exception as e:
3217 3227 yield revlogproblem(
3218 3228 error=_(b'unpacking %s: %s')
3219 3229 % (short(node), stringutil.forcebytestr(e)),
3220 3230 node=node,
3221 3231 )
3222 3232 state[b'skipread'].add(node)
3223 3233
3224 3234 def storageinfo(
3225 3235 self,
3226 3236 exclusivefiles=False,
3227 3237 sharedfiles=False,
3228 3238 revisionscount=False,
3229 3239 trackedsize=False,
3230 3240 storedsize=False,
3231 3241 ):
3232 3242 d = {}
3233 3243
3234 3244 if exclusivefiles:
3235 3245 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3236 3246 if not self._inline:
3237 3247 d[b'exclusivefiles'].append((self.opener, self._datafile))
3238 3248
3239 3249 if sharedfiles:
3240 3250 d[b'sharedfiles'] = []
3241 3251
3242 3252 if revisionscount:
3243 3253 d[b'revisionscount'] = len(self)
3244 3254
3245 3255 if trackedsize:
3246 3256 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3247 3257
3248 3258 if storedsize:
3249 3259 d[b'storedsize'] = sum(
3250 3260 self.opener.stat(path).st_size for path in self.files()
3251 3261 )
3252 3262
3253 3263 return d
3254 3264
3255 3265 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3256 3266 if not self.hassidedata:
3257 3267 return
3258 3268 # revlog formats with sidedata support does not support inline
3259 3269 assert not self._inline
3260 3270 if not helpers[1] and not helpers[2]:
3261 3271 # Nothing to generate or remove
3262 3272 return
3263 3273
3264 3274 new_entries = []
3265 3275 # append the new sidedata
3266 3276 with self._writing(transaction):
3267 3277 ifh, dfh, sdfh = self._writinghandles
3268 3278 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3269 3279
3270 3280 current_offset = sdfh.tell()
3271 3281 for rev in range(startrev, endrev + 1):
3272 3282 entry = self.index[rev]
3273 3283 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3274 3284 store=self,
3275 3285 sidedata_helpers=helpers,
3276 3286 sidedata={},
3277 3287 rev=rev,
3278 3288 )
3279 3289
3280 3290 serialized_sidedata = sidedatautil.serialize_sidedata(
3281 3291 new_sidedata
3282 3292 )
3283 3293
3284 3294 sidedata_compression_mode = COMP_MODE_INLINE
3285 3295 if serialized_sidedata and self.hassidedata:
3286 3296 sidedata_compression_mode = COMP_MODE_PLAIN
3287 3297 h, comp_sidedata = self.compress(serialized_sidedata)
3288 3298 if (
3289 3299 h != b'u'
3290 3300 and comp_sidedata[0] != b'\0'
3291 3301 and len(comp_sidedata) < len(serialized_sidedata)
3292 3302 ):
3293 3303 assert not h
3294 3304 if (
3295 3305 comp_sidedata[0]
3296 3306 == self._docket.default_compression_header
3297 3307 ):
3298 3308 sidedata_compression_mode = COMP_MODE_DEFAULT
3299 3309 serialized_sidedata = comp_sidedata
3300 3310 else:
3301 3311 sidedata_compression_mode = COMP_MODE_INLINE
3302 3312 serialized_sidedata = comp_sidedata
3303 3313 if entry[8] != 0 or entry[9] != 0:
3304 3314 # rewriting entries that already have sidedata is not
3305 3315 # supported yet, because it introduces garbage data in the
3306 3316 # revlog.
3307 3317 msg = b"rewriting existing sidedata is not supported yet"
3308 3318 raise error.Abort(msg)
3309 3319
3310 3320 # Apply (potential) flags to add and to remove after running
3311 3321 # the sidedata helpers
3312 3322 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3313 3323 entry_update = (
3314 3324 current_offset,
3315 3325 len(serialized_sidedata),
3316 3326 new_offset_flags,
3317 3327 sidedata_compression_mode,
3318 3328 )
3319 3329
3320 3330 # the sidedata computation might have move the file cursors around
3321 3331 sdfh.seek(current_offset, os.SEEK_SET)
3322 3332 sdfh.write(serialized_sidedata)
3323 3333 new_entries.append(entry_update)
3324 3334 current_offset += len(serialized_sidedata)
3325 3335 self._docket.sidedata_end = sdfh.tell()
3326 3336
3327 3337 # rewrite the new index entries
3328 3338 ifh.seek(startrev * self.index.entry_size)
3329 3339 for i, e in enumerate(new_entries):
3330 3340 rev = startrev + i
3331 3341 self.index.replace_sidedata_info(rev, *e)
3332 3342 packed = self.index.entry_binary(rev)
3333 3343 if rev == 0 and self._docket is None:
3334 3344 header = self._format_flags | self._format_version
3335 3345 header = self.index.pack_header(header)
3336 3346 packed = header + packed
3337 3347 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now