##// END OF EJS Templates
delta-find: use sets instead of list in the snapshot cache...
marmoute -
r50574:b670eb3d default
parent child Browse files
Show More
@@ -1,3324 +1,3324 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Olivia Mackall <olivia@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #define PY_SSIZE_T_CLEAN
11 11 #include <Python.h>
12 12 #include <assert.h>
13 13 #include <ctype.h>
14 14 #include <limits.h>
15 15 #include <stddef.h>
16 16 #include <stdlib.h>
17 17 #include <string.h>
18 18 #include <structmember.h>
19 19
20 20 #include "bitmanipulation.h"
21 21 #include "charencode.h"
22 22 #include "compat.h"
23 23 #include "revlog.h"
24 24 #include "util.h"
25 25
26 26 typedef struct indexObjectStruct indexObject;
27 27
28 28 typedef struct {
29 29 int children[16];
30 30 } nodetreenode;
31 31
32 32 typedef struct {
33 33 int abi_version;
34 34 Py_ssize_t (*index_length)(const indexObject *);
35 35 const char *(*index_node)(indexObject *, Py_ssize_t);
36 36 int (*fast_rank)(indexObject *, Py_ssize_t);
37 37 int (*index_parents)(PyObject *, int, int *);
38 38 } Revlog_CAPI;
39 39
40 40 /*
41 41 * A base-16 trie for fast node->rev mapping.
42 42 *
43 43 * Positive value is index of the next node in the trie
44 44 * Negative value is a leaf: -(rev + 2)
45 45 * Zero is empty
46 46 */
47 47 typedef struct {
48 48 indexObject *index;
49 49 nodetreenode *nodes;
50 50 Py_ssize_t nodelen;
51 51 size_t length; /* # nodes in use */
52 52 size_t capacity; /* # nodes allocated */
53 53 int depth; /* maximum depth of tree */
54 54 int splits; /* # splits performed */
55 55 } nodetree;
56 56
57 57 typedef struct {
58 58 PyObject_HEAD /* ; */
59 59 nodetree nt;
60 60 } nodetreeObject;
61 61
62 62 /*
63 63 * This class has two behaviors.
64 64 *
65 65 * When used in a list-like way (with integer keys), we decode an
66 66 * entry in a RevlogNG index file on demand. We have limited support for
67 67 * integer-keyed insert and delete, only at elements right before the
68 68 * end.
69 69 *
70 70 * With string keys, we lazily perform a reverse mapping from node to
71 71 * rev, using a base-16 trie.
72 72 */
73 73 struct indexObjectStruct {
74 74 PyObject_HEAD
75 75 /* Type-specific fields go here. */
76 76 PyObject *data; /* raw bytes of index */
77 77 Py_ssize_t nodelen; /* digest size of the hash, 20 for SHA-1 */
78 78 PyObject *nullentry; /* fast path for references to null */
79 79 Py_buffer buf; /* buffer of data */
80 80 const char **offsets; /* populated on demand */
81 81 Py_ssize_t length; /* current on-disk number of elements */
82 82 unsigned new_length; /* number of added elements */
83 83 unsigned added_length; /* space reserved for added elements */
84 84 char *added; /* populated on demand */
85 85 PyObject *headrevs; /* cache, invalidated on changes */
86 86 PyObject *filteredrevs; /* filtered revs set */
87 87 nodetree nt; /* base-16 trie */
88 88 int ntinitialized; /* 0 or 1 */
89 89 int ntrev; /* last rev scanned */
90 90 int ntlookups; /* # lookups */
91 91 int ntmisses; /* # lookups that miss the cache */
92 92 int inlined;
93 93 long entry_size; /* size of index headers. Differs in v1 v.s. v2 format
94 94 */
95 95 long rust_ext_compat; /* compatibility with being used in rust
96 96 extensions */
97 97 long format_version; /* format version selector (format_*) */
98 98 };
99 99
100 100 static Py_ssize_t index_length(const indexObject *self)
101 101 {
102 102 return self->length + self->new_length;
103 103 }
104 104
105 105 static const char nullid[32] = {0};
106 106 static const Py_ssize_t nullrev = -1;
107 107
108 108 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
109 109
110 110 static int index_find_node(indexObject *self, const char *node);
111 111
112 112 #if LONG_MAX == 0x7fffffffL
113 113 static const char *const tuple_format = "Kiiiiiiy#KiBBi";
114 114 #else
115 115 static const char *const tuple_format = "kiiiiiiy#kiBBi";
116 116 #endif
117 117
118 118 /* A RevlogNG v1 index entry is 64 bytes long. */
119 119 static const long v1_entry_size = 64;
120 120
121 121 /* A Revlogv2 index entry is 96 bytes long. */
122 122 static const long v2_entry_size = 96;
123 123
124 124 /* A Changelogv2 index entry is 96 bytes long. */
125 125 static const long cl2_entry_size = 96;
126 126
127 127 /* Internal format version.
128 128 * Must match their counterparts in revlogutils/constants.py */
129 129 static const long format_v1 = 1; /* constants.py: REVLOGV1 */
130 130 static const long format_v2 = 0xDEAD; /* constants.py: REVLOGV2 */
131 131 static const long format_cl2 = 0xD34D; /* constants.py: CHANGELOGV2 */
132 132
133 133 static const long entry_v1_offset_high = 0;
134 134 static const long entry_v1_offset_offset_flags = 4;
135 135 static const long entry_v1_offset_comp_len = 8;
136 136 static const long entry_v1_offset_uncomp_len = 12;
137 137 static const long entry_v1_offset_base_rev = 16;
138 138 static const long entry_v1_offset_link_rev = 20;
139 139 static const long entry_v1_offset_parent_1 = 24;
140 140 static const long entry_v1_offset_parent_2 = 28;
141 141 static const long entry_v1_offset_node_id = 32;
142 142
143 143 static const long entry_v2_offset_high = 0;
144 144 static const long entry_v2_offset_offset_flags = 4;
145 145 static const long entry_v2_offset_comp_len = 8;
146 146 static const long entry_v2_offset_uncomp_len = 12;
147 147 static const long entry_v2_offset_base_rev = 16;
148 148 static const long entry_v2_offset_link_rev = 20;
149 149 static const long entry_v2_offset_parent_1 = 24;
150 150 static const long entry_v2_offset_parent_2 = 28;
151 151 static const long entry_v2_offset_node_id = 32;
152 152 static const long entry_v2_offset_sidedata_offset = 64;
153 153 static const long entry_v2_offset_sidedata_comp_len = 72;
154 154 static const long entry_v2_offset_all_comp_mode = 76;
155 155 /* next free offset: 77 */
156 156
157 157 static const long entry_cl2_offset_high = 0;
158 158 static const long entry_cl2_offset_offset_flags = 4;
159 159 static const long entry_cl2_offset_comp_len = 8;
160 160 static const long entry_cl2_offset_uncomp_len = 12;
161 161 static const long entry_cl2_offset_parent_1 = 16;
162 162 static const long entry_cl2_offset_parent_2 = 20;
163 163 static const long entry_cl2_offset_node_id = 24;
164 164 static const long entry_cl2_offset_sidedata_offset = 56;
165 165 static const long entry_cl2_offset_sidedata_comp_len = 64;
166 166 static const long entry_cl2_offset_all_comp_mode = 68;
167 167 static const long entry_cl2_offset_rank = 69;
168 168 /* next free offset: 73 */
169 169
170 170 static const char comp_mode_inline = 2;
171 171 static const int rank_unknown = -1;
172 172
173 173 static void raise_revlog_error(void)
174 174 {
175 175 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
176 176
177 177 mod = PyImport_ImportModule("mercurial.error");
178 178 if (mod == NULL) {
179 179 goto cleanup;
180 180 }
181 181
182 182 dict = PyModule_GetDict(mod);
183 183 if (dict == NULL) {
184 184 goto cleanup;
185 185 }
186 186 Py_INCREF(dict);
187 187
188 188 errclass = PyDict_GetItemString(dict, "RevlogError");
189 189 if (errclass == NULL) {
190 190 PyErr_SetString(PyExc_SystemError,
191 191 "could not find RevlogError");
192 192 goto cleanup;
193 193 }
194 194
195 195 /* value of exception is ignored by callers */
196 196 PyErr_SetString(errclass, "RevlogError");
197 197
198 198 cleanup:
199 199 Py_XDECREF(dict);
200 200 Py_XDECREF(mod);
201 201 }
202 202
203 203 /*
204 204 * Return a pointer to the beginning of a RevlogNG record.
205 205 */
206 206 static const char *index_deref(indexObject *self, Py_ssize_t pos)
207 207 {
208 208 if (pos >= self->length)
209 209 return self->added + (pos - self->length) * self->entry_size;
210 210
211 211 if (self->inlined && pos > 0) {
212 212 if (self->offsets == NULL) {
213 213 Py_ssize_t ret;
214 214 self->offsets =
215 215 PyMem_Malloc(self->length * sizeof(*self->offsets));
216 216 if (self->offsets == NULL)
217 217 return (const char *)PyErr_NoMemory();
218 218 ret = inline_scan(self, self->offsets);
219 219 if (ret == -1) {
220 220 return NULL;
221 221 };
222 222 }
223 223 return self->offsets[pos];
224 224 }
225 225
226 226 return (const char *)(self->buf.buf) + pos * self->entry_size;
227 227 }
228 228
229 229 /*
230 230 * Get parents of the given rev.
231 231 *
232 232 * The specified rev must be valid and must not be nullrev. A returned
233 233 * parent revision may be nullrev, but is guaranteed to be in valid range.
234 234 */
235 235 static inline int index_get_parents(indexObject *self, Py_ssize_t rev, int *ps,
236 236 int maxrev)
237 237 {
238 238 const char *data = index_deref(self, rev);
239 239
240 240 if (self->format_version == format_v1) {
241 241 ps[0] = getbe32(data + entry_v1_offset_parent_1);
242 242 ps[1] = getbe32(data + entry_v1_offset_parent_2);
243 243 } else if (self->format_version == format_v2) {
244 244 ps[0] = getbe32(data + entry_v2_offset_parent_1);
245 245 ps[1] = getbe32(data + entry_v2_offset_parent_2);
246 246 } else if (self->format_version == format_cl2) {
247 247 ps[0] = getbe32(data + entry_cl2_offset_parent_1);
248 248 ps[1] = getbe32(data + entry_cl2_offset_parent_2);
249 249 } else {
250 250 raise_revlog_error();
251 251 return -1;
252 252 }
253 253
254 254 /* If index file is corrupted, ps[] may point to invalid revisions. So
255 255 * there is a risk of buffer overflow to trust them unconditionally. */
256 256 if (ps[0] < -1 || ps[0] > maxrev || ps[1] < -1 || ps[1] > maxrev) {
257 257 PyErr_SetString(PyExc_ValueError, "parent out of range");
258 258 return -1;
259 259 }
260 260 return 0;
261 261 }
262 262
263 263 /*
264 264 * Get parents of the given rev.
265 265 *
266 266 * If the specified rev is out of range, IndexError will be raised. If the
267 267 * revlog entry is corrupted, ValueError may be raised.
268 268 *
269 269 * Returns 0 on success or -1 on failure.
270 270 */
271 271 static int HgRevlogIndex_GetParents(PyObject *op, int rev, int *ps)
272 272 {
273 273 int tiprev;
274 274 if (!op || !HgRevlogIndex_Check(op) || !ps) {
275 275 PyErr_BadInternalCall();
276 276 return -1;
277 277 }
278 278 tiprev = (int)index_length((indexObject *)op) - 1;
279 279 if (rev < -1 || rev > tiprev) {
280 280 PyErr_Format(PyExc_IndexError, "rev out of range: %d", rev);
281 281 return -1;
282 282 } else if (rev == -1) {
283 283 ps[0] = ps[1] = -1;
284 284 return 0;
285 285 } else {
286 286 return index_get_parents((indexObject *)op, rev, ps, tiprev);
287 287 }
288 288 }
289 289
290 290 static inline int64_t index_get_start(indexObject *self, Py_ssize_t rev)
291 291 {
292 292 const char *data;
293 293 uint64_t offset;
294 294
295 295 if (rev == nullrev)
296 296 return 0;
297 297
298 298 data = index_deref(self, rev);
299 299
300 300 if (self->format_version == format_v1) {
301 301 offset = getbe32(data + entry_v1_offset_offset_flags);
302 302 if (rev == 0) {
303 303 /* mask out version number for the first entry */
304 304 offset &= 0xFFFF;
305 305 } else {
306 306 uint32_t offset_high =
307 307 getbe32(data + entry_v1_offset_high);
308 308 offset |= ((uint64_t)offset_high) << 32;
309 309 }
310 310 } else if (self->format_version == format_v2) {
311 311 offset = getbe32(data + entry_v2_offset_offset_flags);
312 312 if (rev == 0) {
313 313 /* mask out version number for the first entry */
314 314 offset &= 0xFFFF;
315 315 } else {
316 316 uint32_t offset_high =
317 317 getbe32(data + entry_v2_offset_high);
318 318 offset |= ((uint64_t)offset_high) << 32;
319 319 }
320 320 } else if (self->format_version == format_cl2) {
321 321 uint32_t offset_high = getbe32(data + entry_cl2_offset_high);
322 322 offset = getbe32(data + entry_cl2_offset_offset_flags);
323 323 offset |= ((uint64_t)offset_high) << 32;
324 324 } else {
325 325 raise_revlog_error();
326 326 return -1;
327 327 }
328 328
329 329 return (int64_t)(offset >> 16);
330 330 }
331 331
332 332 static inline int index_get_length(indexObject *self, Py_ssize_t rev)
333 333 {
334 334 const char *data;
335 335 int tmp;
336 336
337 337 if (rev == nullrev)
338 338 return 0;
339 339
340 340 data = index_deref(self, rev);
341 341
342 342 if (self->format_version == format_v1) {
343 343 tmp = (int)getbe32(data + entry_v1_offset_comp_len);
344 344 } else if (self->format_version == format_v2) {
345 345 tmp = (int)getbe32(data + entry_v2_offset_comp_len);
346 346 } else if (self->format_version == format_cl2) {
347 347 tmp = (int)getbe32(data + entry_cl2_offset_comp_len);
348 348 } else {
349 349 raise_revlog_error();
350 350 return -1;
351 351 }
352 352 if (tmp < 0) {
353 353 PyErr_Format(PyExc_OverflowError,
354 354 "revlog entry size out of bound (%d)", tmp);
355 355 return -1;
356 356 }
357 357 return tmp;
358 358 }
359 359
360 360 /*
361 361 * RevlogNG format (all in big endian, data may be inlined):
362 362 * 6 bytes: offset
363 363 * 2 bytes: flags
364 364 * 4 bytes: compressed length
365 365 * 4 bytes: uncompressed length
366 366 * 4 bytes: base revision
367 367 * 4 bytes: link revision
368 368 * 4 bytes: parent 1 revision
369 369 * 4 bytes: parent 2 revision
370 370 * 32 bytes: nodeid (only 20 bytes used with SHA-1)
371 371 */
372 372 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
373 373 {
374 374 uint64_t offset_flags, sidedata_offset;
375 375 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
376 376 sidedata_comp_len, rank = rank_unknown;
377 377 char data_comp_mode, sidedata_comp_mode;
378 378 const char *c_node_id;
379 379 const char *data;
380 380 Py_ssize_t length = index_length(self);
381 381
382 382 if (pos == nullrev) {
383 383 Py_INCREF(self->nullentry);
384 384 return self->nullentry;
385 385 }
386 386
387 387 if (pos < 0 || pos >= length) {
388 388 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
389 389 return NULL;
390 390 }
391 391
392 392 data = index_deref(self, pos);
393 393 if (data == NULL)
394 394 return NULL;
395 395
396 396 if (self->format_version == format_v1) {
397 397 offset_flags = getbe32(data + entry_v1_offset_offset_flags);
398 398 /*
399 399 * The first entry on-disk needs the version number masked out,
400 400 * but this doesn't apply if entries are added to an empty
401 401 * index.
402 402 */
403 403 if (self->length && pos == 0)
404 404 offset_flags &= 0xFFFF;
405 405 else {
406 406 uint32_t offset_high =
407 407 getbe32(data + entry_v1_offset_high);
408 408 offset_flags |= ((uint64_t)offset_high) << 32;
409 409 }
410 410
411 411 comp_len = getbe32(data + entry_v1_offset_comp_len);
412 412 uncomp_len = getbe32(data + entry_v1_offset_uncomp_len);
413 413 base_rev = getbe32(data + entry_v1_offset_base_rev);
414 414 link_rev = getbe32(data + entry_v1_offset_link_rev);
415 415 parent_1 = getbe32(data + entry_v1_offset_parent_1);
416 416 parent_2 = getbe32(data + entry_v1_offset_parent_2);
417 417 c_node_id = data + entry_v1_offset_node_id;
418 418
419 419 sidedata_offset = 0;
420 420 sidedata_comp_len = 0;
421 421 data_comp_mode = comp_mode_inline;
422 422 sidedata_comp_mode = comp_mode_inline;
423 423 } else if (self->format_version == format_v2) {
424 424 offset_flags = getbe32(data + entry_v2_offset_offset_flags);
425 425 /*
426 426 * The first entry on-disk needs the version number masked out,
427 427 * but this doesn't apply if entries are added to an empty
428 428 * index.
429 429 */
430 430 if (self->length && pos == 0)
431 431 offset_flags &= 0xFFFF;
432 432 else {
433 433 uint32_t offset_high =
434 434 getbe32(data + entry_v2_offset_high);
435 435 offset_flags |= ((uint64_t)offset_high) << 32;
436 436 }
437 437
438 438 comp_len = getbe32(data + entry_v2_offset_comp_len);
439 439 uncomp_len = getbe32(data + entry_v2_offset_uncomp_len);
440 440 base_rev = getbe32(data + entry_v2_offset_base_rev);
441 441 link_rev = getbe32(data + entry_v2_offset_link_rev);
442 442 parent_1 = getbe32(data + entry_v2_offset_parent_1);
443 443 parent_2 = getbe32(data + entry_v2_offset_parent_2);
444 444 c_node_id = data + entry_v2_offset_node_id;
445 445
446 446 sidedata_offset =
447 447 getbe64(data + entry_v2_offset_sidedata_offset);
448 448 sidedata_comp_len =
449 449 getbe32(data + entry_v2_offset_sidedata_comp_len);
450 450 data_comp_mode = data[entry_v2_offset_all_comp_mode] & 3;
451 451 sidedata_comp_mode =
452 452 ((data[entry_v2_offset_all_comp_mode] >> 2) & 3);
453 453 } else if (self->format_version == format_cl2) {
454 454 uint32_t offset_high = getbe32(data + entry_cl2_offset_high);
455 455 offset_flags = getbe32(data + entry_cl2_offset_offset_flags);
456 456 offset_flags |= ((uint64_t)offset_high) << 32;
457 457 comp_len = getbe32(data + entry_cl2_offset_comp_len);
458 458 uncomp_len = getbe32(data + entry_cl2_offset_uncomp_len);
459 459 /* base_rev and link_rev are not stored in changelogv2, but are
460 460 still used by some functions shared with the other revlogs.
461 461 They are supposed to contain links to other revisions,
462 462 but they always point to themselves in the case of a changelog.
463 463 */
464 464 base_rev = pos;
465 465 link_rev = pos;
466 466 parent_1 = getbe32(data + entry_cl2_offset_parent_1);
467 467 parent_2 = getbe32(data + entry_cl2_offset_parent_2);
468 468 c_node_id = data + entry_cl2_offset_node_id;
469 469 sidedata_offset =
470 470 getbe64(data + entry_cl2_offset_sidedata_offset);
471 471 sidedata_comp_len =
472 472 getbe32(data + entry_cl2_offset_sidedata_comp_len);
473 473 data_comp_mode = data[entry_cl2_offset_all_comp_mode] & 3;
474 474 sidedata_comp_mode =
475 475 ((data[entry_cl2_offset_all_comp_mode] >> 2) & 3);
476 476 rank = getbe32(data + entry_cl2_offset_rank);
477 477 } else {
478 478 raise_revlog_error();
479 479 return NULL;
480 480 }
481 481
482 482 return Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len,
483 483 base_rev, link_rev, parent_1, parent_2, c_node_id,
484 484 self->nodelen, sidedata_offset, sidedata_comp_len,
485 485 data_comp_mode, sidedata_comp_mode, rank);
486 486 }
487 487 /*
488 488 * Pack header information in binary
489 489 */
490 490 static PyObject *index_pack_header(indexObject *self, PyObject *args)
491 491 {
492 492 int header;
493 493 char out[4];
494 494 if (!PyArg_ParseTuple(args, "i", &header)) {
495 495 return NULL;
496 496 }
497 497 if (self->format_version != format_v1) {
498 498 PyErr_Format(PyExc_RuntimeError,
499 499 "version header should go in the docket, not the "
500 500 "index: %d",
501 501 header);
502 502 return NULL;
503 503 }
504 504 putbe32(header, out);
505 505 return PyBytes_FromStringAndSize(out, 4);
506 506 }
507 507 /*
508 508 * Return the raw binary string representing a revision
509 509 */
510 510 static PyObject *index_entry_binary(indexObject *self, PyObject *value)
511 511 {
512 512 long rev;
513 513 const char *data;
514 514 Py_ssize_t length = index_length(self);
515 515
516 516 if (!pylong_to_long(value, &rev)) {
517 517 return NULL;
518 518 }
519 519 if (rev < 0 || rev >= length) {
520 520 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
521 521 rev);
522 522 return NULL;
523 523 };
524 524
525 525 data = index_deref(self, rev);
526 526 if (data == NULL)
527 527 return NULL;
528 528 if (rev == 0 && self->format_version == format_v1) {
529 529 /* the header is eating the start of the first entry */
530 530 return PyBytes_FromStringAndSize(data + 4,
531 531 self->entry_size - 4);
532 532 }
533 533 return PyBytes_FromStringAndSize(data, self->entry_size);
534 534 }
535 535
536 536 /*
537 537 * Return the hash of node corresponding to the given rev.
538 538 */
539 539 static const char *index_node(indexObject *self, Py_ssize_t pos)
540 540 {
541 541 Py_ssize_t length = index_length(self);
542 542 const char *data;
543 543 const char *node_id;
544 544
545 545 if (pos == nullrev)
546 546 return nullid;
547 547
548 548 if (pos >= length)
549 549 return NULL;
550 550
551 551 data = index_deref(self, pos);
552 552
553 553 if (self->format_version == format_v1) {
554 554 node_id = data + entry_v1_offset_node_id;
555 555 } else if (self->format_version == format_v2) {
556 556 node_id = data + entry_v2_offset_node_id;
557 557 } else if (self->format_version == format_cl2) {
558 558 node_id = data + entry_cl2_offset_node_id;
559 559 } else {
560 560 raise_revlog_error();
561 561 return NULL;
562 562 }
563 563
564 564 return data ? node_id : NULL;
565 565 }
566 566
567 567 /*
568 568 * Return the stored rank of a given revision if known, or rank_unknown
569 569 * otherwise.
570 570 *
571 571 * The rank of a revision is the size of the sub-graph it defines as a head.
572 572 * Equivalently, the rank of a revision `r` is the size of the set
573 573 * `ancestors(r)`, `r` included.
574 574 *
575 575 * This method returns the rank retrieved from the revlog in constant time. It
576 576 * makes no attempt at computing unknown values for versions of the revlog
577 577 * which do not persist the rank.
578 578 */
579 579 static int index_fast_rank(indexObject *self, Py_ssize_t pos)
580 580 {
581 581 Py_ssize_t length = index_length(self);
582 582
583 583 if (self->format_version != format_cl2 || pos >= length) {
584 584 return rank_unknown;
585 585 }
586 586
587 587 if (pos == nullrev) {
588 588 return 0; /* convention */
589 589 }
590 590
591 591 return getbe32(index_deref(self, pos) + entry_cl2_offset_rank);
592 592 }
593 593
594 594 /*
595 595 * Return the hash of the node corresponding to the given rev. The
596 596 * rev is assumed to be existing. If not, an exception is set.
597 597 */
598 598 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
599 599 {
600 600 const char *node = index_node(self, pos);
601 601 if (node == NULL) {
602 602 PyErr_Format(PyExc_IndexError, "could not access rev %d",
603 603 (int)pos);
604 604 }
605 605 return node;
606 606 }
607 607
608 608 static int nt_insert(nodetree *self, const char *node, int rev);
609 609
610 610 static int node_check(Py_ssize_t nodelen, PyObject *obj, char **node)
611 611 {
612 612 Py_ssize_t thisnodelen;
613 613 if (PyBytes_AsStringAndSize(obj, node, &thisnodelen) == -1)
614 614 return -1;
615 615 if (nodelen == thisnodelen)
616 616 return 0;
617 617 PyErr_Format(PyExc_ValueError, "node len %zd != expected node len %zd",
618 618 thisnodelen, nodelen);
619 619 return -1;
620 620 }
621 621
622 622 static PyObject *index_append(indexObject *self, PyObject *obj)
623 623 {
624 624 uint64_t offset_flags, sidedata_offset;
625 625 int rev, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
626 626 sidedata_comp_len, rank;
627 627 char data_comp_mode, sidedata_comp_mode;
628 628 Py_ssize_t c_node_id_len;
629 629 const char *c_node_id;
630 630 char comp_field;
631 631 char *data;
632 632
633 633 if (!PyArg_ParseTuple(obj, tuple_format, &offset_flags, &comp_len,
634 634 &uncomp_len, &base_rev, &link_rev, &parent_1,
635 635 &parent_2, &c_node_id, &c_node_id_len,
636 636 &sidedata_offset, &sidedata_comp_len,
637 637 &data_comp_mode, &sidedata_comp_mode, &rank)) {
638 638 PyErr_SetString(PyExc_TypeError, "12-tuple required");
639 639 return NULL;
640 640 }
641 641
642 642 if (c_node_id_len != self->nodelen) {
643 643 PyErr_SetString(PyExc_TypeError, "invalid node");
644 644 return NULL;
645 645 }
646 646 if (self->format_version == format_v1) {
647 647
648 648 if (data_comp_mode != comp_mode_inline) {
649 649 PyErr_Format(PyExc_ValueError,
650 650 "invalid data compression mode: %i",
651 651 data_comp_mode);
652 652 return NULL;
653 653 }
654 654 if (sidedata_comp_mode != comp_mode_inline) {
655 655 PyErr_Format(PyExc_ValueError,
656 656 "invalid sidedata compression mode: %i",
657 657 sidedata_comp_mode);
658 658 return NULL;
659 659 }
660 660 }
661 661
662 662 if (self->new_length == self->added_length) {
663 663 size_t new_added_length =
664 664 self->added_length ? self->added_length * 2 : 4096;
665 665 void *new_added = PyMem_Realloc(
666 666 self->added, new_added_length * self->entry_size);
667 667 if (!new_added)
668 668 return PyErr_NoMemory();
669 669 self->added = new_added;
670 670 self->added_length = new_added_length;
671 671 }
672 672 rev = self->length + self->new_length;
673 673 data = self->added + self->entry_size * self->new_length++;
674 674
675 675 memset(data, 0, self->entry_size);
676 676
677 677 if (self->format_version == format_v1) {
678 678 putbe32(offset_flags >> 32, data + entry_v1_offset_high);
679 679 putbe32(offset_flags & 0xffffffffU,
680 680 data + entry_v1_offset_offset_flags);
681 681 putbe32(comp_len, data + entry_v1_offset_comp_len);
682 682 putbe32(uncomp_len, data + entry_v1_offset_uncomp_len);
683 683 putbe32(base_rev, data + entry_v1_offset_base_rev);
684 684 putbe32(link_rev, data + entry_v1_offset_link_rev);
685 685 putbe32(parent_1, data + entry_v1_offset_parent_1);
686 686 putbe32(parent_2, data + entry_v1_offset_parent_2);
687 687 memcpy(data + entry_v1_offset_node_id, c_node_id,
688 688 c_node_id_len);
689 689 } else if (self->format_version == format_v2) {
690 690 putbe32(offset_flags >> 32, data + entry_v2_offset_high);
691 691 putbe32(offset_flags & 0xffffffffU,
692 692 data + entry_v2_offset_offset_flags);
693 693 putbe32(comp_len, data + entry_v2_offset_comp_len);
694 694 putbe32(uncomp_len, data + entry_v2_offset_uncomp_len);
695 695 putbe32(base_rev, data + entry_v2_offset_base_rev);
696 696 putbe32(link_rev, data + entry_v2_offset_link_rev);
697 697 putbe32(parent_1, data + entry_v2_offset_parent_1);
698 698 putbe32(parent_2, data + entry_v2_offset_parent_2);
699 699 memcpy(data + entry_v2_offset_node_id, c_node_id,
700 700 c_node_id_len);
701 701 putbe64(sidedata_offset,
702 702 data + entry_v2_offset_sidedata_offset);
703 703 putbe32(sidedata_comp_len,
704 704 data + entry_v2_offset_sidedata_comp_len);
705 705 comp_field = data_comp_mode & 3;
706 706 comp_field = comp_field | (sidedata_comp_mode & 3) << 2;
707 707 data[entry_v2_offset_all_comp_mode] = comp_field;
708 708 } else if (self->format_version == format_cl2) {
709 709 putbe32(offset_flags >> 32, data + entry_cl2_offset_high);
710 710 putbe32(offset_flags & 0xffffffffU,
711 711 data + entry_cl2_offset_offset_flags);
712 712 putbe32(comp_len, data + entry_cl2_offset_comp_len);
713 713 putbe32(uncomp_len, data + entry_cl2_offset_uncomp_len);
714 714 putbe32(parent_1, data + entry_cl2_offset_parent_1);
715 715 putbe32(parent_2, data + entry_cl2_offset_parent_2);
716 716 memcpy(data + entry_cl2_offset_node_id, c_node_id,
717 717 c_node_id_len);
718 718 putbe64(sidedata_offset,
719 719 data + entry_cl2_offset_sidedata_offset);
720 720 putbe32(sidedata_comp_len,
721 721 data + entry_cl2_offset_sidedata_comp_len);
722 722 comp_field = data_comp_mode & 3;
723 723 comp_field = comp_field | (sidedata_comp_mode & 3) << 2;
724 724 data[entry_cl2_offset_all_comp_mode] = comp_field;
725 725 putbe32(rank, data + entry_cl2_offset_rank);
726 726 } else {
727 727 raise_revlog_error();
728 728 return NULL;
729 729 }
730 730
731 731 if (self->ntinitialized)
732 732 nt_insert(&self->nt, c_node_id, rev);
733 733
734 734 Py_CLEAR(self->headrevs);
735 735 Py_RETURN_NONE;
736 736 }
737 737
738 738 /* Replace an existing index entry's sidedata offset and length with new ones.
739 739 This cannot be used outside of the context of sidedata rewriting,
740 740 inside the transaction that creates the given revision. */
741 741 static PyObject *index_replace_sidedata_info(indexObject *self, PyObject *args)
742 742 {
743 743 uint64_t offset_flags, sidedata_offset;
744 744 Py_ssize_t rev;
745 745 int sidedata_comp_len;
746 746 char comp_mode;
747 747 char *data;
748 748 #if LONG_MAX == 0x7fffffffL
749 749 const char *const sidedata_format = "nKiKB";
750 750 #else
751 751 const char *const sidedata_format = "nkikB";
752 752 #endif
753 753
754 754 if (self->entry_size == v1_entry_size || self->inlined) {
755 755 /*
756 756 There is a bug in the transaction handling when going from an
757 757 inline revlog to a separate index and data file. Turn it off until
758 758 it's fixed, since v2 revlogs sometimes get rewritten on exchange.
759 759 See issue6485.
760 760 */
761 761 raise_revlog_error();
762 762 return NULL;
763 763 }
764 764
765 765 if (!PyArg_ParseTuple(args, sidedata_format, &rev, &sidedata_offset,
766 766 &sidedata_comp_len, &offset_flags, &comp_mode))
767 767 return NULL;
768 768
769 769 if (rev < 0 || rev >= index_length(self)) {
770 770 PyErr_SetString(PyExc_IndexError, "revision outside index");
771 771 return NULL;
772 772 }
773 773 if (rev < self->length) {
774 774 PyErr_SetString(
775 775 PyExc_IndexError,
776 776 "cannot rewrite entries outside of this transaction");
777 777 return NULL;
778 778 }
779 779
780 780 /* Find the newly added node, offset from the "already on-disk" length
781 781 */
782 782 data = self->added + self->entry_size * (rev - self->length);
783 783 if (self->format_version == format_v2) {
784 784 putbe64(offset_flags, data + entry_v2_offset_high);
785 785 putbe64(sidedata_offset,
786 786 data + entry_v2_offset_sidedata_offset);
787 787 putbe32(sidedata_comp_len,
788 788 data + entry_v2_offset_sidedata_comp_len);
789 789 data[entry_v2_offset_all_comp_mode] =
790 790 (data[entry_v2_offset_all_comp_mode] & ~(3 << 2)) |
791 791 ((comp_mode & 3) << 2);
792 792 } else if (self->format_version == format_cl2) {
793 793 putbe64(offset_flags, data + entry_cl2_offset_high);
794 794 putbe64(sidedata_offset,
795 795 data + entry_cl2_offset_sidedata_offset);
796 796 putbe32(sidedata_comp_len,
797 797 data + entry_cl2_offset_sidedata_comp_len);
798 798 data[entry_cl2_offset_all_comp_mode] =
799 799 (data[entry_cl2_offset_all_comp_mode] & ~(3 << 2)) |
800 800 ((comp_mode & 3) << 2);
801 801 } else {
802 802 raise_revlog_error();
803 803 return NULL;
804 804 }
805 805
806 806 Py_RETURN_NONE;
807 807 }
808 808
809 809 static PyObject *index_stats(indexObject *self)
810 810 {
811 811 PyObject *obj = PyDict_New();
812 812 PyObject *s = NULL;
813 813 PyObject *t = NULL;
814 814
815 815 if (obj == NULL)
816 816 return NULL;
817 817
818 818 #define istat(__n, __d) \
819 819 do { \
820 820 s = PyBytes_FromString(__d); \
821 821 t = PyLong_FromSsize_t(self->__n); \
822 822 if (!s || !t) \
823 823 goto bail; \
824 824 if (PyDict_SetItem(obj, s, t) == -1) \
825 825 goto bail; \
826 826 Py_CLEAR(s); \
827 827 Py_CLEAR(t); \
828 828 } while (0)
829 829
830 830 if (self->added_length)
831 831 istat(new_length, "index entries added");
832 832 istat(length, "revs in memory");
833 833 istat(ntlookups, "node trie lookups");
834 834 istat(ntmisses, "node trie misses");
835 835 istat(ntrev, "node trie last rev scanned");
836 836 if (self->ntinitialized) {
837 837 istat(nt.capacity, "node trie capacity");
838 838 istat(nt.depth, "node trie depth");
839 839 istat(nt.length, "node trie count");
840 840 istat(nt.splits, "node trie splits");
841 841 }
842 842
843 843 #undef istat
844 844
845 845 return obj;
846 846
847 847 bail:
848 848 Py_XDECREF(obj);
849 849 Py_XDECREF(s);
850 850 Py_XDECREF(t);
851 851 return NULL;
852 852 }
853 853
854 854 /*
855 855 * When we cache a list, we want to be sure the caller can't mutate
856 856 * the cached copy.
857 857 */
858 858 static PyObject *list_copy(PyObject *list)
859 859 {
860 860 Py_ssize_t len = PyList_GET_SIZE(list);
861 861 PyObject *newlist = PyList_New(len);
862 862 Py_ssize_t i;
863 863
864 864 if (newlist == NULL)
865 865 return NULL;
866 866
867 867 for (i = 0; i < len; i++) {
868 868 PyObject *obj = PyList_GET_ITEM(list, i);
869 869 Py_INCREF(obj);
870 870 PyList_SET_ITEM(newlist, i, obj);
871 871 }
872 872
873 873 return newlist;
874 874 }
875 875
876 876 static int check_filter(PyObject *filter, Py_ssize_t arg)
877 877 {
878 878 if (filter) {
879 879 PyObject *arglist, *result;
880 880 int isfiltered;
881 881
882 882 arglist = Py_BuildValue("(n)", arg);
883 883 if (!arglist) {
884 884 return -1;
885 885 }
886 886
887 887 result = PyObject_Call(filter, arglist, NULL);
888 888 Py_DECREF(arglist);
889 889 if (!result) {
890 890 return -1;
891 891 }
892 892
893 893 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
894 894 * same as this function, so we can just return it directly.*/
895 895 isfiltered = PyObject_IsTrue(result);
896 896 Py_DECREF(result);
897 897 return isfiltered;
898 898 } else {
899 899 return 0;
900 900 }
901 901 }
902 902
903 903 static inline void set_phase_from_parents(char *phases, int parent_1,
904 904 int parent_2, Py_ssize_t i)
905 905 {
906 906 if (parent_1 >= 0 && phases[parent_1] > phases[i])
907 907 phases[i] = phases[parent_1];
908 908 if (parent_2 >= 0 && phases[parent_2] > phases[i])
909 909 phases[i] = phases[parent_2];
910 910 }
911 911
912 912 static PyObject *reachableroots2(indexObject *self, PyObject *args)
913 913 {
914 914
915 915 /* Input */
916 916 long minroot;
917 917 PyObject *includepatharg = NULL;
918 918 int includepath = 0;
919 919 /* heads and roots are lists */
920 920 PyObject *heads = NULL;
921 921 PyObject *roots = NULL;
922 922 PyObject *reachable = NULL;
923 923
924 924 PyObject *val;
925 925 Py_ssize_t len = index_length(self);
926 926 long revnum;
927 927 Py_ssize_t k;
928 928 Py_ssize_t i;
929 929 Py_ssize_t l;
930 930 int r;
931 931 int parents[2];
932 932
933 933 /* Internal data structure:
934 934 * tovisit: array of length len+1 (all revs + nullrev), filled upto
935 935 * lentovisit
936 936 *
937 937 * revstates: array of length len+1 (all revs + nullrev) */
938 938 int *tovisit = NULL;
939 939 long lentovisit = 0;
940 940 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
941 941 char *revstates = NULL;
942 942
943 943 /* Get arguments */
944 944 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
945 945 &PyList_Type, &roots, &PyBool_Type,
946 946 &includepatharg))
947 947 goto bail;
948 948
949 949 if (includepatharg == Py_True)
950 950 includepath = 1;
951 951
952 952 /* Initialize return set */
953 953 reachable = PyList_New(0);
954 954 if (reachable == NULL)
955 955 goto bail;
956 956
957 957 /* Initialize internal datastructures */
958 958 tovisit = (int *)malloc((len + 1) * sizeof(int));
959 959 if (tovisit == NULL) {
960 960 PyErr_NoMemory();
961 961 goto bail;
962 962 }
963 963
964 964 revstates = (char *)calloc(len + 1, 1);
965 965 if (revstates == NULL) {
966 966 PyErr_NoMemory();
967 967 goto bail;
968 968 }
969 969
970 970 l = PyList_GET_SIZE(roots);
971 971 for (i = 0; i < l; i++) {
972 972 revnum = PyLong_AsLong(PyList_GET_ITEM(roots, i));
973 973 if (revnum == -1 && PyErr_Occurred())
974 974 goto bail;
975 975 /* If root is out of range, e.g. wdir(), it must be unreachable
976 976 * from heads. So we can just ignore it. */
977 977 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
978 978 continue;
979 979 revstates[revnum + 1] |= RS_ROOT;
980 980 }
981 981
982 982 /* Populate tovisit with all the heads */
983 983 l = PyList_GET_SIZE(heads);
984 984 for (i = 0; i < l; i++) {
985 985 revnum = PyLong_AsLong(PyList_GET_ITEM(heads, i));
986 986 if (revnum == -1 && PyErr_Occurred())
987 987 goto bail;
988 988 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
989 989 PyErr_SetString(PyExc_IndexError, "head out of range");
990 990 goto bail;
991 991 }
992 992 if (!(revstates[revnum + 1] & RS_SEEN)) {
993 993 tovisit[lentovisit++] = (int)revnum;
994 994 revstates[revnum + 1] |= RS_SEEN;
995 995 }
996 996 }
997 997
998 998 /* Visit the tovisit list and find the reachable roots */
999 999 k = 0;
1000 1000 while (k < lentovisit) {
1001 1001 /* Add the node to reachable if it is a root*/
1002 1002 revnum = tovisit[k++];
1003 1003 if (revstates[revnum + 1] & RS_ROOT) {
1004 1004 revstates[revnum + 1] |= RS_REACHABLE;
1005 1005 val = PyLong_FromLong(revnum);
1006 1006 if (val == NULL)
1007 1007 goto bail;
1008 1008 r = PyList_Append(reachable, val);
1009 1009 Py_DECREF(val);
1010 1010 if (r < 0)
1011 1011 goto bail;
1012 1012 if (includepath == 0)
1013 1013 continue;
1014 1014 }
1015 1015
1016 1016 /* Add its parents to the list of nodes to visit */
1017 1017 if (revnum == nullrev)
1018 1018 continue;
1019 1019 r = index_get_parents(self, revnum, parents, (int)len - 1);
1020 1020 if (r < 0)
1021 1021 goto bail;
1022 1022 for (i = 0; i < 2; i++) {
1023 1023 if (!(revstates[parents[i] + 1] & RS_SEEN) &&
1024 1024 parents[i] >= minroot) {
1025 1025 tovisit[lentovisit++] = parents[i];
1026 1026 revstates[parents[i] + 1] |= RS_SEEN;
1027 1027 }
1028 1028 }
1029 1029 }
1030 1030
1031 1031 /* Find all the nodes in between the roots we found and the heads
1032 1032 * and add them to the reachable set */
1033 1033 if (includepath == 1) {
1034 1034 long minidx = minroot;
1035 1035 if (minidx < 0)
1036 1036 minidx = 0;
1037 1037 for (i = minidx; i < len; i++) {
1038 1038 if (!(revstates[i + 1] & RS_SEEN))
1039 1039 continue;
1040 1040 r = index_get_parents(self, i, parents, (int)len - 1);
1041 1041 /* Corrupted index file, error is set from
1042 1042 * index_get_parents */
1043 1043 if (r < 0)
1044 1044 goto bail;
1045 1045 if (((revstates[parents[0] + 1] |
1046 1046 revstates[parents[1] + 1]) &
1047 1047 RS_REACHABLE) &&
1048 1048 !(revstates[i + 1] & RS_REACHABLE)) {
1049 1049 revstates[i + 1] |= RS_REACHABLE;
1050 1050 val = PyLong_FromSsize_t(i);
1051 1051 if (val == NULL)
1052 1052 goto bail;
1053 1053 r = PyList_Append(reachable, val);
1054 1054 Py_DECREF(val);
1055 1055 if (r < 0)
1056 1056 goto bail;
1057 1057 }
1058 1058 }
1059 1059 }
1060 1060
1061 1061 free(revstates);
1062 1062 free(tovisit);
1063 1063 return reachable;
1064 1064 bail:
1065 1065 Py_XDECREF(reachable);
1066 1066 free(revstates);
1067 1067 free(tovisit);
1068 1068 return NULL;
1069 1069 }
1070 1070
1071 1071 static int add_roots_get_min(indexObject *self, PyObject *roots, char *phases,
1072 1072 char phase)
1073 1073 {
1074 1074 Py_ssize_t len = index_length(self);
1075 1075 PyObject *item;
1076 1076 PyObject *iterator;
1077 1077 int rev, minrev = -1;
1078 1078 char *node;
1079 1079
1080 1080 if (!PySet_Check(roots)) {
1081 1081 PyErr_SetString(PyExc_TypeError,
1082 1082 "roots must be a set of nodes");
1083 1083 return -2;
1084 1084 }
1085 1085 iterator = PyObject_GetIter(roots);
1086 1086 if (iterator == NULL)
1087 1087 return -2;
1088 1088 while ((item = PyIter_Next(iterator))) {
1089 1089 if (node_check(self->nodelen, item, &node) == -1)
1090 1090 goto failed;
1091 1091 rev = index_find_node(self, node);
1092 1092 /* null is implicitly public, so negative is invalid */
1093 1093 if (rev < 0 || rev >= len)
1094 1094 goto failed;
1095 1095 phases[rev] = phase;
1096 1096 if (minrev == -1 || minrev > rev)
1097 1097 minrev = rev;
1098 1098 Py_DECREF(item);
1099 1099 }
1100 1100 Py_DECREF(iterator);
1101 1101 return minrev;
1102 1102 failed:
1103 1103 Py_DECREF(iterator);
1104 1104 Py_DECREF(item);
1105 1105 return -2;
1106 1106 }
1107 1107
1108 1108 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
1109 1109 {
1110 1110 /* 0: public (untracked), 1: draft, 2: secret, 32: archive,
1111 1111 96: internal */
1112 1112 static const char trackedphases[] = {1, 2, 32, 96};
1113 1113 PyObject *roots = Py_None;
1114 1114 PyObject *phasesetsdict = NULL;
1115 1115 PyObject *phasesets[4] = {NULL, NULL, NULL, NULL};
1116 1116 Py_ssize_t len = index_length(self);
1117 1117 char *phases = NULL;
1118 1118 int minphaserev = -1, rev, i;
1119 1119 const int numphases = (int)(sizeof(phasesets) / sizeof(phasesets[0]));
1120 1120
1121 1121 if (!PyArg_ParseTuple(args, "O", &roots))
1122 1122 return NULL;
1123 1123 if (roots == NULL || !PyDict_Check(roots)) {
1124 1124 PyErr_SetString(PyExc_TypeError, "roots must be a dictionary");
1125 1125 return NULL;
1126 1126 }
1127 1127
1128 1128 phases = calloc(len, 1);
1129 1129 if (phases == NULL) {
1130 1130 PyErr_NoMemory();
1131 1131 return NULL;
1132 1132 }
1133 1133
1134 1134 for (i = 0; i < numphases; ++i) {
1135 1135 PyObject *pyphase = PyLong_FromLong(trackedphases[i]);
1136 1136 PyObject *phaseroots = NULL;
1137 1137 if (pyphase == NULL)
1138 1138 goto release;
1139 1139 phaseroots = PyDict_GetItem(roots, pyphase);
1140 1140 Py_DECREF(pyphase);
1141 1141 if (phaseroots == NULL)
1142 1142 continue;
1143 1143 rev = add_roots_get_min(self, phaseroots, phases,
1144 1144 trackedphases[i]);
1145 1145 if (rev == -2)
1146 1146 goto release;
1147 1147 if (rev != -1 && (minphaserev == -1 || rev < minphaserev))
1148 1148 minphaserev = rev;
1149 1149 }
1150 1150
1151 1151 for (i = 0; i < numphases; ++i) {
1152 1152 phasesets[i] = PySet_New(NULL);
1153 1153 if (phasesets[i] == NULL)
1154 1154 goto release;
1155 1155 }
1156 1156
1157 1157 if (minphaserev == -1)
1158 1158 minphaserev = len;
1159 1159 for (rev = minphaserev; rev < len; ++rev) {
1160 1160 PyObject *pyphase = NULL;
1161 1161 PyObject *pyrev = NULL;
1162 1162 int parents[2];
1163 1163 /*
1164 1164 * The parent lookup could be skipped for phaseroots, but
1165 1165 * phase --force would historically not recompute them
1166 1166 * correctly, leaving descendents with a lower phase around.
1167 1167 * As such, unconditionally recompute the phase.
1168 1168 */
1169 1169 if (index_get_parents(self, rev, parents, (int)len - 1) < 0)
1170 1170 goto release;
1171 1171 set_phase_from_parents(phases, parents[0], parents[1], rev);
1172 1172 switch (phases[rev]) {
1173 1173 case 0:
1174 1174 continue;
1175 1175 case 1:
1176 1176 pyphase = phasesets[0];
1177 1177 break;
1178 1178 case 2:
1179 1179 pyphase = phasesets[1];
1180 1180 break;
1181 1181 case 32:
1182 1182 pyphase = phasesets[2];
1183 1183 break;
1184 1184 case 96:
1185 1185 pyphase = phasesets[3];
1186 1186 break;
1187 1187 default:
1188 1188 /* this should never happen since the phase number is
1189 1189 * specified by this function. */
1190 1190 PyErr_SetString(PyExc_SystemError,
1191 1191 "bad phase number in internal list");
1192 1192 goto release;
1193 1193 }
1194 1194 pyrev = PyLong_FromLong(rev);
1195 1195 if (pyrev == NULL)
1196 1196 goto release;
1197 1197 if (PySet_Add(pyphase, pyrev) == -1) {
1198 1198 Py_DECREF(pyrev);
1199 1199 goto release;
1200 1200 }
1201 1201 Py_DECREF(pyrev);
1202 1202 }
1203 1203
1204 1204 phasesetsdict = _dict_new_presized(numphases);
1205 1205 if (phasesetsdict == NULL)
1206 1206 goto release;
1207 1207 for (i = 0; i < numphases; ++i) {
1208 1208 PyObject *pyphase = PyLong_FromLong(trackedphases[i]);
1209 1209 if (pyphase == NULL)
1210 1210 goto release;
1211 1211 if (PyDict_SetItem(phasesetsdict, pyphase, phasesets[i]) ==
1212 1212 -1) {
1213 1213 Py_DECREF(pyphase);
1214 1214 goto release;
1215 1215 }
1216 1216 Py_DECREF(phasesets[i]);
1217 1217 phasesets[i] = NULL;
1218 1218 }
1219 1219
1220 1220 free(phases);
1221 1221 return Py_BuildValue("nN", len, phasesetsdict);
1222 1222
1223 1223 release:
1224 1224 for (i = 0; i < numphases; ++i)
1225 1225 Py_XDECREF(phasesets[i]);
1226 1226 Py_XDECREF(phasesetsdict);
1227 1227
1228 1228 free(phases);
1229 1229 return NULL;
1230 1230 }
1231 1231
1232 1232 static PyObject *index_headrevs(indexObject *self, PyObject *args)
1233 1233 {
1234 1234 Py_ssize_t i, j, len;
1235 1235 char *nothead = NULL;
1236 1236 PyObject *heads = NULL;
1237 1237 PyObject *filter = NULL;
1238 1238 PyObject *filteredrevs = Py_None;
1239 1239
1240 1240 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
1241 1241 return NULL;
1242 1242 }
1243 1243
1244 1244 if (self->headrevs && filteredrevs == self->filteredrevs)
1245 1245 return list_copy(self->headrevs);
1246 1246
1247 1247 Py_DECREF(self->filteredrevs);
1248 1248 self->filteredrevs = filteredrevs;
1249 1249 Py_INCREF(filteredrevs);
1250 1250
1251 1251 if (filteredrevs != Py_None) {
1252 1252 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
1253 1253 if (!filter) {
1254 1254 PyErr_SetString(
1255 1255 PyExc_TypeError,
1256 1256 "filteredrevs has no attribute __contains__");
1257 1257 goto bail;
1258 1258 }
1259 1259 }
1260 1260
1261 1261 len = index_length(self);
1262 1262 heads = PyList_New(0);
1263 1263 if (heads == NULL)
1264 1264 goto bail;
1265 1265 if (len == 0) {
1266 1266 PyObject *nullid = PyLong_FromLong(-1);
1267 1267 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
1268 1268 Py_XDECREF(nullid);
1269 1269 goto bail;
1270 1270 }
1271 1271 goto done;
1272 1272 }
1273 1273
1274 1274 nothead = calloc(len, 1);
1275 1275 if (nothead == NULL) {
1276 1276 PyErr_NoMemory();
1277 1277 goto bail;
1278 1278 }
1279 1279
1280 1280 for (i = len - 1; i >= 0; i--) {
1281 1281 int isfiltered;
1282 1282 int parents[2];
1283 1283
1284 1284 /* If nothead[i] == 1, it means we've seen an unfiltered child
1285 1285 * of this node already, and therefore this node is not
1286 1286 * filtered. So we can skip the expensive check_filter step.
1287 1287 */
1288 1288 if (nothead[i] != 1) {
1289 1289 isfiltered = check_filter(filter, i);
1290 1290 if (isfiltered == -1) {
1291 1291 PyErr_SetString(PyExc_TypeError,
1292 1292 "unable to check filter");
1293 1293 goto bail;
1294 1294 }
1295 1295
1296 1296 if (isfiltered) {
1297 1297 nothead[i] = 1;
1298 1298 continue;
1299 1299 }
1300 1300 }
1301 1301
1302 1302 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
1303 1303 goto bail;
1304 1304 for (j = 0; j < 2; j++) {
1305 1305 if (parents[j] >= 0)
1306 1306 nothead[parents[j]] = 1;
1307 1307 }
1308 1308 }
1309 1309
1310 1310 for (i = 0; i < len; i++) {
1311 1311 PyObject *head;
1312 1312
1313 1313 if (nothead[i])
1314 1314 continue;
1315 1315 head = PyLong_FromSsize_t(i);
1316 1316 if (head == NULL || PyList_Append(heads, head) == -1) {
1317 1317 Py_XDECREF(head);
1318 1318 goto bail;
1319 1319 }
1320 1320 }
1321 1321
1322 1322 done:
1323 1323 self->headrevs = heads;
1324 1324 Py_XDECREF(filter);
1325 1325 free(nothead);
1326 1326 return list_copy(self->headrevs);
1327 1327 bail:
1328 1328 Py_XDECREF(filter);
1329 1329 Py_XDECREF(heads);
1330 1330 free(nothead);
1331 1331 return NULL;
1332 1332 }
1333 1333
1334 1334 /**
1335 1335 * Obtain the base revision index entry.
1336 1336 *
1337 1337 * Callers must ensure that rev >= 0 or illegal memory access may occur.
1338 1338 */
1339 1339 static inline int index_baserev(indexObject *self, int rev)
1340 1340 {
1341 1341 const char *data;
1342 1342 int result;
1343 1343
1344 1344 data = index_deref(self, rev);
1345 1345 if (data == NULL)
1346 1346 return -2;
1347 1347
1348 1348 if (self->format_version == format_v1) {
1349 1349 result = getbe32(data + entry_v1_offset_base_rev);
1350 1350 } else if (self->format_version == format_v2) {
1351 1351 result = getbe32(data + entry_v2_offset_base_rev);
1352 1352 } else if (self->format_version == format_cl2) {
1353 1353 return rev;
1354 1354 } else {
1355 1355 raise_revlog_error();
1356 1356 return -1;
1357 1357 }
1358 1358
1359 1359 if (result > rev) {
1360 1360 PyErr_Format(
1361 1361 PyExc_ValueError,
1362 1362 "corrupted revlog, revision base above revision: %d, %d",
1363 1363 rev, result);
1364 1364 return -2;
1365 1365 }
1366 1366 if (result < -1) {
1367 1367 PyErr_Format(
1368 1368 PyExc_ValueError,
1369 1369 "corrupted revlog, revision base out of range: %d, %d", rev,
1370 1370 result);
1371 1371 return -2;
1372 1372 }
1373 1373 return result;
1374 1374 }
1375 1375
1376 1376 /**
1377 1377 * Find if a revision is a snapshot or not
1378 1378 *
1379 1379 * Only relevant for sparse-revlog case.
1380 1380 * Callers must ensure that rev is in a valid range.
1381 1381 */
1382 1382 static int index_issnapshotrev(indexObject *self, Py_ssize_t rev)
1383 1383 {
1384 1384 int ps[2];
1385 1385 int b;
1386 1386 Py_ssize_t base;
1387 1387 while (rev >= 0) {
1388 1388 base = (Py_ssize_t)index_baserev(self, rev);
1389 1389 if (base == rev) {
1390 1390 base = -1;
1391 1391 }
1392 1392 if (base == -2) {
1393 1393 assert(PyErr_Occurred());
1394 1394 return -1;
1395 1395 }
1396 1396 if (base == -1) {
1397 1397 return 1;
1398 1398 }
1399 1399 if (index_get_parents(self, rev, ps, (int)rev) < 0) {
1400 1400 assert(PyErr_Occurred());
1401 1401 return -1;
1402 1402 };
1403 1403 while ((index_get_length(self, ps[0]) == 0) && ps[0] >= 0) {
1404 1404 b = index_baserev(self, ps[0]);
1405 1405 if (b == ps[0]) {
1406 1406 break;
1407 1407 }
1408 1408 ps[0] = b;
1409 1409 }
1410 1410 while ((index_get_length(self, ps[1]) == 0) && ps[1] >= 0) {
1411 1411 b = index_baserev(self, ps[1]);
1412 1412 if (b == ps[1]) {
1413 1413 break;
1414 1414 }
1415 1415 ps[1] = b;
1416 1416 }
1417 1417 if (base == ps[0] || base == ps[1]) {
1418 1418 return 0;
1419 1419 }
1420 1420 rev = base;
1421 1421 }
1422 1422 return rev == -1;
1423 1423 }
1424 1424
1425 1425 static PyObject *index_issnapshot(indexObject *self, PyObject *value)
1426 1426 {
1427 1427 long rev;
1428 1428 int issnap;
1429 1429 Py_ssize_t length = index_length(self);
1430 1430
1431 1431 if (!pylong_to_long(value, &rev)) {
1432 1432 return NULL;
1433 1433 }
1434 1434 if (rev < -1 || rev >= length) {
1435 1435 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
1436 1436 rev);
1437 1437 return NULL;
1438 1438 };
1439 1439 issnap = index_issnapshotrev(self, (Py_ssize_t)rev);
1440 1440 if (issnap < 0) {
1441 1441 return NULL;
1442 1442 };
1443 1443 return PyBool_FromLong((long)issnap);
1444 1444 }
1445 1445
1446 1446 static PyObject *index_findsnapshots(indexObject *self, PyObject *args)
1447 1447 {
1448 1448 Py_ssize_t start_rev;
1449 1449 Py_ssize_t end_rev;
1450 1450 PyObject *cache;
1451 1451 Py_ssize_t base;
1452 1452 Py_ssize_t rev;
1453 1453 PyObject *key = NULL;
1454 1454 PyObject *value = NULL;
1455 1455 const Py_ssize_t length = index_length(self);
1456 1456 if (!PyArg_ParseTuple(args, "O!nn", &PyDict_Type, &cache, &start_rev,
1457 1457 &end_rev)) {
1458 1458 return NULL;
1459 1459 }
1460 1460 end_rev += 1;
1461 1461 if (end_rev > length) {
1462 1462 end_rev = length;
1463 1463 }
1464 1464 if (start_rev < 0) {
1465 1465 start_rev = 0;
1466 1466 }
1467 1467 for (rev = start_rev; rev < end_rev; rev++) {
1468 1468 int issnap;
1469 1469 PyObject *allvalues = NULL;
1470 1470 issnap = index_issnapshotrev(self, rev);
1471 1471 if (issnap < 0) {
1472 1472 goto bail;
1473 1473 }
1474 1474 if (issnap == 0) {
1475 1475 continue;
1476 1476 }
1477 1477 base = (Py_ssize_t)index_baserev(self, rev);
1478 1478 if (base == rev) {
1479 1479 base = -1;
1480 1480 }
1481 1481 if (base == -2) {
1482 1482 assert(PyErr_Occurred());
1483 1483 goto bail;
1484 1484 }
1485 1485 key = PyLong_FromSsize_t(base);
1486 1486 allvalues = PyDict_GetItem(cache, key);
1487 1487 if (allvalues == NULL && PyErr_Occurred()) {
1488 1488 goto bail;
1489 1489 }
1490 1490 if (allvalues == NULL) {
1491 1491 int r;
1492 allvalues = PyList_New(0);
1492 allvalues = PySet_New(0);
1493 1493 if (!allvalues) {
1494 1494 goto bail;
1495 1495 }
1496 1496 r = PyDict_SetItem(cache, key, allvalues);
1497 1497 Py_DECREF(allvalues);
1498 1498 if (r < 0) {
1499 1499 goto bail;
1500 1500 }
1501 1501 }
1502 1502 value = PyLong_FromSsize_t(rev);
1503 if (PyList_Append(allvalues, value)) {
1503 if (PySet_Add(allvalues, value)) {
1504 1504 goto bail;
1505 1505 }
1506 1506 Py_CLEAR(key);
1507 1507 Py_CLEAR(value);
1508 1508 }
1509 1509 Py_RETURN_NONE;
1510 1510 bail:
1511 1511 Py_XDECREF(key);
1512 1512 Py_XDECREF(value);
1513 1513 return NULL;
1514 1514 }
1515 1515
1516 1516 static PyObject *index_deltachain(indexObject *self, PyObject *args)
1517 1517 {
1518 1518 int rev, generaldelta;
1519 1519 PyObject *stoparg;
1520 1520 int stoprev, iterrev, baserev = -1;
1521 1521 int stopped;
1522 1522 PyObject *chain = NULL, *result = NULL;
1523 1523 const Py_ssize_t length = index_length(self);
1524 1524
1525 1525 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
1526 1526 return NULL;
1527 1527 }
1528 1528
1529 1529 if (PyLong_Check(stoparg)) {
1530 1530 stoprev = (int)PyLong_AsLong(stoparg);
1531 1531 if (stoprev == -1 && PyErr_Occurred()) {
1532 1532 return NULL;
1533 1533 }
1534 1534 } else if (stoparg == Py_None) {
1535 1535 stoprev = -2;
1536 1536 } else {
1537 1537 PyErr_SetString(PyExc_ValueError,
1538 1538 "stoprev must be integer or None");
1539 1539 return NULL;
1540 1540 }
1541 1541
1542 1542 if (rev < 0 || rev >= length) {
1543 1543 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1544 1544 return NULL;
1545 1545 }
1546 1546
1547 1547 chain = PyList_New(0);
1548 1548 if (chain == NULL) {
1549 1549 return NULL;
1550 1550 }
1551 1551
1552 1552 baserev = index_baserev(self, rev);
1553 1553
1554 1554 /* This should never happen. */
1555 1555 if (baserev <= -2) {
1556 1556 /* Error should be set by index_deref() */
1557 1557 assert(PyErr_Occurred());
1558 1558 goto bail;
1559 1559 }
1560 1560
1561 1561 iterrev = rev;
1562 1562
1563 1563 while (iterrev != baserev && iterrev != stoprev) {
1564 1564 PyObject *value = PyLong_FromLong(iterrev);
1565 1565 if (value == NULL) {
1566 1566 goto bail;
1567 1567 }
1568 1568 if (PyList_Append(chain, value)) {
1569 1569 Py_DECREF(value);
1570 1570 goto bail;
1571 1571 }
1572 1572 Py_DECREF(value);
1573 1573
1574 1574 if (generaldelta) {
1575 1575 iterrev = baserev;
1576 1576 } else {
1577 1577 iterrev--;
1578 1578 }
1579 1579
1580 1580 if (iterrev < 0) {
1581 1581 break;
1582 1582 }
1583 1583
1584 1584 if (iterrev >= length) {
1585 1585 PyErr_SetString(PyExc_IndexError,
1586 1586 "revision outside index");
1587 1587 return NULL;
1588 1588 }
1589 1589
1590 1590 baserev = index_baserev(self, iterrev);
1591 1591
1592 1592 /* This should never happen. */
1593 1593 if (baserev <= -2) {
1594 1594 /* Error should be set by index_deref() */
1595 1595 assert(PyErr_Occurred());
1596 1596 goto bail;
1597 1597 }
1598 1598 }
1599 1599
1600 1600 if (iterrev == stoprev) {
1601 1601 stopped = 1;
1602 1602 } else {
1603 1603 PyObject *value = PyLong_FromLong(iterrev);
1604 1604 if (value == NULL) {
1605 1605 goto bail;
1606 1606 }
1607 1607 if (PyList_Append(chain, value)) {
1608 1608 Py_DECREF(value);
1609 1609 goto bail;
1610 1610 }
1611 1611 Py_DECREF(value);
1612 1612
1613 1613 stopped = 0;
1614 1614 }
1615 1615
1616 1616 if (PyList_Reverse(chain)) {
1617 1617 goto bail;
1618 1618 }
1619 1619
1620 1620 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
1621 1621 Py_DECREF(chain);
1622 1622 return result;
1623 1623
1624 1624 bail:
1625 1625 Py_DECREF(chain);
1626 1626 return NULL;
1627 1627 }
1628 1628
1629 1629 static inline int64_t
1630 1630 index_segment_span(indexObject *self, Py_ssize_t start_rev, Py_ssize_t end_rev)
1631 1631 {
1632 1632 int64_t start_offset;
1633 1633 int64_t end_offset;
1634 1634 int end_size;
1635 1635 start_offset = index_get_start(self, start_rev);
1636 1636 if (start_offset < 0) {
1637 1637 return -1;
1638 1638 }
1639 1639 end_offset = index_get_start(self, end_rev);
1640 1640 if (end_offset < 0) {
1641 1641 return -1;
1642 1642 }
1643 1643 end_size = index_get_length(self, end_rev);
1644 1644 if (end_size < 0) {
1645 1645 return -1;
1646 1646 }
1647 1647 if (end_offset < start_offset) {
1648 1648 PyErr_Format(PyExc_ValueError,
1649 1649 "corrupted revlog index: inconsistent offset "
1650 1650 "between revisions (%zd) and (%zd)",
1651 1651 start_rev, end_rev);
1652 1652 return -1;
1653 1653 }
1654 1654 return (end_offset - start_offset) + (int64_t)end_size;
1655 1655 }
1656 1656
1657 1657 /* returns endidx so that revs[startidx:endidx] has no empty trailing revs */
1658 1658 static Py_ssize_t trim_endidx(indexObject *self, const Py_ssize_t *revs,
1659 1659 Py_ssize_t startidx, Py_ssize_t endidx)
1660 1660 {
1661 1661 int length;
1662 1662 while (endidx > 1 && endidx > startidx) {
1663 1663 length = index_get_length(self, revs[endidx - 1]);
1664 1664 if (length < 0) {
1665 1665 return -1;
1666 1666 }
1667 1667 if (length != 0) {
1668 1668 break;
1669 1669 }
1670 1670 endidx -= 1;
1671 1671 }
1672 1672 return endidx;
1673 1673 }
1674 1674
1675 1675 struct Gap {
1676 1676 int64_t size;
1677 1677 Py_ssize_t idx;
1678 1678 };
1679 1679
1680 1680 static int gap_compare(const void *left, const void *right)
1681 1681 {
1682 1682 const struct Gap *l_left = ((const struct Gap *)left);
1683 1683 const struct Gap *l_right = ((const struct Gap *)right);
1684 1684 if (l_left->size < l_right->size) {
1685 1685 return -1;
1686 1686 } else if (l_left->size > l_right->size) {
1687 1687 return 1;
1688 1688 }
1689 1689 return 0;
1690 1690 }
1691 1691 static int Py_ssize_t_compare(const void *left, const void *right)
1692 1692 {
1693 1693 const Py_ssize_t l_left = *(const Py_ssize_t *)left;
1694 1694 const Py_ssize_t l_right = *(const Py_ssize_t *)right;
1695 1695 if (l_left < l_right) {
1696 1696 return -1;
1697 1697 } else if (l_left > l_right) {
1698 1698 return 1;
1699 1699 }
1700 1700 return 0;
1701 1701 }
1702 1702
1703 1703 static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
1704 1704 {
1705 1705 /* method arguments */
1706 1706 PyObject *list_revs = NULL; /* revisions in the chain */
1707 1707 double targetdensity = 0; /* min density to achieve */
1708 1708 Py_ssize_t mingapsize = 0; /* threshold to ignore gaps */
1709 1709
1710 1710 /* other core variables */
1711 1711 Py_ssize_t idxlen = index_length(self);
1712 1712 Py_ssize_t i; /* used for various iteration */
1713 1713 PyObject *result = NULL; /* the final return of the function */
1714 1714
1715 1715 /* generic information about the delta chain being slice */
1716 1716 Py_ssize_t num_revs = 0; /* size of the full delta chain */
1717 1717 Py_ssize_t *revs = NULL; /* native array of revision in the chain */
1718 1718 int64_t chainpayload = 0; /* sum of all delta in the chain */
1719 1719 int64_t deltachainspan = 0; /* distance from first byte to last byte */
1720 1720
1721 1721 /* variable used for slicing the delta chain */
1722 1722 int64_t readdata = 0; /* amount of data currently planned to be read */
1723 1723 double density = 0; /* ration of payload data compared to read ones */
1724 1724 int64_t previous_end;
1725 1725 struct Gap *gaps = NULL; /* array of notable gap in the chain */
1726 1726 Py_ssize_t num_gaps =
1727 1727 0; /* total number of notable gap recorded so far */
1728 1728 Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
1729 1729 Py_ssize_t num_selected = 0; /* number of gaps skipped */
1730 1730 PyObject *chunk = NULL; /* individual slice */
1731 1731 PyObject *allchunks = NULL; /* all slices */
1732 1732 Py_ssize_t previdx;
1733 1733
1734 1734 /* parsing argument */
1735 1735 if (!PyArg_ParseTuple(args, "O!dn", &PyList_Type, &list_revs,
1736 1736 &targetdensity, &mingapsize)) {
1737 1737 goto bail;
1738 1738 }
1739 1739
1740 1740 /* If the delta chain contains a single element, we do not need slicing
1741 1741 */
1742 1742 num_revs = PyList_GET_SIZE(list_revs);
1743 1743 if (num_revs <= 1) {
1744 1744 result = PyTuple_Pack(1, list_revs);
1745 1745 goto done;
1746 1746 }
1747 1747
1748 1748 /* Turn the python list into a native integer array (for efficiency) */
1749 1749 revs = (Py_ssize_t *)calloc(num_revs, sizeof(Py_ssize_t));
1750 1750 if (revs == NULL) {
1751 1751 PyErr_NoMemory();
1752 1752 goto bail;
1753 1753 }
1754 1754 for (i = 0; i < num_revs; i++) {
1755 1755 Py_ssize_t revnum =
1756 1756 PyLong_AsLong(PyList_GET_ITEM(list_revs, i));
1757 1757 if (revnum == -1 && PyErr_Occurred()) {
1758 1758 goto bail;
1759 1759 }
1760 1760 if (revnum < nullrev || revnum >= idxlen) {
1761 1761 PyErr_Format(PyExc_IndexError,
1762 1762 "index out of range: %zd", revnum);
1763 1763 goto bail;
1764 1764 }
1765 1765 revs[i] = revnum;
1766 1766 }
1767 1767
1768 1768 /* Compute and check various property of the unsliced delta chain */
1769 1769 deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
1770 1770 if (deltachainspan < 0) {
1771 1771 goto bail;
1772 1772 }
1773 1773
1774 1774 if (deltachainspan <= mingapsize) {
1775 1775 result = PyTuple_Pack(1, list_revs);
1776 1776 goto done;
1777 1777 }
1778 1778 chainpayload = 0;
1779 1779 for (i = 0; i < num_revs; i++) {
1780 1780 int tmp = index_get_length(self, revs[i]);
1781 1781 if (tmp < 0) {
1782 1782 goto bail;
1783 1783 }
1784 1784 chainpayload += tmp;
1785 1785 }
1786 1786
1787 1787 readdata = deltachainspan;
1788 1788 density = 1.0;
1789 1789
1790 1790 if (0 < deltachainspan) {
1791 1791 density = (double)chainpayload / (double)deltachainspan;
1792 1792 }
1793 1793
1794 1794 if (density >= targetdensity) {
1795 1795 result = PyTuple_Pack(1, list_revs);
1796 1796 goto done;
1797 1797 }
1798 1798
1799 1799 /* if chain is too sparse, look for relevant gaps */
1800 1800 gaps = (struct Gap *)calloc(num_revs, sizeof(struct Gap));
1801 1801 if (gaps == NULL) {
1802 1802 PyErr_NoMemory();
1803 1803 goto bail;
1804 1804 }
1805 1805
1806 1806 previous_end = -1;
1807 1807 for (i = 0; i < num_revs; i++) {
1808 1808 int64_t revstart;
1809 1809 int revsize;
1810 1810 revstart = index_get_start(self, revs[i]);
1811 1811 if (revstart < 0) {
1812 1812 goto bail;
1813 1813 };
1814 1814 revsize = index_get_length(self, revs[i]);
1815 1815 if (revsize < 0) {
1816 1816 goto bail;
1817 1817 };
1818 1818 if (revsize == 0) {
1819 1819 continue;
1820 1820 }
1821 1821 if (previous_end >= 0) {
1822 1822 int64_t gapsize = revstart - previous_end;
1823 1823 if (gapsize > mingapsize) {
1824 1824 gaps[num_gaps].size = gapsize;
1825 1825 gaps[num_gaps].idx = i;
1826 1826 num_gaps += 1;
1827 1827 }
1828 1828 }
1829 1829 previous_end = revstart + revsize;
1830 1830 }
1831 1831 if (num_gaps == 0) {
1832 1832 result = PyTuple_Pack(1, list_revs);
1833 1833 goto done;
1834 1834 }
1835 1835 qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
1836 1836
1837 1837 /* Slice the largest gap first, they improve the density the most */
1838 1838 selected_indices =
1839 1839 (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
1840 1840 if (selected_indices == NULL) {
1841 1841 PyErr_NoMemory();
1842 1842 goto bail;
1843 1843 }
1844 1844
1845 1845 for (i = num_gaps - 1; i >= 0; i--) {
1846 1846 selected_indices[num_selected] = gaps[i].idx;
1847 1847 readdata -= gaps[i].size;
1848 1848 num_selected += 1;
1849 1849 if (readdata <= 0) {
1850 1850 density = 1.0;
1851 1851 } else {
1852 1852 density = (double)chainpayload / (double)readdata;
1853 1853 }
1854 1854 if (density >= targetdensity) {
1855 1855 break;
1856 1856 }
1857 1857 }
1858 1858 qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
1859 1859 &Py_ssize_t_compare);
1860 1860
1861 1861 /* create the resulting slice */
1862 1862 allchunks = PyList_New(0);
1863 1863 if (allchunks == NULL) {
1864 1864 goto bail;
1865 1865 }
1866 1866 previdx = 0;
1867 1867 selected_indices[num_selected] = num_revs;
1868 1868 for (i = 0; i <= num_selected; i++) {
1869 1869 Py_ssize_t idx = selected_indices[i];
1870 1870 Py_ssize_t endidx = trim_endidx(self, revs, previdx, idx);
1871 1871 if (endidx < 0) {
1872 1872 goto bail;
1873 1873 }
1874 1874 if (previdx < endidx) {
1875 1875 chunk = PyList_GetSlice(list_revs, previdx, endidx);
1876 1876 if (chunk == NULL) {
1877 1877 goto bail;
1878 1878 }
1879 1879 if (PyList_Append(allchunks, chunk) == -1) {
1880 1880 goto bail;
1881 1881 }
1882 1882 Py_DECREF(chunk);
1883 1883 chunk = NULL;
1884 1884 }
1885 1885 previdx = idx;
1886 1886 }
1887 1887 result = allchunks;
1888 1888 goto done;
1889 1889
1890 1890 bail:
1891 1891 Py_XDECREF(allchunks);
1892 1892 Py_XDECREF(chunk);
1893 1893 done:
1894 1894 free(revs);
1895 1895 free(gaps);
1896 1896 free(selected_indices);
1897 1897 return result;
1898 1898 }
1899 1899
1900 1900 static inline int nt_level(const char *node, Py_ssize_t level)
1901 1901 {
1902 1902 int v = node[level >> 1];
1903 1903 if (!(level & 1))
1904 1904 v >>= 4;
1905 1905 return v & 0xf;
1906 1906 }
1907 1907
1908 1908 /*
1909 1909 * Return values:
1910 1910 *
1911 1911 * -4: match is ambiguous (multiple candidates)
1912 1912 * -2: not found
1913 1913 * rest: valid rev
1914 1914 */
1915 1915 static int nt_find(nodetree *self, const char *node, Py_ssize_t nodelen,
1916 1916 int hex)
1917 1917 {
1918 1918 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
1919 1919 int level, maxlevel, off;
1920 1920
1921 1921 /* If the input is binary, do a fast check for the nullid first. */
1922 1922 if (!hex && nodelen == self->nodelen && node[0] == '\0' &&
1923 1923 node[1] == '\0' && memcmp(node, nullid, self->nodelen) == 0)
1924 1924 return -1;
1925 1925
1926 1926 if (hex)
1927 1927 maxlevel = nodelen;
1928 1928 else
1929 1929 maxlevel = 2 * nodelen;
1930 1930 if (maxlevel > 2 * self->nodelen)
1931 1931 maxlevel = 2 * self->nodelen;
1932 1932
1933 1933 for (level = off = 0; level < maxlevel; level++) {
1934 1934 int k = getnybble(node, level);
1935 1935 nodetreenode *n = &self->nodes[off];
1936 1936 int v = n->children[k];
1937 1937
1938 1938 if (v < 0) {
1939 1939 const char *n;
1940 1940 Py_ssize_t i;
1941 1941
1942 1942 v = -(v + 2);
1943 1943 n = index_node(self->index, v);
1944 1944 if (n == NULL)
1945 1945 return -2;
1946 1946 for (i = level; i < maxlevel; i++)
1947 1947 if (getnybble(node, i) != nt_level(n, i))
1948 1948 return -2;
1949 1949 return v;
1950 1950 }
1951 1951 if (v == 0)
1952 1952 return -2;
1953 1953 off = v;
1954 1954 }
1955 1955 /* multiple matches against an ambiguous prefix */
1956 1956 return -4;
1957 1957 }
1958 1958
1959 1959 static int nt_new(nodetree *self)
1960 1960 {
1961 1961 if (self->length == self->capacity) {
1962 1962 size_t newcapacity;
1963 1963 nodetreenode *newnodes;
1964 1964 newcapacity = self->capacity * 2;
1965 1965 if (newcapacity >= SIZE_MAX / sizeof(nodetreenode)) {
1966 1966 PyErr_SetString(PyExc_MemoryError,
1967 1967 "overflow in nt_new");
1968 1968 return -1;
1969 1969 }
1970 1970 newnodes =
1971 1971 realloc(self->nodes, newcapacity * sizeof(nodetreenode));
1972 1972 if (newnodes == NULL) {
1973 1973 PyErr_SetString(PyExc_MemoryError, "out of memory");
1974 1974 return -1;
1975 1975 }
1976 1976 self->capacity = newcapacity;
1977 1977 self->nodes = newnodes;
1978 1978 memset(&self->nodes[self->length], 0,
1979 1979 sizeof(nodetreenode) * (self->capacity - self->length));
1980 1980 }
1981 1981 return self->length++;
1982 1982 }
1983 1983
1984 1984 static int nt_insert(nodetree *self, const char *node, int rev)
1985 1985 {
1986 1986 int level = 0;
1987 1987 int off = 0;
1988 1988
1989 1989 while (level < 2 * self->nodelen) {
1990 1990 int k = nt_level(node, level);
1991 1991 nodetreenode *n;
1992 1992 int v;
1993 1993
1994 1994 n = &self->nodes[off];
1995 1995 v = n->children[k];
1996 1996
1997 1997 if (v == 0) {
1998 1998 n->children[k] = -rev - 2;
1999 1999 return 0;
2000 2000 }
2001 2001 if (v < 0) {
2002 2002 const char *oldnode =
2003 2003 index_node_existing(self->index, -(v + 2));
2004 2004 int noff;
2005 2005
2006 2006 if (oldnode == NULL)
2007 2007 return -1;
2008 2008 if (!memcmp(oldnode, node, self->nodelen)) {
2009 2009 n->children[k] = -rev - 2;
2010 2010 return 0;
2011 2011 }
2012 2012 noff = nt_new(self);
2013 2013 if (noff == -1)
2014 2014 return -1;
2015 2015 /* self->nodes may have been changed by realloc */
2016 2016 self->nodes[off].children[k] = noff;
2017 2017 off = noff;
2018 2018 n = &self->nodes[off];
2019 2019 n->children[nt_level(oldnode, ++level)] = v;
2020 2020 if (level > self->depth)
2021 2021 self->depth = level;
2022 2022 self->splits += 1;
2023 2023 } else {
2024 2024 level += 1;
2025 2025 off = v;
2026 2026 }
2027 2027 }
2028 2028
2029 2029 return -1;
2030 2030 }
2031 2031
2032 2032 static PyObject *ntobj_insert(nodetreeObject *self, PyObject *args)
2033 2033 {
2034 2034 Py_ssize_t rev;
2035 2035 const char *node;
2036 2036 Py_ssize_t length;
2037 2037 if (!PyArg_ParseTuple(args, "n", &rev))
2038 2038 return NULL;
2039 2039 length = index_length(self->nt.index);
2040 2040 if (rev < 0 || rev >= length) {
2041 2041 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
2042 2042 return NULL;
2043 2043 }
2044 2044 node = index_node_existing(self->nt.index, rev);
2045 2045 if (nt_insert(&self->nt, node, (int)rev) == -1)
2046 2046 return NULL;
2047 2047 Py_RETURN_NONE;
2048 2048 }
2049 2049
2050 2050 static int nt_delete_node(nodetree *self, const char *node)
2051 2051 {
2052 2052 /* rev==-2 happens to get encoded as 0, which is interpreted as not set
2053 2053 */
2054 2054 return nt_insert(self, node, -2);
2055 2055 }
2056 2056
2057 2057 static int nt_init(nodetree *self, indexObject *index, unsigned capacity)
2058 2058 {
2059 2059 /* Initialize before overflow-checking to avoid nt_dealloc() crash. */
2060 2060 self->nodes = NULL;
2061 2061
2062 2062 self->index = index;
2063 2063 /* The input capacity is in terms of revisions, while the field is in
2064 2064 * terms of nodetree nodes. */
2065 2065 self->capacity = (capacity < 4 ? 4 : capacity / 2);
2066 2066 self->nodelen = index->nodelen;
2067 2067 self->depth = 0;
2068 2068 self->splits = 0;
2069 2069 if (self->capacity > SIZE_MAX / sizeof(nodetreenode)) {
2070 2070 PyErr_SetString(PyExc_ValueError, "overflow in init_nt");
2071 2071 return -1;
2072 2072 }
2073 2073 self->nodes = calloc(self->capacity, sizeof(nodetreenode));
2074 2074 if (self->nodes == NULL) {
2075 2075 PyErr_NoMemory();
2076 2076 return -1;
2077 2077 }
2078 2078 self->length = 1;
2079 2079 return 0;
2080 2080 }
2081 2081
2082 2082 static int ntobj_init(nodetreeObject *self, PyObject *args)
2083 2083 {
2084 2084 PyObject *index;
2085 2085 unsigned capacity;
2086 2086 if (!PyArg_ParseTuple(args, "O!I", &HgRevlogIndex_Type, &index,
2087 2087 &capacity))
2088 2088 return -1;
2089 2089 Py_INCREF(index);
2090 2090 return nt_init(&self->nt, (indexObject *)index, capacity);
2091 2091 }
2092 2092
2093 2093 static int nt_partialmatch(nodetree *self, const char *node, Py_ssize_t nodelen)
2094 2094 {
2095 2095 return nt_find(self, node, nodelen, 1);
2096 2096 }
2097 2097
2098 2098 /*
2099 2099 * Find the length of the shortest unique prefix of node.
2100 2100 *
2101 2101 * Return values:
2102 2102 *
2103 2103 * -3: error (exception set)
2104 2104 * -2: not found (no exception set)
2105 2105 * rest: length of shortest prefix
2106 2106 */
2107 2107 static int nt_shortest(nodetree *self, const char *node)
2108 2108 {
2109 2109 int level, off;
2110 2110
2111 2111 for (level = off = 0; level < 2 * self->nodelen; level++) {
2112 2112 int k, v;
2113 2113 nodetreenode *n = &self->nodes[off];
2114 2114 k = nt_level(node, level);
2115 2115 v = n->children[k];
2116 2116 if (v < 0) {
2117 2117 const char *n;
2118 2118 v = -(v + 2);
2119 2119 n = index_node_existing(self->index, v);
2120 2120 if (n == NULL)
2121 2121 return -3;
2122 2122 if (memcmp(node, n, self->nodelen) != 0)
2123 2123 /*
2124 2124 * Found a unique prefix, but it wasn't for the
2125 2125 * requested node (i.e the requested node does
2126 2126 * not exist).
2127 2127 */
2128 2128 return -2;
2129 2129 return level + 1;
2130 2130 }
2131 2131 if (v == 0)
2132 2132 return -2;
2133 2133 off = v;
2134 2134 }
2135 2135 /*
2136 2136 * The node was still not unique after 40 hex digits, so this won't
2137 2137 * happen. Also, if we get here, then there's a programming error in
2138 2138 * this file that made us insert a node longer than 40 hex digits.
2139 2139 */
2140 2140 PyErr_SetString(PyExc_Exception, "broken node tree");
2141 2141 return -3;
2142 2142 }
2143 2143
2144 2144 static PyObject *ntobj_shortest(nodetreeObject *self, PyObject *args)
2145 2145 {
2146 2146 PyObject *val;
2147 2147 char *node;
2148 2148 int length;
2149 2149
2150 2150 if (!PyArg_ParseTuple(args, "O", &val))
2151 2151 return NULL;
2152 2152 if (node_check(self->nt.nodelen, val, &node) == -1)
2153 2153 return NULL;
2154 2154
2155 2155 length = nt_shortest(&self->nt, node);
2156 2156 if (length == -3)
2157 2157 return NULL;
2158 2158 if (length == -2) {
2159 2159 raise_revlog_error();
2160 2160 return NULL;
2161 2161 }
2162 2162 return PyLong_FromLong(length);
2163 2163 }
2164 2164
2165 2165 static void nt_dealloc(nodetree *self)
2166 2166 {
2167 2167 free(self->nodes);
2168 2168 self->nodes = NULL;
2169 2169 }
2170 2170
2171 2171 static void ntobj_dealloc(nodetreeObject *self)
2172 2172 {
2173 2173 Py_XDECREF(self->nt.index);
2174 2174 nt_dealloc(&self->nt);
2175 2175 PyObject_Del(self);
2176 2176 }
2177 2177
2178 2178 static PyMethodDef ntobj_methods[] = {
2179 2179 {"insert", (PyCFunction)ntobj_insert, METH_VARARGS,
2180 2180 "insert an index entry"},
2181 2181 {"shortest", (PyCFunction)ntobj_shortest, METH_VARARGS,
2182 2182 "find length of shortest hex nodeid of a binary ID"},
2183 2183 {NULL} /* Sentinel */
2184 2184 };
2185 2185
2186 2186 static PyTypeObject nodetreeType = {
2187 2187 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2188 2188 "parsers.nodetree", /* tp_name */
2189 2189 sizeof(nodetreeObject), /* tp_basicsize */
2190 2190 0, /* tp_itemsize */
2191 2191 (destructor)ntobj_dealloc, /* tp_dealloc */
2192 2192 0, /* tp_print */
2193 2193 0, /* tp_getattr */
2194 2194 0, /* tp_setattr */
2195 2195 0, /* tp_compare */
2196 2196 0, /* tp_repr */
2197 2197 0, /* tp_as_number */
2198 2198 0, /* tp_as_sequence */
2199 2199 0, /* tp_as_mapping */
2200 2200 0, /* tp_hash */
2201 2201 0, /* tp_call */
2202 2202 0, /* tp_str */
2203 2203 0, /* tp_getattro */
2204 2204 0, /* tp_setattro */
2205 2205 0, /* tp_as_buffer */
2206 2206 Py_TPFLAGS_DEFAULT, /* tp_flags */
2207 2207 "nodetree", /* tp_doc */
2208 2208 0, /* tp_traverse */
2209 2209 0, /* tp_clear */
2210 2210 0, /* tp_richcompare */
2211 2211 0, /* tp_weaklistoffset */
2212 2212 0, /* tp_iter */
2213 2213 0, /* tp_iternext */
2214 2214 ntobj_methods, /* tp_methods */
2215 2215 0, /* tp_members */
2216 2216 0, /* tp_getset */
2217 2217 0, /* tp_base */
2218 2218 0, /* tp_dict */
2219 2219 0, /* tp_descr_get */
2220 2220 0, /* tp_descr_set */
2221 2221 0, /* tp_dictoffset */
2222 2222 (initproc)ntobj_init, /* tp_init */
2223 2223 0, /* tp_alloc */
2224 2224 };
2225 2225
2226 2226 static int index_init_nt(indexObject *self)
2227 2227 {
2228 2228 if (!self->ntinitialized) {
2229 2229 if (nt_init(&self->nt, self, (int)self->length) == -1) {
2230 2230 nt_dealloc(&self->nt);
2231 2231 return -1;
2232 2232 }
2233 2233 if (nt_insert(&self->nt, nullid, -1) == -1) {
2234 2234 nt_dealloc(&self->nt);
2235 2235 return -1;
2236 2236 }
2237 2237 self->ntinitialized = 1;
2238 2238 self->ntrev = (int)index_length(self);
2239 2239 self->ntlookups = 1;
2240 2240 self->ntmisses = 0;
2241 2241 }
2242 2242 return 0;
2243 2243 }
2244 2244
2245 2245 /*
2246 2246 * Return values:
2247 2247 *
2248 2248 * -3: error (exception set)
2249 2249 * -2: not found (no exception set)
2250 2250 * rest: valid rev
2251 2251 */
2252 2252 static int index_find_node(indexObject *self, const char *node)
2253 2253 {
2254 2254 int rev;
2255 2255
2256 2256 if (index_init_nt(self) == -1)
2257 2257 return -3;
2258 2258
2259 2259 self->ntlookups++;
2260 2260 rev = nt_find(&self->nt, node, self->nodelen, 0);
2261 2261 if (rev >= -1)
2262 2262 return rev;
2263 2263
2264 2264 /*
2265 2265 * For the first handful of lookups, we scan the entire index,
2266 2266 * and cache only the matching nodes. This optimizes for cases
2267 2267 * like "hg tip", where only a few nodes are accessed.
2268 2268 *
2269 2269 * After that, we cache every node we visit, using a single
2270 2270 * scan amortized over multiple lookups. This gives the best
2271 2271 * bulk performance, e.g. for "hg log".
2272 2272 */
2273 2273 if (self->ntmisses++ < 4) {
2274 2274 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2275 2275 const char *n = index_node_existing(self, rev);
2276 2276 if (n == NULL)
2277 2277 return -3;
2278 2278 if (memcmp(node, n, self->nodelen) == 0) {
2279 2279 if (nt_insert(&self->nt, n, rev) == -1)
2280 2280 return -3;
2281 2281 break;
2282 2282 }
2283 2283 }
2284 2284 } else {
2285 2285 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2286 2286 const char *n = index_node_existing(self, rev);
2287 2287 if (n == NULL)
2288 2288 return -3;
2289 2289 if (nt_insert(&self->nt, n, rev) == -1) {
2290 2290 self->ntrev = rev + 1;
2291 2291 return -3;
2292 2292 }
2293 2293 if (memcmp(node, n, self->nodelen) == 0) {
2294 2294 break;
2295 2295 }
2296 2296 }
2297 2297 self->ntrev = rev;
2298 2298 }
2299 2299
2300 2300 if (rev >= 0)
2301 2301 return rev;
2302 2302 return -2;
2303 2303 }
2304 2304
2305 2305 static PyObject *index_getitem(indexObject *self, PyObject *value)
2306 2306 {
2307 2307 char *node;
2308 2308 int rev;
2309 2309
2310 2310 if (PyLong_Check(value)) {
2311 2311 long idx;
2312 2312 if (!pylong_to_long(value, &idx)) {
2313 2313 return NULL;
2314 2314 }
2315 2315 return index_get(self, idx);
2316 2316 }
2317 2317
2318 2318 if (node_check(self->nodelen, value, &node) == -1)
2319 2319 return NULL;
2320 2320 rev = index_find_node(self, node);
2321 2321 if (rev >= -1)
2322 2322 return PyLong_FromLong(rev);
2323 2323 if (rev == -2)
2324 2324 raise_revlog_error();
2325 2325 return NULL;
2326 2326 }
2327 2327
2328 2328 /*
2329 2329 * Fully populate the radix tree.
2330 2330 */
2331 2331 static int index_populate_nt(indexObject *self)
2332 2332 {
2333 2333 int rev;
2334 2334 if (self->ntrev > 0) {
2335 2335 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2336 2336 const char *n = index_node_existing(self, rev);
2337 2337 if (n == NULL)
2338 2338 return -1;
2339 2339 if (nt_insert(&self->nt, n, rev) == -1)
2340 2340 return -1;
2341 2341 }
2342 2342 self->ntrev = -1;
2343 2343 }
2344 2344 return 0;
2345 2345 }
2346 2346
2347 2347 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
2348 2348 {
2349 2349 const char *fullnode;
2350 2350 Py_ssize_t nodelen;
2351 2351 char *node;
2352 2352 int rev, i;
2353 2353
2354 2354 if (!PyArg_ParseTuple(args, "y#", &node, &nodelen))
2355 2355 return NULL;
2356 2356
2357 2357 if (nodelen < 1) {
2358 2358 PyErr_SetString(PyExc_ValueError, "key too short");
2359 2359 return NULL;
2360 2360 }
2361 2361
2362 2362 if (nodelen > 2 * self->nodelen) {
2363 2363 PyErr_SetString(PyExc_ValueError, "key too long");
2364 2364 return NULL;
2365 2365 }
2366 2366
2367 2367 for (i = 0; i < nodelen; i++)
2368 2368 hexdigit(node, i);
2369 2369 if (PyErr_Occurred()) {
2370 2370 /* input contains non-hex characters */
2371 2371 PyErr_Clear();
2372 2372 Py_RETURN_NONE;
2373 2373 }
2374 2374
2375 2375 if (index_init_nt(self) == -1)
2376 2376 return NULL;
2377 2377 if (index_populate_nt(self) == -1)
2378 2378 return NULL;
2379 2379 rev = nt_partialmatch(&self->nt, node, nodelen);
2380 2380
2381 2381 switch (rev) {
2382 2382 case -4:
2383 2383 raise_revlog_error();
2384 2384 return NULL;
2385 2385 case -2:
2386 2386 Py_RETURN_NONE;
2387 2387 case -1:
2388 2388 return PyBytes_FromStringAndSize(nullid, self->nodelen);
2389 2389 }
2390 2390
2391 2391 fullnode = index_node_existing(self, rev);
2392 2392 if (fullnode == NULL) {
2393 2393 return NULL;
2394 2394 }
2395 2395 return PyBytes_FromStringAndSize(fullnode, self->nodelen);
2396 2396 }
2397 2397
2398 2398 static PyObject *index_shortest(indexObject *self, PyObject *args)
2399 2399 {
2400 2400 PyObject *val;
2401 2401 char *node;
2402 2402 int length;
2403 2403
2404 2404 if (!PyArg_ParseTuple(args, "O", &val))
2405 2405 return NULL;
2406 2406 if (node_check(self->nodelen, val, &node) == -1)
2407 2407 return NULL;
2408 2408
2409 2409 self->ntlookups++;
2410 2410 if (index_init_nt(self) == -1)
2411 2411 return NULL;
2412 2412 if (index_populate_nt(self) == -1)
2413 2413 return NULL;
2414 2414 length = nt_shortest(&self->nt, node);
2415 2415 if (length == -3)
2416 2416 return NULL;
2417 2417 if (length == -2) {
2418 2418 raise_revlog_error();
2419 2419 return NULL;
2420 2420 }
2421 2421 return PyLong_FromLong(length);
2422 2422 }
2423 2423
2424 2424 static PyObject *index_m_get(indexObject *self, PyObject *args)
2425 2425 {
2426 2426 PyObject *val;
2427 2427 char *node;
2428 2428 int rev;
2429 2429
2430 2430 if (!PyArg_ParseTuple(args, "O", &val))
2431 2431 return NULL;
2432 2432 if (node_check(self->nodelen, val, &node) == -1)
2433 2433 return NULL;
2434 2434 rev = index_find_node(self, node);
2435 2435 if (rev == -3)
2436 2436 return NULL;
2437 2437 if (rev == -2)
2438 2438 Py_RETURN_NONE;
2439 2439 return PyLong_FromLong(rev);
2440 2440 }
2441 2441
2442 2442 static int index_contains(indexObject *self, PyObject *value)
2443 2443 {
2444 2444 char *node;
2445 2445
2446 2446 if (PyLong_Check(value)) {
2447 2447 long rev;
2448 2448 if (!pylong_to_long(value, &rev)) {
2449 2449 return -1;
2450 2450 }
2451 2451 return rev >= -1 && rev < index_length(self);
2452 2452 }
2453 2453
2454 2454 if (node_check(self->nodelen, value, &node) == -1)
2455 2455 return -1;
2456 2456
2457 2457 switch (index_find_node(self, node)) {
2458 2458 case -3:
2459 2459 return -1;
2460 2460 case -2:
2461 2461 return 0;
2462 2462 default:
2463 2463 return 1;
2464 2464 }
2465 2465 }
2466 2466
2467 2467 static PyObject *index_m_has_node(indexObject *self, PyObject *args)
2468 2468 {
2469 2469 int ret = index_contains(self, args);
2470 2470 if (ret < 0)
2471 2471 return NULL;
2472 2472 return PyBool_FromLong((long)ret);
2473 2473 }
2474 2474
2475 2475 static PyObject *index_m_rev(indexObject *self, PyObject *val)
2476 2476 {
2477 2477 char *node;
2478 2478 int rev;
2479 2479
2480 2480 if (node_check(self->nodelen, val, &node) == -1)
2481 2481 return NULL;
2482 2482 rev = index_find_node(self, node);
2483 2483 if (rev >= -1)
2484 2484 return PyLong_FromLong(rev);
2485 2485 if (rev == -2)
2486 2486 raise_revlog_error();
2487 2487 return NULL;
2488 2488 }
2489 2489
2490 2490 typedef uint64_t bitmask;
2491 2491
2492 2492 /*
2493 2493 * Given a disjoint set of revs, return all candidates for the
2494 2494 * greatest common ancestor. In revset notation, this is the set
2495 2495 * "heads(::a and ::b and ...)"
2496 2496 */
2497 2497 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
2498 2498 int revcount)
2499 2499 {
2500 2500 const bitmask allseen = (1ull << revcount) - 1;
2501 2501 const bitmask poison = 1ull << revcount;
2502 2502 PyObject *gca = PyList_New(0);
2503 2503 int i, v, interesting;
2504 2504 int maxrev = -1;
2505 2505 bitmask sp;
2506 2506 bitmask *seen;
2507 2507
2508 2508 if (gca == NULL)
2509 2509 return PyErr_NoMemory();
2510 2510
2511 2511 for (i = 0; i < revcount; i++) {
2512 2512 if (revs[i] > maxrev)
2513 2513 maxrev = revs[i];
2514 2514 }
2515 2515
2516 2516 seen = calloc(sizeof(*seen), maxrev + 1);
2517 2517 if (seen == NULL) {
2518 2518 Py_DECREF(gca);
2519 2519 return PyErr_NoMemory();
2520 2520 }
2521 2521
2522 2522 for (i = 0; i < revcount; i++)
2523 2523 seen[revs[i]] = 1ull << i;
2524 2524
2525 2525 interesting = revcount;
2526 2526
2527 2527 for (v = maxrev; v >= 0 && interesting; v--) {
2528 2528 bitmask sv = seen[v];
2529 2529 int parents[2];
2530 2530
2531 2531 if (!sv)
2532 2532 continue;
2533 2533
2534 2534 if (sv < poison) {
2535 2535 interesting -= 1;
2536 2536 if (sv == allseen) {
2537 2537 PyObject *obj = PyLong_FromLong(v);
2538 2538 if (obj == NULL)
2539 2539 goto bail;
2540 2540 if (PyList_Append(gca, obj) == -1) {
2541 2541 Py_DECREF(obj);
2542 2542 goto bail;
2543 2543 }
2544 2544 sv |= poison;
2545 2545 for (i = 0; i < revcount; i++) {
2546 2546 if (revs[i] == v)
2547 2547 goto done;
2548 2548 }
2549 2549 }
2550 2550 }
2551 2551 if (index_get_parents(self, v, parents, maxrev) < 0)
2552 2552 goto bail;
2553 2553
2554 2554 for (i = 0; i < 2; i++) {
2555 2555 int p = parents[i];
2556 2556 if (p == -1)
2557 2557 continue;
2558 2558 sp = seen[p];
2559 2559 if (sv < poison) {
2560 2560 if (sp == 0) {
2561 2561 seen[p] = sv;
2562 2562 interesting++;
2563 2563 } else if (sp != sv)
2564 2564 seen[p] |= sv;
2565 2565 } else {
2566 2566 if (sp && sp < poison)
2567 2567 interesting--;
2568 2568 seen[p] = sv;
2569 2569 }
2570 2570 }
2571 2571 }
2572 2572
2573 2573 done:
2574 2574 free(seen);
2575 2575 return gca;
2576 2576 bail:
2577 2577 free(seen);
2578 2578 Py_XDECREF(gca);
2579 2579 return NULL;
2580 2580 }
2581 2581
2582 2582 /*
2583 2583 * Given a disjoint set of revs, return the subset with the longest
2584 2584 * path to the root.
2585 2585 */
2586 2586 static PyObject *find_deepest(indexObject *self, PyObject *revs)
2587 2587 {
2588 2588 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
2589 2589 static const Py_ssize_t capacity = 24;
2590 2590 int *depth, *interesting = NULL;
2591 2591 int i, j, v, ninteresting;
2592 2592 PyObject *dict = NULL, *keys = NULL;
2593 2593 long *seen = NULL;
2594 2594 int maxrev = -1;
2595 2595 long final;
2596 2596
2597 2597 if (revcount > capacity) {
2598 2598 PyErr_Format(PyExc_OverflowError,
2599 2599 "bitset size (%ld) > capacity (%ld)",
2600 2600 (long)revcount, (long)capacity);
2601 2601 return NULL;
2602 2602 }
2603 2603
2604 2604 for (i = 0; i < revcount; i++) {
2605 2605 int n = (int)PyLong_AsLong(PyList_GET_ITEM(revs, i));
2606 2606 if (n > maxrev)
2607 2607 maxrev = n;
2608 2608 }
2609 2609
2610 2610 depth = calloc(sizeof(*depth), maxrev + 1);
2611 2611 if (depth == NULL)
2612 2612 return PyErr_NoMemory();
2613 2613
2614 2614 seen = calloc(sizeof(*seen), maxrev + 1);
2615 2615 if (seen == NULL) {
2616 2616 PyErr_NoMemory();
2617 2617 goto bail;
2618 2618 }
2619 2619
2620 2620 interesting = calloc(sizeof(*interesting), ((size_t)1) << revcount);
2621 2621 if (interesting == NULL) {
2622 2622 PyErr_NoMemory();
2623 2623 goto bail;
2624 2624 }
2625 2625
2626 2626 if (PyList_Sort(revs) == -1)
2627 2627 goto bail;
2628 2628
2629 2629 for (i = 0; i < revcount; i++) {
2630 2630 int n = (int)PyLong_AsLong(PyList_GET_ITEM(revs, i));
2631 2631 long b = 1l << i;
2632 2632 depth[n] = 1;
2633 2633 seen[n] = b;
2634 2634 interesting[b] = 1;
2635 2635 }
2636 2636
2637 2637 /* invariant: ninteresting is the number of non-zero entries in
2638 2638 * interesting. */
2639 2639 ninteresting = (int)revcount;
2640 2640
2641 2641 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
2642 2642 int dv = depth[v];
2643 2643 int parents[2];
2644 2644 long sv;
2645 2645
2646 2646 if (dv == 0)
2647 2647 continue;
2648 2648
2649 2649 sv = seen[v];
2650 2650 if (index_get_parents(self, v, parents, maxrev) < 0)
2651 2651 goto bail;
2652 2652
2653 2653 for (i = 0; i < 2; i++) {
2654 2654 int p = parents[i];
2655 2655 long sp;
2656 2656 int dp;
2657 2657
2658 2658 if (p == -1)
2659 2659 continue;
2660 2660
2661 2661 dp = depth[p];
2662 2662 sp = seen[p];
2663 2663 if (dp <= dv) {
2664 2664 depth[p] = dv + 1;
2665 2665 if (sp != sv) {
2666 2666 interesting[sv] += 1;
2667 2667 seen[p] = sv;
2668 2668 if (sp) {
2669 2669 interesting[sp] -= 1;
2670 2670 if (interesting[sp] == 0)
2671 2671 ninteresting -= 1;
2672 2672 }
2673 2673 }
2674 2674 } else if (dv == dp - 1) {
2675 2675 long nsp = sp | sv;
2676 2676 if (nsp == sp)
2677 2677 continue;
2678 2678 seen[p] = nsp;
2679 2679 interesting[sp] -= 1;
2680 2680 if (interesting[sp] == 0)
2681 2681 ninteresting -= 1;
2682 2682 if (interesting[nsp] == 0)
2683 2683 ninteresting += 1;
2684 2684 interesting[nsp] += 1;
2685 2685 }
2686 2686 }
2687 2687 interesting[sv] -= 1;
2688 2688 if (interesting[sv] == 0)
2689 2689 ninteresting -= 1;
2690 2690 }
2691 2691
2692 2692 final = 0;
2693 2693 j = ninteresting;
2694 2694 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
2695 2695 if (interesting[i] == 0)
2696 2696 continue;
2697 2697 final |= i;
2698 2698 j -= 1;
2699 2699 }
2700 2700 if (final == 0) {
2701 2701 keys = PyList_New(0);
2702 2702 goto bail;
2703 2703 }
2704 2704
2705 2705 dict = PyDict_New();
2706 2706 if (dict == NULL)
2707 2707 goto bail;
2708 2708
2709 2709 for (i = 0; i < revcount; i++) {
2710 2710 PyObject *key;
2711 2711
2712 2712 if ((final & (1 << i)) == 0)
2713 2713 continue;
2714 2714
2715 2715 key = PyList_GET_ITEM(revs, i);
2716 2716 Py_INCREF(key);
2717 2717 Py_INCREF(Py_None);
2718 2718 if (PyDict_SetItem(dict, key, Py_None) == -1) {
2719 2719 Py_DECREF(key);
2720 2720 Py_DECREF(Py_None);
2721 2721 goto bail;
2722 2722 }
2723 2723 }
2724 2724
2725 2725 keys = PyDict_Keys(dict);
2726 2726
2727 2727 bail:
2728 2728 free(depth);
2729 2729 free(seen);
2730 2730 free(interesting);
2731 2731 Py_XDECREF(dict);
2732 2732
2733 2733 return keys;
2734 2734 }
2735 2735
2736 2736 /*
2737 2737 * Given a (possibly overlapping) set of revs, return all the
2738 2738 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
2739 2739 */
2740 2740 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
2741 2741 {
2742 2742 PyObject *ret = NULL;
2743 2743 Py_ssize_t argcount, i, len;
2744 2744 bitmask repeat = 0;
2745 2745 int revcount = 0;
2746 2746 int *revs;
2747 2747
2748 2748 argcount = PySequence_Length(args);
2749 2749 revs = PyMem_Malloc(argcount * sizeof(*revs));
2750 2750 if (argcount > 0 && revs == NULL)
2751 2751 return PyErr_NoMemory();
2752 2752 len = index_length(self);
2753 2753
2754 2754 for (i = 0; i < argcount; i++) {
2755 2755 static const int capacity = 24;
2756 2756 PyObject *obj = PySequence_GetItem(args, i);
2757 2757 bitmask x;
2758 2758 long val;
2759 2759
2760 2760 if (!PyLong_Check(obj)) {
2761 2761 PyErr_SetString(PyExc_TypeError,
2762 2762 "arguments must all be ints");
2763 2763 Py_DECREF(obj);
2764 2764 goto bail;
2765 2765 }
2766 2766 val = PyLong_AsLong(obj);
2767 2767 Py_DECREF(obj);
2768 2768 if (val == -1) {
2769 2769 ret = PyList_New(0);
2770 2770 goto done;
2771 2771 }
2772 2772 if (val < 0 || val >= len) {
2773 2773 PyErr_SetString(PyExc_IndexError, "index out of range");
2774 2774 goto bail;
2775 2775 }
2776 2776 /* this cheesy bloom filter lets us avoid some more
2777 2777 * expensive duplicate checks in the common set-is-disjoint
2778 2778 * case */
2779 2779 x = 1ull << (val & 0x3f);
2780 2780 if (repeat & x) {
2781 2781 int k;
2782 2782 for (k = 0; k < revcount; k++) {
2783 2783 if (val == revs[k])
2784 2784 goto duplicate;
2785 2785 }
2786 2786 } else
2787 2787 repeat |= x;
2788 2788 if (revcount >= capacity) {
2789 2789 PyErr_Format(PyExc_OverflowError,
2790 2790 "bitset size (%d) > capacity (%d)",
2791 2791 revcount, capacity);
2792 2792 goto bail;
2793 2793 }
2794 2794 revs[revcount++] = (int)val;
2795 2795 duplicate:;
2796 2796 }
2797 2797
2798 2798 if (revcount == 0) {
2799 2799 ret = PyList_New(0);
2800 2800 goto done;
2801 2801 }
2802 2802 if (revcount == 1) {
2803 2803 PyObject *obj;
2804 2804 ret = PyList_New(1);
2805 2805 if (ret == NULL)
2806 2806 goto bail;
2807 2807 obj = PyLong_FromLong(revs[0]);
2808 2808 if (obj == NULL)
2809 2809 goto bail;
2810 2810 PyList_SET_ITEM(ret, 0, obj);
2811 2811 goto done;
2812 2812 }
2813 2813
2814 2814 ret = find_gca_candidates(self, revs, revcount);
2815 2815 if (ret == NULL)
2816 2816 goto bail;
2817 2817
2818 2818 done:
2819 2819 PyMem_Free(revs);
2820 2820 return ret;
2821 2821
2822 2822 bail:
2823 2823 PyMem_Free(revs);
2824 2824 Py_XDECREF(ret);
2825 2825 return NULL;
2826 2826 }
2827 2827
2828 2828 /*
2829 2829 * Given a (possibly overlapping) set of revs, return the greatest
2830 2830 * common ancestors: those with the longest path to the root.
2831 2831 */
2832 2832 static PyObject *index_ancestors(indexObject *self, PyObject *args)
2833 2833 {
2834 2834 PyObject *ret;
2835 2835 PyObject *gca = index_commonancestorsheads(self, args);
2836 2836 if (gca == NULL)
2837 2837 return NULL;
2838 2838
2839 2839 if (PyList_GET_SIZE(gca) <= 1) {
2840 2840 return gca;
2841 2841 }
2842 2842
2843 2843 ret = find_deepest(self, gca);
2844 2844 Py_DECREF(gca);
2845 2845 return ret;
2846 2846 }
2847 2847
2848 2848 /*
2849 2849 * Invalidate any trie entries introduced by added revs.
2850 2850 */
2851 2851 static void index_invalidate_added(indexObject *self, Py_ssize_t start)
2852 2852 {
2853 2853 Py_ssize_t i, len;
2854 2854
2855 2855 len = self->length + self->new_length;
2856 2856 i = start - self->length;
2857 2857 if (i < 0)
2858 2858 return;
2859 2859
2860 2860 for (i = start; i < len; i++) {
2861 2861 const char *node = index_node(self, i);
2862 2862 nt_delete_node(&self->nt, node);
2863 2863 }
2864 2864
2865 2865 self->new_length = start - self->length;
2866 2866 }
2867 2867
2868 2868 /*
2869 2869 * Delete a numeric range of revs, which must be at the end of the
2870 2870 * range.
2871 2871 */
2872 2872 static int index_slice_del(indexObject *self, PyObject *item)
2873 2873 {
2874 2874 Py_ssize_t start, stop, step, slicelength;
2875 2875 Py_ssize_t length = index_length(self) + 1;
2876 2876 int ret = 0;
2877 2877
2878 2878 if (PySlice_GetIndicesEx(item, length, &start, &stop, &step,
2879 2879 &slicelength) < 0)
2880 2880 return -1;
2881 2881
2882 2882 if (slicelength <= 0)
2883 2883 return 0;
2884 2884
2885 2885 if ((step < 0 && start < stop) || (step > 0 && start > stop))
2886 2886 stop = start;
2887 2887
2888 2888 if (step < 0) {
2889 2889 stop = start + 1;
2890 2890 start = stop + step * (slicelength - 1) - 1;
2891 2891 step = -step;
2892 2892 }
2893 2893
2894 2894 if (step != 1) {
2895 2895 PyErr_SetString(PyExc_ValueError,
2896 2896 "revlog index delete requires step size of 1");
2897 2897 return -1;
2898 2898 }
2899 2899
2900 2900 if (stop != length - 1) {
2901 2901 PyErr_SetString(PyExc_IndexError,
2902 2902 "revlog index deletion indices are invalid");
2903 2903 return -1;
2904 2904 }
2905 2905
2906 2906 if (start < self->length) {
2907 2907 if (self->ntinitialized) {
2908 2908 Py_ssize_t i;
2909 2909
2910 2910 for (i = start; i < self->length; i++) {
2911 2911 const char *node = index_node_existing(self, i);
2912 2912 if (node == NULL)
2913 2913 return -1;
2914 2914
2915 2915 nt_delete_node(&self->nt, node);
2916 2916 }
2917 2917 if (self->new_length)
2918 2918 index_invalidate_added(self, self->length);
2919 2919 if (self->ntrev > start)
2920 2920 self->ntrev = (int)start;
2921 2921 } else if (self->new_length) {
2922 2922 self->new_length = 0;
2923 2923 }
2924 2924
2925 2925 self->length = start;
2926 2926 goto done;
2927 2927 }
2928 2928
2929 2929 if (self->ntinitialized) {
2930 2930 index_invalidate_added(self, start);
2931 2931 if (self->ntrev > start)
2932 2932 self->ntrev = (int)start;
2933 2933 } else {
2934 2934 self->new_length = start - self->length;
2935 2935 }
2936 2936 done:
2937 2937 Py_CLEAR(self->headrevs);
2938 2938 return ret;
2939 2939 }
2940 2940
2941 2941 /*
2942 2942 * Supported ops:
2943 2943 *
2944 2944 * slice deletion
2945 2945 * string assignment (extend node->rev mapping)
2946 2946 * string deletion (shrink node->rev mapping)
2947 2947 */
2948 2948 static int index_assign_subscript(indexObject *self, PyObject *item,
2949 2949 PyObject *value)
2950 2950 {
2951 2951 char *node;
2952 2952 long rev;
2953 2953
2954 2954 if (PySlice_Check(item) && value == NULL)
2955 2955 return index_slice_del(self, item);
2956 2956
2957 2957 if (node_check(self->nodelen, item, &node) == -1)
2958 2958 return -1;
2959 2959
2960 2960 if (value == NULL)
2961 2961 return self->ntinitialized ? nt_delete_node(&self->nt, node)
2962 2962 : 0;
2963 2963 rev = PyLong_AsLong(value);
2964 2964 if (rev > INT_MAX || rev < 0) {
2965 2965 if (!PyErr_Occurred())
2966 2966 PyErr_SetString(PyExc_ValueError, "rev out of range");
2967 2967 return -1;
2968 2968 }
2969 2969
2970 2970 if (index_init_nt(self) == -1)
2971 2971 return -1;
2972 2972 return nt_insert(&self->nt, node, (int)rev);
2973 2973 }
2974 2974
2975 2975 /*
2976 2976 * Find all RevlogNG entries in an index that has inline data. Update
2977 2977 * the optional "offsets" table with those entries.
2978 2978 */
2979 2979 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
2980 2980 {
2981 2981 const char *data = (const char *)self->buf.buf;
2982 2982 Py_ssize_t pos = 0;
2983 2983 Py_ssize_t end = self->buf.len;
2984 2984 long incr = self->entry_size;
2985 2985 Py_ssize_t len = 0;
2986 2986
2987 2987 while (pos + self->entry_size <= end && pos >= 0) {
2988 2988 uint32_t comp_len, sidedata_comp_len = 0;
2989 2989 /* 3rd element of header is length of compressed inline data */
2990 2990 if (self->format_version == format_v1) {
2991 2991 comp_len =
2992 2992 getbe32(data + pos + entry_v1_offset_comp_len);
2993 2993 sidedata_comp_len = 0;
2994 2994 } else if (self->format_version == format_v2) {
2995 2995 comp_len =
2996 2996 getbe32(data + pos + entry_v2_offset_comp_len);
2997 2997 sidedata_comp_len = getbe32(
2998 2998 data + pos + entry_v2_offset_sidedata_comp_len);
2999 2999 } else {
3000 3000 raise_revlog_error();
3001 3001 return -1;
3002 3002 }
3003 3003 incr = self->entry_size + comp_len + sidedata_comp_len;
3004 3004 if (offsets)
3005 3005 offsets[len] = data + pos;
3006 3006 len++;
3007 3007 pos += incr;
3008 3008 }
3009 3009
3010 3010 if (pos != end) {
3011 3011 if (!PyErr_Occurred())
3012 3012 PyErr_SetString(PyExc_ValueError, "corrupt index file");
3013 3013 return -1;
3014 3014 }
3015 3015
3016 3016 return len;
3017 3017 }
3018 3018
3019 3019 static int index_init(indexObject *self, PyObject *args, PyObject *kwargs)
3020 3020 {
3021 3021 PyObject *data_obj, *inlined_obj;
3022 3022 Py_ssize_t size;
3023 3023
3024 3024 static char *kwlist[] = {"data", "inlined", "format", NULL};
3025 3025
3026 3026 /* Initialize before argument-checking to avoid index_dealloc() crash.
3027 3027 */
3028 3028 self->added = NULL;
3029 3029 self->new_length = 0;
3030 3030 self->added_length = 0;
3031 3031 self->data = NULL;
3032 3032 memset(&self->buf, 0, sizeof(self->buf));
3033 3033 self->headrevs = NULL;
3034 3034 self->filteredrevs = Py_None;
3035 3035 Py_INCREF(Py_None);
3036 3036 self->ntinitialized = 0;
3037 3037 self->offsets = NULL;
3038 3038 self->nodelen = 20;
3039 3039 self->nullentry = NULL;
3040 3040 self->rust_ext_compat = 1;
3041 3041 self->format_version = format_v1;
3042 3042
3043 3043 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|l", kwlist,
3044 3044 &data_obj, &inlined_obj,
3045 3045 &(self->format_version)))
3046 3046 return -1;
3047 3047 if (!PyObject_CheckBuffer(data_obj)) {
3048 3048 PyErr_SetString(PyExc_TypeError,
3049 3049 "data does not support buffer interface");
3050 3050 return -1;
3051 3051 }
3052 3052 if (self->nodelen < 20 || self->nodelen > (Py_ssize_t)sizeof(nullid)) {
3053 3053 PyErr_SetString(PyExc_RuntimeError, "unsupported node size");
3054 3054 return -1;
3055 3055 }
3056 3056
3057 3057 if (self->format_version == format_v1) {
3058 3058 self->entry_size = v1_entry_size;
3059 3059 } else if (self->format_version == format_v2) {
3060 3060 self->entry_size = v2_entry_size;
3061 3061 } else if (self->format_version == format_cl2) {
3062 3062 self->entry_size = cl2_entry_size;
3063 3063 }
3064 3064
3065 3065 self->nullentry = Py_BuildValue(
3066 3066 "iiiiiiiy#iiBBi", 0, 0, 0, -1, -1, -1, -1, nullid, self->nodelen, 0,
3067 3067 0, comp_mode_inline, comp_mode_inline, rank_unknown);
3068 3068
3069 3069 if (!self->nullentry)
3070 3070 return -1;
3071 3071 PyObject_GC_UnTrack(self->nullentry);
3072 3072
3073 3073 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
3074 3074 return -1;
3075 3075 size = self->buf.len;
3076 3076
3077 3077 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
3078 3078 self->data = data_obj;
3079 3079
3080 3080 self->ntlookups = self->ntmisses = 0;
3081 3081 self->ntrev = -1;
3082 3082 Py_INCREF(self->data);
3083 3083
3084 3084 if (self->inlined) {
3085 3085 Py_ssize_t len = inline_scan(self, NULL);
3086 3086 if (len == -1)
3087 3087 goto bail;
3088 3088 self->length = len;
3089 3089 } else {
3090 3090 if (size % self->entry_size) {
3091 3091 PyErr_SetString(PyExc_ValueError, "corrupt index file");
3092 3092 goto bail;
3093 3093 }
3094 3094 self->length = size / self->entry_size;
3095 3095 }
3096 3096
3097 3097 return 0;
3098 3098 bail:
3099 3099 return -1;
3100 3100 }
3101 3101
3102 3102 static PyObject *index_nodemap(indexObject *self)
3103 3103 {
3104 3104 Py_INCREF(self);
3105 3105 return (PyObject *)self;
3106 3106 }
3107 3107
3108 3108 static void _index_clearcaches(indexObject *self)
3109 3109 {
3110 3110 if (self->offsets) {
3111 3111 PyMem_Free((void *)self->offsets);
3112 3112 self->offsets = NULL;
3113 3113 }
3114 3114 if (self->ntinitialized) {
3115 3115 nt_dealloc(&self->nt);
3116 3116 }
3117 3117 self->ntinitialized = 0;
3118 3118 Py_CLEAR(self->headrevs);
3119 3119 }
3120 3120
3121 3121 static PyObject *index_clearcaches(indexObject *self)
3122 3122 {
3123 3123 _index_clearcaches(self);
3124 3124 self->ntrev = -1;
3125 3125 self->ntlookups = self->ntmisses = 0;
3126 3126 Py_RETURN_NONE;
3127 3127 }
3128 3128
3129 3129 static void index_dealloc(indexObject *self)
3130 3130 {
3131 3131 _index_clearcaches(self);
3132 3132 Py_XDECREF(self->filteredrevs);
3133 3133 if (self->buf.buf) {
3134 3134 PyBuffer_Release(&self->buf);
3135 3135 memset(&self->buf, 0, sizeof(self->buf));
3136 3136 }
3137 3137 Py_XDECREF(self->data);
3138 3138 PyMem_Free(self->added);
3139 3139 Py_XDECREF(self->nullentry);
3140 3140 PyObject_Del(self);
3141 3141 }
3142 3142
3143 3143 static PySequenceMethods index_sequence_methods = {
3144 3144 (lenfunc)index_length, /* sq_length */
3145 3145 0, /* sq_concat */
3146 3146 0, /* sq_repeat */
3147 3147 (ssizeargfunc)index_get, /* sq_item */
3148 3148 0, /* sq_slice */
3149 3149 0, /* sq_ass_item */
3150 3150 0, /* sq_ass_slice */
3151 3151 (objobjproc)index_contains, /* sq_contains */
3152 3152 };
3153 3153
3154 3154 static PyMappingMethods index_mapping_methods = {
3155 3155 (lenfunc)index_length, /* mp_length */
3156 3156 (binaryfunc)index_getitem, /* mp_subscript */
3157 3157 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
3158 3158 };
3159 3159
3160 3160 static PyMethodDef index_methods[] = {
3161 3161 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
3162 3162 "return the gca set of the given revs"},
3163 3163 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
3164 3164 METH_VARARGS,
3165 3165 "return the heads of the common ancestors of the given revs"},
3166 3166 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
3167 3167 "clear the index caches"},
3168 3168 {"get", (PyCFunction)index_m_get, METH_VARARGS, "get an index entry"},
3169 3169 {"get_rev", (PyCFunction)index_m_get, METH_VARARGS,
3170 3170 "return `rev` associated with a node or None"},
3171 3171 {"has_node", (PyCFunction)index_m_has_node, METH_O,
3172 3172 "return True if the node exist in the index"},
3173 3173 {"rev", (PyCFunction)index_m_rev, METH_O,
3174 3174 "return `rev` associated with a node or raise RevlogError"},
3175 3175 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets, METH_VARARGS,
3176 3176 "compute phases"},
3177 3177 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
3178 3178 "reachableroots"},
3179 3179 {"replace_sidedata_info", (PyCFunction)index_replace_sidedata_info,
3180 3180 METH_VARARGS, "replace an existing index entry with a new value"},
3181 3181 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
3182 3182 "get head revisions"}, /* Can do filtering since 3.2 */
3183 3183 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
3184 3184 "get filtered head revisions"}, /* Can always do filtering */
3185 3185 {"issnapshot", (PyCFunction)index_issnapshot, METH_O,
3186 3186 "True if the object is a snapshot"},
3187 3187 {"findsnapshots", (PyCFunction)index_findsnapshots, METH_VARARGS,
3188 3188 "Gather snapshot data in a cache dict"},
3189 3189 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
3190 3190 "determine revisions with deltas to reconstruct fulltext"},
3191 3191 {"slicechunktodensity", (PyCFunction)index_slicechunktodensity,
3192 3192 METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"},
3193 3193 {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
3194 3194 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
3195 3195 "match a potentially ambiguous node ID"},
3196 3196 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
3197 3197 "find length of shortest hex nodeid of a binary ID"},
3198 3198 {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"},
3199 3199 {"entry_binary", (PyCFunction)index_entry_binary, METH_O,
3200 3200 "return an entry in binary form"},
3201 3201 {"pack_header", (PyCFunction)index_pack_header, METH_VARARGS,
3202 3202 "pack the revlog header information into binary"},
3203 3203 {NULL} /* Sentinel */
3204 3204 };
3205 3205
3206 3206 static PyGetSetDef index_getset[] = {
3207 3207 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
3208 3208 {NULL} /* Sentinel */
3209 3209 };
3210 3210
3211 3211 static PyMemberDef index_members[] = {
3212 3212 {"entry_size", T_LONG, offsetof(indexObject, entry_size), 0,
3213 3213 "size of an index entry"},
3214 3214 {"rust_ext_compat", T_LONG, offsetof(indexObject, rust_ext_compat), 0,
3215 3215 "size of an index entry"},
3216 3216 {NULL} /* Sentinel */
3217 3217 };
3218 3218
3219 3219 PyTypeObject HgRevlogIndex_Type = {
3220 3220 PyVarObject_HEAD_INIT(NULL, 0) /* header */
3221 3221 "parsers.index", /* tp_name */
3222 3222 sizeof(indexObject), /* tp_basicsize */
3223 3223 0, /* tp_itemsize */
3224 3224 (destructor)index_dealloc, /* tp_dealloc */
3225 3225 0, /* tp_print */
3226 3226 0, /* tp_getattr */
3227 3227 0, /* tp_setattr */
3228 3228 0, /* tp_compare */
3229 3229 0, /* tp_repr */
3230 3230 0, /* tp_as_number */
3231 3231 &index_sequence_methods, /* tp_as_sequence */
3232 3232 &index_mapping_methods, /* tp_as_mapping */
3233 3233 0, /* tp_hash */
3234 3234 0, /* tp_call */
3235 3235 0, /* tp_str */
3236 3236 0, /* tp_getattro */
3237 3237 0, /* tp_setattro */
3238 3238 0, /* tp_as_buffer */
3239 3239 Py_TPFLAGS_DEFAULT, /* tp_flags */
3240 3240 "revlog index", /* tp_doc */
3241 3241 0, /* tp_traverse */
3242 3242 0, /* tp_clear */
3243 3243 0, /* tp_richcompare */
3244 3244 0, /* tp_weaklistoffset */
3245 3245 0, /* tp_iter */
3246 3246 0, /* tp_iternext */
3247 3247 index_methods, /* tp_methods */
3248 3248 index_members, /* tp_members */
3249 3249 index_getset, /* tp_getset */
3250 3250 0, /* tp_base */
3251 3251 0, /* tp_dict */
3252 3252 0, /* tp_descr_get */
3253 3253 0, /* tp_descr_set */
3254 3254 0, /* tp_dictoffset */
3255 3255 (initproc)index_init, /* tp_init */
3256 3256 0, /* tp_alloc */
3257 3257 };
3258 3258
3259 3259 /*
3260 3260 * returns a tuple of the form (index, cache) with elements as
3261 3261 * follows:
3262 3262 *
3263 3263 * index: an index object that lazily parses Revlog (v1 or v2) records
3264 3264 * cache: if data is inlined, a tuple (0, index_file_content), else None
3265 3265 * index_file_content could be a string, or a buffer
3266 3266 *
3267 3267 * added complications are for backwards compatibility
3268 3268 */
3269 3269 PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs)
3270 3270 {
3271 3271 PyObject *cache = NULL;
3272 3272 indexObject *idx;
3273 3273 int ret;
3274 3274
3275 3275 idx = PyObject_New(indexObject, &HgRevlogIndex_Type);
3276 3276 if (idx == NULL)
3277 3277 goto bail;
3278 3278
3279 3279 ret = index_init(idx, args, kwargs);
3280 3280 if (ret == -1)
3281 3281 goto bail;
3282 3282
3283 3283 if (idx->inlined) {
3284 3284 cache = Py_BuildValue("iO", 0, idx->data);
3285 3285 if (cache == NULL)
3286 3286 goto bail;
3287 3287 } else {
3288 3288 cache = Py_None;
3289 3289 Py_INCREF(cache);
3290 3290 }
3291 3291
3292 3292 return Py_BuildValue("NN", idx, cache);
3293 3293
3294 3294 bail:
3295 3295 Py_XDECREF(idx);
3296 3296 Py_XDECREF(cache);
3297 3297 return NULL;
3298 3298 }
3299 3299
3300 3300 static Revlog_CAPI CAPI = {
3301 3301 /* increment the abi_version field upon each change in the Revlog_CAPI
3302 3302 struct or in the ABI of the listed functions */
3303 3303 3, index_length, index_node, index_fast_rank, HgRevlogIndex_GetParents,
3304 3304 };
3305 3305
3306 3306 void revlog_module_init(PyObject *mod)
3307 3307 {
3308 3308 PyObject *caps = NULL;
3309 3309 HgRevlogIndex_Type.tp_new = PyType_GenericNew;
3310 3310 if (PyType_Ready(&HgRevlogIndex_Type) < 0)
3311 3311 return;
3312 3312 Py_INCREF(&HgRevlogIndex_Type);
3313 3313 PyModule_AddObject(mod, "index", (PyObject *)&HgRevlogIndex_Type);
3314 3314
3315 3315 nodetreeType.tp_new = PyType_GenericNew;
3316 3316 if (PyType_Ready(&nodetreeType) < 0)
3317 3317 return;
3318 3318 Py_INCREF(&nodetreeType);
3319 3319 PyModule_AddObject(mod, "nodetree", (PyObject *)&nodetreeType);
3320 3320
3321 3321 caps = PyCapsule_New(&CAPI, "mercurial.cext.parsers.revlog_CAPI", NULL);
3322 3322 if (caps != NULL)
3323 3323 PyModule_AddObject(mod, "revlog_CAPI", caps);
3324 3324 }
@@ -1,1472 +1,1471 b''
1 1 # revlogdeltas.py - Logic around delta computation for revlog
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 # Copyright 2018 Octobus <contact@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 """Helper class to compute deltas stored inside revlogs"""
9 9
10 10
11 11 import collections
12 12 import struct
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from ..node import nullrev
16 16 from ..i18n import _
17 17 from ..pycompat import getattr
18 18
19 19 from .constants import (
20 20 COMP_MODE_DEFAULT,
21 21 COMP_MODE_INLINE,
22 22 COMP_MODE_PLAIN,
23 23 DELTA_BASE_REUSE_NO,
24 24 KIND_CHANGELOG,
25 25 KIND_FILELOG,
26 26 KIND_MANIFESTLOG,
27 27 REVIDX_ISCENSORED,
28 28 REVIDX_RAWTEXT_CHANGING_FLAGS,
29 29 )
30 30
31 31 from ..thirdparty import attr
32 32
33 33 from .. import (
34 34 error,
35 35 mdiff,
36 36 util,
37 37 )
38 38
39 39 from . import flagutil
40 40
41 41 # maximum <delta-chain-data>/<revision-text-length> ratio
42 42 LIMIT_DELTA2TEXT = 2
43 43
44 44
45 45 class _testrevlog:
46 46 """minimalist fake revlog to use in doctests"""
47 47
48 48 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
49 49 """data is an list of revision payload boundaries"""
50 50 self._data = data
51 51 self._srdensitythreshold = density
52 52 self._srmingapsize = mingap
53 53 self._snapshot = set(snapshot)
54 54 self.index = None
55 55
56 56 def start(self, rev):
57 57 if rev == nullrev:
58 58 return 0
59 59 if rev == 0:
60 60 return 0
61 61 return self._data[rev - 1]
62 62
63 63 def end(self, rev):
64 64 if rev == nullrev:
65 65 return 0
66 66 return self._data[rev]
67 67
68 68 def length(self, rev):
69 69 return self.end(rev) - self.start(rev)
70 70
71 71 def __len__(self):
72 72 return len(self._data)
73 73
74 74 def issnapshot(self, rev):
75 75 if rev == nullrev:
76 76 return True
77 77 return rev in self._snapshot
78 78
79 79
80 80 def slicechunk(revlog, revs, targetsize=None):
81 81 """slice revs to reduce the amount of unrelated data to be read from disk.
82 82
83 83 ``revs`` is sliced into groups that should be read in one time.
84 84 Assume that revs are sorted.
85 85
86 86 The initial chunk is sliced until the overall density (payload/chunks-span
87 87 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
88 88 `revlog._srmingapsize` is skipped.
89 89
90 90 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
91 91 For consistency with other slicing choice, this limit won't go lower than
92 92 `revlog._srmingapsize`.
93 93
94 94 If individual revisions chunk are larger than this limit, they will still
95 95 be raised individually.
96 96
97 97 >>> data = [
98 98 ... 5, #00 (5)
99 99 ... 10, #01 (5)
100 100 ... 12, #02 (2)
101 101 ... 12, #03 (empty)
102 102 ... 27, #04 (15)
103 103 ... 31, #05 (4)
104 104 ... 31, #06 (empty)
105 105 ... 42, #07 (11)
106 106 ... 47, #08 (5)
107 107 ... 47, #09 (empty)
108 108 ... 48, #10 (1)
109 109 ... 51, #11 (3)
110 110 ... 74, #12 (23)
111 111 ... 85, #13 (11)
112 112 ... 86, #14 (1)
113 113 ... 91, #15 (5)
114 114 ... ]
115 115 >>> revlog = _testrevlog(data, snapshot=range(16))
116 116
117 117 >>> list(slicechunk(revlog, list(range(16))))
118 118 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
119 119 >>> list(slicechunk(revlog, [0, 15]))
120 120 [[0], [15]]
121 121 >>> list(slicechunk(revlog, [0, 11, 15]))
122 122 [[0], [11], [15]]
123 123 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
124 124 [[0], [11, 13, 15]]
125 125 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
126 126 [[1, 2], [5, 8, 10, 11], [14]]
127 127
128 128 Slicing with a maximum chunk size
129 129 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
130 130 [[0], [11], [13], [15]]
131 131 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
132 132 [[0], [11], [13, 15]]
133 133
134 134 Slicing involving nullrev
135 135 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
136 136 [[-1, 0], [11], [13, 15]]
137 137 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
138 138 [[-1], [13], [15]]
139 139 """
140 140 if targetsize is not None:
141 141 targetsize = max(targetsize, revlog._srmingapsize)
142 142 # targetsize should not be specified when evaluating delta candidates:
143 143 # * targetsize is used to ensure we stay within specification when reading,
144 144 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
145 145 if densityslicing is None:
146 146 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
147 147 for chunk in densityslicing(
148 148 revs, revlog._srdensitythreshold, revlog._srmingapsize
149 149 ):
150 150 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
151 151 yield subchunk
152 152
153 153
154 154 def _slicechunktosize(revlog, revs, targetsize=None):
155 155 """slice revs to match the target size
156 156
157 157 This is intended to be used on chunk that density slicing selected by that
158 158 are still too large compared to the read garantee of revlog. This might
159 159 happens when "minimal gap size" interrupted the slicing or when chain are
160 160 built in a way that create large blocks next to each other.
161 161
162 162 >>> data = [
163 163 ... 3, #0 (3)
164 164 ... 5, #1 (2)
165 165 ... 6, #2 (1)
166 166 ... 8, #3 (2)
167 167 ... 8, #4 (empty)
168 168 ... 11, #5 (3)
169 169 ... 12, #6 (1)
170 170 ... 13, #7 (1)
171 171 ... 14, #8 (1)
172 172 ... ]
173 173
174 174 == All snapshots cases ==
175 175 >>> revlog = _testrevlog(data, snapshot=range(9))
176 176
177 177 Cases where chunk is already small enough
178 178 >>> list(_slicechunktosize(revlog, [0], 3))
179 179 [[0]]
180 180 >>> list(_slicechunktosize(revlog, [6, 7], 3))
181 181 [[6, 7]]
182 182 >>> list(_slicechunktosize(revlog, [0], None))
183 183 [[0]]
184 184 >>> list(_slicechunktosize(revlog, [6, 7], None))
185 185 [[6, 7]]
186 186
187 187 cases where we need actual slicing
188 188 >>> list(_slicechunktosize(revlog, [0, 1], 3))
189 189 [[0], [1]]
190 190 >>> list(_slicechunktosize(revlog, [1, 3], 3))
191 191 [[1], [3]]
192 192 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
193 193 [[1, 2], [3]]
194 194 >>> list(_slicechunktosize(revlog, [3, 5], 3))
195 195 [[3], [5]]
196 196 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
197 197 [[3], [5]]
198 198 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
199 199 [[5], [6, 7, 8]]
200 200 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
201 201 [[0], [1, 2], [3], [5], [6, 7, 8]]
202 202
203 203 Case with too large individual chunk (must return valid chunk)
204 204 >>> list(_slicechunktosize(revlog, [0, 1], 2))
205 205 [[0], [1]]
206 206 >>> list(_slicechunktosize(revlog, [1, 3], 1))
207 207 [[1], [3]]
208 208 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
209 209 [[3], [5]]
210 210
211 211 == No Snapshot cases ==
212 212 >>> revlog = _testrevlog(data)
213 213
214 214 Cases where chunk is already small enough
215 215 >>> list(_slicechunktosize(revlog, [0], 3))
216 216 [[0]]
217 217 >>> list(_slicechunktosize(revlog, [6, 7], 3))
218 218 [[6, 7]]
219 219 >>> list(_slicechunktosize(revlog, [0], None))
220 220 [[0]]
221 221 >>> list(_slicechunktosize(revlog, [6, 7], None))
222 222 [[6, 7]]
223 223
224 224 cases where we need actual slicing
225 225 >>> list(_slicechunktosize(revlog, [0, 1], 3))
226 226 [[0], [1]]
227 227 >>> list(_slicechunktosize(revlog, [1, 3], 3))
228 228 [[1], [3]]
229 229 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
230 230 [[1], [2, 3]]
231 231 >>> list(_slicechunktosize(revlog, [3, 5], 3))
232 232 [[3], [5]]
233 233 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
234 234 [[3], [4, 5]]
235 235 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
236 236 [[5], [6, 7, 8]]
237 237 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
238 238 [[0], [1, 2], [3], [5], [6, 7, 8]]
239 239
240 240 Case with too large individual chunk (must return valid chunk)
241 241 >>> list(_slicechunktosize(revlog, [0, 1], 2))
242 242 [[0], [1]]
243 243 >>> list(_slicechunktosize(revlog, [1, 3], 1))
244 244 [[1], [3]]
245 245 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
246 246 [[3], [5]]
247 247
248 248 == mixed case ==
249 249 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
250 250 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
251 251 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
252 252 """
253 253 assert targetsize is None or 0 <= targetsize
254 254 startdata = revlog.start(revs[0])
255 255 enddata = revlog.end(revs[-1])
256 256 fullspan = enddata - startdata
257 257 if targetsize is None or fullspan <= targetsize:
258 258 yield revs
259 259 return
260 260
261 261 startrevidx = 0
262 262 endrevidx = 1
263 263 iterrevs = enumerate(revs)
264 264 next(iterrevs) # skip first rev.
265 265 # first step: get snapshots out of the way
266 266 for idx, r in iterrevs:
267 267 span = revlog.end(r) - startdata
268 268 snapshot = revlog.issnapshot(r)
269 269 if span <= targetsize and snapshot:
270 270 endrevidx = idx + 1
271 271 else:
272 272 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
273 273 if chunk:
274 274 yield chunk
275 275 startrevidx = idx
276 276 startdata = revlog.start(r)
277 277 endrevidx = idx + 1
278 278 if not snapshot:
279 279 break
280 280
281 281 # for the others, we use binary slicing to quickly converge toward valid
282 282 # chunks (otherwise, we might end up looking for start/end of many
283 283 # revisions). This logic is not looking for the perfect slicing point, it
284 284 # focuses on quickly converging toward valid chunks.
285 285 nbitem = len(revs)
286 286 while (enddata - startdata) > targetsize:
287 287 endrevidx = nbitem
288 288 if nbitem - startrevidx <= 1:
289 289 break # protect against individual chunk larger than limit
290 290 localenddata = revlog.end(revs[endrevidx - 1])
291 291 span = localenddata - startdata
292 292 while span > targetsize:
293 293 if endrevidx - startrevidx <= 1:
294 294 break # protect against individual chunk larger than limit
295 295 endrevidx -= (endrevidx - startrevidx) // 2
296 296 localenddata = revlog.end(revs[endrevidx - 1])
297 297 span = localenddata - startdata
298 298 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
299 299 if chunk:
300 300 yield chunk
301 301 startrevidx = endrevidx
302 302 startdata = revlog.start(revs[startrevidx])
303 303
304 304 chunk = _trimchunk(revlog, revs, startrevidx)
305 305 if chunk:
306 306 yield chunk
307 307
308 308
309 309 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
310 310 """slice revs to reduce the amount of unrelated data to be read from disk.
311 311
312 312 ``revs`` is sliced into groups that should be read in one time.
313 313 Assume that revs are sorted.
314 314
315 315 The initial chunk is sliced until the overall density (payload/chunks-span
316 316 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
317 317 skipped.
318 318
319 319 >>> revlog = _testrevlog([
320 320 ... 5, #00 (5)
321 321 ... 10, #01 (5)
322 322 ... 12, #02 (2)
323 323 ... 12, #03 (empty)
324 324 ... 27, #04 (15)
325 325 ... 31, #05 (4)
326 326 ... 31, #06 (empty)
327 327 ... 42, #07 (11)
328 328 ... 47, #08 (5)
329 329 ... 47, #09 (empty)
330 330 ... 48, #10 (1)
331 331 ... 51, #11 (3)
332 332 ... 74, #12 (23)
333 333 ... 85, #13 (11)
334 334 ... 86, #14 (1)
335 335 ... 91, #15 (5)
336 336 ... ])
337 337
338 338 >>> list(_slicechunktodensity(revlog, list(range(16))))
339 339 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
340 340 >>> list(_slicechunktodensity(revlog, [0, 15]))
341 341 [[0], [15]]
342 342 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
343 343 [[0], [11], [15]]
344 344 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
345 345 [[0], [11, 13, 15]]
346 346 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
347 347 [[1, 2], [5, 8, 10, 11], [14]]
348 348 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
349 349 ... mingapsize=20))
350 350 [[1, 2, 3, 5, 8, 10, 11], [14]]
351 351 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
352 352 ... targetdensity=0.95))
353 353 [[1, 2], [5], [8, 10, 11], [14]]
354 354 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
355 355 ... targetdensity=0.95, mingapsize=12))
356 356 [[1, 2], [5, 8, 10, 11], [14]]
357 357 """
358 358 start = revlog.start
359 359 length = revlog.length
360 360
361 361 if len(revs) <= 1:
362 362 yield revs
363 363 return
364 364
365 365 deltachainspan = segmentspan(revlog, revs)
366 366
367 367 if deltachainspan < mingapsize:
368 368 yield revs
369 369 return
370 370
371 371 readdata = deltachainspan
372 372 chainpayload = sum(length(r) for r in revs)
373 373
374 374 if deltachainspan:
375 375 density = chainpayload / float(deltachainspan)
376 376 else:
377 377 density = 1.0
378 378
379 379 if density >= targetdensity:
380 380 yield revs
381 381 return
382 382
383 383 # Store the gaps in a heap to have them sorted by decreasing size
384 384 gaps = []
385 385 prevend = None
386 386 for i, rev in enumerate(revs):
387 387 revstart = start(rev)
388 388 revlen = length(rev)
389 389
390 390 # Skip empty revisions to form larger holes
391 391 if revlen == 0:
392 392 continue
393 393
394 394 if prevend is not None:
395 395 gapsize = revstart - prevend
396 396 # only consider holes that are large enough
397 397 if gapsize > mingapsize:
398 398 gaps.append((gapsize, i))
399 399
400 400 prevend = revstart + revlen
401 401 # sort the gaps to pop them from largest to small
402 402 gaps.sort()
403 403
404 404 # Collect the indices of the largest holes until the density is acceptable
405 405 selected = []
406 406 while gaps and density < targetdensity:
407 407 gapsize, gapidx = gaps.pop()
408 408
409 409 selected.append(gapidx)
410 410
411 411 # the gap sizes are stored as negatives to be sorted decreasingly
412 412 # by the heap
413 413 readdata -= gapsize
414 414 if readdata > 0:
415 415 density = chainpayload / float(readdata)
416 416 else:
417 417 density = 1.0
418 418 selected.sort()
419 419
420 420 # Cut the revs at collected indices
421 421 previdx = 0
422 422 for idx in selected:
423 423
424 424 chunk = _trimchunk(revlog, revs, previdx, idx)
425 425 if chunk:
426 426 yield chunk
427 427
428 428 previdx = idx
429 429
430 430 chunk = _trimchunk(revlog, revs, previdx)
431 431 if chunk:
432 432 yield chunk
433 433
434 434
435 435 def _trimchunk(revlog, revs, startidx, endidx=None):
436 436 """returns revs[startidx:endidx] without empty trailing revs
437 437
438 438 Doctest Setup
439 439 >>> revlog = _testrevlog([
440 440 ... 5, #0
441 441 ... 10, #1
442 442 ... 12, #2
443 443 ... 12, #3 (empty)
444 444 ... 17, #4
445 445 ... 21, #5
446 446 ... 21, #6 (empty)
447 447 ... ])
448 448
449 449 Contiguous cases:
450 450 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
451 451 [0, 1, 2, 3, 4, 5]
452 452 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
453 453 [0, 1, 2, 3, 4]
454 454 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
455 455 [0, 1, 2]
456 456 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
457 457 [2]
458 458 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
459 459 [3, 4, 5]
460 460 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
461 461 [3, 4]
462 462
463 463 Discontiguous cases:
464 464 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
465 465 [1, 3, 5]
466 466 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
467 467 [1]
468 468 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
469 469 [3, 5]
470 470 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
471 471 [3, 5]
472 472 """
473 473 length = revlog.length
474 474
475 475 if endidx is None:
476 476 endidx = len(revs)
477 477
478 478 # If we have a non-emtpy delta candidate, there are nothing to trim
479 479 if revs[endidx - 1] < len(revlog):
480 480 # Trim empty revs at the end, except the very first revision of a chain
481 481 while (
482 482 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
483 483 ):
484 484 endidx -= 1
485 485
486 486 return revs[startidx:endidx]
487 487
488 488
489 489 def segmentspan(revlog, revs):
490 490 """Get the byte span of a segment of revisions
491 491
492 492 revs is a sorted array of revision numbers
493 493
494 494 >>> revlog = _testrevlog([
495 495 ... 5, #0
496 496 ... 10, #1
497 497 ... 12, #2
498 498 ... 12, #3 (empty)
499 499 ... 17, #4
500 500 ... ])
501 501
502 502 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
503 503 17
504 504 >>> segmentspan(revlog, [0, 4])
505 505 17
506 506 >>> segmentspan(revlog, [3, 4])
507 507 5
508 508 >>> segmentspan(revlog, [1, 2, 3,])
509 509 7
510 510 >>> segmentspan(revlog, [1, 3])
511 511 7
512 512 """
513 513 if not revs:
514 514 return 0
515 515 end = revlog.end(revs[-1])
516 516 return end - revlog.start(revs[0])
517 517
518 518
519 519 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
520 520 """build full text from a (base, delta) pair and other metadata"""
521 521 # special case deltas which replace entire base; no need to decode
522 522 # base revision. this neatly avoids censored bases, which throw when
523 523 # they're decoded.
524 524 hlen = struct.calcsize(b">lll")
525 525 if delta[:hlen] == mdiff.replacediffheader(
526 526 revlog.rawsize(baserev), len(delta) - hlen
527 527 ):
528 528 fulltext = delta[hlen:]
529 529 else:
530 530 # deltabase is rawtext before changed by flag processors, which is
531 531 # equivalent to non-raw text
532 532 basetext = revlog.revision(baserev, _df=fh)
533 533 fulltext = mdiff.patch(basetext, delta)
534 534
535 535 try:
536 536 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
537 537 if validatehash:
538 538 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
539 539 if flags & REVIDX_ISCENSORED:
540 540 raise error.StorageError(
541 541 _(b'node %s is not censored') % expectednode
542 542 )
543 543 except error.CensoredNodeError:
544 544 # must pass the censored index flag to add censored revisions
545 545 if not flags & REVIDX_ISCENSORED:
546 546 raise
547 547 return fulltext
548 548
549 549
550 550 @attr.s(slots=True, frozen=True)
551 551 class _deltainfo:
552 552 distance = attr.ib()
553 553 deltalen = attr.ib()
554 554 data = attr.ib()
555 555 base = attr.ib()
556 556 chainbase = attr.ib()
557 557 chainlen = attr.ib()
558 558 compresseddeltalen = attr.ib()
559 559 snapshotdepth = attr.ib()
560 560
561 561
562 562 def drop_u_compression(delta):
563 563 """turn into a "u" (no-compression) into no-compression without header
564 564
565 565 This is useful for revlog format that has better compression method.
566 566 """
567 567 assert delta.data[0] == b'u', delta.data[0]
568 568 return _deltainfo(
569 569 delta.distance,
570 570 delta.deltalen - 1,
571 571 (b'', delta.data[1]),
572 572 delta.base,
573 573 delta.chainbase,
574 574 delta.chainlen,
575 575 delta.compresseddeltalen,
576 576 delta.snapshotdepth,
577 577 )
578 578
579 579
580 580 def is_good_delta_info(revlog, deltainfo, revinfo):
581 581 """Returns True if the given delta is good. Good means that it is within
582 582 the disk span, disk size, and chain length bounds that we know to be
583 583 performant."""
584 584 if deltainfo is None:
585 585 return False
586 586
587 587 # - 'deltainfo.distance' is the distance from the base revision --
588 588 # bounding it limits the amount of I/O we need to do.
589 589 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
590 590 # deltas we need to apply -- bounding it limits the amount of CPU
591 591 # we consume.
592 592
593 593 textlen = revinfo.textlen
594 594 defaultmax = textlen * 4
595 595 maxdist = revlog._maxdeltachainspan
596 596 if not maxdist:
597 597 maxdist = deltainfo.distance # ensure the conditional pass
598 598 maxdist = max(maxdist, defaultmax)
599 599
600 600 # Bad delta from read span:
601 601 #
602 602 # If the span of data read is larger than the maximum allowed.
603 603 #
604 604 # In the sparse-revlog case, we rely on the associated "sparse reading"
605 605 # to avoid issue related to the span of data. In theory, it would be
606 606 # possible to build pathological revlog where delta pattern would lead
607 607 # to too many reads. However, they do not happen in practice at all. So
608 608 # we skip the span check entirely.
609 609 if not revlog._sparserevlog and maxdist < deltainfo.distance:
610 610 return False
611 611
612 612 # Bad delta from new delta size:
613 613 #
614 614 # If the delta size is larger than the target text, storing the
615 615 # delta will be inefficient.
616 616 if textlen < deltainfo.deltalen:
617 617 return False
618 618
619 619 # Bad delta from cumulated payload size:
620 620 #
621 621 # If the sum of delta get larger than K * target text length.
622 622 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
623 623 return False
624 624
625 625 # Bad delta from chain length:
626 626 #
627 627 # If the number of delta in the chain gets too high.
628 628 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
629 629 return False
630 630
631 631 # bad delta from intermediate snapshot size limit
632 632 #
633 633 # If an intermediate snapshot size is higher than the limit. The
634 634 # limit exist to prevent endless chain of intermediate delta to be
635 635 # created.
636 636 if (
637 637 deltainfo.snapshotdepth is not None
638 638 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
639 639 ):
640 640 return False
641 641
642 642 # bad delta if new intermediate snapshot is larger than the previous
643 643 # snapshot
644 644 if (
645 645 deltainfo.snapshotdepth
646 646 and revlog.length(deltainfo.base) < deltainfo.deltalen
647 647 ):
648 648 return False
649 649
650 650 return True
651 651
652 652
653 653 # If a revision's full text is that much bigger than a base candidate full
654 654 # text's, it is very unlikely that it will produce a valid delta. We no longer
655 655 # consider these candidates.
656 656 LIMIT_BASE2TEXT = 500
657 657
658 658
659 659 def _candidategroups(
660 660 revlog,
661 661 textlen,
662 662 p1,
663 663 p2,
664 664 cachedelta,
665 665 excluded_bases=None,
666 666 target_rev=None,
667 667 ):
668 668 """Provides group of revision to be tested as delta base
669 669
670 670 This top level function focus on emitting groups with unique and worthwhile
671 671 content. See _raw_candidate_groups for details about the group order.
672 672 """
673 673 # should we try to build a delta?
674 674 if not (len(revlog) and revlog._storedeltachains):
675 675 yield None
676 676 return
677 677
678 678 deltalength = revlog.length
679 679 deltaparent = revlog.deltaparent
680 680 sparse = revlog._sparserevlog
681 681 good = None
682 682
683 683 deltas_limit = textlen * LIMIT_DELTA2TEXT
684 684 group_chunk_size = revlog._candidate_group_chunk_size
685 685
686 686 tested = {nullrev}
687 687 candidates = _refinedgroups(
688 688 revlog,
689 689 p1,
690 690 p2,
691 691 cachedelta,
692 692 )
693 693 while True:
694 694 temptative = candidates.send(good)
695 695 if temptative is None:
696 696 break
697 697 group = []
698 698 for rev in temptative:
699 699 # skip over empty delta (no need to include them in a chain)
700 700 while revlog._generaldelta and not (
701 701 rev == nullrev or rev in tested or deltalength(rev)
702 702 ):
703 703 tested.add(rev)
704 704 rev = deltaparent(rev)
705 705 # no need to try a delta against nullrev, this will be done as a
706 706 # last resort.
707 707 if rev == nullrev:
708 708 continue
709 709 # filter out revision we tested already
710 710 if rev in tested:
711 711 continue
712 712 # an higher authority deamed the base unworthy (e.g. censored)
713 713 if excluded_bases is not None and rev in excluded_bases:
714 714 tested.add(rev)
715 715 continue
716 716 # We are in some recomputation cases and that rev is too high in
717 717 # the revlog
718 718 if target_rev is not None and rev >= target_rev:
719 719 tested.add(rev)
720 720 continue
721 721 # filter out delta base that will never produce good delta
722 722 if deltas_limit < revlog.length(rev):
723 723 tested.add(rev)
724 724 continue
725 725 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
726 726 tested.add(rev)
727 727 continue
728 728 # no delta for rawtext-changing revs (see "candelta" for why)
729 729 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
730 730 tested.add(rev)
731 731 continue
732 732
733 733 # If we reach here, we are about to build and test a delta.
734 734 # The delta building process will compute the chaininfo in all
735 735 # case, since that computation is cached, it is fine to access it
736 736 # here too.
737 737 chainlen, chainsize = revlog._chaininfo(rev)
738 738 # if chain will be too long, skip base
739 739 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
740 740 tested.add(rev)
741 741 continue
742 742 # if chain already have too much data, skip base
743 743 if deltas_limit < chainsize:
744 744 tested.add(rev)
745 745 continue
746 746 if sparse and revlog.upperboundcomp is not None:
747 747 maxcomp = revlog.upperboundcomp
748 748 basenotsnap = (p1, p2, nullrev)
749 749 if rev not in basenotsnap and revlog.issnapshot(rev):
750 750 snapshotdepth = revlog.snapshotdepth(rev)
751 751 # If text is significantly larger than the base, we can
752 752 # expect the resulting delta to be proportional to the size
753 753 # difference
754 754 revsize = revlog.rawsize(rev)
755 755 rawsizedistance = max(textlen - revsize, 0)
756 756 # use an estimate of the compression upper bound.
757 757 lowestrealisticdeltalen = rawsizedistance // maxcomp
758 758
759 759 # check the absolute constraint on the delta size
760 760 snapshotlimit = textlen >> snapshotdepth
761 761 if snapshotlimit < lowestrealisticdeltalen:
762 762 # delta lower bound is larger than accepted upper bound
763 763 tested.add(rev)
764 764 continue
765 765
766 766 # check the relative constraint on the delta size
767 767 revlength = revlog.length(rev)
768 768 if revlength < lowestrealisticdeltalen:
769 769 # delta probable lower bound is larger than target base
770 770 tested.add(rev)
771 771 continue
772 772
773 773 group.append(rev)
774 774 if group:
775 775 # When the size of the candidate group is big, it can result in a
776 776 # quite significant performance impact. To reduce this, we can send
777 777 # them in smaller batches until the new batch does not provide any
778 778 # improvements.
779 779 #
780 780 # This might reduce the overall efficiency of the compression in
781 781 # some corner cases, but that should also prevent very pathological
782 782 # cases from being an issue. (eg. 20 000 candidates).
783 783 #
784 784 # XXX note that the ordering of the group becomes important as it
785 785 # now impacts the final result. The current order is unprocessed
786 786 # and can be improved.
787 787 if group_chunk_size == 0:
788 788 tested.update(group)
789 789 good = yield tuple(group)
790 790 else:
791 791 prev_good = good
792 792 for start in range(0, len(group), group_chunk_size):
793 793 sub_group = group[start : start + group_chunk_size]
794 794 tested.update(sub_group)
795 795 good = yield tuple(sub_group)
796 796 if prev_good == good:
797 797 break
798 798
799 799 yield None
800 800
801 801
802 802 def _refinedgroups(revlog, p1, p2, cachedelta):
803 803 good = None
804 804 # First we try to reuse a the delta contained in the bundle.
805 805 # (or from the source revlog)
806 806 #
807 807 # This logic only applies to general delta repositories and can be disabled
808 808 # through configuration. Disabling reuse source delta is useful when
809 809 # we want to make sure we recomputed "optimal" deltas.
810 810 debug_info = None
811 811 if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:
812 812 # Assume what we received from the server is a good choice
813 813 # build delta will reuse the cache
814 814 if debug_info is not None:
815 815 debug_info['cached-delta.tested'] += 1
816 816 good = yield (cachedelta[0],)
817 817 if good is not None:
818 818 if debug_info is not None:
819 819 debug_info['cached-delta.accepted'] += 1
820 820 yield None
821 821 return
822 822 # XXX cache me higher
823 823 snapshot_cache = SnapshotCache()
824 824 groups = _rawgroups(
825 825 revlog,
826 826 p1,
827 827 p2,
828 828 cachedelta,
829 829 snapshot_cache,
830 830 )
831 831 for candidates in groups:
832 832 good = yield candidates
833 833 if good is not None:
834 834 break
835 835
836 836 # If sparse revlog is enabled, we can try to refine the available deltas
837 837 if not revlog._sparserevlog:
838 838 yield None
839 839 return
840 840
841 841 # if we have a refinable value, try to refine it
842 842 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
843 843 # refine snapshot down
844 844 previous = None
845 845 while previous != good:
846 846 previous = good
847 847 base = revlog.deltaparent(good)
848 848 if base == nullrev:
849 849 break
850 850 good = yield (base,)
851 851 # refine snapshot up
852 852 if not snapshot_cache.snapshots:
853 853 snapshot_cache.update(revlog, good + 1)
854 854 previous = None
855 855 while good != previous:
856 856 previous = good
857 857 children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))
858 858 good = yield children
859 859
860 860 if debug_info is not None:
861 861 if good is None:
862 862 debug_info['no-solution'] += 1
863 863
864 864 yield None
865 865
866 866
867 867 def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
868 868 """Provides group of revision to be tested as delta base
869 869
870 870 This lower level function focus on emitting delta theorically interresting
871 871 without looking it any practical details.
872 872
873 873 The group order aims at providing fast or small candidates first.
874 874 """
875 875 gdelta = revlog._generaldelta
876 876 # gate sparse behind general-delta because of issue6056
877 877 sparse = gdelta and revlog._sparserevlog
878 878 curr = len(revlog)
879 879 prev = curr - 1
880 880 deltachain = lambda rev: revlog._deltachain(rev)[0]
881 881
882 882 if gdelta:
883 883 # exclude already lazy tested base if any
884 884 parents = [p for p in (p1, p2) if p != nullrev]
885 885
886 886 if not revlog._deltabothparents and len(parents) == 2:
887 887 parents.sort()
888 888 # To minimize the chance of having to build a fulltext,
889 889 # pick first whichever parent is closest to us (max rev)
890 890 yield (parents[1],)
891 891 # then the other one (min rev) if the first did not fit
892 892 yield (parents[0],)
893 893 elif len(parents) > 0:
894 894 # Test all parents (1 or 2), and keep the best candidate
895 895 yield parents
896 896
897 897 if sparse and parents:
898 898 if snapshot_cache is None:
899 899 # map: base-rev: [snapshot-revs]
900 900 snapshot_cache = SnapshotCache()
901 901 # See if we can use an existing snapshot in the parent chains to use as
902 902 # a base for a new intermediate-snapshot
903 903 #
904 904 # search for snapshot in parents delta chain
905 905 # map: snapshot-level: snapshot-rev
906 906 parents_snaps = collections.defaultdict(set)
907 907 candidate_chains = [deltachain(p) for p in parents]
908 908 for chain in candidate_chains:
909 909 for idx, s in enumerate(chain):
910 910 if not revlog.issnapshot(s):
911 911 break
912 912 parents_snaps[idx].add(s)
913 913 snapfloor = min(parents_snaps[0]) + 1
914 914 snapshot_cache.update(revlog, snapfloor)
915 915 # search for the highest "unrelated" revision
916 916 #
917 917 # Adding snapshots used by "unrelated" revision increase the odd we
918 918 # reuse an independant, yet better snapshot chain.
919 919 #
920 920 # XXX instead of building a set of revisions, we could lazily enumerate
921 921 # over the chains. That would be more efficient, however we stick to
922 922 # simple code for now.
923 923 all_revs = set()
924 924 for chain in candidate_chains:
925 925 all_revs.update(chain)
926 926 other = None
927 927 for r in revlog.revs(prev, snapfloor):
928 928 if r not in all_revs:
929 929 other = r
930 930 break
931 931 if other is not None:
932 932 # To avoid unfair competition, we won't use unrelated intermediate
933 933 # snapshot that are deeper than the ones from the parent delta
934 934 # chain.
935 935 max_depth = max(parents_snaps.keys())
936 936 chain = deltachain(other)
937 937 for depth, s in enumerate(chain):
938 938 if s < snapfloor:
939 939 continue
940 940 if max_depth < depth:
941 941 break
942 942 if not revlog.issnapshot(s):
943 943 break
944 944 parents_snaps[depth].add(s)
945 945 # Test them as possible intermediate snapshot base
946 946 # We test them from highest to lowest level. High level one are more
947 947 # likely to result in small delta
948 948 floor = None
949 949 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
950 950 siblings = set()
951 951 for s in snaps:
952 952 siblings.update(snapshot_cache.snapshots[s])
953 953 # Before considering making a new intermediate snapshot, we check
954 954 # if an existing snapshot, children of base we consider, would be
955 955 # suitable.
956 956 #
957 957 # It give a change to reuse a delta chain "unrelated" to the
958 958 # current revision instead of starting our own. Without such
959 959 # re-use, topological branches would keep reopening new chains.
960 960 # Creating more and more snapshot as the repository grow.
961 961
962 962 if floor is not None:
963 963 # We only do this for siblings created after the one in our
964 964 # parent's delta chain. Those created before has less chances
965 965 # to be valid base since our ancestors had to create a new
966 966 # snapshot.
967 967 siblings = [r for r in siblings if floor < r]
968 968 yield tuple(sorted(siblings))
969 969 # then test the base from our parent's delta chain.
970 970 yield tuple(sorted(snaps))
971 971 floor = min(snaps)
972 972 # No suitable base found in the parent chain, search if any full
973 973 # snapshots emitted since parent's base would be a suitable base for an
974 974 # intermediate snapshot.
975 975 #
976 976 # It give a chance to reuse a delta chain unrelated to the current
977 977 # revisions instead of starting our own. Without such re-use,
978 978 # topological branches would keep reopening new full chains. Creating
979 979 # more and more snapshot as the repository grow.
980 980 yield tuple(snapshot_cache.snapshots[nullrev])
981 981
982 982 if not sparse:
983 983 # other approach failed try against prev to hopefully save us a
984 984 # fulltext.
985 985 yield (prev,)
986 986
987 987
988 988 class SnapshotCache:
989 989 __slots__ = ('snapshots', '_start_rev', '_end_rev')
990 990
991 991 def __init__(self):
992 # XXX should probably be a set ?
993 self.snapshots = collections.defaultdict(list)
992 self.snapshots = collections.defaultdict(set)
994 993 self._start_rev = None
995 994 self._end_rev = None
996 995
997 996 def update(self, revlog, start_rev=0):
998 997 """find snapshots from start_rev to tip"""
999 998 nb_revs = len(revlog)
1000 999 end_rev = nb_revs - 1
1001 1000 if start_rev > end_rev:
1002 1001 return # range is empty
1003 1002
1004 1003 if self._start_rev is None:
1005 1004 assert self._end_rev is None
1006 1005 self._update(revlog, start_rev, end_rev)
1007 1006 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1008 1007 if start_rev < self._start_rev:
1009 1008 self._update(revlog, start_rev, self._start_rev - 1)
1010 1009 if self._end_rev < end_rev:
1011 1010 self._update(revlog, self._end_rev + 1, end_rev)
1012 1011
1013 1012 if self._start_rev is None:
1014 1013 assert self._end_rev is None
1015 1014 self._end_rev = end_rev
1016 1015 self._start_rev = start_rev
1017 1016 else:
1018 1017 self._start_rev = min(self._start_rev, start_rev)
1019 1018 self._end_rev = max(self._end_rev, end_rev)
1020 1019 assert self._start_rev <= self._end_rev, (
1021 1020 self._start_rev,
1022 1021 self._end_rev,
1023 1022 )
1024 1023
1025 1024 def _update(self, revlog, start_rev, end_rev):
1026 1025 """internal method that actually do update content"""
1027 1026 assert self._start_rev is None or (
1028 1027 start_rev < self._start_rev or start_rev > self._end_rev
1029 1028 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1030 1029 assert self._start_rev is None or (
1031 1030 end_rev < self._start_rev or end_rev > self._end_rev
1032 1031 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1033 1032 cache = self.snapshots
1034 1033 if util.safehasattr(revlog.index, b'findsnapshots'):
1035 1034 revlog.index.findsnapshots(cache, start_rev, end_rev)
1036 1035 else:
1037 1036 deltaparent = revlog.deltaparent
1038 1037 issnapshot = revlog.issnapshot
1039 1038 for rev in revlog.revs(start_rev, end_rev):
1040 1039 if issnapshot(rev):
1041 cache[deltaparent(rev)].append(rev)
1040 cache[deltaparent(rev)].add(rev)
1042 1041
1043 1042
1044 1043 class deltacomputer:
1045 1044 def __init__(
1046 1045 self,
1047 1046 revlog,
1048 1047 write_debug=None,
1049 1048 debug_search=False,
1050 1049 debug_info=None,
1051 1050 ):
1052 1051 self.revlog = revlog
1053 1052 self._write_debug = write_debug
1054 1053 self._debug_search = debug_search
1055 1054 self._debug_info = debug_info
1056 1055
1057 1056 def buildtext(self, revinfo, fh):
1058 1057 """Builds a fulltext version of a revision
1059 1058
1060 1059 revinfo: revisioninfo instance that contains all needed info
1061 1060 fh: file handle to either the .i or the .d revlog file,
1062 1061 depending on whether it is inlined or not
1063 1062 """
1064 1063 btext = revinfo.btext
1065 1064 if btext[0] is not None:
1066 1065 return btext[0]
1067 1066
1068 1067 revlog = self.revlog
1069 1068 cachedelta = revinfo.cachedelta
1070 1069 baserev = cachedelta[0]
1071 1070 delta = cachedelta[1]
1072 1071
1073 1072 fulltext = btext[0] = _textfromdelta(
1074 1073 fh,
1075 1074 revlog,
1076 1075 baserev,
1077 1076 delta,
1078 1077 revinfo.p1,
1079 1078 revinfo.p2,
1080 1079 revinfo.flags,
1081 1080 revinfo.node,
1082 1081 )
1083 1082 return fulltext
1084 1083
1085 1084 def _builddeltadiff(self, base, revinfo, fh):
1086 1085 revlog = self.revlog
1087 1086 t = self.buildtext(revinfo, fh)
1088 1087 if revlog.iscensored(base):
1089 1088 # deltas based on a censored revision must replace the
1090 1089 # full content in one patch, so delta works everywhere
1091 1090 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1092 1091 delta = header + t
1093 1092 else:
1094 1093 ptext = revlog.rawdata(base, _df=fh)
1095 1094 delta = mdiff.textdiff(ptext, t)
1096 1095
1097 1096 return delta
1098 1097
1099 1098 def _builddeltainfo(self, revinfo, base, fh):
1100 1099 # can we use the cached delta?
1101 1100 revlog = self.revlog
1102 1101 debug_search = self._write_debug is not None and self._debug_search
1103 1102 chainbase = revlog.chainbase(base)
1104 1103 if revlog._generaldelta:
1105 1104 deltabase = base
1106 1105 else:
1107 1106 deltabase = chainbase
1108 1107 snapshotdepth = None
1109 1108 if revlog._sparserevlog and deltabase == nullrev:
1110 1109 snapshotdepth = 0
1111 1110 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
1112 1111 # A delta chain should always be one full snapshot,
1113 1112 # zero or more semi-snapshots, and zero or more deltas
1114 1113 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1115 1114 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1116 1115 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1117 1116 delta = None
1118 1117 if revinfo.cachedelta:
1119 1118 cachebase = revinfo.cachedelta[0]
1120 1119 # check if the diff still apply
1121 1120 currentbase = cachebase
1122 1121 while (
1123 1122 currentbase != nullrev
1124 1123 and currentbase != base
1125 1124 and self.revlog.length(currentbase) == 0
1126 1125 ):
1127 1126 currentbase = self.revlog.deltaparent(currentbase)
1128 1127 if self.revlog._lazydelta and currentbase == base:
1129 1128 delta = revinfo.cachedelta[1]
1130 1129 if delta is None:
1131 1130 delta = self._builddeltadiff(base, revinfo, fh)
1132 1131 if debug_search:
1133 1132 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1134 1133 msg %= len(delta)
1135 1134 self._write_debug(msg)
1136 1135 # snapshotdept need to be neither None nor 0 level snapshot
1137 1136 if revlog.upperboundcomp is not None and snapshotdepth:
1138 1137 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1139 1138 snapshotlimit = revinfo.textlen >> snapshotdepth
1140 1139 if debug_search:
1141 1140 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1142 1141 msg %= lowestrealisticdeltalen
1143 1142 self._write_debug(msg)
1144 1143 if snapshotlimit < lowestrealisticdeltalen:
1145 1144 if debug_search:
1146 1145 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1147 1146 self._write_debug(msg)
1148 1147 return None
1149 1148 if revlog.length(base) < lowestrealisticdeltalen:
1150 1149 if debug_search:
1151 1150 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1152 1151 self._write_debug(msg)
1153 1152 return None
1154 1153 header, data = revlog.compress(delta)
1155 1154 deltalen = len(header) + len(data)
1156 1155 offset = revlog.end(len(revlog) - 1)
1157 1156 dist = deltalen + offset - revlog.start(chainbase)
1158 1157 chainlen, compresseddeltalen = revlog._chaininfo(base)
1159 1158 chainlen += 1
1160 1159 compresseddeltalen += deltalen
1161 1160
1162 1161 return _deltainfo(
1163 1162 dist,
1164 1163 deltalen,
1165 1164 (header, data),
1166 1165 deltabase,
1167 1166 chainbase,
1168 1167 chainlen,
1169 1168 compresseddeltalen,
1170 1169 snapshotdepth,
1171 1170 )
1172 1171
1173 1172 def _fullsnapshotinfo(self, fh, revinfo, curr):
1174 1173 rawtext = self.buildtext(revinfo, fh)
1175 1174 data = self.revlog.compress(rawtext)
1176 1175 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1177 1176 deltabase = chainbase = curr
1178 1177 snapshotdepth = 0
1179 1178 chainlen = 1
1180 1179
1181 1180 return _deltainfo(
1182 1181 dist,
1183 1182 deltalen,
1184 1183 data,
1185 1184 deltabase,
1186 1185 chainbase,
1187 1186 chainlen,
1188 1187 compresseddeltalen,
1189 1188 snapshotdepth,
1190 1189 )
1191 1190
1192 1191 def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):
1193 1192 """Find an acceptable delta against a candidate revision
1194 1193
1195 1194 revinfo: information about the revision (instance of _revisioninfo)
1196 1195 fh: file handle to either the .i or the .d revlog file,
1197 1196 depending on whether it is inlined or not
1198 1197
1199 1198 Returns the first acceptable candidate revision, as ordered by
1200 1199 _candidategroups
1201 1200
1202 1201 If no suitable deltabase is found, we return delta info for a full
1203 1202 snapshot.
1204 1203
1205 1204 `excluded_bases` is an optional set of revision that cannot be used as
1206 1205 a delta base. Use this to recompute delta suitable in censor or strip
1207 1206 context.
1208 1207 """
1209 1208 if target_rev is None:
1210 1209 target_rev = len(self.revlog)
1211 1210
1212 1211 if not revinfo.textlen:
1213 1212 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1214 1213
1215 1214 if excluded_bases is None:
1216 1215 excluded_bases = set()
1217 1216
1218 1217 # no delta for flag processor revision (see "candelta" for why)
1219 1218 # not calling candelta since only one revision needs test, also to
1220 1219 # avoid overhead fetching flags again.
1221 1220 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1222 1221 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1223 1222
1224 1223 gather_debug = (
1225 1224 self._write_debug is not None or self._debug_info is not None
1226 1225 )
1227 1226 debug_search = self._write_debug is not None and self._debug_search
1228 1227
1229 1228 if gather_debug:
1230 1229 start = util.timer()
1231 1230
1232 1231 # count the number of different delta we tried (for debug purpose)
1233 1232 dbg_try_count = 0
1234 1233 # count the number of "search round" we did. (for debug purpose)
1235 1234 dbg_try_rounds = 0
1236 1235 dbg_type = b'unknown'
1237 1236
1238 1237 cachedelta = revinfo.cachedelta
1239 1238 p1 = revinfo.p1
1240 1239 p2 = revinfo.p2
1241 1240 revlog = self.revlog
1242 1241
1243 1242 deltainfo = None
1244 1243 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
1245 1244
1246 1245 if gather_debug:
1247 1246 if p1r != nullrev:
1248 1247 p1_chain_len = revlog._chaininfo(p1r)[0]
1249 1248 else:
1250 1249 p1_chain_len = -1
1251 1250 if p2r != nullrev:
1252 1251 p2_chain_len = revlog._chaininfo(p2r)[0]
1253 1252 else:
1254 1253 p2_chain_len = -1
1255 1254 if debug_search:
1256 1255 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1257 1256 msg %= target_rev
1258 1257 self._write_debug(msg)
1259 1258
1260 1259 groups = _candidategroups(
1261 1260 self.revlog,
1262 1261 revinfo.textlen,
1263 1262 p1r,
1264 1263 p2r,
1265 1264 cachedelta,
1266 1265 excluded_bases,
1267 1266 target_rev,
1268 1267 )
1269 1268 candidaterevs = next(groups)
1270 1269 while candidaterevs is not None:
1271 1270 dbg_try_rounds += 1
1272 1271 if debug_search:
1273 1272 prev = None
1274 1273 if deltainfo is not None:
1275 1274 prev = deltainfo.base
1276 1275
1277 1276 if (
1278 1277 cachedelta is not None
1279 1278 and len(candidaterevs) == 1
1280 1279 and cachedelta[0] in candidaterevs
1281 1280 ):
1282 1281 round_type = b"cached-delta"
1283 1282 elif p1 in candidaterevs or p2 in candidaterevs:
1284 1283 round_type = b"parents"
1285 1284 elif prev is not None and all(c < prev for c in candidaterevs):
1286 1285 round_type = b"refine-down"
1287 1286 elif prev is not None and all(c > prev for c in candidaterevs):
1288 1287 round_type = b"refine-up"
1289 1288 else:
1290 1289 round_type = b"search-down"
1291 1290 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1292 1291 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1293 1292 self._write_debug(msg)
1294 1293 nominateddeltas = []
1295 1294 if deltainfo is not None:
1296 1295 if debug_search:
1297 1296 msg = (
1298 1297 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1299 1298 )
1300 1299 msg %= (deltainfo.base, deltainfo.deltalen)
1301 1300 self._write_debug(msg)
1302 1301 # if we already found a good delta,
1303 1302 # challenge it against refined candidates
1304 1303 nominateddeltas.append(deltainfo)
1305 1304 for candidaterev in candidaterevs:
1306 1305 if debug_search:
1307 1306 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1308 1307 msg %= candidaterev
1309 1308 self._write_debug(msg)
1310 1309 candidate_type = None
1311 1310 if candidaterev == p1:
1312 1311 candidate_type = b"p1"
1313 1312 elif candidaterev == p2:
1314 1313 candidate_type = b"p2"
1315 1314 elif self.revlog.issnapshot(candidaterev):
1316 1315 candidate_type = b"snapshot-%d"
1317 1316 candidate_type %= self.revlog.snapshotdepth(
1318 1317 candidaterev
1319 1318 )
1320 1319
1321 1320 if candidate_type is not None:
1322 1321 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1323 1322 msg %= candidate_type
1324 1323 self._write_debug(msg)
1325 1324 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1326 1325 msg %= self.revlog.length(candidaterev)
1327 1326 self._write_debug(msg)
1328 1327 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1329 1328 msg %= self.revlog.deltaparent(candidaterev)
1330 1329 self._write_debug(msg)
1331 1330
1332 1331 dbg_try_count += 1
1333 1332
1334 1333 if debug_search:
1335 1334 delta_start = util.timer()
1336 1335 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
1337 1336 if debug_search:
1338 1337 delta_end = util.timer()
1339 1338 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1340 1339 msg %= delta_end - delta_start
1341 1340 self._write_debug(msg)
1342 1341 if candidatedelta is not None:
1343 1342 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
1344 1343 if debug_search:
1345 1344 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1346 1345 msg %= candidatedelta.deltalen
1347 1346 self._write_debug(msg)
1348 1347 nominateddeltas.append(candidatedelta)
1349 1348 elif debug_search:
1350 1349 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1351 1350 msg %= candidatedelta.deltalen
1352 1351 self._write_debug(msg)
1353 1352 elif debug_search:
1354 1353 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1355 1354 self._write_debug(msg)
1356 1355 if nominateddeltas:
1357 1356 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1358 1357 if deltainfo is not None:
1359 1358 candidaterevs = groups.send(deltainfo.base)
1360 1359 else:
1361 1360 candidaterevs = next(groups)
1362 1361
1363 1362 if deltainfo is None:
1364 1363 dbg_type = b"full"
1365 1364 deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
1366 1365 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1367 1366 dbg_type = b"snapshot"
1368 1367 else:
1369 1368 dbg_type = b"delta"
1370 1369
1371 1370 if gather_debug:
1372 1371 end = util.timer()
1373 1372 used_cached = (
1374 1373 cachedelta is not None
1375 1374 and dbg_try_rounds == 1
1376 1375 and dbg_try_count == 1
1377 1376 and deltainfo.base == cachedelta[0]
1378 1377 )
1379 1378 dbg = {
1380 1379 'duration': end - start,
1381 1380 'revision': target_rev,
1382 1381 'delta-base': deltainfo.base, # pytype: disable=attribute-error
1383 1382 'search_round_count': dbg_try_rounds,
1384 1383 'using-cached-base': used_cached,
1385 1384 'delta_try_count': dbg_try_count,
1386 1385 'type': dbg_type,
1387 1386 'p1-chain-len': p1_chain_len,
1388 1387 'p2-chain-len': p2_chain_len,
1389 1388 }
1390 1389 if (
1391 1390 deltainfo.snapshotdepth # pytype: disable=attribute-error
1392 1391 is not None
1393 1392 ):
1394 1393 dbg[
1395 1394 'snapshot-depth'
1396 1395 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1397 1396 else:
1398 1397 dbg['snapshot-depth'] = 0
1399 1398 target_revlog = b"UNKNOWN"
1400 1399 target_type = self.revlog.target[0]
1401 1400 target_key = self.revlog.target[1]
1402 1401 if target_type == KIND_CHANGELOG:
1403 1402 target_revlog = b'CHANGELOG:'
1404 1403 elif target_type == KIND_MANIFESTLOG:
1405 1404 target_revlog = b'MANIFESTLOG:'
1406 1405 if target_key:
1407 1406 target_revlog += b'%s:' % target_key
1408 1407 elif target_type == KIND_FILELOG:
1409 1408 target_revlog = b'FILELOG:'
1410 1409 if target_key:
1411 1410 target_revlog += b'%s:' % target_key
1412 1411 dbg['target-revlog'] = target_revlog
1413 1412
1414 1413 if self._debug_info is not None:
1415 1414 self._debug_info.append(dbg)
1416 1415
1417 1416 if self._write_debug is not None:
1418 1417 msg = (
1419 1418 b"DBG-DELTAS:"
1420 1419 b" %-12s"
1421 1420 b" rev=%d:"
1422 1421 b" delta-base=%d"
1423 1422 b" is-cached=%d"
1424 1423 b" - search-rounds=%d"
1425 1424 b" try-count=%d"
1426 1425 b" - delta-type=%-6s"
1427 1426 b" snap-depth=%d"
1428 1427 b" - p1-chain-length=%d"
1429 1428 b" p2-chain-length=%d"
1430 1429 b" - duration=%f"
1431 1430 b"\n"
1432 1431 )
1433 1432 msg %= (
1434 1433 dbg["target-revlog"],
1435 1434 dbg["revision"],
1436 1435 dbg["delta-base"],
1437 1436 dbg["using-cached-base"],
1438 1437 dbg["search_round_count"],
1439 1438 dbg["delta_try_count"],
1440 1439 dbg["type"],
1441 1440 dbg["snapshot-depth"],
1442 1441 dbg["p1-chain-len"],
1443 1442 dbg["p2-chain-len"],
1444 1443 dbg["duration"],
1445 1444 )
1446 1445 self._write_debug(msg)
1447 1446 return deltainfo
1448 1447
1449 1448
1450 1449 def delta_compression(default_compression_header, deltainfo):
1451 1450 """return (COMPRESSION_MODE, deltainfo)
1452 1451
1453 1452 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1454 1453 compression.
1455 1454 """
1456 1455 h, d = deltainfo.data
1457 1456 compression_mode = COMP_MODE_INLINE
1458 1457 if not h and not d:
1459 1458 # not data to store at all... declare them uncompressed
1460 1459 compression_mode = COMP_MODE_PLAIN
1461 1460 elif not h:
1462 1461 t = d[0:1]
1463 1462 if t == b'\0':
1464 1463 compression_mode = COMP_MODE_PLAIN
1465 1464 elif t == default_compression_header:
1466 1465 compression_mode = COMP_MODE_DEFAULT
1467 1466 elif h == b'u':
1468 1467 # we have a more efficient way to declare uncompressed
1469 1468 h = b''
1470 1469 compression_mode = COMP_MODE_PLAIN
1471 1470 deltainfo = drop_u_compression(deltainfo)
1472 1471 return compression_mode, deltainfo
@@ -1,523 +1,523 b''
1 1 # test revlog interaction about raw data (flagprocessor)
2 2
3 3
4 4 import hashlib
5 5 import sys
6 6
7 7 from mercurial import (
8 8 encoding,
9 9 revlog,
10 10 transaction,
11 11 vfs,
12 12 )
13 13
14 14 from mercurial.revlogutils import (
15 15 constants,
16 16 deltas,
17 17 flagutil,
18 18 )
19 19
20 20
21 21 class _NoTransaction:
22 22 """transaction like object to update the nodemap outside a transaction"""
23 23
24 24 def __init__(self):
25 25 self._postclose = {}
26 26
27 27 def addpostclose(self, callback_id, callback_func):
28 28 self._postclose[callback_id] = callback_func
29 29
30 30 def registertmp(self, *args, **kwargs):
31 31 pass
32 32
33 33 def addbackup(self, *args, **kwargs):
34 34 pass
35 35
36 36 def add(self, *args, **kwargs):
37 37 pass
38 38
39 39 def addabort(self, *args, **kwargs):
40 40 pass
41 41
42 42 def _report(self, *args):
43 43 pass
44 44
45 45
46 46 # TESTTMP is optional. This makes it convenient to run without run-tests.py
47 47 tvfs = vfs.vfs(encoding.environ.get(b'TESTTMP', b'/tmp'))
48 48
49 49 # Enable generaldelta otherwise revlog won't use delta as expected by the test
50 50 tvfs.options = {
51 51 b'generaldelta': True,
52 52 b'revlogv1': True,
53 53 b'sparse-revlog': True,
54 54 }
55 55
56 56
57 57 def abort(msg):
58 58 print('abort: %s' % msg)
59 59 # Return 0 so run-tests.py could compare the output.
60 60 sys.exit()
61 61
62 62
63 63 # Register a revlog processor for flag EXTSTORED.
64 64 #
65 65 # It simply prepends a fixed header, and replaces '1' to 'i'. So it has
66 66 # insertion and replacement, and may be interesting to test revlog's line-based
67 67 # deltas.
68 68 _extheader = b'E\n'
69 69
70 70
71 71 def readprocessor(self, rawtext):
72 72 # True: the returned text could be used to verify hash
73 73 text = rawtext[len(_extheader) :].replace(b'i', b'1')
74 74 return text, True
75 75
76 76
77 77 def writeprocessor(self, text):
78 78 # False: the returned rawtext shouldn't be used to verify hash
79 79 rawtext = _extheader + text.replace(b'1', b'i')
80 80 return rawtext, False
81 81
82 82
83 83 def rawprocessor(self, rawtext):
84 84 # False: do not verify hash. Only the content returned by "readprocessor"
85 85 # can be used to verify hash.
86 86 return False
87 87
88 88
89 89 flagutil.addflagprocessor(
90 90 revlog.REVIDX_EXTSTORED, (readprocessor, writeprocessor, rawprocessor)
91 91 )
92 92
93 93 # Utilities about reading and appending revlog
94 94
95 95
96 96 def newtransaction():
97 97 # A transaction is required to write revlogs
98 98 report = lambda msg: None
99 99 return transaction.transaction(report, tvfs, {'plain': tvfs}, b'journal')
100 100
101 101
102 102 def newrevlog(name=b'_testrevlog', recreate=False):
103 103 if recreate:
104 104 tvfs.tryunlink(name + b'.i')
105 105 target = (constants.KIND_OTHER, b'test')
106 106 rlog = revlog.revlog(tvfs, target=target, radix=name)
107 107 return rlog
108 108
109 109
110 110 def appendrev(rlog, text, tr, isext=False, isdelta=True):
111 111 """Append a revision. If isext is True, set the EXTSTORED flag so flag
112 112 processor will be used (and rawtext is different from text). If isdelta is
113 113 True, force the revision to be a delta, otherwise it's full text.
114 114 """
115 115 nextrev = len(rlog)
116 116 p1 = rlog.node(nextrev - 1)
117 117 p2 = rlog.nullid
118 118 if isext:
119 119 flags = revlog.REVIDX_EXTSTORED
120 120 else:
121 121 flags = revlog.REVIDX_DEFAULT_FLAGS
122 122 # Change storedeltachains temporarily, to override revlog's delta decision
123 123 rlog._storedeltachains = isdelta
124 124 try:
125 125 rlog.addrevision(text, tr, nextrev, p1, p2, flags=flags)
126 126 return nextrev
127 127 except Exception as ex:
128 128 abort('rev %d: failed to append: %s' % (nextrev, ex))
129 129 finally:
130 130 # Restore storedeltachains. It is always True, see revlog.__init__
131 131 rlog._storedeltachains = True
132 132
133 133
134 134 def addgroupcopy(rlog, tr, destname=b'_destrevlog', optimaldelta=True):
135 135 """Copy revlog to destname using revlog.addgroup. Return the copied revlog.
136 136
137 137 This emulates push or pull. They use changegroup. Changegroup requires
138 138 repo to work. We don't have a repo, so a dummy changegroup is used.
139 139
140 140 If optimaldelta is True, use optimized delta parent, so the destination
141 141 revlog could probably reuse it. Otherwise it builds sub-optimal delta, and
142 142 the destination revlog needs more work to use it.
143 143
144 144 This exercises some revlog.addgroup (and revlog._addrevision(text=None))
145 145 code path, which is not covered by "appendrev" alone.
146 146 """
147 147
148 148 class dummychangegroup:
149 149 @staticmethod
150 150 def deltachunk(pnode):
151 151 pnode = pnode or rlog.nullid
152 152 parentrev = rlog.rev(pnode)
153 153 r = parentrev + 1
154 154 if r >= len(rlog):
155 155 return {}
156 156 if optimaldelta:
157 157 deltaparent = parentrev
158 158 else:
159 159 # suboptimal deltaparent
160 160 deltaparent = min(0, parentrev)
161 161 if not rlog.candelta(deltaparent, r):
162 162 deltaparent = -1
163 163 return {
164 164 b'node': rlog.node(r),
165 165 b'p1': pnode,
166 166 b'p2': rlog.nullid,
167 167 b'cs': rlog.node(rlog.linkrev(r)),
168 168 b'flags': rlog.flags(r),
169 169 b'deltabase': rlog.node(deltaparent),
170 170 b'delta': rlog.revdiff(deltaparent, r),
171 171 b'sidedata': rlog.sidedata(r),
172 172 }
173 173
174 174 def deltaiter(self):
175 175 chain = None
176 176 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
177 177 node = chunkdata[b'node']
178 178 p1 = chunkdata[b'p1']
179 179 p2 = chunkdata[b'p2']
180 180 cs = chunkdata[b'cs']
181 181 deltabase = chunkdata[b'deltabase']
182 182 delta = chunkdata[b'delta']
183 183 flags = chunkdata[b'flags']
184 184 sidedata = chunkdata[b'sidedata']
185 185
186 186 chain = node
187 187
188 188 yield (node, p1, p2, cs, deltabase, delta, flags, sidedata)
189 189
190 190 def linkmap(lnode):
191 191 return rlog.rev(lnode)
192 192
193 193 dlog = newrevlog(destname, recreate=True)
194 194 dummydeltas = dummychangegroup().deltaiter()
195 195 dlog.addgroup(dummydeltas, linkmap, tr)
196 196 return dlog
197 197
198 198
199 199 def lowlevelcopy(rlog, tr, destname=b'_destrevlog'):
200 200 """Like addgroupcopy, but use the low level revlog._addrevision directly.
201 201
202 202 It exercises some code paths that are hard to reach easily otherwise.
203 203 """
204 204 dlog = newrevlog(destname, recreate=True)
205 205 for r in rlog:
206 206 p1 = rlog.node(r - 1)
207 207 p2 = rlog.nullid
208 208 if r == 0 or (rlog.flags(r) & revlog.REVIDX_EXTSTORED):
209 209 text = rlog.rawdata(r)
210 210 cachedelta = None
211 211 else:
212 212 # deltaparent cannot have EXTSTORED flag.
213 213 deltaparent = max(
214 214 [-1]
215 215 + [
216 216 p
217 217 for p in range(r)
218 218 if rlog.flags(p) & revlog.REVIDX_EXTSTORED == 0
219 219 ]
220 220 )
221 221 text = None
222 222 cachedelta = (deltaparent, rlog.revdiff(deltaparent, r))
223 223 flags = rlog.flags(r)
224 224 with dlog._writing(_NoTransaction()):
225 225 dlog._addrevision(
226 226 rlog.node(r),
227 227 text,
228 228 tr,
229 229 r,
230 230 p1,
231 231 p2,
232 232 flags,
233 233 cachedelta,
234 234 )
235 235 return dlog
236 236
237 237
238 238 # Utilities to generate revisions for testing
239 239
240 240
241 241 def genbits(n):
242 242 """Given a number n, generate (2 ** (n * 2) + 1) numbers in range(2 ** n).
243 243 i.e. the generated numbers have a width of n bits.
244 244
245 245 The combination of two adjacent numbers will cover all possible cases.
246 246 That is to say, given any x, y where both x, and y are in range(2 ** n),
247 247 there is an x followed immediately by y in the generated sequence.
248 248 """
249 249 m = 2 ** n
250 250
251 251 # Gray Code. See https://en.wikipedia.org/wiki/Gray_code
252 252 gray = lambda x: x ^ (x >> 1)
253 253 reversegray = {gray(i): i for i in range(m)}
254 254
255 255 # Generate (n * 2) bit gray code, yield lower n bits as X, and look for
256 256 # the next unused gray code where higher n bits equal to X.
257 257
258 258 # For gray codes whose higher bits are X, a[X] of them have been used.
259 259 a = [0] * m
260 260
261 261 # Iterate from 0.
262 262 x = 0
263 263 yield x
264 264 for i in range(m * m):
265 265 x = reversegray[x]
266 266 y = gray(a[x] + x * m) & (m - 1)
267 267 assert a[x] < m
268 268 a[x] += 1
269 269 x = y
270 270 yield x
271 271
272 272
273 273 def gentext(rev):
274 274 '''Given a revision number, generate dummy text'''
275 275 return b''.join(b'%d\n' % j for j in range(-1, rev % 5))
276 276
277 277
278 278 def writecases(rlog, tr):
279 279 """Write some revisions interested to the test.
280 280
281 281 The test is interested in 3 properties of a revision:
282 282
283 283 - Is it a delta or a full text? (isdelta)
284 284 This is to catch some delta application issues.
285 285 - Does it have a flag of EXTSTORED? (isext)
286 286 This is to catch some flag processor issues. Especially when
287 287 interacted with revlog deltas.
288 288 - Is its text empty? (isempty)
289 289 This is less important. It is intended to try to catch some careless
290 290 checks like "if text" instead of "if text is None". Note: if flag
291 291 processor is involved, raw text may be not empty.
292 292
293 293 Write 65 revisions. So that all combinations of the above flags for
294 294 adjacent revisions are covered. That is to say,
295 295
296 296 len(set(
297 297 (r.delta, r.ext, r.empty, (r+1).delta, (r+1).ext, (r+1).empty)
298 298 for r in range(len(rlog) - 1)
299 299 )) is 64.
300 300
301 301 Where "r.delta", "r.ext", and "r.empty" are booleans matching properties
302 302 mentioned above.
303 303
304 304 Return expected [(text, rawtext)].
305 305 """
306 306 result = []
307 307 for i, x in enumerate(genbits(3)):
308 308 isdelta, isext, isempty = bool(x & 1), bool(x & 2), bool(x & 4)
309 309 if isempty:
310 310 text = b''
311 311 else:
312 312 text = gentext(i)
313 313 rev = appendrev(rlog, text, tr, isext=isext, isdelta=isdelta)
314 314
315 315 # Verify text, rawtext, and rawsize
316 316 if isext:
317 317 rawtext = writeprocessor(None, text)[0]
318 318 else:
319 319 rawtext = text
320 320 if rlog.rawsize(rev) != len(rawtext):
321 321 abort('rev %d: wrong rawsize' % rev)
322 322 if rlog.revision(rev) != text:
323 323 abort('rev %d: wrong text' % rev)
324 324 if rlog.rawdata(rev) != rawtext:
325 325 abort('rev %d: wrong rawtext' % rev)
326 326 result.append((text, rawtext))
327 327
328 328 # Verify flags like isdelta, isext work as expected
329 329 # isdelta can be overridden to False if this or p1 has isext set
330 330 if bool(rlog.deltaparent(rev) > -1) and not isdelta:
331 331 abort('rev %d: isdelta is unexpected' % rev)
332 332 if bool(rlog.flags(rev)) != isext:
333 333 abort('rev %d: isext is ineffective' % rev)
334 334 return result
335 335
336 336
337 337 # Main test and checking
338 338
339 339
340 340 def checkrevlog(rlog, expected):
341 341 '''Check if revlog has expected contents. expected is [(text, rawtext)]'''
342 342 # Test using different access orders. This could expose some issues
343 343 # depending on revlog caching (see revlog._cache).
344 344 for r0 in range(len(rlog) - 1):
345 345 r1 = r0 + 1
346 346 for revorder in [[r0, r1], [r1, r0]]:
347 347 for raworder in [[True], [False], [True, False], [False, True]]:
348 348 nlog = newrevlog()
349 349 for rev in revorder:
350 350 for raw in raworder:
351 351 if raw:
352 352 t = nlog.rawdata(rev)
353 353 else:
354 354 t = nlog.revision(rev)
355 355 if t != expected[rev][int(raw)]:
356 356 abort(
357 357 'rev %d: corrupted %stext'
358 358 % (rev, raw and 'raw' or '')
359 359 )
360 360
361 361
362 362 slicingdata = [
363 363 ([0, 1, 2, 3, 55, 56, 58, 59, 60], [[0, 1], [2], [58], [59, 60]], 10),
364 364 ([0, 1, 2, 3, 55, 56, 58, 59, 60], [[0, 1], [2], [58], [59, 60]], 10),
365 365 (
366 366 [-1, 0, 1, 2, 3, 55, 56, 58, 59, 60],
367 367 [[-1, 0, 1], [2], [58], [59, 60]],
368 368 10,
369 369 ),
370 370 ]
371 371
372 372
373 373 def slicingtest(rlog):
374 374 oldmin = rlog._srmingapsize
375 375 try:
376 376 # the test revlog is small, we remove the floor under which we
377 377 # slicing is diregarded.
378 378 rlog._srmingapsize = 0
379 379 for item in slicingdata:
380 380 chain, expected, target = item
381 381 result = deltas.slicechunk(rlog, chain, targetsize=target)
382 382 result = list(result)
383 383 if result != expected:
384 384 print('slicing differ:')
385 385 print(' chain: %s' % chain)
386 386 print(' target: %s' % target)
387 387 print(' expected: %s' % expected)
388 388 print(' result: %s' % result)
389 389 finally:
390 390 rlog._srmingapsize = oldmin
391 391
392 392
393 393 def md5sum(s):
394 394 return hashlib.md5(s).digest()
395 395
396 396
397 397 def _maketext(*coord):
398 398 """create piece of text according to range of integers
399 399
400 400 The test returned use a md5sum of the integer to make it less
401 401 compressible"""
402 402 pieces = []
403 403 for start, size in coord:
404 404 num = range(start, start + size)
405 405 p = [md5sum(b'%d' % r) for r in num]
406 406 pieces.append(b'\n'.join(p))
407 407 return b'\n'.join(pieces) + b'\n'
408 408
409 409
410 410 data = [
411 411 _maketext((0, 120), (456, 60)),
412 412 _maketext((0, 120), (345, 60)),
413 413 _maketext((0, 120), (734, 60)),
414 414 _maketext((0, 120), (734, 60), (923, 45)),
415 415 _maketext((0, 120), (734, 60), (234, 45)),
416 416 _maketext((0, 120), (734, 60), (564, 45)),
417 417 _maketext((0, 120), (734, 60), (361, 45)),
418 418 _maketext((0, 120), (734, 60), (489, 45)),
419 419 _maketext((0, 120), (123, 60)),
420 420 _maketext((0, 120), (145, 60)),
421 421 _maketext((0, 120), (104, 60)),
422 422 _maketext((0, 120), (430, 60)),
423 423 _maketext((0, 120), (430, 60), (923, 45)),
424 424 _maketext((0, 120), (430, 60), (234, 45)),
425 425 _maketext((0, 120), (430, 60), (564, 45)),
426 426 _maketext((0, 120), (430, 60), (361, 45)),
427 427 _maketext((0, 120), (430, 60), (489, 45)),
428 428 _maketext((0, 120), (249, 60)),
429 429 _maketext((0, 120), (832, 60)),
430 430 _maketext((0, 120), (891, 60)),
431 431 _maketext((0, 120), (543, 60)),
432 432 _maketext((0, 120), (120, 60)),
433 433 _maketext((0, 120), (60, 60), (768, 30)),
434 434 _maketext((0, 120), (60, 60), (260, 30)),
435 435 _maketext((0, 120), (60, 60), (450, 30)),
436 436 _maketext((0, 120), (60, 60), (361, 30)),
437 437 _maketext((0, 120), (60, 60), (886, 30)),
438 438 _maketext((0, 120), (60, 60), (116, 30)),
439 439 _maketext((0, 120), (60, 60), (567, 30), (629, 40)),
440 440 _maketext((0, 120), (60, 60), (569, 30), (745, 40)),
441 441 _maketext((0, 120), (60, 60), (777, 30), (700, 40)),
442 442 _maketext((0, 120), (60, 60), (618, 30), (398, 40), (158, 10)),
443 443 ]
444 444
445 445
446 446 def makesnapshot(tr):
447 447 rl = newrevlog(name=b'_snaprevlog3', recreate=True)
448 448 for i in data:
449 449 appendrev(rl, i, tr)
450 450 return rl
451 451
452 452
453 453 snapshots = [-1, 0, 6, 8, 11, 17, 19, 21, 25, 30]
454 454
455 455
456 456 def issnapshottest(rlog):
457 457 result = []
458 458 if rlog.issnapshot(-1):
459 459 result.append(-1)
460 460 for rev in rlog:
461 461 if rlog.issnapshot(rev):
462 462 result.append(rev)
463 463 if snapshots != result:
464 464 print('snapshot differ:')
465 465 print(' expected: %s' % snapshots)
466 466 print(' got: %s' % result)
467 467
468 468
469 snapshotmapall = {0: [6, 8, 11, 17, 19, 25], 8: [21], -1: [0, 30]}
470 snapshotmap15 = {0: [17, 19, 25], 8: [21], -1: [30]}
469 snapshotmapall = {0: {6, 8, 11, 17, 19, 25}, 8: {21}, -1: {0, 30}}
470 snapshotmap15 = {0: {17, 19, 25}, 8: {21}, -1: {30}}
471 471
472 472
473 473 def findsnapshottest(rlog):
474 474 cache = deltas.SnapshotCache()
475 475 cache.update(rlog)
476 476 resultall = dict(cache.snapshots)
477 477 if resultall != snapshotmapall:
478 478 print('snapshot map differ:')
479 479 print(' expected: %s' % snapshotmapall)
480 480 print(' got: %s' % resultall)
481 481 cache15 = deltas.SnapshotCache()
482 482 cache15.update(rlog, 15)
483 483 result15 = dict(cache15.snapshots)
484 484 if result15 != snapshotmap15:
485 485 print('snapshot map differ:')
486 486 print(' expected: %s' % snapshotmap15)
487 487 print(' got: %s' % result15)
488 488
489 489
490 490 def maintest():
491 491 with newtransaction() as tr:
492 492 rl = newrevlog(recreate=True)
493 493 expected = writecases(rl, tr)
494 494 checkrevlog(rl, expected)
495 495 print('local test passed')
496 496 # Copy via revlog.addgroup
497 497 rl1 = addgroupcopy(rl, tr)
498 498 checkrevlog(rl1, expected)
499 499 rl2 = addgroupcopy(rl, tr, optimaldelta=False)
500 500 checkrevlog(rl2, expected)
501 501 print('addgroupcopy test passed')
502 502 # Copy via revlog.clone
503 503 rl3 = newrevlog(name=b'_destrevlog3', recreate=True)
504 504 rl.clone(tr, rl3)
505 505 checkrevlog(rl3, expected)
506 506 print('clone test passed')
507 507 # Copy via low-level revlog._addrevision
508 508 rl4 = lowlevelcopy(rl, tr)
509 509 checkrevlog(rl4, expected)
510 510 print('lowlevelcopy test passed')
511 511 slicingtest(rl)
512 512 print('slicing test passed')
513 513 rl5 = makesnapshot(tr)
514 514 issnapshottest(rl5)
515 515 print('issnapshot test passed')
516 516 findsnapshottest(rl5)
517 517 print('findsnapshot test passed')
518 518
519 519
520 520 try:
521 521 maintest()
522 522 except Exception as ex:
523 523 abort('crashed: %s' % ex)
General Comments 0
You need to be logged in to leave comments. Login now