##// END OF EJS Templates
delta-find: use a smarter object for snapshot caching...
marmoute -
r50573:efbbc2f9 default
parent child Browse files
Show More
@@ -1,61 +1,61 b''
1 1 #include <Python.h>
2 2 #include <assert.h>
3 3 #include <stdlib.h>
4 4 #include <unistd.h>
5 5
6 6 #include <string>
7 7
8 8 #include "pyutil.h"
9 9
10 10 extern "C" {
11 11
12 12 static PYCODETYPE *code;
13 13
14 14 extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv)
15 15 {
16 16 contrib::initpy(*argv[0]);
17 17 code = (PYCODETYPE *)Py_CompileString(R"py(
18 18 for inline in (True, False):
19 19 try:
20 20 index, cache = parsers.parse_index2(data, inline)
21 21 index.slicechunktodensity(list(range(len(index))), 0.5, 262144)
22 22 index.stats()
23 index.findsnapshots({}, 0)
23 index.findsnapshots({}, 0, len(index) - 1)
24 24 10 in index
25 25 for rev in range(len(index)):
26 26 index.reachableroots(0, [len(index)-1], [rev])
27 27 node = index[rev][7]
28 28 partial = index.shortest(node)
29 29 index.partialmatch(node[:partial])
30 30 index.deltachain(rev, None, True)
31 31 except Exception as e:
32 32 pass
33 33 # uncomment this print if you're editing this Python code
34 34 # to debug failures.
35 35 # print e
36 36 )py",
37 37 "fuzzer", Py_file_input);
38 38 return 0;
39 39 }
40 40
41 41 int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size)
42 42 {
43 43 // Don't allow fuzzer inputs larger than 60k, since we'll just bog
44 44 // down and not accomplish much.
45 45 if (Size > 60000) {
46 46 return 0;
47 47 }
48 48 PyObject *text =
49 49 PyBytes_FromStringAndSize((const char *)Data, (Py_ssize_t)Size);
50 50 PyObject *locals = PyDict_New();
51 51 PyDict_SetItemString(locals, "data", text);
52 52 PyObject *res = PyEval_EvalCode(code, contrib::pyglobals(), locals);
53 53 if (!res) {
54 54 PyErr_Print();
55 55 }
56 56 Py_XDECREF(res);
57 57 Py_DECREF(locals);
58 58 Py_DECREF(text);
59 59 return 0; // Non-zero return values are reserved for future use.
60 60 }
61 61 }
@@ -1,3315 +1,3324 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Olivia Mackall <olivia@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #define PY_SSIZE_T_CLEAN
11 11 #include <Python.h>
12 12 #include <assert.h>
13 13 #include <ctype.h>
14 14 #include <limits.h>
15 15 #include <stddef.h>
16 16 #include <stdlib.h>
17 17 #include <string.h>
18 18 #include <structmember.h>
19 19
20 20 #include "bitmanipulation.h"
21 21 #include "charencode.h"
22 22 #include "compat.h"
23 23 #include "revlog.h"
24 24 #include "util.h"
25 25
26 26 typedef struct indexObjectStruct indexObject;
27 27
28 28 typedef struct {
29 29 int children[16];
30 30 } nodetreenode;
31 31
32 32 typedef struct {
33 33 int abi_version;
34 34 Py_ssize_t (*index_length)(const indexObject *);
35 35 const char *(*index_node)(indexObject *, Py_ssize_t);
36 36 int (*fast_rank)(indexObject *, Py_ssize_t);
37 37 int (*index_parents)(PyObject *, int, int *);
38 38 } Revlog_CAPI;
39 39
40 40 /*
41 41 * A base-16 trie for fast node->rev mapping.
42 42 *
43 43 * Positive value is index of the next node in the trie
44 44 * Negative value is a leaf: -(rev + 2)
45 45 * Zero is empty
46 46 */
47 47 typedef struct {
48 48 indexObject *index;
49 49 nodetreenode *nodes;
50 50 Py_ssize_t nodelen;
51 51 size_t length; /* # nodes in use */
52 52 size_t capacity; /* # nodes allocated */
53 53 int depth; /* maximum depth of tree */
54 54 int splits; /* # splits performed */
55 55 } nodetree;
56 56
57 57 typedef struct {
58 58 PyObject_HEAD /* ; */
59 59 nodetree nt;
60 60 } nodetreeObject;
61 61
62 62 /*
63 63 * This class has two behaviors.
64 64 *
65 65 * When used in a list-like way (with integer keys), we decode an
66 66 * entry in a RevlogNG index file on demand. We have limited support for
67 67 * integer-keyed insert and delete, only at elements right before the
68 68 * end.
69 69 *
70 70 * With string keys, we lazily perform a reverse mapping from node to
71 71 * rev, using a base-16 trie.
72 72 */
73 73 struct indexObjectStruct {
74 74 PyObject_HEAD
75 75 /* Type-specific fields go here. */
76 76 PyObject *data; /* raw bytes of index */
77 77 Py_ssize_t nodelen; /* digest size of the hash, 20 for SHA-1 */
78 78 PyObject *nullentry; /* fast path for references to null */
79 79 Py_buffer buf; /* buffer of data */
80 80 const char **offsets; /* populated on demand */
81 81 Py_ssize_t length; /* current on-disk number of elements */
82 82 unsigned new_length; /* number of added elements */
83 83 unsigned added_length; /* space reserved for added elements */
84 84 char *added; /* populated on demand */
85 85 PyObject *headrevs; /* cache, invalidated on changes */
86 86 PyObject *filteredrevs; /* filtered revs set */
87 87 nodetree nt; /* base-16 trie */
88 88 int ntinitialized; /* 0 or 1 */
89 89 int ntrev; /* last rev scanned */
90 90 int ntlookups; /* # lookups */
91 91 int ntmisses; /* # lookups that miss the cache */
92 92 int inlined;
93 93 long entry_size; /* size of index headers. Differs in v1 v.s. v2 format
94 94 */
95 95 long rust_ext_compat; /* compatibility with being used in rust
96 96 extensions */
97 97 long format_version; /* format version selector (format_*) */
98 98 };
99 99
100 100 static Py_ssize_t index_length(const indexObject *self)
101 101 {
102 102 return self->length + self->new_length;
103 103 }
104 104
105 105 static const char nullid[32] = {0};
106 106 static const Py_ssize_t nullrev = -1;
107 107
108 108 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
109 109
110 110 static int index_find_node(indexObject *self, const char *node);
111 111
112 112 #if LONG_MAX == 0x7fffffffL
113 113 static const char *const tuple_format = "Kiiiiiiy#KiBBi";
114 114 #else
115 115 static const char *const tuple_format = "kiiiiiiy#kiBBi";
116 116 #endif
117 117
118 118 /* A RevlogNG v1 index entry is 64 bytes long. */
119 119 static const long v1_entry_size = 64;
120 120
121 121 /* A Revlogv2 index entry is 96 bytes long. */
122 122 static const long v2_entry_size = 96;
123 123
124 124 /* A Changelogv2 index entry is 96 bytes long. */
125 125 static const long cl2_entry_size = 96;
126 126
127 127 /* Internal format version.
128 128 * Must match their counterparts in revlogutils/constants.py */
129 129 static const long format_v1 = 1; /* constants.py: REVLOGV1 */
130 130 static const long format_v2 = 0xDEAD; /* constants.py: REVLOGV2 */
131 131 static const long format_cl2 = 0xD34D; /* constants.py: CHANGELOGV2 */
132 132
133 133 static const long entry_v1_offset_high = 0;
134 134 static const long entry_v1_offset_offset_flags = 4;
135 135 static const long entry_v1_offset_comp_len = 8;
136 136 static const long entry_v1_offset_uncomp_len = 12;
137 137 static const long entry_v1_offset_base_rev = 16;
138 138 static const long entry_v1_offset_link_rev = 20;
139 139 static const long entry_v1_offset_parent_1 = 24;
140 140 static const long entry_v1_offset_parent_2 = 28;
141 141 static const long entry_v1_offset_node_id = 32;
142 142
143 143 static const long entry_v2_offset_high = 0;
144 144 static const long entry_v2_offset_offset_flags = 4;
145 145 static const long entry_v2_offset_comp_len = 8;
146 146 static const long entry_v2_offset_uncomp_len = 12;
147 147 static const long entry_v2_offset_base_rev = 16;
148 148 static const long entry_v2_offset_link_rev = 20;
149 149 static const long entry_v2_offset_parent_1 = 24;
150 150 static const long entry_v2_offset_parent_2 = 28;
151 151 static const long entry_v2_offset_node_id = 32;
152 152 static const long entry_v2_offset_sidedata_offset = 64;
153 153 static const long entry_v2_offset_sidedata_comp_len = 72;
154 154 static const long entry_v2_offset_all_comp_mode = 76;
155 155 /* next free offset: 77 */
156 156
157 157 static const long entry_cl2_offset_high = 0;
158 158 static const long entry_cl2_offset_offset_flags = 4;
159 159 static const long entry_cl2_offset_comp_len = 8;
160 160 static const long entry_cl2_offset_uncomp_len = 12;
161 161 static const long entry_cl2_offset_parent_1 = 16;
162 162 static const long entry_cl2_offset_parent_2 = 20;
163 163 static const long entry_cl2_offset_node_id = 24;
164 164 static const long entry_cl2_offset_sidedata_offset = 56;
165 165 static const long entry_cl2_offset_sidedata_comp_len = 64;
166 166 static const long entry_cl2_offset_all_comp_mode = 68;
167 167 static const long entry_cl2_offset_rank = 69;
168 168 /* next free offset: 73 */
169 169
170 170 static const char comp_mode_inline = 2;
171 171 static const int rank_unknown = -1;
172 172
173 173 static void raise_revlog_error(void)
174 174 {
175 175 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
176 176
177 177 mod = PyImport_ImportModule("mercurial.error");
178 178 if (mod == NULL) {
179 179 goto cleanup;
180 180 }
181 181
182 182 dict = PyModule_GetDict(mod);
183 183 if (dict == NULL) {
184 184 goto cleanup;
185 185 }
186 186 Py_INCREF(dict);
187 187
188 188 errclass = PyDict_GetItemString(dict, "RevlogError");
189 189 if (errclass == NULL) {
190 190 PyErr_SetString(PyExc_SystemError,
191 191 "could not find RevlogError");
192 192 goto cleanup;
193 193 }
194 194
195 195 /* value of exception is ignored by callers */
196 196 PyErr_SetString(errclass, "RevlogError");
197 197
198 198 cleanup:
199 199 Py_XDECREF(dict);
200 200 Py_XDECREF(mod);
201 201 }
202 202
203 203 /*
204 204 * Return a pointer to the beginning of a RevlogNG record.
205 205 */
206 206 static const char *index_deref(indexObject *self, Py_ssize_t pos)
207 207 {
208 208 if (pos >= self->length)
209 209 return self->added + (pos - self->length) * self->entry_size;
210 210
211 211 if (self->inlined && pos > 0) {
212 212 if (self->offsets == NULL) {
213 213 Py_ssize_t ret;
214 214 self->offsets =
215 215 PyMem_Malloc(self->length * sizeof(*self->offsets));
216 216 if (self->offsets == NULL)
217 217 return (const char *)PyErr_NoMemory();
218 218 ret = inline_scan(self, self->offsets);
219 219 if (ret == -1) {
220 220 return NULL;
221 221 };
222 222 }
223 223 return self->offsets[pos];
224 224 }
225 225
226 226 return (const char *)(self->buf.buf) + pos * self->entry_size;
227 227 }
228 228
229 229 /*
230 230 * Get parents of the given rev.
231 231 *
232 232 * The specified rev must be valid and must not be nullrev. A returned
233 233 * parent revision may be nullrev, but is guaranteed to be in valid range.
234 234 */
235 235 static inline int index_get_parents(indexObject *self, Py_ssize_t rev, int *ps,
236 236 int maxrev)
237 237 {
238 238 const char *data = index_deref(self, rev);
239 239
240 240 if (self->format_version == format_v1) {
241 241 ps[0] = getbe32(data + entry_v1_offset_parent_1);
242 242 ps[1] = getbe32(data + entry_v1_offset_parent_2);
243 243 } else if (self->format_version == format_v2) {
244 244 ps[0] = getbe32(data + entry_v2_offset_parent_1);
245 245 ps[1] = getbe32(data + entry_v2_offset_parent_2);
246 246 } else if (self->format_version == format_cl2) {
247 247 ps[0] = getbe32(data + entry_cl2_offset_parent_1);
248 248 ps[1] = getbe32(data + entry_cl2_offset_parent_2);
249 249 } else {
250 250 raise_revlog_error();
251 251 return -1;
252 252 }
253 253
254 254 /* If index file is corrupted, ps[] may point to invalid revisions. So
255 255 * there is a risk of buffer overflow to trust them unconditionally. */
256 256 if (ps[0] < -1 || ps[0] > maxrev || ps[1] < -1 || ps[1] > maxrev) {
257 257 PyErr_SetString(PyExc_ValueError, "parent out of range");
258 258 return -1;
259 259 }
260 260 return 0;
261 261 }
262 262
263 263 /*
264 264 * Get parents of the given rev.
265 265 *
266 266 * If the specified rev is out of range, IndexError will be raised. If the
267 267 * revlog entry is corrupted, ValueError may be raised.
268 268 *
269 269 * Returns 0 on success or -1 on failure.
270 270 */
271 271 static int HgRevlogIndex_GetParents(PyObject *op, int rev, int *ps)
272 272 {
273 273 int tiprev;
274 274 if (!op || !HgRevlogIndex_Check(op) || !ps) {
275 275 PyErr_BadInternalCall();
276 276 return -1;
277 277 }
278 278 tiprev = (int)index_length((indexObject *)op) - 1;
279 279 if (rev < -1 || rev > tiprev) {
280 280 PyErr_Format(PyExc_IndexError, "rev out of range: %d", rev);
281 281 return -1;
282 282 } else if (rev == -1) {
283 283 ps[0] = ps[1] = -1;
284 284 return 0;
285 285 } else {
286 286 return index_get_parents((indexObject *)op, rev, ps, tiprev);
287 287 }
288 288 }
289 289
290 290 static inline int64_t index_get_start(indexObject *self, Py_ssize_t rev)
291 291 {
292 292 const char *data;
293 293 uint64_t offset;
294 294
295 295 if (rev == nullrev)
296 296 return 0;
297 297
298 298 data = index_deref(self, rev);
299 299
300 300 if (self->format_version == format_v1) {
301 301 offset = getbe32(data + entry_v1_offset_offset_flags);
302 302 if (rev == 0) {
303 303 /* mask out version number for the first entry */
304 304 offset &= 0xFFFF;
305 305 } else {
306 306 uint32_t offset_high =
307 307 getbe32(data + entry_v1_offset_high);
308 308 offset |= ((uint64_t)offset_high) << 32;
309 309 }
310 310 } else if (self->format_version == format_v2) {
311 311 offset = getbe32(data + entry_v2_offset_offset_flags);
312 312 if (rev == 0) {
313 313 /* mask out version number for the first entry */
314 314 offset &= 0xFFFF;
315 315 } else {
316 316 uint32_t offset_high =
317 317 getbe32(data + entry_v2_offset_high);
318 318 offset |= ((uint64_t)offset_high) << 32;
319 319 }
320 320 } else if (self->format_version == format_cl2) {
321 321 uint32_t offset_high = getbe32(data + entry_cl2_offset_high);
322 322 offset = getbe32(data + entry_cl2_offset_offset_flags);
323 323 offset |= ((uint64_t)offset_high) << 32;
324 324 } else {
325 325 raise_revlog_error();
326 326 return -1;
327 327 }
328 328
329 329 return (int64_t)(offset >> 16);
330 330 }
331 331
332 332 static inline int index_get_length(indexObject *self, Py_ssize_t rev)
333 333 {
334 334 const char *data;
335 335 int tmp;
336 336
337 337 if (rev == nullrev)
338 338 return 0;
339 339
340 340 data = index_deref(self, rev);
341 341
342 342 if (self->format_version == format_v1) {
343 343 tmp = (int)getbe32(data + entry_v1_offset_comp_len);
344 344 } else if (self->format_version == format_v2) {
345 345 tmp = (int)getbe32(data + entry_v2_offset_comp_len);
346 346 } else if (self->format_version == format_cl2) {
347 347 tmp = (int)getbe32(data + entry_cl2_offset_comp_len);
348 348 } else {
349 349 raise_revlog_error();
350 350 return -1;
351 351 }
352 352 if (tmp < 0) {
353 353 PyErr_Format(PyExc_OverflowError,
354 354 "revlog entry size out of bound (%d)", tmp);
355 355 return -1;
356 356 }
357 357 return tmp;
358 358 }
359 359
360 360 /*
361 361 * RevlogNG format (all in big endian, data may be inlined):
362 362 * 6 bytes: offset
363 363 * 2 bytes: flags
364 364 * 4 bytes: compressed length
365 365 * 4 bytes: uncompressed length
366 366 * 4 bytes: base revision
367 367 * 4 bytes: link revision
368 368 * 4 bytes: parent 1 revision
369 369 * 4 bytes: parent 2 revision
370 370 * 32 bytes: nodeid (only 20 bytes used with SHA-1)
371 371 */
372 372 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
373 373 {
374 374 uint64_t offset_flags, sidedata_offset;
375 375 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
376 376 sidedata_comp_len, rank = rank_unknown;
377 377 char data_comp_mode, sidedata_comp_mode;
378 378 const char *c_node_id;
379 379 const char *data;
380 380 Py_ssize_t length = index_length(self);
381 381
382 382 if (pos == nullrev) {
383 383 Py_INCREF(self->nullentry);
384 384 return self->nullentry;
385 385 }
386 386
387 387 if (pos < 0 || pos >= length) {
388 388 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
389 389 return NULL;
390 390 }
391 391
392 392 data = index_deref(self, pos);
393 393 if (data == NULL)
394 394 return NULL;
395 395
396 396 if (self->format_version == format_v1) {
397 397 offset_flags = getbe32(data + entry_v1_offset_offset_flags);
398 398 /*
399 399 * The first entry on-disk needs the version number masked out,
400 400 * but this doesn't apply if entries are added to an empty
401 401 * index.
402 402 */
403 403 if (self->length && pos == 0)
404 404 offset_flags &= 0xFFFF;
405 405 else {
406 406 uint32_t offset_high =
407 407 getbe32(data + entry_v1_offset_high);
408 408 offset_flags |= ((uint64_t)offset_high) << 32;
409 409 }
410 410
411 411 comp_len = getbe32(data + entry_v1_offset_comp_len);
412 412 uncomp_len = getbe32(data + entry_v1_offset_uncomp_len);
413 413 base_rev = getbe32(data + entry_v1_offset_base_rev);
414 414 link_rev = getbe32(data + entry_v1_offset_link_rev);
415 415 parent_1 = getbe32(data + entry_v1_offset_parent_1);
416 416 parent_2 = getbe32(data + entry_v1_offset_parent_2);
417 417 c_node_id = data + entry_v1_offset_node_id;
418 418
419 419 sidedata_offset = 0;
420 420 sidedata_comp_len = 0;
421 421 data_comp_mode = comp_mode_inline;
422 422 sidedata_comp_mode = comp_mode_inline;
423 423 } else if (self->format_version == format_v2) {
424 424 offset_flags = getbe32(data + entry_v2_offset_offset_flags);
425 425 /*
426 426 * The first entry on-disk needs the version number masked out,
427 427 * but this doesn't apply if entries are added to an empty
428 428 * index.
429 429 */
430 430 if (self->length && pos == 0)
431 431 offset_flags &= 0xFFFF;
432 432 else {
433 433 uint32_t offset_high =
434 434 getbe32(data + entry_v2_offset_high);
435 435 offset_flags |= ((uint64_t)offset_high) << 32;
436 436 }
437 437
438 438 comp_len = getbe32(data + entry_v2_offset_comp_len);
439 439 uncomp_len = getbe32(data + entry_v2_offset_uncomp_len);
440 440 base_rev = getbe32(data + entry_v2_offset_base_rev);
441 441 link_rev = getbe32(data + entry_v2_offset_link_rev);
442 442 parent_1 = getbe32(data + entry_v2_offset_parent_1);
443 443 parent_2 = getbe32(data + entry_v2_offset_parent_2);
444 444 c_node_id = data + entry_v2_offset_node_id;
445 445
446 446 sidedata_offset =
447 447 getbe64(data + entry_v2_offset_sidedata_offset);
448 448 sidedata_comp_len =
449 449 getbe32(data + entry_v2_offset_sidedata_comp_len);
450 450 data_comp_mode = data[entry_v2_offset_all_comp_mode] & 3;
451 451 sidedata_comp_mode =
452 452 ((data[entry_v2_offset_all_comp_mode] >> 2) & 3);
453 453 } else if (self->format_version == format_cl2) {
454 454 uint32_t offset_high = getbe32(data + entry_cl2_offset_high);
455 455 offset_flags = getbe32(data + entry_cl2_offset_offset_flags);
456 456 offset_flags |= ((uint64_t)offset_high) << 32;
457 457 comp_len = getbe32(data + entry_cl2_offset_comp_len);
458 458 uncomp_len = getbe32(data + entry_cl2_offset_uncomp_len);
459 459 /* base_rev and link_rev are not stored in changelogv2, but are
460 460 still used by some functions shared with the other revlogs.
461 461 They are supposed to contain links to other revisions,
462 462 but they always point to themselves in the case of a changelog.
463 463 */
464 464 base_rev = pos;
465 465 link_rev = pos;
466 466 parent_1 = getbe32(data + entry_cl2_offset_parent_1);
467 467 parent_2 = getbe32(data + entry_cl2_offset_parent_2);
468 468 c_node_id = data + entry_cl2_offset_node_id;
469 469 sidedata_offset =
470 470 getbe64(data + entry_cl2_offset_sidedata_offset);
471 471 sidedata_comp_len =
472 472 getbe32(data + entry_cl2_offset_sidedata_comp_len);
473 473 data_comp_mode = data[entry_cl2_offset_all_comp_mode] & 3;
474 474 sidedata_comp_mode =
475 475 ((data[entry_cl2_offset_all_comp_mode] >> 2) & 3);
476 476 rank = getbe32(data + entry_cl2_offset_rank);
477 477 } else {
478 478 raise_revlog_error();
479 479 return NULL;
480 480 }
481 481
482 482 return Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len,
483 483 base_rev, link_rev, parent_1, parent_2, c_node_id,
484 484 self->nodelen, sidedata_offset, sidedata_comp_len,
485 485 data_comp_mode, sidedata_comp_mode, rank);
486 486 }
487 487 /*
488 488 * Pack header information in binary
489 489 */
490 490 static PyObject *index_pack_header(indexObject *self, PyObject *args)
491 491 {
492 492 int header;
493 493 char out[4];
494 494 if (!PyArg_ParseTuple(args, "i", &header)) {
495 495 return NULL;
496 496 }
497 497 if (self->format_version != format_v1) {
498 498 PyErr_Format(PyExc_RuntimeError,
499 499 "version header should go in the docket, not the "
500 500 "index: %d",
501 501 header);
502 502 return NULL;
503 503 }
504 504 putbe32(header, out);
505 505 return PyBytes_FromStringAndSize(out, 4);
506 506 }
507 507 /*
508 508 * Return the raw binary string representing a revision
509 509 */
510 510 static PyObject *index_entry_binary(indexObject *self, PyObject *value)
511 511 {
512 512 long rev;
513 513 const char *data;
514 514 Py_ssize_t length = index_length(self);
515 515
516 516 if (!pylong_to_long(value, &rev)) {
517 517 return NULL;
518 518 }
519 519 if (rev < 0 || rev >= length) {
520 520 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
521 521 rev);
522 522 return NULL;
523 523 };
524 524
525 525 data = index_deref(self, rev);
526 526 if (data == NULL)
527 527 return NULL;
528 528 if (rev == 0 && self->format_version == format_v1) {
529 529 /* the header is eating the start of the first entry */
530 530 return PyBytes_FromStringAndSize(data + 4,
531 531 self->entry_size - 4);
532 532 }
533 533 return PyBytes_FromStringAndSize(data, self->entry_size);
534 534 }
535 535
536 536 /*
537 537 * Return the hash of node corresponding to the given rev.
538 538 */
539 539 static const char *index_node(indexObject *self, Py_ssize_t pos)
540 540 {
541 541 Py_ssize_t length = index_length(self);
542 542 const char *data;
543 543 const char *node_id;
544 544
545 545 if (pos == nullrev)
546 546 return nullid;
547 547
548 548 if (pos >= length)
549 549 return NULL;
550 550
551 551 data = index_deref(self, pos);
552 552
553 553 if (self->format_version == format_v1) {
554 554 node_id = data + entry_v1_offset_node_id;
555 555 } else if (self->format_version == format_v2) {
556 556 node_id = data + entry_v2_offset_node_id;
557 557 } else if (self->format_version == format_cl2) {
558 558 node_id = data + entry_cl2_offset_node_id;
559 559 } else {
560 560 raise_revlog_error();
561 561 return NULL;
562 562 }
563 563
564 564 return data ? node_id : NULL;
565 565 }
566 566
567 567 /*
568 568 * Return the stored rank of a given revision if known, or rank_unknown
569 569 * otherwise.
570 570 *
571 571 * The rank of a revision is the size of the sub-graph it defines as a head.
572 572 * Equivalently, the rank of a revision `r` is the size of the set
573 573 * `ancestors(r)`, `r` included.
574 574 *
575 575 * This method returns the rank retrieved from the revlog in constant time. It
576 576 * makes no attempt at computing unknown values for versions of the revlog
577 577 * which do not persist the rank.
578 578 */
579 579 static int index_fast_rank(indexObject *self, Py_ssize_t pos)
580 580 {
581 581 Py_ssize_t length = index_length(self);
582 582
583 583 if (self->format_version != format_cl2 || pos >= length) {
584 584 return rank_unknown;
585 585 }
586 586
587 587 if (pos == nullrev) {
588 588 return 0; /* convention */
589 589 }
590 590
591 591 return getbe32(index_deref(self, pos) + entry_cl2_offset_rank);
592 592 }
593 593
594 594 /*
595 595 * Return the hash of the node corresponding to the given rev. The
596 596 * rev is assumed to be existing. If not, an exception is set.
597 597 */
598 598 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
599 599 {
600 600 const char *node = index_node(self, pos);
601 601 if (node == NULL) {
602 602 PyErr_Format(PyExc_IndexError, "could not access rev %d",
603 603 (int)pos);
604 604 }
605 605 return node;
606 606 }
607 607
608 608 static int nt_insert(nodetree *self, const char *node, int rev);
609 609
610 610 static int node_check(Py_ssize_t nodelen, PyObject *obj, char **node)
611 611 {
612 612 Py_ssize_t thisnodelen;
613 613 if (PyBytes_AsStringAndSize(obj, node, &thisnodelen) == -1)
614 614 return -1;
615 615 if (nodelen == thisnodelen)
616 616 return 0;
617 617 PyErr_Format(PyExc_ValueError, "node len %zd != expected node len %zd",
618 618 thisnodelen, nodelen);
619 619 return -1;
620 620 }
621 621
622 622 static PyObject *index_append(indexObject *self, PyObject *obj)
623 623 {
624 624 uint64_t offset_flags, sidedata_offset;
625 625 int rev, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
626 626 sidedata_comp_len, rank;
627 627 char data_comp_mode, sidedata_comp_mode;
628 628 Py_ssize_t c_node_id_len;
629 629 const char *c_node_id;
630 630 char comp_field;
631 631 char *data;
632 632
633 633 if (!PyArg_ParseTuple(obj, tuple_format, &offset_flags, &comp_len,
634 634 &uncomp_len, &base_rev, &link_rev, &parent_1,
635 635 &parent_2, &c_node_id, &c_node_id_len,
636 636 &sidedata_offset, &sidedata_comp_len,
637 637 &data_comp_mode, &sidedata_comp_mode, &rank)) {
638 638 PyErr_SetString(PyExc_TypeError, "12-tuple required");
639 639 return NULL;
640 640 }
641 641
642 642 if (c_node_id_len != self->nodelen) {
643 643 PyErr_SetString(PyExc_TypeError, "invalid node");
644 644 return NULL;
645 645 }
646 646 if (self->format_version == format_v1) {
647 647
648 648 if (data_comp_mode != comp_mode_inline) {
649 649 PyErr_Format(PyExc_ValueError,
650 650 "invalid data compression mode: %i",
651 651 data_comp_mode);
652 652 return NULL;
653 653 }
654 654 if (sidedata_comp_mode != comp_mode_inline) {
655 655 PyErr_Format(PyExc_ValueError,
656 656 "invalid sidedata compression mode: %i",
657 657 sidedata_comp_mode);
658 658 return NULL;
659 659 }
660 660 }
661 661
662 662 if (self->new_length == self->added_length) {
663 663 size_t new_added_length =
664 664 self->added_length ? self->added_length * 2 : 4096;
665 665 void *new_added = PyMem_Realloc(
666 666 self->added, new_added_length * self->entry_size);
667 667 if (!new_added)
668 668 return PyErr_NoMemory();
669 669 self->added = new_added;
670 670 self->added_length = new_added_length;
671 671 }
672 672 rev = self->length + self->new_length;
673 673 data = self->added + self->entry_size * self->new_length++;
674 674
675 675 memset(data, 0, self->entry_size);
676 676
677 677 if (self->format_version == format_v1) {
678 678 putbe32(offset_flags >> 32, data + entry_v1_offset_high);
679 679 putbe32(offset_flags & 0xffffffffU,
680 680 data + entry_v1_offset_offset_flags);
681 681 putbe32(comp_len, data + entry_v1_offset_comp_len);
682 682 putbe32(uncomp_len, data + entry_v1_offset_uncomp_len);
683 683 putbe32(base_rev, data + entry_v1_offset_base_rev);
684 684 putbe32(link_rev, data + entry_v1_offset_link_rev);
685 685 putbe32(parent_1, data + entry_v1_offset_parent_1);
686 686 putbe32(parent_2, data + entry_v1_offset_parent_2);
687 687 memcpy(data + entry_v1_offset_node_id, c_node_id,
688 688 c_node_id_len);
689 689 } else if (self->format_version == format_v2) {
690 690 putbe32(offset_flags >> 32, data + entry_v2_offset_high);
691 691 putbe32(offset_flags & 0xffffffffU,
692 692 data + entry_v2_offset_offset_flags);
693 693 putbe32(comp_len, data + entry_v2_offset_comp_len);
694 694 putbe32(uncomp_len, data + entry_v2_offset_uncomp_len);
695 695 putbe32(base_rev, data + entry_v2_offset_base_rev);
696 696 putbe32(link_rev, data + entry_v2_offset_link_rev);
697 697 putbe32(parent_1, data + entry_v2_offset_parent_1);
698 698 putbe32(parent_2, data + entry_v2_offset_parent_2);
699 699 memcpy(data + entry_v2_offset_node_id, c_node_id,
700 700 c_node_id_len);
701 701 putbe64(sidedata_offset,
702 702 data + entry_v2_offset_sidedata_offset);
703 703 putbe32(sidedata_comp_len,
704 704 data + entry_v2_offset_sidedata_comp_len);
705 705 comp_field = data_comp_mode & 3;
706 706 comp_field = comp_field | (sidedata_comp_mode & 3) << 2;
707 707 data[entry_v2_offset_all_comp_mode] = comp_field;
708 708 } else if (self->format_version == format_cl2) {
709 709 putbe32(offset_flags >> 32, data + entry_cl2_offset_high);
710 710 putbe32(offset_flags & 0xffffffffU,
711 711 data + entry_cl2_offset_offset_flags);
712 712 putbe32(comp_len, data + entry_cl2_offset_comp_len);
713 713 putbe32(uncomp_len, data + entry_cl2_offset_uncomp_len);
714 714 putbe32(parent_1, data + entry_cl2_offset_parent_1);
715 715 putbe32(parent_2, data + entry_cl2_offset_parent_2);
716 716 memcpy(data + entry_cl2_offset_node_id, c_node_id,
717 717 c_node_id_len);
718 718 putbe64(sidedata_offset,
719 719 data + entry_cl2_offset_sidedata_offset);
720 720 putbe32(sidedata_comp_len,
721 721 data + entry_cl2_offset_sidedata_comp_len);
722 722 comp_field = data_comp_mode & 3;
723 723 comp_field = comp_field | (sidedata_comp_mode & 3) << 2;
724 724 data[entry_cl2_offset_all_comp_mode] = comp_field;
725 725 putbe32(rank, data + entry_cl2_offset_rank);
726 726 } else {
727 727 raise_revlog_error();
728 728 return NULL;
729 729 }
730 730
731 731 if (self->ntinitialized)
732 732 nt_insert(&self->nt, c_node_id, rev);
733 733
734 734 Py_CLEAR(self->headrevs);
735 735 Py_RETURN_NONE;
736 736 }
737 737
738 738 /* Replace an existing index entry's sidedata offset and length with new ones.
739 739 This cannot be used outside of the context of sidedata rewriting,
740 740 inside the transaction that creates the given revision. */
741 741 static PyObject *index_replace_sidedata_info(indexObject *self, PyObject *args)
742 742 {
743 743 uint64_t offset_flags, sidedata_offset;
744 744 Py_ssize_t rev;
745 745 int sidedata_comp_len;
746 746 char comp_mode;
747 747 char *data;
748 748 #if LONG_MAX == 0x7fffffffL
749 749 const char *const sidedata_format = "nKiKB";
750 750 #else
751 751 const char *const sidedata_format = "nkikB";
752 752 #endif
753 753
754 754 if (self->entry_size == v1_entry_size || self->inlined) {
755 755 /*
756 756 There is a bug in the transaction handling when going from an
757 757 inline revlog to a separate index and data file. Turn it off until
758 758 it's fixed, since v2 revlogs sometimes get rewritten on exchange.
759 759 See issue6485.
760 760 */
761 761 raise_revlog_error();
762 762 return NULL;
763 763 }
764 764
765 765 if (!PyArg_ParseTuple(args, sidedata_format, &rev, &sidedata_offset,
766 766 &sidedata_comp_len, &offset_flags, &comp_mode))
767 767 return NULL;
768 768
769 769 if (rev < 0 || rev >= index_length(self)) {
770 770 PyErr_SetString(PyExc_IndexError, "revision outside index");
771 771 return NULL;
772 772 }
773 773 if (rev < self->length) {
774 774 PyErr_SetString(
775 775 PyExc_IndexError,
776 776 "cannot rewrite entries outside of this transaction");
777 777 return NULL;
778 778 }
779 779
780 780 /* Find the newly added node, offset from the "already on-disk" length
781 781 */
782 782 data = self->added + self->entry_size * (rev - self->length);
783 783 if (self->format_version == format_v2) {
784 784 putbe64(offset_flags, data + entry_v2_offset_high);
785 785 putbe64(sidedata_offset,
786 786 data + entry_v2_offset_sidedata_offset);
787 787 putbe32(sidedata_comp_len,
788 788 data + entry_v2_offset_sidedata_comp_len);
789 789 data[entry_v2_offset_all_comp_mode] =
790 790 (data[entry_v2_offset_all_comp_mode] & ~(3 << 2)) |
791 791 ((comp_mode & 3) << 2);
792 792 } else if (self->format_version == format_cl2) {
793 793 putbe64(offset_flags, data + entry_cl2_offset_high);
794 794 putbe64(sidedata_offset,
795 795 data + entry_cl2_offset_sidedata_offset);
796 796 putbe32(sidedata_comp_len,
797 797 data + entry_cl2_offset_sidedata_comp_len);
798 798 data[entry_cl2_offset_all_comp_mode] =
799 799 (data[entry_cl2_offset_all_comp_mode] & ~(3 << 2)) |
800 800 ((comp_mode & 3) << 2);
801 801 } else {
802 802 raise_revlog_error();
803 803 return NULL;
804 804 }
805 805
806 806 Py_RETURN_NONE;
807 807 }
808 808
809 809 static PyObject *index_stats(indexObject *self)
810 810 {
811 811 PyObject *obj = PyDict_New();
812 812 PyObject *s = NULL;
813 813 PyObject *t = NULL;
814 814
815 815 if (obj == NULL)
816 816 return NULL;
817 817
818 818 #define istat(__n, __d) \
819 819 do { \
820 820 s = PyBytes_FromString(__d); \
821 821 t = PyLong_FromSsize_t(self->__n); \
822 822 if (!s || !t) \
823 823 goto bail; \
824 824 if (PyDict_SetItem(obj, s, t) == -1) \
825 825 goto bail; \
826 826 Py_CLEAR(s); \
827 827 Py_CLEAR(t); \
828 828 } while (0)
829 829
830 830 if (self->added_length)
831 831 istat(new_length, "index entries added");
832 832 istat(length, "revs in memory");
833 833 istat(ntlookups, "node trie lookups");
834 834 istat(ntmisses, "node trie misses");
835 835 istat(ntrev, "node trie last rev scanned");
836 836 if (self->ntinitialized) {
837 837 istat(nt.capacity, "node trie capacity");
838 838 istat(nt.depth, "node trie depth");
839 839 istat(nt.length, "node trie count");
840 840 istat(nt.splits, "node trie splits");
841 841 }
842 842
843 843 #undef istat
844 844
845 845 return obj;
846 846
847 847 bail:
848 848 Py_XDECREF(obj);
849 849 Py_XDECREF(s);
850 850 Py_XDECREF(t);
851 851 return NULL;
852 852 }
853 853
854 854 /*
855 855 * When we cache a list, we want to be sure the caller can't mutate
856 856 * the cached copy.
857 857 */
858 858 static PyObject *list_copy(PyObject *list)
859 859 {
860 860 Py_ssize_t len = PyList_GET_SIZE(list);
861 861 PyObject *newlist = PyList_New(len);
862 862 Py_ssize_t i;
863 863
864 864 if (newlist == NULL)
865 865 return NULL;
866 866
867 867 for (i = 0; i < len; i++) {
868 868 PyObject *obj = PyList_GET_ITEM(list, i);
869 869 Py_INCREF(obj);
870 870 PyList_SET_ITEM(newlist, i, obj);
871 871 }
872 872
873 873 return newlist;
874 874 }
875 875
876 876 static int check_filter(PyObject *filter, Py_ssize_t arg)
877 877 {
878 878 if (filter) {
879 879 PyObject *arglist, *result;
880 880 int isfiltered;
881 881
882 882 arglist = Py_BuildValue("(n)", arg);
883 883 if (!arglist) {
884 884 return -1;
885 885 }
886 886
887 887 result = PyObject_Call(filter, arglist, NULL);
888 888 Py_DECREF(arglist);
889 889 if (!result) {
890 890 return -1;
891 891 }
892 892
893 893 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
894 894 * same as this function, so we can just return it directly.*/
895 895 isfiltered = PyObject_IsTrue(result);
896 896 Py_DECREF(result);
897 897 return isfiltered;
898 898 } else {
899 899 return 0;
900 900 }
901 901 }
902 902
903 903 static inline void set_phase_from_parents(char *phases, int parent_1,
904 904 int parent_2, Py_ssize_t i)
905 905 {
906 906 if (parent_1 >= 0 && phases[parent_1] > phases[i])
907 907 phases[i] = phases[parent_1];
908 908 if (parent_2 >= 0 && phases[parent_2] > phases[i])
909 909 phases[i] = phases[parent_2];
910 910 }
911 911
912 912 static PyObject *reachableroots2(indexObject *self, PyObject *args)
913 913 {
914 914
915 915 /* Input */
916 916 long minroot;
917 917 PyObject *includepatharg = NULL;
918 918 int includepath = 0;
919 919 /* heads and roots are lists */
920 920 PyObject *heads = NULL;
921 921 PyObject *roots = NULL;
922 922 PyObject *reachable = NULL;
923 923
924 924 PyObject *val;
925 925 Py_ssize_t len = index_length(self);
926 926 long revnum;
927 927 Py_ssize_t k;
928 928 Py_ssize_t i;
929 929 Py_ssize_t l;
930 930 int r;
931 931 int parents[2];
932 932
933 933 /* Internal data structure:
934 934 * tovisit: array of length len+1 (all revs + nullrev), filled upto
935 935 * lentovisit
936 936 *
937 937 * revstates: array of length len+1 (all revs + nullrev) */
938 938 int *tovisit = NULL;
939 939 long lentovisit = 0;
940 940 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
941 941 char *revstates = NULL;
942 942
943 943 /* Get arguments */
944 944 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
945 945 &PyList_Type, &roots, &PyBool_Type,
946 946 &includepatharg))
947 947 goto bail;
948 948
949 949 if (includepatharg == Py_True)
950 950 includepath = 1;
951 951
952 952 /* Initialize return set */
953 953 reachable = PyList_New(0);
954 954 if (reachable == NULL)
955 955 goto bail;
956 956
957 957 /* Initialize internal datastructures */
958 958 tovisit = (int *)malloc((len + 1) * sizeof(int));
959 959 if (tovisit == NULL) {
960 960 PyErr_NoMemory();
961 961 goto bail;
962 962 }
963 963
964 964 revstates = (char *)calloc(len + 1, 1);
965 965 if (revstates == NULL) {
966 966 PyErr_NoMemory();
967 967 goto bail;
968 968 }
969 969
970 970 l = PyList_GET_SIZE(roots);
971 971 for (i = 0; i < l; i++) {
972 972 revnum = PyLong_AsLong(PyList_GET_ITEM(roots, i));
973 973 if (revnum == -1 && PyErr_Occurred())
974 974 goto bail;
975 975 /* If root is out of range, e.g. wdir(), it must be unreachable
976 976 * from heads. So we can just ignore it. */
977 977 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
978 978 continue;
979 979 revstates[revnum + 1] |= RS_ROOT;
980 980 }
981 981
982 982 /* Populate tovisit with all the heads */
983 983 l = PyList_GET_SIZE(heads);
984 984 for (i = 0; i < l; i++) {
985 985 revnum = PyLong_AsLong(PyList_GET_ITEM(heads, i));
986 986 if (revnum == -1 && PyErr_Occurred())
987 987 goto bail;
988 988 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
989 989 PyErr_SetString(PyExc_IndexError, "head out of range");
990 990 goto bail;
991 991 }
992 992 if (!(revstates[revnum + 1] & RS_SEEN)) {
993 993 tovisit[lentovisit++] = (int)revnum;
994 994 revstates[revnum + 1] |= RS_SEEN;
995 995 }
996 996 }
997 997
998 998 /* Visit the tovisit list and find the reachable roots */
999 999 k = 0;
1000 1000 while (k < lentovisit) {
1001 1001 /* Add the node to reachable if it is a root*/
1002 1002 revnum = tovisit[k++];
1003 1003 if (revstates[revnum + 1] & RS_ROOT) {
1004 1004 revstates[revnum + 1] |= RS_REACHABLE;
1005 1005 val = PyLong_FromLong(revnum);
1006 1006 if (val == NULL)
1007 1007 goto bail;
1008 1008 r = PyList_Append(reachable, val);
1009 1009 Py_DECREF(val);
1010 1010 if (r < 0)
1011 1011 goto bail;
1012 1012 if (includepath == 0)
1013 1013 continue;
1014 1014 }
1015 1015
1016 1016 /* Add its parents to the list of nodes to visit */
1017 1017 if (revnum == nullrev)
1018 1018 continue;
1019 1019 r = index_get_parents(self, revnum, parents, (int)len - 1);
1020 1020 if (r < 0)
1021 1021 goto bail;
1022 1022 for (i = 0; i < 2; i++) {
1023 1023 if (!(revstates[parents[i] + 1] & RS_SEEN) &&
1024 1024 parents[i] >= minroot) {
1025 1025 tovisit[lentovisit++] = parents[i];
1026 1026 revstates[parents[i] + 1] |= RS_SEEN;
1027 1027 }
1028 1028 }
1029 1029 }
1030 1030
1031 1031 /* Find all the nodes in between the roots we found and the heads
1032 1032 * and add them to the reachable set */
1033 1033 if (includepath == 1) {
1034 1034 long minidx = minroot;
1035 1035 if (minidx < 0)
1036 1036 minidx = 0;
1037 1037 for (i = minidx; i < len; i++) {
1038 1038 if (!(revstates[i + 1] & RS_SEEN))
1039 1039 continue;
1040 1040 r = index_get_parents(self, i, parents, (int)len - 1);
1041 1041 /* Corrupted index file, error is set from
1042 1042 * index_get_parents */
1043 1043 if (r < 0)
1044 1044 goto bail;
1045 1045 if (((revstates[parents[0] + 1] |
1046 1046 revstates[parents[1] + 1]) &
1047 1047 RS_REACHABLE) &&
1048 1048 !(revstates[i + 1] & RS_REACHABLE)) {
1049 1049 revstates[i + 1] |= RS_REACHABLE;
1050 1050 val = PyLong_FromSsize_t(i);
1051 1051 if (val == NULL)
1052 1052 goto bail;
1053 1053 r = PyList_Append(reachable, val);
1054 1054 Py_DECREF(val);
1055 1055 if (r < 0)
1056 1056 goto bail;
1057 1057 }
1058 1058 }
1059 1059 }
1060 1060
1061 1061 free(revstates);
1062 1062 free(tovisit);
1063 1063 return reachable;
1064 1064 bail:
1065 1065 Py_XDECREF(reachable);
1066 1066 free(revstates);
1067 1067 free(tovisit);
1068 1068 return NULL;
1069 1069 }
1070 1070
1071 1071 static int add_roots_get_min(indexObject *self, PyObject *roots, char *phases,
1072 1072 char phase)
1073 1073 {
1074 1074 Py_ssize_t len = index_length(self);
1075 1075 PyObject *item;
1076 1076 PyObject *iterator;
1077 1077 int rev, minrev = -1;
1078 1078 char *node;
1079 1079
1080 1080 if (!PySet_Check(roots)) {
1081 1081 PyErr_SetString(PyExc_TypeError,
1082 1082 "roots must be a set of nodes");
1083 1083 return -2;
1084 1084 }
1085 1085 iterator = PyObject_GetIter(roots);
1086 1086 if (iterator == NULL)
1087 1087 return -2;
1088 1088 while ((item = PyIter_Next(iterator))) {
1089 1089 if (node_check(self->nodelen, item, &node) == -1)
1090 1090 goto failed;
1091 1091 rev = index_find_node(self, node);
1092 1092 /* null is implicitly public, so negative is invalid */
1093 1093 if (rev < 0 || rev >= len)
1094 1094 goto failed;
1095 1095 phases[rev] = phase;
1096 1096 if (minrev == -1 || minrev > rev)
1097 1097 minrev = rev;
1098 1098 Py_DECREF(item);
1099 1099 }
1100 1100 Py_DECREF(iterator);
1101 1101 return minrev;
1102 1102 failed:
1103 1103 Py_DECREF(iterator);
1104 1104 Py_DECREF(item);
1105 1105 return -2;
1106 1106 }
1107 1107
1108 1108 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
1109 1109 {
1110 1110 /* 0: public (untracked), 1: draft, 2: secret, 32: archive,
1111 1111 96: internal */
1112 1112 static const char trackedphases[] = {1, 2, 32, 96};
1113 1113 PyObject *roots = Py_None;
1114 1114 PyObject *phasesetsdict = NULL;
1115 1115 PyObject *phasesets[4] = {NULL, NULL, NULL, NULL};
1116 1116 Py_ssize_t len = index_length(self);
1117 1117 char *phases = NULL;
1118 1118 int minphaserev = -1, rev, i;
1119 1119 const int numphases = (int)(sizeof(phasesets) / sizeof(phasesets[0]));
1120 1120
1121 1121 if (!PyArg_ParseTuple(args, "O", &roots))
1122 1122 return NULL;
1123 1123 if (roots == NULL || !PyDict_Check(roots)) {
1124 1124 PyErr_SetString(PyExc_TypeError, "roots must be a dictionary");
1125 1125 return NULL;
1126 1126 }
1127 1127
1128 1128 phases = calloc(len, 1);
1129 1129 if (phases == NULL) {
1130 1130 PyErr_NoMemory();
1131 1131 return NULL;
1132 1132 }
1133 1133
1134 1134 for (i = 0; i < numphases; ++i) {
1135 1135 PyObject *pyphase = PyLong_FromLong(trackedphases[i]);
1136 1136 PyObject *phaseroots = NULL;
1137 1137 if (pyphase == NULL)
1138 1138 goto release;
1139 1139 phaseroots = PyDict_GetItem(roots, pyphase);
1140 1140 Py_DECREF(pyphase);
1141 1141 if (phaseroots == NULL)
1142 1142 continue;
1143 1143 rev = add_roots_get_min(self, phaseroots, phases,
1144 1144 trackedphases[i]);
1145 1145 if (rev == -2)
1146 1146 goto release;
1147 1147 if (rev != -1 && (minphaserev == -1 || rev < minphaserev))
1148 1148 minphaserev = rev;
1149 1149 }
1150 1150
1151 1151 for (i = 0; i < numphases; ++i) {
1152 1152 phasesets[i] = PySet_New(NULL);
1153 1153 if (phasesets[i] == NULL)
1154 1154 goto release;
1155 1155 }
1156 1156
1157 1157 if (minphaserev == -1)
1158 1158 minphaserev = len;
1159 1159 for (rev = minphaserev; rev < len; ++rev) {
1160 1160 PyObject *pyphase = NULL;
1161 1161 PyObject *pyrev = NULL;
1162 1162 int parents[2];
1163 1163 /*
1164 1164 * The parent lookup could be skipped for phaseroots, but
1165 1165 * phase --force would historically not recompute them
1166 1166 * correctly, leaving descendents with a lower phase around.
1167 1167 * As such, unconditionally recompute the phase.
1168 1168 */
1169 1169 if (index_get_parents(self, rev, parents, (int)len - 1) < 0)
1170 1170 goto release;
1171 1171 set_phase_from_parents(phases, parents[0], parents[1], rev);
1172 1172 switch (phases[rev]) {
1173 1173 case 0:
1174 1174 continue;
1175 1175 case 1:
1176 1176 pyphase = phasesets[0];
1177 1177 break;
1178 1178 case 2:
1179 1179 pyphase = phasesets[1];
1180 1180 break;
1181 1181 case 32:
1182 1182 pyphase = phasesets[2];
1183 1183 break;
1184 1184 case 96:
1185 1185 pyphase = phasesets[3];
1186 1186 break;
1187 1187 default:
1188 1188 /* this should never happen since the phase number is
1189 1189 * specified by this function. */
1190 1190 PyErr_SetString(PyExc_SystemError,
1191 1191 "bad phase number in internal list");
1192 1192 goto release;
1193 1193 }
1194 1194 pyrev = PyLong_FromLong(rev);
1195 1195 if (pyrev == NULL)
1196 1196 goto release;
1197 1197 if (PySet_Add(pyphase, pyrev) == -1) {
1198 1198 Py_DECREF(pyrev);
1199 1199 goto release;
1200 1200 }
1201 1201 Py_DECREF(pyrev);
1202 1202 }
1203 1203
1204 1204 phasesetsdict = _dict_new_presized(numphases);
1205 1205 if (phasesetsdict == NULL)
1206 1206 goto release;
1207 1207 for (i = 0; i < numphases; ++i) {
1208 1208 PyObject *pyphase = PyLong_FromLong(trackedphases[i]);
1209 1209 if (pyphase == NULL)
1210 1210 goto release;
1211 1211 if (PyDict_SetItem(phasesetsdict, pyphase, phasesets[i]) ==
1212 1212 -1) {
1213 1213 Py_DECREF(pyphase);
1214 1214 goto release;
1215 1215 }
1216 1216 Py_DECREF(phasesets[i]);
1217 1217 phasesets[i] = NULL;
1218 1218 }
1219 1219
1220 1220 free(phases);
1221 1221 return Py_BuildValue("nN", len, phasesetsdict);
1222 1222
1223 1223 release:
1224 1224 for (i = 0; i < numphases; ++i)
1225 1225 Py_XDECREF(phasesets[i]);
1226 1226 Py_XDECREF(phasesetsdict);
1227 1227
1228 1228 free(phases);
1229 1229 return NULL;
1230 1230 }
1231 1231
1232 1232 static PyObject *index_headrevs(indexObject *self, PyObject *args)
1233 1233 {
1234 1234 Py_ssize_t i, j, len;
1235 1235 char *nothead = NULL;
1236 1236 PyObject *heads = NULL;
1237 1237 PyObject *filter = NULL;
1238 1238 PyObject *filteredrevs = Py_None;
1239 1239
1240 1240 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
1241 1241 return NULL;
1242 1242 }
1243 1243
1244 1244 if (self->headrevs && filteredrevs == self->filteredrevs)
1245 1245 return list_copy(self->headrevs);
1246 1246
1247 1247 Py_DECREF(self->filteredrevs);
1248 1248 self->filteredrevs = filteredrevs;
1249 1249 Py_INCREF(filteredrevs);
1250 1250
1251 1251 if (filteredrevs != Py_None) {
1252 1252 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
1253 1253 if (!filter) {
1254 1254 PyErr_SetString(
1255 1255 PyExc_TypeError,
1256 1256 "filteredrevs has no attribute __contains__");
1257 1257 goto bail;
1258 1258 }
1259 1259 }
1260 1260
1261 1261 len = index_length(self);
1262 1262 heads = PyList_New(0);
1263 1263 if (heads == NULL)
1264 1264 goto bail;
1265 1265 if (len == 0) {
1266 1266 PyObject *nullid = PyLong_FromLong(-1);
1267 1267 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
1268 1268 Py_XDECREF(nullid);
1269 1269 goto bail;
1270 1270 }
1271 1271 goto done;
1272 1272 }
1273 1273
1274 1274 nothead = calloc(len, 1);
1275 1275 if (nothead == NULL) {
1276 1276 PyErr_NoMemory();
1277 1277 goto bail;
1278 1278 }
1279 1279
1280 1280 for (i = len - 1; i >= 0; i--) {
1281 1281 int isfiltered;
1282 1282 int parents[2];
1283 1283
1284 1284 /* If nothead[i] == 1, it means we've seen an unfiltered child
1285 1285 * of this node already, and therefore this node is not
1286 1286 * filtered. So we can skip the expensive check_filter step.
1287 1287 */
1288 1288 if (nothead[i] != 1) {
1289 1289 isfiltered = check_filter(filter, i);
1290 1290 if (isfiltered == -1) {
1291 1291 PyErr_SetString(PyExc_TypeError,
1292 1292 "unable to check filter");
1293 1293 goto bail;
1294 1294 }
1295 1295
1296 1296 if (isfiltered) {
1297 1297 nothead[i] = 1;
1298 1298 continue;
1299 1299 }
1300 1300 }
1301 1301
1302 1302 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
1303 1303 goto bail;
1304 1304 for (j = 0; j < 2; j++) {
1305 1305 if (parents[j] >= 0)
1306 1306 nothead[parents[j]] = 1;
1307 1307 }
1308 1308 }
1309 1309
1310 1310 for (i = 0; i < len; i++) {
1311 1311 PyObject *head;
1312 1312
1313 1313 if (nothead[i])
1314 1314 continue;
1315 1315 head = PyLong_FromSsize_t(i);
1316 1316 if (head == NULL || PyList_Append(heads, head) == -1) {
1317 1317 Py_XDECREF(head);
1318 1318 goto bail;
1319 1319 }
1320 1320 }
1321 1321
1322 1322 done:
1323 1323 self->headrevs = heads;
1324 1324 Py_XDECREF(filter);
1325 1325 free(nothead);
1326 1326 return list_copy(self->headrevs);
1327 1327 bail:
1328 1328 Py_XDECREF(filter);
1329 1329 Py_XDECREF(heads);
1330 1330 free(nothead);
1331 1331 return NULL;
1332 1332 }
1333 1333
1334 1334 /**
1335 1335 * Obtain the base revision index entry.
1336 1336 *
1337 1337 * Callers must ensure that rev >= 0 or illegal memory access may occur.
1338 1338 */
1339 1339 static inline int index_baserev(indexObject *self, int rev)
1340 1340 {
1341 1341 const char *data;
1342 1342 int result;
1343 1343
1344 1344 data = index_deref(self, rev);
1345 1345 if (data == NULL)
1346 1346 return -2;
1347 1347
1348 1348 if (self->format_version == format_v1) {
1349 1349 result = getbe32(data + entry_v1_offset_base_rev);
1350 1350 } else if (self->format_version == format_v2) {
1351 1351 result = getbe32(data + entry_v2_offset_base_rev);
1352 1352 } else if (self->format_version == format_cl2) {
1353 1353 return rev;
1354 1354 } else {
1355 1355 raise_revlog_error();
1356 1356 return -1;
1357 1357 }
1358 1358
1359 1359 if (result > rev) {
1360 1360 PyErr_Format(
1361 1361 PyExc_ValueError,
1362 1362 "corrupted revlog, revision base above revision: %d, %d",
1363 1363 rev, result);
1364 1364 return -2;
1365 1365 }
1366 1366 if (result < -1) {
1367 1367 PyErr_Format(
1368 1368 PyExc_ValueError,
1369 1369 "corrupted revlog, revision base out of range: %d, %d", rev,
1370 1370 result);
1371 1371 return -2;
1372 1372 }
1373 1373 return result;
1374 1374 }
1375 1375
1376 1376 /**
1377 1377 * Find if a revision is a snapshot or not
1378 1378 *
1379 1379 * Only relevant for sparse-revlog case.
1380 1380 * Callers must ensure that rev is in a valid range.
1381 1381 */
1382 1382 static int index_issnapshotrev(indexObject *self, Py_ssize_t rev)
1383 1383 {
1384 1384 int ps[2];
1385 1385 int b;
1386 1386 Py_ssize_t base;
1387 1387 while (rev >= 0) {
1388 1388 base = (Py_ssize_t)index_baserev(self, rev);
1389 1389 if (base == rev) {
1390 1390 base = -1;
1391 1391 }
1392 1392 if (base == -2) {
1393 1393 assert(PyErr_Occurred());
1394 1394 return -1;
1395 1395 }
1396 1396 if (base == -1) {
1397 1397 return 1;
1398 1398 }
1399 1399 if (index_get_parents(self, rev, ps, (int)rev) < 0) {
1400 1400 assert(PyErr_Occurred());
1401 1401 return -1;
1402 1402 };
1403 1403 while ((index_get_length(self, ps[0]) == 0) && ps[0] >= 0) {
1404 1404 b = index_baserev(self, ps[0]);
1405 1405 if (b == ps[0]) {
1406 1406 break;
1407 1407 }
1408 1408 ps[0] = b;
1409 1409 }
1410 1410 while ((index_get_length(self, ps[1]) == 0) && ps[1] >= 0) {
1411 1411 b = index_baserev(self, ps[1]);
1412 1412 if (b == ps[1]) {
1413 1413 break;
1414 1414 }
1415 1415 ps[1] = b;
1416 1416 }
1417 1417 if (base == ps[0] || base == ps[1]) {
1418 1418 return 0;
1419 1419 }
1420 1420 rev = base;
1421 1421 }
1422 1422 return rev == -1;
1423 1423 }
1424 1424
1425 1425 static PyObject *index_issnapshot(indexObject *self, PyObject *value)
1426 1426 {
1427 1427 long rev;
1428 1428 int issnap;
1429 1429 Py_ssize_t length = index_length(self);
1430 1430
1431 1431 if (!pylong_to_long(value, &rev)) {
1432 1432 return NULL;
1433 1433 }
1434 1434 if (rev < -1 || rev >= length) {
1435 1435 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
1436 1436 rev);
1437 1437 return NULL;
1438 1438 };
1439 1439 issnap = index_issnapshotrev(self, (Py_ssize_t)rev);
1440 1440 if (issnap < 0) {
1441 1441 return NULL;
1442 1442 };
1443 1443 return PyBool_FromLong((long)issnap);
1444 1444 }
1445 1445
1446 1446 static PyObject *index_findsnapshots(indexObject *self, PyObject *args)
1447 1447 {
1448 1448 Py_ssize_t start_rev;
1449 Py_ssize_t end_rev;
1449 1450 PyObject *cache;
1450 1451 Py_ssize_t base;
1451 1452 Py_ssize_t rev;
1452 1453 PyObject *key = NULL;
1453 1454 PyObject *value = NULL;
1454 1455 const Py_ssize_t length = index_length(self);
1455 if (!PyArg_ParseTuple(args, "O!n", &PyDict_Type, &cache, &start_rev)) {
1456 if (!PyArg_ParseTuple(args, "O!nn", &PyDict_Type, &cache, &start_rev,
1457 &end_rev)) {
1456 1458 return NULL;
1457 1459 }
1458 for (rev = start_rev; rev < length; rev++) {
1460 end_rev += 1;
1461 if (end_rev > length) {
1462 end_rev = length;
1463 }
1464 if (start_rev < 0) {
1465 start_rev = 0;
1466 }
1467 for (rev = start_rev; rev < end_rev; rev++) {
1459 1468 int issnap;
1460 1469 PyObject *allvalues = NULL;
1461 1470 issnap = index_issnapshotrev(self, rev);
1462 1471 if (issnap < 0) {
1463 1472 goto bail;
1464 1473 }
1465 1474 if (issnap == 0) {
1466 1475 continue;
1467 1476 }
1468 1477 base = (Py_ssize_t)index_baserev(self, rev);
1469 1478 if (base == rev) {
1470 1479 base = -1;
1471 1480 }
1472 1481 if (base == -2) {
1473 1482 assert(PyErr_Occurred());
1474 1483 goto bail;
1475 1484 }
1476 1485 key = PyLong_FromSsize_t(base);
1477 1486 allvalues = PyDict_GetItem(cache, key);
1478 1487 if (allvalues == NULL && PyErr_Occurred()) {
1479 1488 goto bail;
1480 1489 }
1481 1490 if (allvalues == NULL) {
1482 1491 int r;
1483 1492 allvalues = PyList_New(0);
1484 1493 if (!allvalues) {
1485 1494 goto bail;
1486 1495 }
1487 1496 r = PyDict_SetItem(cache, key, allvalues);
1488 1497 Py_DECREF(allvalues);
1489 1498 if (r < 0) {
1490 1499 goto bail;
1491 1500 }
1492 1501 }
1493 1502 value = PyLong_FromSsize_t(rev);
1494 1503 if (PyList_Append(allvalues, value)) {
1495 1504 goto bail;
1496 1505 }
1497 1506 Py_CLEAR(key);
1498 1507 Py_CLEAR(value);
1499 1508 }
1500 1509 Py_RETURN_NONE;
1501 1510 bail:
1502 1511 Py_XDECREF(key);
1503 1512 Py_XDECREF(value);
1504 1513 return NULL;
1505 1514 }
1506 1515
1507 1516 static PyObject *index_deltachain(indexObject *self, PyObject *args)
1508 1517 {
1509 1518 int rev, generaldelta;
1510 1519 PyObject *stoparg;
1511 1520 int stoprev, iterrev, baserev = -1;
1512 1521 int stopped;
1513 1522 PyObject *chain = NULL, *result = NULL;
1514 1523 const Py_ssize_t length = index_length(self);
1515 1524
1516 1525 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
1517 1526 return NULL;
1518 1527 }
1519 1528
1520 1529 if (PyLong_Check(stoparg)) {
1521 1530 stoprev = (int)PyLong_AsLong(stoparg);
1522 1531 if (stoprev == -1 && PyErr_Occurred()) {
1523 1532 return NULL;
1524 1533 }
1525 1534 } else if (stoparg == Py_None) {
1526 1535 stoprev = -2;
1527 1536 } else {
1528 1537 PyErr_SetString(PyExc_ValueError,
1529 1538 "stoprev must be integer or None");
1530 1539 return NULL;
1531 1540 }
1532 1541
1533 1542 if (rev < 0 || rev >= length) {
1534 1543 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1535 1544 return NULL;
1536 1545 }
1537 1546
1538 1547 chain = PyList_New(0);
1539 1548 if (chain == NULL) {
1540 1549 return NULL;
1541 1550 }
1542 1551
1543 1552 baserev = index_baserev(self, rev);
1544 1553
1545 1554 /* This should never happen. */
1546 1555 if (baserev <= -2) {
1547 1556 /* Error should be set by index_deref() */
1548 1557 assert(PyErr_Occurred());
1549 1558 goto bail;
1550 1559 }
1551 1560
1552 1561 iterrev = rev;
1553 1562
1554 1563 while (iterrev != baserev && iterrev != stoprev) {
1555 1564 PyObject *value = PyLong_FromLong(iterrev);
1556 1565 if (value == NULL) {
1557 1566 goto bail;
1558 1567 }
1559 1568 if (PyList_Append(chain, value)) {
1560 1569 Py_DECREF(value);
1561 1570 goto bail;
1562 1571 }
1563 1572 Py_DECREF(value);
1564 1573
1565 1574 if (generaldelta) {
1566 1575 iterrev = baserev;
1567 1576 } else {
1568 1577 iterrev--;
1569 1578 }
1570 1579
1571 1580 if (iterrev < 0) {
1572 1581 break;
1573 1582 }
1574 1583
1575 1584 if (iterrev >= length) {
1576 1585 PyErr_SetString(PyExc_IndexError,
1577 1586 "revision outside index");
1578 1587 return NULL;
1579 1588 }
1580 1589
1581 1590 baserev = index_baserev(self, iterrev);
1582 1591
1583 1592 /* This should never happen. */
1584 1593 if (baserev <= -2) {
1585 1594 /* Error should be set by index_deref() */
1586 1595 assert(PyErr_Occurred());
1587 1596 goto bail;
1588 1597 }
1589 1598 }
1590 1599
1591 1600 if (iterrev == stoprev) {
1592 1601 stopped = 1;
1593 1602 } else {
1594 1603 PyObject *value = PyLong_FromLong(iterrev);
1595 1604 if (value == NULL) {
1596 1605 goto bail;
1597 1606 }
1598 1607 if (PyList_Append(chain, value)) {
1599 1608 Py_DECREF(value);
1600 1609 goto bail;
1601 1610 }
1602 1611 Py_DECREF(value);
1603 1612
1604 1613 stopped = 0;
1605 1614 }
1606 1615
1607 1616 if (PyList_Reverse(chain)) {
1608 1617 goto bail;
1609 1618 }
1610 1619
1611 1620 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
1612 1621 Py_DECREF(chain);
1613 1622 return result;
1614 1623
1615 1624 bail:
1616 1625 Py_DECREF(chain);
1617 1626 return NULL;
1618 1627 }
1619 1628
1620 1629 static inline int64_t
1621 1630 index_segment_span(indexObject *self, Py_ssize_t start_rev, Py_ssize_t end_rev)
1622 1631 {
1623 1632 int64_t start_offset;
1624 1633 int64_t end_offset;
1625 1634 int end_size;
1626 1635 start_offset = index_get_start(self, start_rev);
1627 1636 if (start_offset < 0) {
1628 1637 return -1;
1629 1638 }
1630 1639 end_offset = index_get_start(self, end_rev);
1631 1640 if (end_offset < 0) {
1632 1641 return -1;
1633 1642 }
1634 1643 end_size = index_get_length(self, end_rev);
1635 1644 if (end_size < 0) {
1636 1645 return -1;
1637 1646 }
1638 1647 if (end_offset < start_offset) {
1639 1648 PyErr_Format(PyExc_ValueError,
1640 1649 "corrupted revlog index: inconsistent offset "
1641 1650 "between revisions (%zd) and (%zd)",
1642 1651 start_rev, end_rev);
1643 1652 return -1;
1644 1653 }
1645 1654 return (end_offset - start_offset) + (int64_t)end_size;
1646 1655 }
1647 1656
1648 1657 /* returns endidx so that revs[startidx:endidx] has no empty trailing revs */
1649 1658 static Py_ssize_t trim_endidx(indexObject *self, const Py_ssize_t *revs,
1650 1659 Py_ssize_t startidx, Py_ssize_t endidx)
1651 1660 {
1652 1661 int length;
1653 1662 while (endidx > 1 && endidx > startidx) {
1654 1663 length = index_get_length(self, revs[endidx - 1]);
1655 1664 if (length < 0) {
1656 1665 return -1;
1657 1666 }
1658 1667 if (length != 0) {
1659 1668 break;
1660 1669 }
1661 1670 endidx -= 1;
1662 1671 }
1663 1672 return endidx;
1664 1673 }
1665 1674
1666 1675 struct Gap {
1667 1676 int64_t size;
1668 1677 Py_ssize_t idx;
1669 1678 };
1670 1679
1671 1680 static int gap_compare(const void *left, const void *right)
1672 1681 {
1673 1682 const struct Gap *l_left = ((const struct Gap *)left);
1674 1683 const struct Gap *l_right = ((const struct Gap *)right);
1675 1684 if (l_left->size < l_right->size) {
1676 1685 return -1;
1677 1686 } else if (l_left->size > l_right->size) {
1678 1687 return 1;
1679 1688 }
1680 1689 return 0;
1681 1690 }
1682 1691 static int Py_ssize_t_compare(const void *left, const void *right)
1683 1692 {
1684 1693 const Py_ssize_t l_left = *(const Py_ssize_t *)left;
1685 1694 const Py_ssize_t l_right = *(const Py_ssize_t *)right;
1686 1695 if (l_left < l_right) {
1687 1696 return -1;
1688 1697 } else if (l_left > l_right) {
1689 1698 return 1;
1690 1699 }
1691 1700 return 0;
1692 1701 }
1693 1702
1694 1703 static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
1695 1704 {
1696 1705 /* method arguments */
1697 1706 PyObject *list_revs = NULL; /* revisions in the chain */
1698 1707 double targetdensity = 0; /* min density to achieve */
1699 1708 Py_ssize_t mingapsize = 0; /* threshold to ignore gaps */
1700 1709
1701 1710 /* other core variables */
1702 1711 Py_ssize_t idxlen = index_length(self);
1703 1712 Py_ssize_t i; /* used for various iteration */
1704 1713 PyObject *result = NULL; /* the final return of the function */
1705 1714
1706 1715 /* generic information about the delta chain being slice */
1707 1716 Py_ssize_t num_revs = 0; /* size of the full delta chain */
1708 1717 Py_ssize_t *revs = NULL; /* native array of revision in the chain */
1709 1718 int64_t chainpayload = 0; /* sum of all delta in the chain */
1710 1719 int64_t deltachainspan = 0; /* distance from first byte to last byte */
1711 1720
1712 1721 /* variable used for slicing the delta chain */
1713 1722 int64_t readdata = 0; /* amount of data currently planned to be read */
1714 1723 double density = 0; /* ration of payload data compared to read ones */
1715 1724 int64_t previous_end;
1716 1725 struct Gap *gaps = NULL; /* array of notable gap in the chain */
1717 1726 Py_ssize_t num_gaps =
1718 1727 0; /* total number of notable gap recorded so far */
1719 1728 Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
1720 1729 Py_ssize_t num_selected = 0; /* number of gaps skipped */
1721 1730 PyObject *chunk = NULL; /* individual slice */
1722 1731 PyObject *allchunks = NULL; /* all slices */
1723 1732 Py_ssize_t previdx;
1724 1733
1725 1734 /* parsing argument */
1726 1735 if (!PyArg_ParseTuple(args, "O!dn", &PyList_Type, &list_revs,
1727 1736 &targetdensity, &mingapsize)) {
1728 1737 goto bail;
1729 1738 }
1730 1739
1731 1740 /* If the delta chain contains a single element, we do not need slicing
1732 1741 */
1733 1742 num_revs = PyList_GET_SIZE(list_revs);
1734 1743 if (num_revs <= 1) {
1735 1744 result = PyTuple_Pack(1, list_revs);
1736 1745 goto done;
1737 1746 }
1738 1747
1739 1748 /* Turn the python list into a native integer array (for efficiency) */
1740 1749 revs = (Py_ssize_t *)calloc(num_revs, sizeof(Py_ssize_t));
1741 1750 if (revs == NULL) {
1742 1751 PyErr_NoMemory();
1743 1752 goto bail;
1744 1753 }
1745 1754 for (i = 0; i < num_revs; i++) {
1746 1755 Py_ssize_t revnum =
1747 1756 PyLong_AsLong(PyList_GET_ITEM(list_revs, i));
1748 1757 if (revnum == -1 && PyErr_Occurred()) {
1749 1758 goto bail;
1750 1759 }
1751 1760 if (revnum < nullrev || revnum >= idxlen) {
1752 1761 PyErr_Format(PyExc_IndexError,
1753 1762 "index out of range: %zd", revnum);
1754 1763 goto bail;
1755 1764 }
1756 1765 revs[i] = revnum;
1757 1766 }
1758 1767
1759 1768 /* Compute and check various property of the unsliced delta chain */
1760 1769 deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
1761 1770 if (deltachainspan < 0) {
1762 1771 goto bail;
1763 1772 }
1764 1773
1765 1774 if (deltachainspan <= mingapsize) {
1766 1775 result = PyTuple_Pack(1, list_revs);
1767 1776 goto done;
1768 1777 }
1769 1778 chainpayload = 0;
1770 1779 for (i = 0; i < num_revs; i++) {
1771 1780 int tmp = index_get_length(self, revs[i]);
1772 1781 if (tmp < 0) {
1773 1782 goto bail;
1774 1783 }
1775 1784 chainpayload += tmp;
1776 1785 }
1777 1786
1778 1787 readdata = deltachainspan;
1779 1788 density = 1.0;
1780 1789
1781 1790 if (0 < deltachainspan) {
1782 1791 density = (double)chainpayload / (double)deltachainspan;
1783 1792 }
1784 1793
1785 1794 if (density >= targetdensity) {
1786 1795 result = PyTuple_Pack(1, list_revs);
1787 1796 goto done;
1788 1797 }
1789 1798
1790 1799 /* if chain is too sparse, look for relevant gaps */
1791 1800 gaps = (struct Gap *)calloc(num_revs, sizeof(struct Gap));
1792 1801 if (gaps == NULL) {
1793 1802 PyErr_NoMemory();
1794 1803 goto bail;
1795 1804 }
1796 1805
1797 1806 previous_end = -1;
1798 1807 for (i = 0; i < num_revs; i++) {
1799 1808 int64_t revstart;
1800 1809 int revsize;
1801 1810 revstart = index_get_start(self, revs[i]);
1802 1811 if (revstart < 0) {
1803 1812 goto bail;
1804 1813 };
1805 1814 revsize = index_get_length(self, revs[i]);
1806 1815 if (revsize < 0) {
1807 1816 goto bail;
1808 1817 };
1809 1818 if (revsize == 0) {
1810 1819 continue;
1811 1820 }
1812 1821 if (previous_end >= 0) {
1813 1822 int64_t gapsize = revstart - previous_end;
1814 1823 if (gapsize > mingapsize) {
1815 1824 gaps[num_gaps].size = gapsize;
1816 1825 gaps[num_gaps].idx = i;
1817 1826 num_gaps += 1;
1818 1827 }
1819 1828 }
1820 1829 previous_end = revstart + revsize;
1821 1830 }
1822 1831 if (num_gaps == 0) {
1823 1832 result = PyTuple_Pack(1, list_revs);
1824 1833 goto done;
1825 1834 }
1826 1835 qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
1827 1836
1828 1837 /* Slice the largest gap first, they improve the density the most */
1829 1838 selected_indices =
1830 1839 (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
1831 1840 if (selected_indices == NULL) {
1832 1841 PyErr_NoMemory();
1833 1842 goto bail;
1834 1843 }
1835 1844
1836 1845 for (i = num_gaps - 1; i >= 0; i--) {
1837 1846 selected_indices[num_selected] = gaps[i].idx;
1838 1847 readdata -= gaps[i].size;
1839 1848 num_selected += 1;
1840 1849 if (readdata <= 0) {
1841 1850 density = 1.0;
1842 1851 } else {
1843 1852 density = (double)chainpayload / (double)readdata;
1844 1853 }
1845 1854 if (density >= targetdensity) {
1846 1855 break;
1847 1856 }
1848 1857 }
1849 1858 qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
1850 1859 &Py_ssize_t_compare);
1851 1860
1852 1861 /* create the resulting slice */
1853 1862 allchunks = PyList_New(0);
1854 1863 if (allchunks == NULL) {
1855 1864 goto bail;
1856 1865 }
1857 1866 previdx = 0;
1858 1867 selected_indices[num_selected] = num_revs;
1859 1868 for (i = 0; i <= num_selected; i++) {
1860 1869 Py_ssize_t idx = selected_indices[i];
1861 1870 Py_ssize_t endidx = trim_endidx(self, revs, previdx, idx);
1862 1871 if (endidx < 0) {
1863 1872 goto bail;
1864 1873 }
1865 1874 if (previdx < endidx) {
1866 1875 chunk = PyList_GetSlice(list_revs, previdx, endidx);
1867 1876 if (chunk == NULL) {
1868 1877 goto bail;
1869 1878 }
1870 1879 if (PyList_Append(allchunks, chunk) == -1) {
1871 1880 goto bail;
1872 1881 }
1873 1882 Py_DECREF(chunk);
1874 1883 chunk = NULL;
1875 1884 }
1876 1885 previdx = idx;
1877 1886 }
1878 1887 result = allchunks;
1879 1888 goto done;
1880 1889
1881 1890 bail:
1882 1891 Py_XDECREF(allchunks);
1883 1892 Py_XDECREF(chunk);
1884 1893 done:
1885 1894 free(revs);
1886 1895 free(gaps);
1887 1896 free(selected_indices);
1888 1897 return result;
1889 1898 }
1890 1899
1891 1900 static inline int nt_level(const char *node, Py_ssize_t level)
1892 1901 {
1893 1902 int v = node[level >> 1];
1894 1903 if (!(level & 1))
1895 1904 v >>= 4;
1896 1905 return v & 0xf;
1897 1906 }
1898 1907
1899 1908 /*
1900 1909 * Return values:
1901 1910 *
1902 1911 * -4: match is ambiguous (multiple candidates)
1903 1912 * -2: not found
1904 1913 * rest: valid rev
1905 1914 */
1906 1915 static int nt_find(nodetree *self, const char *node, Py_ssize_t nodelen,
1907 1916 int hex)
1908 1917 {
1909 1918 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
1910 1919 int level, maxlevel, off;
1911 1920
1912 1921 /* If the input is binary, do a fast check for the nullid first. */
1913 1922 if (!hex && nodelen == self->nodelen && node[0] == '\0' &&
1914 1923 node[1] == '\0' && memcmp(node, nullid, self->nodelen) == 0)
1915 1924 return -1;
1916 1925
1917 1926 if (hex)
1918 1927 maxlevel = nodelen;
1919 1928 else
1920 1929 maxlevel = 2 * nodelen;
1921 1930 if (maxlevel > 2 * self->nodelen)
1922 1931 maxlevel = 2 * self->nodelen;
1923 1932
1924 1933 for (level = off = 0; level < maxlevel; level++) {
1925 1934 int k = getnybble(node, level);
1926 1935 nodetreenode *n = &self->nodes[off];
1927 1936 int v = n->children[k];
1928 1937
1929 1938 if (v < 0) {
1930 1939 const char *n;
1931 1940 Py_ssize_t i;
1932 1941
1933 1942 v = -(v + 2);
1934 1943 n = index_node(self->index, v);
1935 1944 if (n == NULL)
1936 1945 return -2;
1937 1946 for (i = level; i < maxlevel; i++)
1938 1947 if (getnybble(node, i) != nt_level(n, i))
1939 1948 return -2;
1940 1949 return v;
1941 1950 }
1942 1951 if (v == 0)
1943 1952 return -2;
1944 1953 off = v;
1945 1954 }
1946 1955 /* multiple matches against an ambiguous prefix */
1947 1956 return -4;
1948 1957 }
1949 1958
1950 1959 static int nt_new(nodetree *self)
1951 1960 {
1952 1961 if (self->length == self->capacity) {
1953 1962 size_t newcapacity;
1954 1963 nodetreenode *newnodes;
1955 1964 newcapacity = self->capacity * 2;
1956 1965 if (newcapacity >= SIZE_MAX / sizeof(nodetreenode)) {
1957 1966 PyErr_SetString(PyExc_MemoryError,
1958 1967 "overflow in nt_new");
1959 1968 return -1;
1960 1969 }
1961 1970 newnodes =
1962 1971 realloc(self->nodes, newcapacity * sizeof(nodetreenode));
1963 1972 if (newnodes == NULL) {
1964 1973 PyErr_SetString(PyExc_MemoryError, "out of memory");
1965 1974 return -1;
1966 1975 }
1967 1976 self->capacity = newcapacity;
1968 1977 self->nodes = newnodes;
1969 1978 memset(&self->nodes[self->length], 0,
1970 1979 sizeof(nodetreenode) * (self->capacity - self->length));
1971 1980 }
1972 1981 return self->length++;
1973 1982 }
1974 1983
1975 1984 static int nt_insert(nodetree *self, const char *node, int rev)
1976 1985 {
1977 1986 int level = 0;
1978 1987 int off = 0;
1979 1988
1980 1989 while (level < 2 * self->nodelen) {
1981 1990 int k = nt_level(node, level);
1982 1991 nodetreenode *n;
1983 1992 int v;
1984 1993
1985 1994 n = &self->nodes[off];
1986 1995 v = n->children[k];
1987 1996
1988 1997 if (v == 0) {
1989 1998 n->children[k] = -rev - 2;
1990 1999 return 0;
1991 2000 }
1992 2001 if (v < 0) {
1993 2002 const char *oldnode =
1994 2003 index_node_existing(self->index, -(v + 2));
1995 2004 int noff;
1996 2005
1997 2006 if (oldnode == NULL)
1998 2007 return -1;
1999 2008 if (!memcmp(oldnode, node, self->nodelen)) {
2000 2009 n->children[k] = -rev - 2;
2001 2010 return 0;
2002 2011 }
2003 2012 noff = nt_new(self);
2004 2013 if (noff == -1)
2005 2014 return -1;
2006 2015 /* self->nodes may have been changed by realloc */
2007 2016 self->nodes[off].children[k] = noff;
2008 2017 off = noff;
2009 2018 n = &self->nodes[off];
2010 2019 n->children[nt_level(oldnode, ++level)] = v;
2011 2020 if (level > self->depth)
2012 2021 self->depth = level;
2013 2022 self->splits += 1;
2014 2023 } else {
2015 2024 level += 1;
2016 2025 off = v;
2017 2026 }
2018 2027 }
2019 2028
2020 2029 return -1;
2021 2030 }
2022 2031
2023 2032 static PyObject *ntobj_insert(nodetreeObject *self, PyObject *args)
2024 2033 {
2025 2034 Py_ssize_t rev;
2026 2035 const char *node;
2027 2036 Py_ssize_t length;
2028 2037 if (!PyArg_ParseTuple(args, "n", &rev))
2029 2038 return NULL;
2030 2039 length = index_length(self->nt.index);
2031 2040 if (rev < 0 || rev >= length) {
2032 2041 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
2033 2042 return NULL;
2034 2043 }
2035 2044 node = index_node_existing(self->nt.index, rev);
2036 2045 if (nt_insert(&self->nt, node, (int)rev) == -1)
2037 2046 return NULL;
2038 2047 Py_RETURN_NONE;
2039 2048 }
2040 2049
2041 2050 static int nt_delete_node(nodetree *self, const char *node)
2042 2051 {
2043 2052 /* rev==-2 happens to get encoded as 0, which is interpreted as not set
2044 2053 */
2045 2054 return nt_insert(self, node, -2);
2046 2055 }
2047 2056
2048 2057 static int nt_init(nodetree *self, indexObject *index, unsigned capacity)
2049 2058 {
2050 2059 /* Initialize before overflow-checking to avoid nt_dealloc() crash. */
2051 2060 self->nodes = NULL;
2052 2061
2053 2062 self->index = index;
2054 2063 /* The input capacity is in terms of revisions, while the field is in
2055 2064 * terms of nodetree nodes. */
2056 2065 self->capacity = (capacity < 4 ? 4 : capacity / 2);
2057 2066 self->nodelen = index->nodelen;
2058 2067 self->depth = 0;
2059 2068 self->splits = 0;
2060 2069 if (self->capacity > SIZE_MAX / sizeof(nodetreenode)) {
2061 2070 PyErr_SetString(PyExc_ValueError, "overflow in init_nt");
2062 2071 return -1;
2063 2072 }
2064 2073 self->nodes = calloc(self->capacity, sizeof(nodetreenode));
2065 2074 if (self->nodes == NULL) {
2066 2075 PyErr_NoMemory();
2067 2076 return -1;
2068 2077 }
2069 2078 self->length = 1;
2070 2079 return 0;
2071 2080 }
2072 2081
2073 2082 static int ntobj_init(nodetreeObject *self, PyObject *args)
2074 2083 {
2075 2084 PyObject *index;
2076 2085 unsigned capacity;
2077 2086 if (!PyArg_ParseTuple(args, "O!I", &HgRevlogIndex_Type, &index,
2078 2087 &capacity))
2079 2088 return -1;
2080 2089 Py_INCREF(index);
2081 2090 return nt_init(&self->nt, (indexObject *)index, capacity);
2082 2091 }
2083 2092
2084 2093 static int nt_partialmatch(nodetree *self, const char *node, Py_ssize_t nodelen)
2085 2094 {
2086 2095 return nt_find(self, node, nodelen, 1);
2087 2096 }
2088 2097
2089 2098 /*
2090 2099 * Find the length of the shortest unique prefix of node.
2091 2100 *
2092 2101 * Return values:
2093 2102 *
2094 2103 * -3: error (exception set)
2095 2104 * -2: not found (no exception set)
2096 2105 * rest: length of shortest prefix
2097 2106 */
2098 2107 static int nt_shortest(nodetree *self, const char *node)
2099 2108 {
2100 2109 int level, off;
2101 2110
2102 2111 for (level = off = 0; level < 2 * self->nodelen; level++) {
2103 2112 int k, v;
2104 2113 nodetreenode *n = &self->nodes[off];
2105 2114 k = nt_level(node, level);
2106 2115 v = n->children[k];
2107 2116 if (v < 0) {
2108 2117 const char *n;
2109 2118 v = -(v + 2);
2110 2119 n = index_node_existing(self->index, v);
2111 2120 if (n == NULL)
2112 2121 return -3;
2113 2122 if (memcmp(node, n, self->nodelen) != 0)
2114 2123 /*
2115 2124 * Found a unique prefix, but it wasn't for the
2116 2125 * requested node (i.e the requested node does
2117 2126 * not exist).
2118 2127 */
2119 2128 return -2;
2120 2129 return level + 1;
2121 2130 }
2122 2131 if (v == 0)
2123 2132 return -2;
2124 2133 off = v;
2125 2134 }
2126 2135 /*
2127 2136 * The node was still not unique after 40 hex digits, so this won't
2128 2137 * happen. Also, if we get here, then there's a programming error in
2129 2138 * this file that made us insert a node longer than 40 hex digits.
2130 2139 */
2131 2140 PyErr_SetString(PyExc_Exception, "broken node tree");
2132 2141 return -3;
2133 2142 }
2134 2143
2135 2144 static PyObject *ntobj_shortest(nodetreeObject *self, PyObject *args)
2136 2145 {
2137 2146 PyObject *val;
2138 2147 char *node;
2139 2148 int length;
2140 2149
2141 2150 if (!PyArg_ParseTuple(args, "O", &val))
2142 2151 return NULL;
2143 2152 if (node_check(self->nt.nodelen, val, &node) == -1)
2144 2153 return NULL;
2145 2154
2146 2155 length = nt_shortest(&self->nt, node);
2147 2156 if (length == -3)
2148 2157 return NULL;
2149 2158 if (length == -2) {
2150 2159 raise_revlog_error();
2151 2160 return NULL;
2152 2161 }
2153 2162 return PyLong_FromLong(length);
2154 2163 }
2155 2164
2156 2165 static void nt_dealloc(nodetree *self)
2157 2166 {
2158 2167 free(self->nodes);
2159 2168 self->nodes = NULL;
2160 2169 }
2161 2170
2162 2171 static void ntobj_dealloc(nodetreeObject *self)
2163 2172 {
2164 2173 Py_XDECREF(self->nt.index);
2165 2174 nt_dealloc(&self->nt);
2166 2175 PyObject_Del(self);
2167 2176 }
2168 2177
2169 2178 static PyMethodDef ntobj_methods[] = {
2170 2179 {"insert", (PyCFunction)ntobj_insert, METH_VARARGS,
2171 2180 "insert an index entry"},
2172 2181 {"shortest", (PyCFunction)ntobj_shortest, METH_VARARGS,
2173 2182 "find length of shortest hex nodeid of a binary ID"},
2174 2183 {NULL} /* Sentinel */
2175 2184 };
2176 2185
2177 2186 static PyTypeObject nodetreeType = {
2178 2187 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2179 2188 "parsers.nodetree", /* tp_name */
2180 2189 sizeof(nodetreeObject), /* tp_basicsize */
2181 2190 0, /* tp_itemsize */
2182 2191 (destructor)ntobj_dealloc, /* tp_dealloc */
2183 2192 0, /* tp_print */
2184 2193 0, /* tp_getattr */
2185 2194 0, /* tp_setattr */
2186 2195 0, /* tp_compare */
2187 2196 0, /* tp_repr */
2188 2197 0, /* tp_as_number */
2189 2198 0, /* tp_as_sequence */
2190 2199 0, /* tp_as_mapping */
2191 2200 0, /* tp_hash */
2192 2201 0, /* tp_call */
2193 2202 0, /* tp_str */
2194 2203 0, /* tp_getattro */
2195 2204 0, /* tp_setattro */
2196 2205 0, /* tp_as_buffer */
2197 2206 Py_TPFLAGS_DEFAULT, /* tp_flags */
2198 2207 "nodetree", /* tp_doc */
2199 2208 0, /* tp_traverse */
2200 2209 0, /* tp_clear */
2201 2210 0, /* tp_richcompare */
2202 2211 0, /* tp_weaklistoffset */
2203 2212 0, /* tp_iter */
2204 2213 0, /* tp_iternext */
2205 2214 ntobj_methods, /* tp_methods */
2206 2215 0, /* tp_members */
2207 2216 0, /* tp_getset */
2208 2217 0, /* tp_base */
2209 2218 0, /* tp_dict */
2210 2219 0, /* tp_descr_get */
2211 2220 0, /* tp_descr_set */
2212 2221 0, /* tp_dictoffset */
2213 2222 (initproc)ntobj_init, /* tp_init */
2214 2223 0, /* tp_alloc */
2215 2224 };
2216 2225
2217 2226 static int index_init_nt(indexObject *self)
2218 2227 {
2219 2228 if (!self->ntinitialized) {
2220 2229 if (nt_init(&self->nt, self, (int)self->length) == -1) {
2221 2230 nt_dealloc(&self->nt);
2222 2231 return -1;
2223 2232 }
2224 2233 if (nt_insert(&self->nt, nullid, -1) == -1) {
2225 2234 nt_dealloc(&self->nt);
2226 2235 return -1;
2227 2236 }
2228 2237 self->ntinitialized = 1;
2229 2238 self->ntrev = (int)index_length(self);
2230 2239 self->ntlookups = 1;
2231 2240 self->ntmisses = 0;
2232 2241 }
2233 2242 return 0;
2234 2243 }
2235 2244
2236 2245 /*
2237 2246 * Return values:
2238 2247 *
2239 2248 * -3: error (exception set)
2240 2249 * -2: not found (no exception set)
2241 2250 * rest: valid rev
2242 2251 */
2243 2252 static int index_find_node(indexObject *self, const char *node)
2244 2253 {
2245 2254 int rev;
2246 2255
2247 2256 if (index_init_nt(self) == -1)
2248 2257 return -3;
2249 2258
2250 2259 self->ntlookups++;
2251 2260 rev = nt_find(&self->nt, node, self->nodelen, 0);
2252 2261 if (rev >= -1)
2253 2262 return rev;
2254 2263
2255 2264 /*
2256 2265 * For the first handful of lookups, we scan the entire index,
2257 2266 * and cache only the matching nodes. This optimizes for cases
2258 2267 * like "hg tip", where only a few nodes are accessed.
2259 2268 *
2260 2269 * After that, we cache every node we visit, using a single
2261 2270 * scan amortized over multiple lookups. This gives the best
2262 2271 * bulk performance, e.g. for "hg log".
2263 2272 */
2264 2273 if (self->ntmisses++ < 4) {
2265 2274 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2266 2275 const char *n = index_node_existing(self, rev);
2267 2276 if (n == NULL)
2268 2277 return -3;
2269 2278 if (memcmp(node, n, self->nodelen) == 0) {
2270 2279 if (nt_insert(&self->nt, n, rev) == -1)
2271 2280 return -3;
2272 2281 break;
2273 2282 }
2274 2283 }
2275 2284 } else {
2276 2285 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2277 2286 const char *n = index_node_existing(self, rev);
2278 2287 if (n == NULL)
2279 2288 return -3;
2280 2289 if (nt_insert(&self->nt, n, rev) == -1) {
2281 2290 self->ntrev = rev + 1;
2282 2291 return -3;
2283 2292 }
2284 2293 if (memcmp(node, n, self->nodelen) == 0) {
2285 2294 break;
2286 2295 }
2287 2296 }
2288 2297 self->ntrev = rev;
2289 2298 }
2290 2299
2291 2300 if (rev >= 0)
2292 2301 return rev;
2293 2302 return -2;
2294 2303 }
2295 2304
2296 2305 static PyObject *index_getitem(indexObject *self, PyObject *value)
2297 2306 {
2298 2307 char *node;
2299 2308 int rev;
2300 2309
2301 2310 if (PyLong_Check(value)) {
2302 2311 long idx;
2303 2312 if (!pylong_to_long(value, &idx)) {
2304 2313 return NULL;
2305 2314 }
2306 2315 return index_get(self, idx);
2307 2316 }
2308 2317
2309 2318 if (node_check(self->nodelen, value, &node) == -1)
2310 2319 return NULL;
2311 2320 rev = index_find_node(self, node);
2312 2321 if (rev >= -1)
2313 2322 return PyLong_FromLong(rev);
2314 2323 if (rev == -2)
2315 2324 raise_revlog_error();
2316 2325 return NULL;
2317 2326 }
2318 2327
2319 2328 /*
2320 2329 * Fully populate the radix tree.
2321 2330 */
2322 2331 static int index_populate_nt(indexObject *self)
2323 2332 {
2324 2333 int rev;
2325 2334 if (self->ntrev > 0) {
2326 2335 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2327 2336 const char *n = index_node_existing(self, rev);
2328 2337 if (n == NULL)
2329 2338 return -1;
2330 2339 if (nt_insert(&self->nt, n, rev) == -1)
2331 2340 return -1;
2332 2341 }
2333 2342 self->ntrev = -1;
2334 2343 }
2335 2344 return 0;
2336 2345 }
2337 2346
2338 2347 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
2339 2348 {
2340 2349 const char *fullnode;
2341 2350 Py_ssize_t nodelen;
2342 2351 char *node;
2343 2352 int rev, i;
2344 2353
2345 2354 if (!PyArg_ParseTuple(args, "y#", &node, &nodelen))
2346 2355 return NULL;
2347 2356
2348 2357 if (nodelen < 1) {
2349 2358 PyErr_SetString(PyExc_ValueError, "key too short");
2350 2359 return NULL;
2351 2360 }
2352 2361
2353 2362 if (nodelen > 2 * self->nodelen) {
2354 2363 PyErr_SetString(PyExc_ValueError, "key too long");
2355 2364 return NULL;
2356 2365 }
2357 2366
2358 2367 for (i = 0; i < nodelen; i++)
2359 2368 hexdigit(node, i);
2360 2369 if (PyErr_Occurred()) {
2361 2370 /* input contains non-hex characters */
2362 2371 PyErr_Clear();
2363 2372 Py_RETURN_NONE;
2364 2373 }
2365 2374
2366 2375 if (index_init_nt(self) == -1)
2367 2376 return NULL;
2368 2377 if (index_populate_nt(self) == -1)
2369 2378 return NULL;
2370 2379 rev = nt_partialmatch(&self->nt, node, nodelen);
2371 2380
2372 2381 switch (rev) {
2373 2382 case -4:
2374 2383 raise_revlog_error();
2375 2384 return NULL;
2376 2385 case -2:
2377 2386 Py_RETURN_NONE;
2378 2387 case -1:
2379 2388 return PyBytes_FromStringAndSize(nullid, self->nodelen);
2380 2389 }
2381 2390
2382 2391 fullnode = index_node_existing(self, rev);
2383 2392 if (fullnode == NULL) {
2384 2393 return NULL;
2385 2394 }
2386 2395 return PyBytes_FromStringAndSize(fullnode, self->nodelen);
2387 2396 }
2388 2397
2389 2398 static PyObject *index_shortest(indexObject *self, PyObject *args)
2390 2399 {
2391 2400 PyObject *val;
2392 2401 char *node;
2393 2402 int length;
2394 2403
2395 2404 if (!PyArg_ParseTuple(args, "O", &val))
2396 2405 return NULL;
2397 2406 if (node_check(self->nodelen, val, &node) == -1)
2398 2407 return NULL;
2399 2408
2400 2409 self->ntlookups++;
2401 2410 if (index_init_nt(self) == -1)
2402 2411 return NULL;
2403 2412 if (index_populate_nt(self) == -1)
2404 2413 return NULL;
2405 2414 length = nt_shortest(&self->nt, node);
2406 2415 if (length == -3)
2407 2416 return NULL;
2408 2417 if (length == -2) {
2409 2418 raise_revlog_error();
2410 2419 return NULL;
2411 2420 }
2412 2421 return PyLong_FromLong(length);
2413 2422 }
2414 2423
2415 2424 static PyObject *index_m_get(indexObject *self, PyObject *args)
2416 2425 {
2417 2426 PyObject *val;
2418 2427 char *node;
2419 2428 int rev;
2420 2429
2421 2430 if (!PyArg_ParseTuple(args, "O", &val))
2422 2431 return NULL;
2423 2432 if (node_check(self->nodelen, val, &node) == -1)
2424 2433 return NULL;
2425 2434 rev = index_find_node(self, node);
2426 2435 if (rev == -3)
2427 2436 return NULL;
2428 2437 if (rev == -2)
2429 2438 Py_RETURN_NONE;
2430 2439 return PyLong_FromLong(rev);
2431 2440 }
2432 2441
2433 2442 static int index_contains(indexObject *self, PyObject *value)
2434 2443 {
2435 2444 char *node;
2436 2445
2437 2446 if (PyLong_Check(value)) {
2438 2447 long rev;
2439 2448 if (!pylong_to_long(value, &rev)) {
2440 2449 return -1;
2441 2450 }
2442 2451 return rev >= -1 && rev < index_length(self);
2443 2452 }
2444 2453
2445 2454 if (node_check(self->nodelen, value, &node) == -1)
2446 2455 return -1;
2447 2456
2448 2457 switch (index_find_node(self, node)) {
2449 2458 case -3:
2450 2459 return -1;
2451 2460 case -2:
2452 2461 return 0;
2453 2462 default:
2454 2463 return 1;
2455 2464 }
2456 2465 }
2457 2466
2458 2467 static PyObject *index_m_has_node(indexObject *self, PyObject *args)
2459 2468 {
2460 2469 int ret = index_contains(self, args);
2461 2470 if (ret < 0)
2462 2471 return NULL;
2463 2472 return PyBool_FromLong((long)ret);
2464 2473 }
2465 2474
2466 2475 static PyObject *index_m_rev(indexObject *self, PyObject *val)
2467 2476 {
2468 2477 char *node;
2469 2478 int rev;
2470 2479
2471 2480 if (node_check(self->nodelen, val, &node) == -1)
2472 2481 return NULL;
2473 2482 rev = index_find_node(self, node);
2474 2483 if (rev >= -1)
2475 2484 return PyLong_FromLong(rev);
2476 2485 if (rev == -2)
2477 2486 raise_revlog_error();
2478 2487 return NULL;
2479 2488 }
2480 2489
2481 2490 typedef uint64_t bitmask;
2482 2491
2483 2492 /*
2484 2493 * Given a disjoint set of revs, return all candidates for the
2485 2494 * greatest common ancestor. In revset notation, this is the set
2486 2495 * "heads(::a and ::b and ...)"
2487 2496 */
2488 2497 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
2489 2498 int revcount)
2490 2499 {
2491 2500 const bitmask allseen = (1ull << revcount) - 1;
2492 2501 const bitmask poison = 1ull << revcount;
2493 2502 PyObject *gca = PyList_New(0);
2494 2503 int i, v, interesting;
2495 2504 int maxrev = -1;
2496 2505 bitmask sp;
2497 2506 bitmask *seen;
2498 2507
2499 2508 if (gca == NULL)
2500 2509 return PyErr_NoMemory();
2501 2510
2502 2511 for (i = 0; i < revcount; i++) {
2503 2512 if (revs[i] > maxrev)
2504 2513 maxrev = revs[i];
2505 2514 }
2506 2515
2507 2516 seen = calloc(sizeof(*seen), maxrev + 1);
2508 2517 if (seen == NULL) {
2509 2518 Py_DECREF(gca);
2510 2519 return PyErr_NoMemory();
2511 2520 }
2512 2521
2513 2522 for (i = 0; i < revcount; i++)
2514 2523 seen[revs[i]] = 1ull << i;
2515 2524
2516 2525 interesting = revcount;
2517 2526
2518 2527 for (v = maxrev; v >= 0 && interesting; v--) {
2519 2528 bitmask sv = seen[v];
2520 2529 int parents[2];
2521 2530
2522 2531 if (!sv)
2523 2532 continue;
2524 2533
2525 2534 if (sv < poison) {
2526 2535 interesting -= 1;
2527 2536 if (sv == allseen) {
2528 2537 PyObject *obj = PyLong_FromLong(v);
2529 2538 if (obj == NULL)
2530 2539 goto bail;
2531 2540 if (PyList_Append(gca, obj) == -1) {
2532 2541 Py_DECREF(obj);
2533 2542 goto bail;
2534 2543 }
2535 2544 sv |= poison;
2536 2545 for (i = 0; i < revcount; i++) {
2537 2546 if (revs[i] == v)
2538 2547 goto done;
2539 2548 }
2540 2549 }
2541 2550 }
2542 2551 if (index_get_parents(self, v, parents, maxrev) < 0)
2543 2552 goto bail;
2544 2553
2545 2554 for (i = 0; i < 2; i++) {
2546 2555 int p = parents[i];
2547 2556 if (p == -1)
2548 2557 continue;
2549 2558 sp = seen[p];
2550 2559 if (sv < poison) {
2551 2560 if (sp == 0) {
2552 2561 seen[p] = sv;
2553 2562 interesting++;
2554 2563 } else if (sp != sv)
2555 2564 seen[p] |= sv;
2556 2565 } else {
2557 2566 if (sp && sp < poison)
2558 2567 interesting--;
2559 2568 seen[p] = sv;
2560 2569 }
2561 2570 }
2562 2571 }
2563 2572
2564 2573 done:
2565 2574 free(seen);
2566 2575 return gca;
2567 2576 bail:
2568 2577 free(seen);
2569 2578 Py_XDECREF(gca);
2570 2579 return NULL;
2571 2580 }
2572 2581
2573 2582 /*
2574 2583 * Given a disjoint set of revs, return the subset with the longest
2575 2584 * path to the root.
2576 2585 */
2577 2586 static PyObject *find_deepest(indexObject *self, PyObject *revs)
2578 2587 {
2579 2588 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
2580 2589 static const Py_ssize_t capacity = 24;
2581 2590 int *depth, *interesting = NULL;
2582 2591 int i, j, v, ninteresting;
2583 2592 PyObject *dict = NULL, *keys = NULL;
2584 2593 long *seen = NULL;
2585 2594 int maxrev = -1;
2586 2595 long final;
2587 2596
2588 2597 if (revcount > capacity) {
2589 2598 PyErr_Format(PyExc_OverflowError,
2590 2599 "bitset size (%ld) > capacity (%ld)",
2591 2600 (long)revcount, (long)capacity);
2592 2601 return NULL;
2593 2602 }
2594 2603
2595 2604 for (i = 0; i < revcount; i++) {
2596 2605 int n = (int)PyLong_AsLong(PyList_GET_ITEM(revs, i));
2597 2606 if (n > maxrev)
2598 2607 maxrev = n;
2599 2608 }
2600 2609
2601 2610 depth = calloc(sizeof(*depth), maxrev + 1);
2602 2611 if (depth == NULL)
2603 2612 return PyErr_NoMemory();
2604 2613
2605 2614 seen = calloc(sizeof(*seen), maxrev + 1);
2606 2615 if (seen == NULL) {
2607 2616 PyErr_NoMemory();
2608 2617 goto bail;
2609 2618 }
2610 2619
2611 2620 interesting = calloc(sizeof(*interesting), ((size_t)1) << revcount);
2612 2621 if (interesting == NULL) {
2613 2622 PyErr_NoMemory();
2614 2623 goto bail;
2615 2624 }
2616 2625
2617 2626 if (PyList_Sort(revs) == -1)
2618 2627 goto bail;
2619 2628
2620 2629 for (i = 0; i < revcount; i++) {
2621 2630 int n = (int)PyLong_AsLong(PyList_GET_ITEM(revs, i));
2622 2631 long b = 1l << i;
2623 2632 depth[n] = 1;
2624 2633 seen[n] = b;
2625 2634 interesting[b] = 1;
2626 2635 }
2627 2636
2628 2637 /* invariant: ninteresting is the number of non-zero entries in
2629 2638 * interesting. */
2630 2639 ninteresting = (int)revcount;
2631 2640
2632 2641 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
2633 2642 int dv = depth[v];
2634 2643 int parents[2];
2635 2644 long sv;
2636 2645
2637 2646 if (dv == 0)
2638 2647 continue;
2639 2648
2640 2649 sv = seen[v];
2641 2650 if (index_get_parents(self, v, parents, maxrev) < 0)
2642 2651 goto bail;
2643 2652
2644 2653 for (i = 0; i < 2; i++) {
2645 2654 int p = parents[i];
2646 2655 long sp;
2647 2656 int dp;
2648 2657
2649 2658 if (p == -1)
2650 2659 continue;
2651 2660
2652 2661 dp = depth[p];
2653 2662 sp = seen[p];
2654 2663 if (dp <= dv) {
2655 2664 depth[p] = dv + 1;
2656 2665 if (sp != sv) {
2657 2666 interesting[sv] += 1;
2658 2667 seen[p] = sv;
2659 2668 if (sp) {
2660 2669 interesting[sp] -= 1;
2661 2670 if (interesting[sp] == 0)
2662 2671 ninteresting -= 1;
2663 2672 }
2664 2673 }
2665 2674 } else if (dv == dp - 1) {
2666 2675 long nsp = sp | sv;
2667 2676 if (nsp == sp)
2668 2677 continue;
2669 2678 seen[p] = nsp;
2670 2679 interesting[sp] -= 1;
2671 2680 if (interesting[sp] == 0)
2672 2681 ninteresting -= 1;
2673 2682 if (interesting[nsp] == 0)
2674 2683 ninteresting += 1;
2675 2684 interesting[nsp] += 1;
2676 2685 }
2677 2686 }
2678 2687 interesting[sv] -= 1;
2679 2688 if (interesting[sv] == 0)
2680 2689 ninteresting -= 1;
2681 2690 }
2682 2691
2683 2692 final = 0;
2684 2693 j = ninteresting;
2685 2694 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
2686 2695 if (interesting[i] == 0)
2687 2696 continue;
2688 2697 final |= i;
2689 2698 j -= 1;
2690 2699 }
2691 2700 if (final == 0) {
2692 2701 keys = PyList_New(0);
2693 2702 goto bail;
2694 2703 }
2695 2704
2696 2705 dict = PyDict_New();
2697 2706 if (dict == NULL)
2698 2707 goto bail;
2699 2708
2700 2709 for (i = 0; i < revcount; i++) {
2701 2710 PyObject *key;
2702 2711
2703 2712 if ((final & (1 << i)) == 0)
2704 2713 continue;
2705 2714
2706 2715 key = PyList_GET_ITEM(revs, i);
2707 2716 Py_INCREF(key);
2708 2717 Py_INCREF(Py_None);
2709 2718 if (PyDict_SetItem(dict, key, Py_None) == -1) {
2710 2719 Py_DECREF(key);
2711 2720 Py_DECREF(Py_None);
2712 2721 goto bail;
2713 2722 }
2714 2723 }
2715 2724
2716 2725 keys = PyDict_Keys(dict);
2717 2726
2718 2727 bail:
2719 2728 free(depth);
2720 2729 free(seen);
2721 2730 free(interesting);
2722 2731 Py_XDECREF(dict);
2723 2732
2724 2733 return keys;
2725 2734 }
2726 2735
2727 2736 /*
2728 2737 * Given a (possibly overlapping) set of revs, return all the
2729 2738 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
2730 2739 */
2731 2740 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
2732 2741 {
2733 2742 PyObject *ret = NULL;
2734 2743 Py_ssize_t argcount, i, len;
2735 2744 bitmask repeat = 0;
2736 2745 int revcount = 0;
2737 2746 int *revs;
2738 2747
2739 2748 argcount = PySequence_Length(args);
2740 2749 revs = PyMem_Malloc(argcount * sizeof(*revs));
2741 2750 if (argcount > 0 && revs == NULL)
2742 2751 return PyErr_NoMemory();
2743 2752 len = index_length(self);
2744 2753
2745 2754 for (i = 0; i < argcount; i++) {
2746 2755 static const int capacity = 24;
2747 2756 PyObject *obj = PySequence_GetItem(args, i);
2748 2757 bitmask x;
2749 2758 long val;
2750 2759
2751 2760 if (!PyLong_Check(obj)) {
2752 2761 PyErr_SetString(PyExc_TypeError,
2753 2762 "arguments must all be ints");
2754 2763 Py_DECREF(obj);
2755 2764 goto bail;
2756 2765 }
2757 2766 val = PyLong_AsLong(obj);
2758 2767 Py_DECREF(obj);
2759 2768 if (val == -1) {
2760 2769 ret = PyList_New(0);
2761 2770 goto done;
2762 2771 }
2763 2772 if (val < 0 || val >= len) {
2764 2773 PyErr_SetString(PyExc_IndexError, "index out of range");
2765 2774 goto bail;
2766 2775 }
2767 2776 /* this cheesy bloom filter lets us avoid some more
2768 2777 * expensive duplicate checks in the common set-is-disjoint
2769 2778 * case */
2770 2779 x = 1ull << (val & 0x3f);
2771 2780 if (repeat & x) {
2772 2781 int k;
2773 2782 for (k = 0; k < revcount; k++) {
2774 2783 if (val == revs[k])
2775 2784 goto duplicate;
2776 2785 }
2777 2786 } else
2778 2787 repeat |= x;
2779 2788 if (revcount >= capacity) {
2780 2789 PyErr_Format(PyExc_OverflowError,
2781 2790 "bitset size (%d) > capacity (%d)",
2782 2791 revcount, capacity);
2783 2792 goto bail;
2784 2793 }
2785 2794 revs[revcount++] = (int)val;
2786 2795 duplicate:;
2787 2796 }
2788 2797
2789 2798 if (revcount == 0) {
2790 2799 ret = PyList_New(0);
2791 2800 goto done;
2792 2801 }
2793 2802 if (revcount == 1) {
2794 2803 PyObject *obj;
2795 2804 ret = PyList_New(1);
2796 2805 if (ret == NULL)
2797 2806 goto bail;
2798 2807 obj = PyLong_FromLong(revs[0]);
2799 2808 if (obj == NULL)
2800 2809 goto bail;
2801 2810 PyList_SET_ITEM(ret, 0, obj);
2802 2811 goto done;
2803 2812 }
2804 2813
2805 2814 ret = find_gca_candidates(self, revs, revcount);
2806 2815 if (ret == NULL)
2807 2816 goto bail;
2808 2817
2809 2818 done:
2810 2819 PyMem_Free(revs);
2811 2820 return ret;
2812 2821
2813 2822 bail:
2814 2823 PyMem_Free(revs);
2815 2824 Py_XDECREF(ret);
2816 2825 return NULL;
2817 2826 }
2818 2827
2819 2828 /*
2820 2829 * Given a (possibly overlapping) set of revs, return the greatest
2821 2830 * common ancestors: those with the longest path to the root.
2822 2831 */
2823 2832 static PyObject *index_ancestors(indexObject *self, PyObject *args)
2824 2833 {
2825 2834 PyObject *ret;
2826 2835 PyObject *gca = index_commonancestorsheads(self, args);
2827 2836 if (gca == NULL)
2828 2837 return NULL;
2829 2838
2830 2839 if (PyList_GET_SIZE(gca) <= 1) {
2831 2840 return gca;
2832 2841 }
2833 2842
2834 2843 ret = find_deepest(self, gca);
2835 2844 Py_DECREF(gca);
2836 2845 return ret;
2837 2846 }
2838 2847
2839 2848 /*
2840 2849 * Invalidate any trie entries introduced by added revs.
2841 2850 */
2842 2851 static void index_invalidate_added(indexObject *self, Py_ssize_t start)
2843 2852 {
2844 2853 Py_ssize_t i, len;
2845 2854
2846 2855 len = self->length + self->new_length;
2847 2856 i = start - self->length;
2848 2857 if (i < 0)
2849 2858 return;
2850 2859
2851 2860 for (i = start; i < len; i++) {
2852 2861 const char *node = index_node(self, i);
2853 2862 nt_delete_node(&self->nt, node);
2854 2863 }
2855 2864
2856 2865 self->new_length = start - self->length;
2857 2866 }
2858 2867
2859 2868 /*
2860 2869 * Delete a numeric range of revs, which must be at the end of the
2861 2870 * range.
2862 2871 */
2863 2872 static int index_slice_del(indexObject *self, PyObject *item)
2864 2873 {
2865 2874 Py_ssize_t start, stop, step, slicelength;
2866 2875 Py_ssize_t length = index_length(self) + 1;
2867 2876 int ret = 0;
2868 2877
2869 2878 if (PySlice_GetIndicesEx(item, length, &start, &stop, &step,
2870 2879 &slicelength) < 0)
2871 2880 return -1;
2872 2881
2873 2882 if (slicelength <= 0)
2874 2883 return 0;
2875 2884
2876 2885 if ((step < 0 && start < stop) || (step > 0 && start > stop))
2877 2886 stop = start;
2878 2887
2879 2888 if (step < 0) {
2880 2889 stop = start + 1;
2881 2890 start = stop + step * (slicelength - 1) - 1;
2882 2891 step = -step;
2883 2892 }
2884 2893
2885 2894 if (step != 1) {
2886 2895 PyErr_SetString(PyExc_ValueError,
2887 2896 "revlog index delete requires step size of 1");
2888 2897 return -1;
2889 2898 }
2890 2899
2891 2900 if (stop != length - 1) {
2892 2901 PyErr_SetString(PyExc_IndexError,
2893 2902 "revlog index deletion indices are invalid");
2894 2903 return -1;
2895 2904 }
2896 2905
2897 2906 if (start < self->length) {
2898 2907 if (self->ntinitialized) {
2899 2908 Py_ssize_t i;
2900 2909
2901 2910 for (i = start; i < self->length; i++) {
2902 2911 const char *node = index_node_existing(self, i);
2903 2912 if (node == NULL)
2904 2913 return -1;
2905 2914
2906 2915 nt_delete_node(&self->nt, node);
2907 2916 }
2908 2917 if (self->new_length)
2909 2918 index_invalidate_added(self, self->length);
2910 2919 if (self->ntrev > start)
2911 2920 self->ntrev = (int)start;
2912 2921 } else if (self->new_length) {
2913 2922 self->new_length = 0;
2914 2923 }
2915 2924
2916 2925 self->length = start;
2917 2926 goto done;
2918 2927 }
2919 2928
2920 2929 if (self->ntinitialized) {
2921 2930 index_invalidate_added(self, start);
2922 2931 if (self->ntrev > start)
2923 2932 self->ntrev = (int)start;
2924 2933 } else {
2925 2934 self->new_length = start - self->length;
2926 2935 }
2927 2936 done:
2928 2937 Py_CLEAR(self->headrevs);
2929 2938 return ret;
2930 2939 }
2931 2940
2932 2941 /*
2933 2942 * Supported ops:
2934 2943 *
2935 2944 * slice deletion
2936 2945 * string assignment (extend node->rev mapping)
2937 2946 * string deletion (shrink node->rev mapping)
2938 2947 */
2939 2948 static int index_assign_subscript(indexObject *self, PyObject *item,
2940 2949 PyObject *value)
2941 2950 {
2942 2951 char *node;
2943 2952 long rev;
2944 2953
2945 2954 if (PySlice_Check(item) && value == NULL)
2946 2955 return index_slice_del(self, item);
2947 2956
2948 2957 if (node_check(self->nodelen, item, &node) == -1)
2949 2958 return -1;
2950 2959
2951 2960 if (value == NULL)
2952 2961 return self->ntinitialized ? nt_delete_node(&self->nt, node)
2953 2962 : 0;
2954 2963 rev = PyLong_AsLong(value);
2955 2964 if (rev > INT_MAX || rev < 0) {
2956 2965 if (!PyErr_Occurred())
2957 2966 PyErr_SetString(PyExc_ValueError, "rev out of range");
2958 2967 return -1;
2959 2968 }
2960 2969
2961 2970 if (index_init_nt(self) == -1)
2962 2971 return -1;
2963 2972 return nt_insert(&self->nt, node, (int)rev);
2964 2973 }
2965 2974
2966 2975 /*
2967 2976 * Find all RevlogNG entries in an index that has inline data. Update
2968 2977 * the optional "offsets" table with those entries.
2969 2978 */
2970 2979 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
2971 2980 {
2972 2981 const char *data = (const char *)self->buf.buf;
2973 2982 Py_ssize_t pos = 0;
2974 2983 Py_ssize_t end = self->buf.len;
2975 2984 long incr = self->entry_size;
2976 2985 Py_ssize_t len = 0;
2977 2986
2978 2987 while (pos + self->entry_size <= end && pos >= 0) {
2979 2988 uint32_t comp_len, sidedata_comp_len = 0;
2980 2989 /* 3rd element of header is length of compressed inline data */
2981 2990 if (self->format_version == format_v1) {
2982 2991 comp_len =
2983 2992 getbe32(data + pos + entry_v1_offset_comp_len);
2984 2993 sidedata_comp_len = 0;
2985 2994 } else if (self->format_version == format_v2) {
2986 2995 comp_len =
2987 2996 getbe32(data + pos + entry_v2_offset_comp_len);
2988 2997 sidedata_comp_len = getbe32(
2989 2998 data + pos + entry_v2_offset_sidedata_comp_len);
2990 2999 } else {
2991 3000 raise_revlog_error();
2992 3001 return -1;
2993 3002 }
2994 3003 incr = self->entry_size + comp_len + sidedata_comp_len;
2995 3004 if (offsets)
2996 3005 offsets[len] = data + pos;
2997 3006 len++;
2998 3007 pos += incr;
2999 3008 }
3000 3009
3001 3010 if (pos != end) {
3002 3011 if (!PyErr_Occurred())
3003 3012 PyErr_SetString(PyExc_ValueError, "corrupt index file");
3004 3013 return -1;
3005 3014 }
3006 3015
3007 3016 return len;
3008 3017 }
3009 3018
3010 3019 static int index_init(indexObject *self, PyObject *args, PyObject *kwargs)
3011 3020 {
3012 3021 PyObject *data_obj, *inlined_obj;
3013 3022 Py_ssize_t size;
3014 3023
3015 3024 static char *kwlist[] = {"data", "inlined", "format", NULL};
3016 3025
3017 3026 /* Initialize before argument-checking to avoid index_dealloc() crash.
3018 3027 */
3019 3028 self->added = NULL;
3020 3029 self->new_length = 0;
3021 3030 self->added_length = 0;
3022 3031 self->data = NULL;
3023 3032 memset(&self->buf, 0, sizeof(self->buf));
3024 3033 self->headrevs = NULL;
3025 3034 self->filteredrevs = Py_None;
3026 3035 Py_INCREF(Py_None);
3027 3036 self->ntinitialized = 0;
3028 3037 self->offsets = NULL;
3029 3038 self->nodelen = 20;
3030 3039 self->nullentry = NULL;
3031 3040 self->rust_ext_compat = 1;
3032 3041 self->format_version = format_v1;
3033 3042
3034 3043 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|l", kwlist,
3035 3044 &data_obj, &inlined_obj,
3036 3045 &(self->format_version)))
3037 3046 return -1;
3038 3047 if (!PyObject_CheckBuffer(data_obj)) {
3039 3048 PyErr_SetString(PyExc_TypeError,
3040 3049 "data does not support buffer interface");
3041 3050 return -1;
3042 3051 }
3043 3052 if (self->nodelen < 20 || self->nodelen > (Py_ssize_t)sizeof(nullid)) {
3044 3053 PyErr_SetString(PyExc_RuntimeError, "unsupported node size");
3045 3054 return -1;
3046 3055 }
3047 3056
3048 3057 if (self->format_version == format_v1) {
3049 3058 self->entry_size = v1_entry_size;
3050 3059 } else if (self->format_version == format_v2) {
3051 3060 self->entry_size = v2_entry_size;
3052 3061 } else if (self->format_version == format_cl2) {
3053 3062 self->entry_size = cl2_entry_size;
3054 3063 }
3055 3064
3056 3065 self->nullentry = Py_BuildValue(
3057 3066 "iiiiiiiy#iiBBi", 0, 0, 0, -1, -1, -1, -1, nullid, self->nodelen, 0,
3058 3067 0, comp_mode_inline, comp_mode_inline, rank_unknown);
3059 3068
3060 3069 if (!self->nullentry)
3061 3070 return -1;
3062 3071 PyObject_GC_UnTrack(self->nullentry);
3063 3072
3064 3073 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
3065 3074 return -1;
3066 3075 size = self->buf.len;
3067 3076
3068 3077 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
3069 3078 self->data = data_obj;
3070 3079
3071 3080 self->ntlookups = self->ntmisses = 0;
3072 3081 self->ntrev = -1;
3073 3082 Py_INCREF(self->data);
3074 3083
3075 3084 if (self->inlined) {
3076 3085 Py_ssize_t len = inline_scan(self, NULL);
3077 3086 if (len == -1)
3078 3087 goto bail;
3079 3088 self->length = len;
3080 3089 } else {
3081 3090 if (size % self->entry_size) {
3082 3091 PyErr_SetString(PyExc_ValueError, "corrupt index file");
3083 3092 goto bail;
3084 3093 }
3085 3094 self->length = size / self->entry_size;
3086 3095 }
3087 3096
3088 3097 return 0;
3089 3098 bail:
3090 3099 return -1;
3091 3100 }
3092 3101
3093 3102 static PyObject *index_nodemap(indexObject *self)
3094 3103 {
3095 3104 Py_INCREF(self);
3096 3105 return (PyObject *)self;
3097 3106 }
3098 3107
3099 3108 static void _index_clearcaches(indexObject *self)
3100 3109 {
3101 3110 if (self->offsets) {
3102 3111 PyMem_Free((void *)self->offsets);
3103 3112 self->offsets = NULL;
3104 3113 }
3105 3114 if (self->ntinitialized) {
3106 3115 nt_dealloc(&self->nt);
3107 3116 }
3108 3117 self->ntinitialized = 0;
3109 3118 Py_CLEAR(self->headrevs);
3110 3119 }
3111 3120
3112 3121 static PyObject *index_clearcaches(indexObject *self)
3113 3122 {
3114 3123 _index_clearcaches(self);
3115 3124 self->ntrev = -1;
3116 3125 self->ntlookups = self->ntmisses = 0;
3117 3126 Py_RETURN_NONE;
3118 3127 }
3119 3128
3120 3129 static void index_dealloc(indexObject *self)
3121 3130 {
3122 3131 _index_clearcaches(self);
3123 3132 Py_XDECREF(self->filteredrevs);
3124 3133 if (self->buf.buf) {
3125 3134 PyBuffer_Release(&self->buf);
3126 3135 memset(&self->buf, 0, sizeof(self->buf));
3127 3136 }
3128 3137 Py_XDECREF(self->data);
3129 3138 PyMem_Free(self->added);
3130 3139 Py_XDECREF(self->nullentry);
3131 3140 PyObject_Del(self);
3132 3141 }
3133 3142
3134 3143 static PySequenceMethods index_sequence_methods = {
3135 3144 (lenfunc)index_length, /* sq_length */
3136 3145 0, /* sq_concat */
3137 3146 0, /* sq_repeat */
3138 3147 (ssizeargfunc)index_get, /* sq_item */
3139 3148 0, /* sq_slice */
3140 3149 0, /* sq_ass_item */
3141 3150 0, /* sq_ass_slice */
3142 3151 (objobjproc)index_contains, /* sq_contains */
3143 3152 };
3144 3153
3145 3154 static PyMappingMethods index_mapping_methods = {
3146 3155 (lenfunc)index_length, /* mp_length */
3147 3156 (binaryfunc)index_getitem, /* mp_subscript */
3148 3157 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
3149 3158 };
3150 3159
3151 3160 static PyMethodDef index_methods[] = {
3152 3161 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
3153 3162 "return the gca set of the given revs"},
3154 3163 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
3155 3164 METH_VARARGS,
3156 3165 "return the heads of the common ancestors of the given revs"},
3157 3166 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
3158 3167 "clear the index caches"},
3159 3168 {"get", (PyCFunction)index_m_get, METH_VARARGS, "get an index entry"},
3160 3169 {"get_rev", (PyCFunction)index_m_get, METH_VARARGS,
3161 3170 "return `rev` associated with a node or None"},
3162 3171 {"has_node", (PyCFunction)index_m_has_node, METH_O,
3163 3172 "return True if the node exist in the index"},
3164 3173 {"rev", (PyCFunction)index_m_rev, METH_O,
3165 3174 "return `rev` associated with a node or raise RevlogError"},
3166 3175 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets, METH_VARARGS,
3167 3176 "compute phases"},
3168 3177 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
3169 3178 "reachableroots"},
3170 3179 {"replace_sidedata_info", (PyCFunction)index_replace_sidedata_info,
3171 3180 METH_VARARGS, "replace an existing index entry with a new value"},
3172 3181 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
3173 3182 "get head revisions"}, /* Can do filtering since 3.2 */
3174 3183 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
3175 3184 "get filtered head revisions"}, /* Can always do filtering */
3176 3185 {"issnapshot", (PyCFunction)index_issnapshot, METH_O,
3177 3186 "True if the object is a snapshot"},
3178 3187 {"findsnapshots", (PyCFunction)index_findsnapshots, METH_VARARGS,
3179 3188 "Gather snapshot data in a cache dict"},
3180 3189 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
3181 3190 "determine revisions with deltas to reconstruct fulltext"},
3182 3191 {"slicechunktodensity", (PyCFunction)index_slicechunktodensity,
3183 3192 METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"},
3184 3193 {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
3185 3194 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
3186 3195 "match a potentially ambiguous node ID"},
3187 3196 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
3188 3197 "find length of shortest hex nodeid of a binary ID"},
3189 3198 {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"},
3190 3199 {"entry_binary", (PyCFunction)index_entry_binary, METH_O,
3191 3200 "return an entry in binary form"},
3192 3201 {"pack_header", (PyCFunction)index_pack_header, METH_VARARGS,
3193 3202 "pack the revlog header information into binary"},
3194 3203 {NULL} /* Sentinel */
3195 3204 };
3196 3205
3197 3206 static PyGetSetDef index_getset[] = {
3198 3207 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
3199 3208 {NULL} /* Sentinel */
3200 3209 };
3201 3210
3202 3211 static PyMemberDef index_members[] = {
3203 3212 {"entry_size", T_LONG, offsetof(indexObject, entry_size), 0,
3204 3213 "size of an index entry"},
3205 3214 {"rust_ext_compat", T_LONG, offsetof(indexObject, rust_ext_compat), 0,
3206 3215 "size of an index entry"},
3207 3216 {NULL} /* Sentinel */
3208 3217 };
3209 3218
3210 3219 PyTypeObject HgRevlogIndex_Type = {
3211 3220 PyVarObject_HEAD_INIT(NULL, 0) /* header */
3212 3221 "parsers.index", /* tp_name */
3213 3222 sizeof(indexObject), /* tp_basicsize */
3214 3223 0, /* tp_itemsize */
3215 3224 (destructor)index_dealloc, /* tp_dealloc */
3216 3225 0, /* tp_print */
3217 3226 0, /* tp_getattr */
3218 3227 0, /* tp_setattr */
3219 3228 0, /* tp_compare */
3220 3229 0, /* tp_repr */
3221 3230 0, /* tp_as_number */
3222 3231 &index_sequence_methods, /* tp_as_sequence */
3223 3232 &index_mapping_methods, /* tp_as_mapping */
3224 3233 0, /* tp_hash */
3225 3234 0, /* tp_call */
3226 3235 0, /* tp_str */
3227 3236 0, /* tp_getattro */
3228 3237 0, /* tp_setattro */
3229 3238 0, /* tp_as_buffer */
3230 3239 Py_TPFLAGS_DEFAULT, /* tp_flags */
3231 3240 "revlog index", /* tp_doc */
3232 3241 0, /* tp_traverse */
3233 3242 0, /* tp_clear */
3234 3243 0, /* tp_richcompare */
3235 3244 0, /* tp_weaklistoffset */
3236 3245 0, /* tp_iter */
3237 3246 0, /* tp_iternext */
3238 3247 index_methods, /* tp_methods */
3239 3248 index_members, /* tp_members */
3240 3249 index_getset, /* tp_getset */
3241 3250 0, /* tp_base */
3242 3251 0, /* tp_dict */
3243 3252 0, /* tp_descr_get */
3244 3253 0, /* tp_descr_set */
3245 3254 0, /* tp_dictoffset */
3246 3255 (initproc)index_init, /* tp_init */
3247 3256 0, /* tp_alloc */
3248 3257 };
3249 3258
3250 3259 /*
3251 3260 * returns a tuple of the form (index, cache) with elements as
3252 3261 * follows:
3253 3262 *
3254 3263 * index: an index object that lazily parses Revlog (v1 or v2) records
3255 3264 * cache: if data is inlined, a tuple (0, index_file_content), else None
3256 3265 * index_file_content could be a string, or a buffer
3257 3266 *
3258 3267 * added complications are for backwards compatibility
3259 3268 */
3260 3269 PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs)
3261 3270 {
3262 3271 PyObject *cache = NULL;
3263 3272 indexObject *idx;
3264 3273 int ret;
3265 3274
3266 3275 idx = PyObject_New(indexObject, &HgRevlogIndex_Type);
3267 3276 if (idx == NULL)
3268 3277 goto bail;
3269 3278
3270 3279 ret = index_init(idx, args, kwargs);
3271 3280 if (ret == -1)
3272 3281 goto bail;
3273 3282
3274 3283 if (idx->inlined) {
3275 3284 cache = Py_BuildValue("iO", 0, idx->data);
3276 3285 if (cache == NULL)
3277 3286 goto bail;
3278 3287 } else {
3279 3288 cache = Py_None;
3280 3289 Py_INCREF(cache);
3281 3290 }
3282 3291
3283 3292 return Py_BuildValue("NN", idx, cache);
3284 3293
3285 3294 bail:
3286 3295 Py_XDECREF(idx);
3287 3296 Py_XDECREF(cache);
3288 3297 return NULL;
3289 3298 }
3290 3299
3291 3300 static Revlog_CAPI CAPI = {
3292 3301 /* increment the abi_version field upon each change in the Revlog_CAPI
3293 3302 struct or in the ABI of the listed functions */
3294 3303 3, index_length, index_node, index_fast_rank, HgRevlogIndex_GetParents,
3295 3304 };
3296 3305
3297 3306 void revlog_module_init(PyObject *mod)
3298 3307 {
3299 3308 PyObject *caps = NULL;
3300 3309 HgRevlogIndex_Type.tp_new = PyType_GenericNew;
3301 3310 if (PyType_Ready(&HgRevlogIndex_Type) < 0)
3302 3311 return;
3303 3312 Py_INCREF(&HgRevlogIndex_Type);
3304 3313 PyModule_AddObject(mod, "index", (PyObject *)&HgRevlogIndex_Type);
3305 3314
3306 3315 nodetreeType.tp_new = PyType_GenericNew;
3307 3316 if (PyType_Ready(&nodetreeType) < 0)
3308 3317 return;
3309 3318 Py_INCREF(&nodetreeType);
3310 3319 PyModule_AddObject(mod, "nodetree", (PyObject *)&nodetreeType);
3311 3320
3312 3321 caps = PyCapsule_New(&CAPI, "mercurial.cext.parsers.revlog_CAPI", NULL);
3313 3322 if (caps != NULL)
3314 3323 PyModule_AddObject(mod, "revlog_CAPI", caps);
3315 3324 }
@@ -1,1428 +1,1472 b''
1 1 # revlogdeltas.py - Logic around delta computation for revlog
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 # Copyright 2018 Octobus <contact@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 """Helper class to compute deltas stored inside revlogs"""
9 9
10 10
11 11 import collections
12 12 import struct
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from ..node import nullrev
16 16 from ..i18n import _
17 17 from ..pycompat import getattr
18 18
19 19 from .constants import (
20 20 COMP_MODE_DEFAULT,
21 21 COMP_MODE_INLINE,
22 22 COMP_MODE_PLAIN,
23 23 DELTA_BASE_REUSE_NO,
24 24 KIND_CHANGELOG,
25 25 KIND_FILELOG,
26 26 KIND_MANIFESTLOG,
27 27 REVIDX_ISCENSORED,
28 28 REVIDX_RAWTEXT_CHANGING_FLAGS,
29 29 )
30 30
31 31 from ..thirdparty import attr
32 32
33 33 from .. import (
34 34 error,
35 35 mdiff,
36 36 util,
37 37 )
38 38
39 39 from . import flagutil
40 40
41 41 # maximum <delta-chain-data>/<revision-text-length> ratio
42 42 LIMIT_DELTA2TEXT = 2
43 43
44 44
45 45 class _testrevlog:
46 46 """minimalist fake revlog to use in doctests"""
47 47
48 48 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
49 49 """data is an list of revision payload boundaries"""
50 50 self._data = data
51 51 self._srdensitythreshold = density
52 52 self._srmingapsize = mingap
53 53 self._snapshot = set(snapshot)
54 54 self.index = None
55 55
56 56 def start(self, rev):
57 57 if rev == nullrev:
58 58 return 0
59 59 if rev == 0:
60 60 return 0
61 61 return self._data[rev - 1]
62 62
63 63 def end(self, rev):
64 64 if rev == nullrev:
65 65 return 0
66 66 return self._data[rev]
67 67
68 68 def length(self, rev):
69 69 return self.end(rev) - self.start(rev)
70 70
71 71 def __len__(self):
72 72 return len(self._data)
73 73
74 74 def issnapshot(self, rev):
75 75 if rev == nullrev:
76 76 return True
77 77 return rev in self._snapshot
78 78
79 79
80 80 def slicechunk(revlog, revs, targetsize=None):
81 81 """slice revs to reduce the amount of unrelated data to be read from disk.
82 82
83 83 ``revs`` is sliced into groups that should be read in one time.
84 84 Assume that revs are sorted.
85 85
86 86 The initial chunk is sliced until the overall density (payload/chunks-span
87 87 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
88 88 `revlog._srmingapsize` is skipped.
89 89
90 90 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
91 91 For consistency with other slicing choice, this limit won't go lower than
92 92 `revlog._srmingapsize`.
93 93
94 94 If individual revisions chunk are larger than this limit, they will still
95 95 be raised individually.
96 96
97 97 >>> data = [
98 98 ... 5, #00 (5)
99 99 ... 10, #01 (5)
100 100 ... 12, #02 (2)
101 101 ... 12, #03 (empty)
102 102 ... 27, #04 (15)
103 103 ... 31, #05 (4)
104 104 ... 31, #06 (empty)
105 105 ... 42, #07 (11)
106 106 ... 47, #08 (5)
107 107 ... 47, #09 (empty)
108 108 ... 48, #10 (1)
109 109 ... 51, #11 (3)
110 110 ... 74, #12 (23)
111 111 ... 85, #13 (11)
112 112 ... 86, #14 (1)
113 113 ... 91, #15 (5)
114 114 ... ]
115 115 >>> revlog = _testrevlog(data, snapshot=range(16))
116 116
117 117 >>> list(slicechunk(revlog, list(range(16))))
118 118 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
119 119 >>> list(slicechunk(revlog, [0, 15]))
120 120 [[0], [15]]
121 121 >>> list(slicechunk(revlog, [0, 11, 15]))
122 122 [[0], [11], [15]]
123 123 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
124 124 [[0], [11, 13, 15]]
125 125 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
126 126 [[1, 2], [5, 8, 10, 11], [14]]
127 127
128 128 Slicing with a maximum chunk size
129 129 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
130 130 [[0], [11], [13], [15]]
131 131 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
132 132 [[0], [11], [13, 15]]
133 133
134 134 Slicing involving nullrev
135 135 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
136 136 [[-1, 0], [11], [13, 15]]
137 137 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
138 138 [[-1], [13], [15]]
139 139 """
140 140 if targetsize is not None:
141 141 targetsize = max(targetsize, revlog._srmingapsize)
142 142 # targetsize should not be specified when evaluating delta candidates:
143 143 # * targetsize is used to ensure we stay within specification when reading,
144 144 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
145 145 if densityslicing is None:
146 146 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
147 147 for chunk in densityslicing(
148 148 revs, revlog._srdensitythreshold, revlog._srmingapsize
149 149 ):
150 150 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
151 151 yield subchunk
152 152
153 153
154 154 def _slicechunktosize(revlog, revs, targetsize=None):
155 155 """slice revs to match the target size
156 156
157 157 This is intended to be used on chunk that density slicing selected by that
158 158 are still too large compared to the read garantee of revlog. This might
159 159 happens when "minimal gap size" interrupted the slicing or when chain are
160 160 built in a way that create large blocks next to each other.
161 161
162 162 >>> data = [
163 163 ... 3, #0 (3)
164 164 ... 5, #1 (2)
165 165 ... 6, #2 (1)
166 166 ... 8, #3 (2)
167 167 ... 8, #4 (empty)
168 168 ... 11, #5 (3)
169 169 ... 12, #6 (1)
170 170 ... 13, #7 (1)
171 171 ... 14, #8 (1)
172 172 ... ]
173 173
174 174 == All snapshots cases ==
175 175 >>> revlog = _testrevlog(data, snapshot=range(9))
176 176
177 177 Cases where chunk is already small enough
178 178 >>> list(_slicechunktosize(revlog, [0], 3))
179 179 [[0]]
180 180 >>> list(_slicechunktosize(revlog, [6, 7], 3))
181 181 [[6, 7]]
182 182 >>> list(_slicechunktosize(revlog, [0], None))
183 183 [[0]]
184 184 >>> list(_slicechunktosize(revlog, [6, 7], None))
185 185 [[6, 7]]
186 186
187 187 cases where we need actual slicing
188 188 >>> list(_slicechunktosize(revlog, [0, 1], 3))
189 189 [[0], [1]]
190 190 >>> list(_slicechunktosize(revlog, [1, 3], 3))
191 191 [[1], [3]]
192 192 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
193 193 [[1, 2], [3]]
194 194 >>> list(_slicechunktosize(revlog, [3, 5], 3))
195 195 [[3], [5]]
196 196 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
197 197 [[3], [5]]
198 198 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
199 199 [[5], [6, 7, 8]]
200 200 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
201 201 [[0], [1, 2], [3], [5], [6, 7, 8]]
202 202
203 203 Case with too large individual chunk (must return valid chunk)
204 204 >>> list(_slicechunktosize(revlog, [0, 1], 2))
205 205 [[0], [1]]
206 206 >>> list(_slicechunktosize(revlog, [1, 3], 1))
207 207 [[1], [3]]
208 208 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
209 209 [[3], [5]]
210 210
211 211 == No Snapshot cases ==
212 212 >>> revlog = _testrevlog(data)
213 213
214 214 Cases where chunk is already small enough
215 215 >>> list(_slicechunktosize(revlog, [0], 3))
216 216 [[0]]
217 217 >>> list(_slicechunktosize(revlog, [6, 7], 3))
218 218 [[6, 7]]
219 219 >>> list(_slicechunktosize(revlog, [0], None))
220 220 [[0]]
221 221 >>> list(_slicechunktosize(revlog, [6, 7], None))
222 222 [[6, 7]]
223 223
224 224 cases where we need actual slicing
225 225 >>> list(_slicechunktosize(revlog, [0, 1], 3))
226 226 [[0], [1]]
227 227 >>> list(_slicechunktosize(revlog, [1, 3], 3))
228 228 [[1], [3]]
229 229 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
230 230 [[1], [2, 3]]
231 231 >>> list(_slicechunktosize(revlog, [3, 5], 3))
232 232 [[3], [5]]
233 233 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
234 234 [[3], [4, 5]]
235 235 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
236 236 [[5], [6, 7, 8]]
237 237 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
238 238 [[0], [1, 2], [3], [5], [6, 7, 8]]
239 239
240 240 Case with too large individual chunk (must return valid chunk)
241 241 >>> list(_slicechunktosize(revlog, [0, 1], 2))
242 242 [[0], [1]]
243 243 >>> list(_slicechunktosize(revlog, [1, 3], 1))
244 244 [[1], [3]]
245 245 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
246 246 [[3], [5]]
247 247
248 248 == mixed case ==
249 249 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
250 250 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
251 251 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
252 252 """
253 253 assert targetsize is None or 0 <= targetsize
254 254 startdata = revlog.start(revs[0])
255 255 enddata = revlog.end(revs[-1])
256 256 fullspan = enddata - startdata
257 257 if targetsize is None or fullspan <= targetsize:
258 258 yield revs
259 259 return
260 260
261 261 startrevidx = 0
262 262 endrevidx = 1
263 263 iterrevs = enumerate(revs)
264 264 next(iterrevs) # skip first rev.
265 265 # first step: get snapshots out of the way
266 266 for idx, r in iterrevs:
267 267 span = revlog.end(r) - startdata
268 268 snapshot = revlog.issnapshot(r)
269 269 if span <= targetsize and snapshot:
270 270 endrevidx = idx + 1
271 271 else:
272 272 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
273 273 if chunk:
274 274 yield chunk
275 275 startrevidx = idx
276 276 startdata = revlog.start(r)
277 277 endrevidx = idx + 1
278 278 if not snapshot:
279 279 break
280 280
281 281 # for the others, we use binary slicing to quickly converge toward valid
282 282 # chunks (otherwise, we might end up looking for start/end of many
283 283 # revisions). This logic is not looking for the perfect slicing point, it
284 284 # focuses on quickly converging toward valid chunks.
285 285 nbitem = len(revs)
286 286 while (enddata - startdata) > targetsize:
287 287 endrevidx = nbitem
288 288 if nbitem - startrevidx <= 1:
289 289 break # protect against individual chunk larger than limit
290 290 localenddata = revlog.end(revs[endrevidx - 1])
291 291 span = localenddata - startdata
292 292 while span > targetsize:
293 293 if endrevidx - startrevidx <= 1:
294 294 break # protect against individual chunk larger than limit
295 295 endrevidx -= (endrevidx - startrevidx) // 2
296 296 localenddata = revlog.end(revs[endrevidx - 1])
297 297 span = localenddata - startdata
298 298 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
299 299 if chunk:
300 300 yield chunk
301 301 startrevidx = endrevidx
302 302 startdata = revlog.start(revs[startrevidx])
303 303
304 304 chunk = _trimchunk(revlog, revs, startrevidx)
305 305 if chunk:
306 306 yield chunk
307 307
308 308
309 309 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
310 310 """slice revs to reduce the amount of unrelated data to be read from disk.
311 311
312 312 ``revs`` is sliced into groups that should be read in one time.
313 313 Assume that revs are sorted.
314 314
315 315 The initial chunk is sliced until the overall density (payload/chunks-span
316 316 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
317 317 skipped.
318 318
319 319 >>> revlog = _testrevlog([
320 320 ... 5, #00 (5)
321 321 ... 10, #01 (5)
322 322 ... 12, #02 (2)
323 323 ... 12, #03 (empty)
324 324 ... 27, #04 (15)
325 325 ... 31, #05 (4)
326 326 ... 31, #06 (empty)
327 327 ... 42, #07 (11)
328 328 ... 47, #08 (5)
329 329 ... 47, #09 (empty)
330 330 ... 48, #10 (1)
331 331 ... 51, #11 (3)
332 332 ... 74, #12 (23)
333 333 ... 85, #13 (11)
334 334 ... 86, #14 (1)
335 335 ... 91, #15 (5)
336 336 ... ])
337 337
338 338 >>> list(_slicechunktodensity(revlog, list(range(16))))
339 339 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
340 340 >>> list(_slicechunktodensity(revlog, [0, 15]))
341 341 [[0], [15]]
342 342 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
343 343 [[0], [11], [15]]
344 344 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
345 345 [[0], [11, 13, 15]]
346 346 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
347 347 [[1, 2], [5, 8, 10, 11], [14]]
348 348 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
349 349 ... mingapsize=20))
350 350 [[1, 2, 3, 5, 8, 10, 11], [14]]
351 351 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
352 352 ... targetdensity=0.95))
353 353 [[1, 2], [5], [8, 10, 11], [14]]
354 354 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
355 355 ... targetdensity=0.95, mingapsize=12))
356 356 [[1, 2], [5, 8, 10, 11], [14]]
357 357 """
358 358 start = revlog.start
359 359 length = revlog.length
360 360
361 361 if len(revs) <= 1:
362 362 yield revs
363 363 return
364 364
365 365 deltachainspan = segmentspan(revlog, revs)
366 366
367 367 if deltachainspan < mingapsize:
368 368 yield revs
369 369 return
370 370
371 371 readdata = deltachainspan
372 372 chainpayload = sum(length(r) for r in revs)
373 373
374 374 if deltachainspan:
375 375 density = chainpayload / float(deltachainspan)
376 376 else:
377 377 density = 1.0
378 378
379 379 if density >= targetdensity:
380 380 yield revs
381 381 return
382 382
383 383 # Store the gaps in a heap to have them sorted by decreasing size
384 384 gaps = []
385 385 prevend = None
386 386 for i, rev in enumerate(revs):
387 387 revstart = start(rev)
388 388 revlen = length(rev)
389 389
390 390 # Skip empty revisions to form larger holes
391 391 if revlen == 0:
392 392 continue
393 393
394 394 if prevend is not None:
395 395 gapsize = revstart - prevend
396 396 # only consider holes that are large enough
397 397 if gapsize > mingapsize:
398 398 gaps.append((gapsize, i))
399 399
400 400 prevend = revstart + revlen
401 401 # sort the gaps to pop them from largest to small
402 402 gaps.sort()
403 403
404 404 # Collect the indices of the largest holes until the density is acceptable
405 405 selected = []
406 406 while gaps and density < targetdensity:
407 407 gapsize, gapidx = gaps.pop()
408 408
409 409 selected.append(gapidx)
410 410
411 411 # the gap sizes are stored as negatives to be sorted decreasingly
412 412 # by the heap
413 413 readdata -= gapsize
414 414 if readdata > 0:
415 415 density = chainpayload / float(readdata)
416 416 else:
417 417 density = 1.0
418 418 selected.sort()
419 419
420 420 # Cut the revs at collected indices
421 421 previdx = 0
422 422 for idx in selected:
423 423
424 424 chunk = _trimchunk(revlog, revs, previdx, idx)
425 425 if chunk:
426 426 yield chunk
427 427
428 428 previdx = idx
429 429
430 430 chunk = _trimchunk(revlog, revs, previdx)
431 431 if chunk:
432 432 yield chunk
433 433
434 434
435 435 def _trimchunk(revlog, revs, startidx, endidx=None):
436 436 """returns revs[startidx:endidx] without empty trailing revs
437 437
438 438 Doctest Setup
439 439 >>> revlog = _testrevlog([
440 440 ... 5, #0
441 441 ... 10, #1
442 442 ... 12, #2
443 443 ... 12, #3 (empty)
444 444 ... 17, #4
445 445 ... 21, #5
446 446 ... 21, #6 (empty)
447 447 ... ])
448 448
449 449 Contiguous cases:
450 450 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
451 451 [0, 1, 2, 3, 4, 5]
452 452 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
453 453 [0, 1, 2, 3, 4]
454 454 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
455 455 [0, 1, 2]
456 456 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
457 457 [2]
458 458 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
459 459 [3, 4, 5]
460 460 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
461 461 [3, 4]
462 462
463 463 Discontiguous cases:
464 464 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
465 465 [1, 3, 5]
466 466 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
467 467 [1]
468 468 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
469 469 [3, 5]
470 470 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
471 471 [3, 5]
472 472 """
473 473 length = revlog.length
474 474
475 475 if endidx is None:
476 476 endidx = len(revs)
477 477
478 478 # If we have a non-emtpy delta candidate, there are nothing to trim
479 479 if revs[endidx - 1] < len(revlog):
480 480 # Trim empty revs at the end, except the very first revision of a chain
481 481 while (
482 482 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
483 483 ):
484 484 endidx -= 1
485 485
486 486 return revs[startidx:endidx]
487 487
488 488
489 489 def segmentspan(revlog, revs):
490 490 """Get the byte span of a segment of revisions
491 491
492 492 revs is a sorted array of revision numbers
493 493
494 494 >>> revlog = _testrevlog([
495 495 ... 5, #0
496 496 ... 10, #1
497 497 ... 12, #2
498 498 ... 12, #3 (empty)
499 499 ... 17, #4
500 500 ... ])
501 501
502 502 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
503 503 17
504 504 >>> segmentspan(revlog, [0, 4])
505 505 17
506 506 >>> segmentspan(revlog, [3, 4])
507 507 5
508 508 >>> segmentspan(revlog, [1, 2, 3,])
509 509 7
510 510 >>> segmentspan(revlog, [1, 3])
511 511 7
512 512 """
513 513 if not revs:
514 514 return 0
515 515 end = revlog.end(revs[-1])
516 516 return end - revlog.start(revs[0])
517 517
518 518
519 519 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
520 520 """build full text from a (base, delta) pair and other metadata"""
521 521 # special case deltas which replace entire base; no need to decode
522 522 # base revision. this neatly avoids censored bases, which throw when
523 523 # they're decoded.
524 524 hlen = struct.calcsize(b">lll")
525 525 if delta[:hlen] == mdiff.replacediffheader(
526 526 revlog.rawsize(baserev), len(delta) - hlen
527 527 ):
528 528 fulltext = delta[hlen:]
529 529 else:
530 530 # deltabase is rawtext before changed by flag processors, which is
531 531 # equivalent to non-raw text
532 532 basetext = revlog.revision(baserev, _df=fh)
533 533 fulltext = mdiff.patch(basetext, delta)
534 534
535 535 try:
536 536 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
537 537 if validatehash:
538 538 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
539 539 if flags & REVIDX_ISCENSORED:
540 540 raise error.StorageError(
541 541 _(b'node %s is not censored') % expectednode
542 542 )
543 543 except error.CensoredNodeError:
544 544 # must pass the censored index flag to add censored revisions
545 545 if not flags & REVIDX_ISCENSORED:
546 546 raise
547 547 return fulltext
548 548
549 549
550 550 @attr.s(slots=True, frozen=True)
551 551 class _deltainfo:
552 552 distance = attr.ib()
553 553 deltalen = attr.ib()
554 554 data = attr.ib()
555 555 base = attr.ib()
556 556 chainbase = attr.ib()
557 557 chainlen = attr.ib()
558 558 compresseddeltalen = attr.ib()
559 559 snapshotdepth = attr.ib()
560 560
561 561
562 562 def drop_u_compression(delta):
563 563 """turn into a "u" (no-compression) into no-compression without header
564 564
565 565 This is useful for revlog format that has better compression method.
566 566 """
567 567 assert delta.data[0] == b'u', delta.data[0]
568 568 return _deltainfo(
569 569 delta.distance,
570 570 delta.deltalen - 1,
571 571 (b'', delta.data[1]),
572 572 delta.base,
573 573 delta.chainbase,
574 574 delta.chainlen,
575 575 delta.compresseddeltalen,
576 576 delta.snapshotdepth,
577 577 )
578 578
579 579
580 580 def is_good_delta_info(revlog, deltainfo, revinfo):
581 581 """Returns True if the given delta is good. Good means that it is within
582 582 the disk span, disk size, and chain length bounds that we know to be
583 583 performant."""
584 584 if deltainfo is None:
585 585 return False
586 586
587 587 # - 'deltainfo.distance' is the distance from the base revision --
588 588 # bounding it limits the amount of I/O we need to do.
589 589 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
590 590 # deltas we need to apply -- bounding it limits the amount of CPU
591 591 # we consume.
592 592
593 593 textlen = revinfo.textlen
594 594 defaultmax = textlen * 4
595 595 maxdist = revlog._maxdeltachainspan
596 596 if not maxdist:
597 597 maxdist = deltainfo.distance # ensure the conditional pass
598 598 maxdist = max(maxdist, defaultmax)
599 599
600 600 # Bad delta from read span:
601 601 #
602 602 # If the span of data read is larger than the maximum allowed.
603 603 #
604 604 # In the sparse-revlog case, we rely on the associated "sparse reading"
605 605 # to avoid issue related to the span of data. In theory, it would be
606 606 # possible to build pathological revlog where delta pattern would lead
607 607 # to too many reads. However, they do not happen in practice at all. So
608 608 # we skip the span check entirely.
609 609 if not revlog._sparserevlog and maxdist < deltainfo.distance:
610 610 return False
611 611
612 612 # Bad delta from new delta size:
613 613 #
614 614 # If the delta size is larger than the target text, storing the
615 615 # delta will be inefficient.
616 616 if textlen < deltainfo.deltalen:
617 617 return False
618 618
619 619 # Bad delta from cumulated payload size:
620 620 #
621 621 # If the sum of delta get larger than K * target text length.
622 622 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
623 623 return False
624 624
625 625 # Bad delta from chain length:
626 626 #
627 627 # If the number of delta in the chain gets too high.
628 628 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
629 629 return False
630 630
631 631 # bad delta from intermediate snapshot size limit
632 632 #
633 633 # If an intermediate snapshot size is higher than the limit. The
634 634 # limit exist to prevent endless chain of intermediate delta to be
635 635 # created.
636 636 if (
637 637 deltainfo.snapshotdepth is not None
638 638 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
639 639 ):
640 640 return False
641 641
642 642 # bad delta if new intermediate snapshot is larger than the previous
643 643 # snapshot
644 644 if (
645 645 deltainfo.snapshotdepth
646 646 and revlog.length(deltainfo.base) < deltainfo.deltalen
647 647 ):
648 648 return False
649 649
650 650 return True
651 651
652 652
653 653 # If a revision's full text is that much bigger than a base candidate full
654 654 # text's, it is very unlikely that it will produce a valid delta. We no longer
655 655 # consider these candidates.
656 656 LIMIT_BASE2TEXT = 500
657 657
658 658
659 659 def _candidategroups(
660 660 revlog,
661 661 textlen,
662 662 p1,
663 663 p2,
664 664 cachedelta,
665 665 excluded_bases=None,
666 666 target_rev=None,
667 667 ):
668 668 """Provides group of revision to be tested as delta base
669 669
670 670 This top level function focus on emitting groups with unique and worthwhile
671 671 content. See _raw_candidate_groups for details about the group order.
672 672 """
673 673 # should we try to build a delta?
674 674 if not (len(revlog) and revlog._storedeltachains):
675 675 yield None
676 676 return
677 677
678 678 deltalength = revlog.length
679 679 deltaparent = revlog.deltaparent
680 680 sparse = revlog._sparserevlog
681 681 good = None
682 682
683 683 deltas_limit = textlen * LIMIT_DELTA2TEXT
684 684 group_chunk_size = revlog._candidate_group_chunk_size
685 685
686 686 tested = {nullrev}
687 687 candidates = _refinedgroups(
688 688 revlog,
689 689 p1,
690 690 p2,
691 691 cachedelta,
692 692 )
693 693 while True:
694 694 temptative = candidates.send(good)
695 695 if temptative is None:
696 696 break
697 697 group = []
698 698 for rev in temptative:
699 699 # skip over empty delta (no need to include them in a chain)
700 700 while revlog._generaldelta and not (
701 701 rev == nullrev or rev in tested or deltalength(rev)
702 702 ):
703 703 tested.add(rev)
704 704 rev = deltaparent(rev)
705 705 # no need to try a delta against nullrev, this will be done as a
706 706 # last resort.
707 707 if rev == nullrev:
708 708 continue
709 709 # filter out revision we tested already
710 710 if rev in tested:
711 711 continue
712 712 # an higher authority deamed the base unworthy (e.g. censored)
713 713 if excluded_bases is not None and rev in excluded_bases:
714 714 tested.add(rev)
715 715 continue
716 716 # We are in some recomputation cases and that rev is too high in
717 717 # the revlog
718 718 if target_rev is not None and rev >= target_rev:
719 719 tested.add(rev)
720 720 continue
721 721 # filter out delta base that will never produce good delta
722 722 if deltas_limit < revlog.length(rev):
723 723 tested.add(rev)
724 724 continue
725 725 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
726 726 tested.add(rev)
727 727 continue
728 728 # no delta for rawtext-changing revs (see "candelta" for why)
729 729 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
730 730 tested.add(rev)
731 731 continue
732 732
733 733 # If we reach here, we are about to build and test a delta.
734 734 # The delta building process will compute the chaininfo in all
735 735 # case, since that computation is cached, it is fine to access it
736 736 # here too.
737 737 chainlen, chainsize = revlog._chaininfo(rev)
738 738 # if chain will be too long, skip base
739 739 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
740 740 tested.add(rev)
741 741 continue
742 742 # if chain already have too much data, skip base
743 743 if deltas_limit < chainsize:
744 744 tested.add(rev)
745 745 continue
746 746 if sparse and revlog.upperboundcomp is not None:
747 747 maxcomp = revlog.upperboundcomp
748 748 basenotsnap = (p1, p2, nullrev)
749 749 if rev not in basenotsnap and revlog.issnapshot(rev):
750 750 snapshotdepth = revlog.snapshotdepth(rev)
751 751 # If text is significantly larger than the base, we can
752 752 # expect the resulting delta to be proportional to the size
753 753 # difference
754 754 revsize = revlog.rawsize(rev)
755 755 rawsizedistance = max(textlen - revsize, 0)
756 756 # use an estimate of the compression upper bound.
757 757 lowestrealisticdeltalen = rawsizedistance // maxcomp
758 758
759 759 # check the absolute constraint on the delta size
760 760 snapshotlimit = textlen >> snapshotdepth
761 761 if snapshotlimit < lowestrealisticdeltalen:
762 762 # delta lower bound is larger than accepted upper bound
763 763 tested.add(rev)
764 764 continue
765 765
766 766 # check the relative constraint on the delta size
767 767 revlength = revlog.length(rev)
768 768 if revlength < lowestrealisticdeltalen:
769 769 # delta probable lower bound is larger than target base
770 770 tested.add(rev)
771 771 continue
772 772
773 773 group.append(rev)
774 774 if group:
775 775 # When the size of the candidate group is big, it can result in a
776 776 # quite significant performance impact. To reduce this, we can send
777 777 # them in smaller batches until the new batch does not provide any
778 778 # improvements.
779 779 #
780 780 # This might reduce the overall efficiency of the compression in
781 781 # some corner cases, but that should also prevent very pathological
782 782 # cases from being an issue. (eg. 20 000 candidates).
783 783 #
784 784 # XXX note that the ordering of the group becomes important as it
785 785 # now impacts the final result. The current order is unprocessed
786 786 # and can be improved.
787 787 if group_chunk_size == 0:
788 788 tested.update(group)
789 789 good = yield tuple(group)
790 790 else:
791 791 prev_good = good
792 792 for start in range(0, len(group), group_chunk_size):
793 793 sub_group = group[start : start + group_chunk_size]
794 794 tested.update(sub_group)
795 795 good = yield tuple(sub_group)
796 796 if prev_good == good:
797 797 break
798 798
799 799 yield None
800 800
801 801
802 def _findsnapshots(revlog, cache, start_rev):
803 """find snapshot from start_rev to tip"""
804 if util.safehasattr(revlog.index, b'findsnapshots'):
805 revlog.index.findsnapshots(cache, start_rev)
806 else:
807 deltaparent = revlog.deltaparent
808 issnapshot = revlog.issnapshot
809 for rev in revlog.revs(start_rev):
810 if issnapshot(rev):
811 cache[deltaparent(rev)].append(rev)
812
813
814 802 def _refinedgroups(revlog, p1, p2, cachedelta):
815 803 good = None
816 804 # First we try to reuse a the delta contained in the bundle.
817 805 # (or from the source revlog)
818 806 #
819 807 # This logic only applies to general delta repositories and can be disabled
820 808 # through configuration. Disabling reuse source delta is useful when
821 809 # we want to make sure we recomputed "optimal" deltas.
822 810 debug_info = None
823 811 if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:
824 812 # Assume what we received from the server is a good choice
825 813 # build delta will reuse the cache
826 814 if debug_info is not None:
827 815 debug_info['cached-delta.tested'] += 1
828 816 good = yield (cachedelta[0],)
829 817 if good is not None:
830 818 if debug_info is not None:
831 819 debug_info['cached-delta.accepted'] += 1
832 820 yield None
833 821 return
834 822 # XXX cache me higher
835 snapshots = collections.defaultdict(list)
823 snapshot_cache = SnapshotCache()
836 824 groups = _rawgroups(
837 825 revlog,
838 826 p1,
839 827 p2,
840 828 cachedelta,
841 snapshots,
829 snapshot_cache,
842 830 )
843 831 for candidates in groups:
844 832 good = yield candidates
845 833 if good is not None:
846 834 break
847 835
848 836 # If sparse revlog is enabled, we can try to refine the available deltas
849 837 if not revlog._sparserevlog:
850 838 yield None
851 839 return
852 840
853 841 # if we have a refinable value, try to refine it
854 842 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
855 843 # refine snapshot down
856 844 previous = None
857 845 while previous != good:
858 846 previous = good
859 847 base = revlog.deltaparent(good)
860 848 if base == nullrev:
861 849 break
862 850 good = yield (base,)
863 851 # refine snapshot up
864 if not snapshots:
865 _findsnapshots(revlog, snapshots, good + 1)
852 if not snapshot_cache.snapshots:
853 snapshot_cache.update(revlog, good + 1)
866 854 previous = None
867 855 while good != previous:
868 856 previous = good
869 children = tuple(sorted(c for c in snapshots[good]))
857 children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))
870 858 good = yield children
871 859
872 860 if debug_info is not None:
873 861 if good is None:
874 862 debug_info['no-solution'] += 1
875 863
876 864 yield None
877 865
878 866
879 def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):
867 def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
880 868 """Provides group of revision to be tested as delta base
881 869
882 870 This lower level function focus on emitting delta theorically interresting
883 871 without looking it any practical details.
884 872
885 873 The group order aims at providing fast or small candidates first.
886 874 """
887 875 gdelta = revlog._generaldelta
888 876 # gate sparse behind general-delta because of issue6056
889 877 sparse = gdelta and revlog._sparserevlog
890 878 curr = len(revlog)
891 879 prev = curr - 1
892 880 deltachain = lambda rev: revlog._deltachain(rev)[0]
893 881
894 882 if gdelta:
895 883 # exclude already lazy tested base if any
896 884 parents = [p for p in (p1, p2) if p != nullrev]
897 885
898 886 if not revlog._deltabothparents and len(parents) == 2:
899 887 parents.sort()
900 888 # To minimize the chance of having to build a fulltext,
901 889 # pick first whichever parent is closest to us (max rev)
902 890 yield (parents[1],)
903 891 # then the other one (min rev) if the first did not fit
904 892 yield (parents[0],)
905 893 elif len(parents) > 0:
906 894 # Test all parents (1 or 2), and keep the best candidate
907 895 yield parents
908 896
909 897 if sparse and parents:
910 if snapshots is None:
898 if snapshot_cache is None:
911 899 # map: base-rev: [snapshot-revs]
912 snapshots = collections.defaultdict(list)
900 snapshot_cache = SnapshotCache()
913 901 # See if we can use an existing snapshot in the parent chains to use as
914 902 # a base for a new intermediate-snapshot
915 903 #
916 904 # search for snapshot in parents delta chain
917 905 # map: snapshot-level: snapshot-rev
918 906 parents_snaps = collections.defaultdict(set)
919 907 candidate_chains = [deltachain(p) for p in parents]
920 908 for chain in candidate_chains:
921 909 for idx, s in enumerate(chain):
922 910 if not revlog.issnapshot(s):
923 911 break
924 912 parents_snaps[idx].add(s)
925 913 snapfloor = min(parents_snaps[0]) + 1
926 _findsnapshots(revlog, snapshots, snapfloor)
914 snapshot_cache.update(revlog, snapfloor)
927 915 # search for the highest "unrelated" revision
928 916 #
929 917 # Adding snapshots used by "unrelated" revision increase the odd we
930 918 # reuse an independant, yet better snapshot chain.
931 919 #
932 920 # XXX instead of building a set of revisions, we could lazily enumerate
933 921 # over the chains. That would be more efficient, however we stick to
934 922 # simple code for now.
935 923 all_revs = set()
936 924 for chain in candidate_chains:
937 925 all_revs.update(chain)
938 926 other = None
939 927 for r in revlog.revs(prev, snapfloor):
940 928 if r not in all_revs:
941 929 other = r
942 930 break
943 931 if other is not None:
944 932 # To avoid unfair competition, we won't use unrelated intermediate
945 933 # snapshot that are deeper than the ones from the parent delta
946 934 # chain.
947 935 max_depth = max(parents_snaps.keys())
948 936 chain = deltachain(other)
949 937 for depth, s in enumerate(chain):
950 938 if s < snapfloor:
951 939 continue
952 940 if max_depth < depth:
953 941 break
954 942 if not revlog.issnapshot(s):
955 943 break
956 944 parents_snaps[depth].add(s)
957 945 # Test them as possible intermediate snapshot base
958 946 # We test them from highest to lowest level. High level one are more
959 947 # likely to result in small delta
960 948 floor = None
961 949 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
962 950 siblings = set()
963 951 for s in snaps:
964 siblings.update(snapshots[s])
952 siblings.update(snapshot_cache.snapshots[s])
965 953 # Before considering making a new intermediate snapshot, we check
966 954 # if an existing snapshot, children of base we consider, would be
967 955 # suitable.
968 956 #
969 957 # It give a change to reuse a delta chain "unrelated" to the
970 958 # current revision instead of starting our own. Without such
971 959 # re-use, topological branches would keep reopening new chains.
972 960 # Creating more and more snapshot as the repository grow.
973 961
974 962 if floor is not None:
975 963 # We only do this for siblings created after the one in our
976 964 # parent's delta chain. Those created before has less chances
977 965 # to be valid base since our ancestors had to create a new
978 966 # snapshot.
979 967 siblings = [r for r in siblings if floor < r]
980 968 yield tuple(sorted(siblings))
981 969 # then test the base from our parent's delta chain.
982 970 yield tuple(sorted(snaps))
983 971 floor = min(snaps)
984 972 # No suitable base found in the parent chain, search if any full
985 973 # snapshots emitted since parent's base would be a suitable base for an
986 974 # intermediate snapshot.
987 975 #
988 976 # It give a chance to reuse a delta chain unrelated to the current
989 977 # revisions instead of starting our own. Without such re-use,
990 978 # topological branches would keep reopening new full chains. Creating
991 979 # more and more snapshot as the repository grow.
992 yield tuple(snapshots[nullrev])
980 yield tuple(snapshot_cache.snapshots[nullrev])
993 981
994 982 if not sparse:
995 983 # other approach failed try against prev to hopefully save us a
996 984 # fulltext.
997 985 yield (prev,)
998 986
999 987
988 class SnapshotCache:
989 __slots__ = ('snapshots', '_start_rev', '_end_rev')
990
991 def __init__(self):
992 # XXX should probably be a set ?
993 self.snapshots = collections.defaultdict(list)
994 self._start_rev = None
995 self._end_rev = None
996
997 def update(self, revlog, start_rev=0):
998 """find snapshots from start_rev to tip"""
999 nb_revs = len(revlog)
1000 end_rev = nb_revs - 1
1001 if start_rev > end_rev:
1002 return # range is empty
1003
1004 if self._start_rev is None:
1005 assert self._end_rev is None
1006 self._update(revlog, start_rev, end_rev)
1007 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1008 if start_rev < self._start_rev:
1009 self._update(revlog, start_rev, self._start_rev - 1)
1010 if self._end_rev < end_rev:
1011 self._update(revlog, self._end_rev + 1, end_rev)
1012
1013 if self._start_rev is None:
1014 assert self._end_rev is None
1015 self._end_rev = end_rev
1016 self._start_rev = start_rev
1017 else:
1018 self._start_rev = min(self._start_rev, start_rev)
1019 self._end_rev = max(self._end_rev, end_rev)
1020 assert self._start_rev <= self._end_rev, (
1021 self._start_rev,
1022 self._end_rev,
1023 )
1024
1025 def _update(self, revlog, start_rev, end_rev):
1026 """internal method that actually do update content"""
1027 assert self._start_rev is None or (
1028 start_rev < self._start_rev or start_rev > self._end_rev
1029 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1030 assert self._start_rev is None or (
1031 end_rev < self._start_rev or end_rev > self._end_rev
1032 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1033 cache = self.snapshots
1034 if util.safehasattr(revlog.index, b'findsnapshots'):
1035 revlog.index.findsnapshots(cache, start_rev, end_rev)
1036 else:
1037 deltaparent = revlog.deltaparent
1038 issnapshot = revlog.issnapshot
1039 for rev in revlog.revs(start_rev, end_rev):
1040 if issnapshot(rev):
1041 cache[deltaparent(rev)].append(rev)
1042
1043
1000 1044 class deltacomputer:
1001 1045 def __init__(
1002 1046 self,
1003 1047 revlog,
1004 1048 write_debug=None,
1005 1049 debug_search=False,
1006 1050 debug_info=None,
1007 1051 ):
1008 1052 self.revlog = revlog
1009 1053 self._write_debug = write_debug
1010 1054 self._debug_search = debug_search
1011 1055 self._debug_info = debug_info
1012 1056
1013 1057 def buildtext(self, revinfo, fh):
1014 1058 """Builds a fulltext version of a revision
1015 1059
1016 1060 revinfo: revisioninfo instance that contains all needed info
1017 1061 fh: file handle to either the .i or the .d revlog file,
1018 1062 depending on whether it is inlined or not
1019 1063 """
1020 1064 btext = revinfo.btext
1021 1065 if btext[0] is not None:
1022 1066 return btext[0]
1023 1067
1024 1068 revlog = self.revlog
1025 1069 cachedelta = revinfo.cachedelta
1026 1070 baserev = cachedelta[0]
1027 1071 delta = cachedelta[1]
1028 1072
1029 1073 fulltext = btext[0] = _textfromdelta(
1030 1074 fh,
1031 1075 revlog,
1032 1076 baserev,
1033 1077 delta,
1034 1078 revinfo.p1,
1035 1079 revinfo.p2,
1036 1080 revinfo.flags,
1037 1081 revinfo.node,
1038 1082 )
1039 1083 return fulltext
1040 1084
1041 1085 def _builddeltadiff(self, base, revinfo, fh):
1042 1086 revlog = self.revlog
1043 1087 t = self.buildtext(revinfo, fh)
1044 1088 if revlog.iscensored(base):
1045 1089 # deltas based on a censored revision must replace the
1046 1090 # full content in one patch, so delta works everywhere
1047 1091 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1048 1092 delta = header + t
1049 1093 else:
1050 1094 ptext = revlog.rawdata(base, _df=fh)
1051 1095 delta = mdiff.textdiff(ptext, t)
1052 1096
1053 1097 return delta
1054 1098
1055 1099 def _builddeltainfo(self, revinfo, base, fh):
1056 1100 # can we use the cached delta?
1057 1101 revlog = self.revlog
1058 1102 debug_search = self._write_debug is not None and self._debug_search
1059 1103 chainbase = revlog.chainbase(base)
1060 1104 if revlog._generaldelta:
1061 1105 deltabase = base
1062 1106 else:
1063 1107 deltabase = chainbase
1064 1108 snapshotdepth = None
1065 1109 if revlog._sparserevlog and deltabase == nullrev:
1066 1110 snapshotdepth = 0
1067 1111 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
1068 1112 # A delta chain should always be one full snapshot,
1069 1113 # zero or more semi-snapshots, and zero or more deltas
1070 1114 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1071 1115 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1072 1116 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1073 1117 delta = None
1074 1118 if revinfo.cachedelta:
1075 1119 cachebase = revinfo.cachedelta[0]
1076 1120 # check if the diff still apply
1077 1121 currentbase = cachebase
1078 1122 while (
1079 1123 currentbase != nullrev
1080 1124 and currentbase != base
1081 1125 and self.revlog.length(currentbase) == 0
1082 1126 ):
1083 1127 currentbase = self.revlog.deltaparent(currentbase)
1084 1128 if self.revlog._lazydelta and currentbase == base:
1085 1129 delta = revinfo.cachedelta[1]
1086 1130 if delta is None:
1087 1131 delta = self._builddeltadiff(base, revinfo, fh)
1088 1132 if debug_search:
1089 1133 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1090 1134 msg %= len(delta)
1091 1135 self._write_debug(msg)
1092 1136 # snapshotdept need to be neither None nor 0 level snapshot
1093 1137 if revlog.upperboundcomp is not None and snapshotdepth:
1094 1138 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1095 1139 snapshotlimit = revinfo.textlen >> snapshotdepth
1096 1140 if debug_search:
1097 1141 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1098 1142 msg %= lowestrealisticdeltalen
1099 1143 self._write_debug(msg)
1100 1144 if snapshotlimit < lowestrealisticdeltalen:
1101 1145 if debug_search:
1102 1146 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1103 1147 self._write_debug(msg)
1104 1148 return None
1105 1149 if revlog.length(base) < lowestrealisticdeltalen:
1106 1150 if debug_search:
1107 1151 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1108 1152 self._write_debug(msg)
1109 1153 return None
1110 1154 header, data = revlog.compress(delta)
1111 1155 deltalen = len(header) + len(data)
1112 1156 offset = revlog.end(len(revlog) - 1)
1113 1157 dist = deltalen + offset - revlog.start(chainbase)
1114 1158 chainlen, compresseddeltalen = revlog._chaininfo(base)
1115 1159 chainlen += 1
1116 1160 compresseddeltalen += deltalen
1117 1161
1118 1162 return _deltainfo(
1119 1163 dist,
1120 1164 deltalen,
1121 1165 (header, data),
1122 1166 deltabase,
1123 1167 chainbase,
1124 1168 chainlen,
1125 1169 compresseddeltalen,
1126 1170 snapshotdepth,
1127 1171 )
1128 1172
1129 1173 def _fullsnapshotinfo(self, fh, revinfo, curr):
1130 1174 rawtext = self.buildtext(revinfo, fh)
1131 1175 data = self.revlog.compress(rawtext)
1132 1176 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1133 1177 deltabase = chainbase = curr
1134 1178 snapshotdepth = 0
1135 1179 chainlen = 1
1136 1180
1137 1181 return _deltainfo(
1138 1182 dist,
1139 1183 deltalen,
1140 1184 data,
1141 1185 deltabase,
1142 1186 chainbase,
1143 1187 chainlen,
1144 1188 compresseddeltalen,
1145 1189 snapshotdepth,
1146 1190 )
1147 1191
1148 1192 def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):
1149 1193 """Find an acceptable delta against a candidate revision
1150 1194
1151 1195 revinfo: information about the revision (instance of _revisioninfo)
1152 1196 fh: file handle to either the .i or the .d revlog file,
1153 1197 depending on whether it is inlined or not
1154 1198
1155 1199 Returns the first acceptable candidate revision, as ordered by
1156 1200 _candidategroups
1157 1201
1158 1202 If no suitable deltabase is found, we return delta info for a full
1159 1203 snapshot.
1160 1204
1161 1205 `excluded_bases` is an optional set of revision that cannot be used as
1162 1206 a delta base. Use this to recompute delta suitable in censor or strip
1163 1207 context.
1164 1208 """
1165 1209 if target_rev is None:
1166 1210 target_rev = len(self.revlog)
1167 1211
1168 1212 if not revinfo.textlen:
1169 1213 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1170 1214
1171 1215 if excluded_bases is None:
1172 1216 excluded_bases = set()
1173 1217
1174 1218 # no delta for flag processor revision (see "candelta" for why)
1175 1219 # not calling candelta since only one revision needs test, also to
1176 1220 # avoid overhead fetching flags again.
1177 1221 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1178 1222 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1179 1223
1180 1224 gather_debug = (
1181 1225 self._write_debug is not None or self._debug_info is not None
1182 1226 )
1183 1227 debug_search = self._write_debug is not None and self._debug_search
1184 1228
1185 1229 if gather_debug:
1186 1230 start = util.timer()
1187 1231
1188 1232 # count the number of different delta we tried (for debug purpose)
1189 1233 dbg_try_count = 0
1190 1234 # count the number of "search round" we did. (for debug purpose)
1191 1235 dbg_try_rounds = 0
1192 1236 dbg_type = b'unknown'
1193 1237
1194 1238 cachedelta = revinfo.cachedelta
1195 1239 p1 = revinfo.p1
1196 1240 p2 = revinfo.p2
1197 1241 revlog = self.revlog
1198 1242
1199 1243 deltainfo = None
1200 1244 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
1201 1245
1202 1246 if gather_debug:
1203 1247 if p1r != nullrev:
1204 1248 p1_chain_len = revlog._chaininfo(p1r)[0]
1205 1249 else:
1206 1250 p1_chain_len = -1
1207 1251 if p2r != nullrev:
1208 1252 p2_chain_len = revlog._chaininfo(p2r)[0]
1209 1253 else:
1210 1254 p2_chain_len = -1
1211 1255 if debug_search:
1212 1256 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1213 1257 msg %= target_rev
1214 1258 self._write_debug(msg)
1215 1259
1216 1260 groups = _candidategroups(
1217 1261 self.revlog,
1218 1262 revinfo.textlen,
1219 1263 p1r,
1220 1264 p2r,
1221 1265 cachedelta,
1222 1266 excluded_bases,
1223 1267 target_rev,
1224 1268 )
1225 1269 candidaterevs = next(groups)
1226 1270 while candidaterevs is not None:
1227 1271 dbg_try_rounds += 1
1228 1272 if debug_search:
1229 1273 prev = None
1230 1274 if deltainfo is not None:
1231 1275 prev = deltainfo.base
1232 1276
1233 1277 if (
1234 1278 cachedelta is not None
1235 1279 and len(candidaterevs) == 1
1236 1280 and cachedelta[0] in candidaterevs
1237 1281 ):
1238 1282 round_type = b"cached-delta"
1239 1283 elif p1 in candidaterevs or p2 in candidaterevs:
1240 1284 round_type = b"parents"
1241 1285 elif prev is not None and all(c < prev for c in candidaterevs):
1242 1286 round_type = b"refine-down"
1243 1287 elif prev is not None and all(c > prev for c in candidaterevs):
1244 1288 round_type = b"refine-up"
1245 1289 else:
1246 1290 round_type = b"search-down"
1247 1291 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1248 1292 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1249 1293 self._write_debug(msg)
1250 1294 nominateddeltas = []
1251 1295 if deltainfo is not None:
1252 1296 if debug_search:
1253 1297 msg = (
1254 1298 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1255 1299 )
1256 1300 msg %= (deltainfo.base, deltainfo.deltalen)
1257 1301 self._write_debug(msg)
1258 1302 # if we already found a good delta,
1259 1303 # challenge it against refined candidates
1260 1304 nominateddeltas.append(deltainfo)
1261 1305 for candidaterev in candidaterevs:
1262 1306 if debug_search:
1263 1307 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1264 1308 msg %= candidaterev
1265 1309 self._write_debug(msg)
1266 1310 candidate_type = None
1267 1311 if candidaterev == p1:
1268 1312 candidate_type = b"p1"
1269 1313 elif candidaterev == p2:
1270 1314 candidate_type = b"p2"
1271 1315 elif self.revlog.issnapshot(candidaterev):
1272 1316 candidate_type = b"snapshot-%d"
1273 1317 candidate_type %= self.revlog.snapshotdepth(
1274 1318 candidaterev
1275 1319 )
1276 1320
1277 1321 if candidate_type is not None:
1278 1322 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1279 1323 msg %= candidate_type
1280 1324 self._write_debug(msg)
1281 1325 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1282 1326 msg %= self.revlog.length(candidaterev)
1283 1327 self._write_debug(msg)
1284 1328 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1285 1329 msg %= self.revlog.deltaparent(candidaterev)
1286 1330 self._write_debug(msg)
1287 1331
1288 1332 dbg_try_count += 1
1289 1333
1290 1334 if debug_search:
1291 1335 delta_start = util.timer()
1292 1336 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
1293 1337 if debug_search:
1294 1338 delta_end = util.timer()
1295 1339 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1296 1340 msg %= delta_end - delta_start
1297 1341 self._write_debug(msg)
1298 1342 if candidatedelta is not None:
1299 1343 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
1300 1344 if debug_search:
1301 1345 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1302 1346 msg %= candidatedelta.deltalen
1303 1347 self._write_debug(msg)
1304 1348 nominateddeltas.append(candidatedelta)
1305 1349 elif debug_search:
1306 1350 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1307 1351 msg %= candidatedelta.deltalen
1308 1352 self._write_debug(msg)
1309 1353 elif debug_search:
1310 1354 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1311 1355 self._write_debug(msg)
1312 1356 if nominateddeltas:
1313 1357 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1314 1358 if deltainfo is not None:
1315 1359 candidaterevs = groups.send(deltainfo.base)
1316 1360 else:
1317 1361 candidaterevs = next(groups)
1318 1362
1319 1363 if deltainfo is None:
1320 1364 dbg_type = b"full"
1321 1365 deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
1322 1366 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1323 1367 dbg_type = b"snapshot"
1324 1368 else:
1325 1369 dbg_type = b"delta"
1326 1370
1327 1371 if gather_debug:
1328 1372 end = util.timer()
1329 1373 used_cached = (
1330 1374 cachedelta is not None
1331 1375 and dbg_try_rounds == 1
1332 1376 and dbg_try_count == 1
1333 1377 and deltainfo.base == cachedelta[0]
1334 1378 )
1335 1379 dbg = {
1336 1380 'duration': end - start,
1337 1381 'revision': target_rev,
1338 1382 'delta-base': deltainfo.base, # pytype: disable=attribute-error
1339 1383 'search_round_count': dbg_try_rounds,
1340 1384 'using-cached-base': used_cached,
1341 1385 'delta_try_count': dbg_try_count,
1342 1386 'type': dbg_type,
1343 1387 'p1-chain-len': p1_chain_len,
1344 1388 'p2-chain-len': p2_chain_len,
1345 1389 }
1346 1390 if (
1347 1391 deltainfo.snapshotdepth # pytype: disable=attribute-error
1348 1392 is not None
1349 1393 ):
1350 1394 dbg[
1351 1395 'snapshot-depth'
1352 1396 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1353 1397 else:
1354 1398 dbg['snapshot-depth'] = 0
1355 1399 target_revlog = b"UNKNOWN"
1356 1400 target_type = self.revlog.target[0]
1357 1401 target_key = self.revlog.target[1]
1358 1402 if target_type == KIND_CHANGELOG:
1359 1403 target_revlog = b'CHANGELOG:'
1360 1404 elif target_type == KIND_MANIFESTLOG:
1361 1405 target_revlog = b'MANIFESTLOG:'
1362 1406 if target_key:
1363 1407 target_revlog += b'%s:' % target_key
1364 1408 elif target_type == KIND_FILELOG:
1365 1409 target_revlog = b'FILELOG:'
1366 1410 if target_key:
1367 1411 target_revlog += b'%s:' % target_key
1368 1412 dbg['target-revlog'] = target_revlog
1369 1413
1370 1414 if self._debug_info is not None:
1371 1415 self._debug_info.append(dbg)
1372 1416
1373 1417 if self._write_debug is not None:
1374 1418 msg = (
1375 1419 b"DBG-DELTAS:"
1376 1420 b" %-12s"
1377 1421 b" rev=%d:"
1378 1422 b" delta-base=%d"
1379 1423 b" is-cached=%d"
1380 1424 b" - search-rounds=%d"
1381 1425 b" try-count=%d"
1382 1426 b" - delta-type=%-6s"
1383 1427 b" snap-depth=%d"
1384 1428 b" - p1-chain-length=%d"
1385 1429 b" p2-chain-length=%d"
1386 1430 b" - duration=%f"
1387 1431 b"\n"
1388 1432 )
1389 1433 msg %= (
1390 1434 dbg["target-revlog"],
1391 1435 dbg["revision"],
1392 1436 dbg["delta-base"],
1393 1437 dbg["using-cached-base"],
1394 1438 dbg["search_round_count"],
1395 1439 dbg["delta_try_count"],
1396 1440 dbg["type"],
1397 1441 dbg["snapshot-depth"],
1398 1442 dbg["p1-chain-len"],
1399 1443 dbg["p2-chain-len"],
1400 1444 dbg["duration"],
1401 1445 )
1402 1446 self._write_debug(msg)
1403 1447 return deltainfo
1404 1448
1405 1449
1406 1450 def delta_compression(default_compression_header, deltainfo):
1407 1451 """return (COMPRESSION_MODE, deltainfo)
1408 1452
1409 1453 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1410 1454 compression.
1411 1455 """
1412 1456 h, d = deltainfo.data
1413 1457 compression_mode = COMP_MODE_INLINE
1414 1458 if not h and not d:
1415 1459 # not data to store at all... declare them uncompressed
1416 1460 compression_mode = COMP_MODE_PLAIN
1417 1461 elif not h:
1418 1462 t = d[0:1]
1419 1463 if t == b'\0':
1420 1464 compression_mode = COMP_MODE_PLAIN
1421 1465 elif t == default_compression_header:
1422 1466 compression_mode = COMP_MODE_DEFAULT
1423 1467 elif h == b'u':
1424 1468 # we have a more efficient way to declare uncompressed
1425 1469 h = b''
1426 1470 compression_mode = COMP_MODE_PLAIN
1427 1471 deltainfo = drop_u_compression(deltainfo)
1428 1472 return compression_mode, deltainfo
@@ -1,524 +1,523 b''
1 1 # test revlog interaction about raw data (flagprocessor)
2 2
3 3
4 import collections
5 4 import hashlib
6 5 import sys
7 6
8 7 from mercurial import (
9 8 encoding,
10 9 revlog,
11 10 transaction,
12 11 vfs,
13 12 )
14 13
15 14 from mercurial.revlogutils import (
16 15 constants,
17 16 deltas,
18 17 flagutil,
19 18 )
20 19
21 20
22 21 class _NoTransaction:
23 22 """transaction like object to update the nodemap outside a transaction"""
24 23
25 24 def __init__(self):
26 25 self._postclose = {}
27 26
28 27 def addpostclose(self, callback_id, callback_func):
29 28 self._postclose[callback_id] = callback_func
30 29
31 30 def registertmp(self, *args, **kwargs):
32 31 pass
33 32
34 33 def addbackup(self, *args, **kwargs):
35 34 pass
36 35
37 36 def add(self, *args, **kwargs):
38 37 pass
39 38
40 39 def addabort(self, *args, **kwargs):
41 40 pass
42 41
43 42 def _report(self, *args):
44 43 pass
45 44
46 45
47 46 # TESTTMP is optional. This makes it convenient to run without run-tests.py
48 47 tvfs = vfs.vfs(encoding.environ.get(b'TESTTMP', b'/tmp'))
49 48
50 49 # Enable generaldelta otherwise revlog won't use delta as expected by the test
51 50 tvfs.options = {
52 51 b'generaldelta': True,
53 52 b'revlogv1': True,
54 53 b'sparse-revlog': True,
55 54 }
56 55
57 56
58 57 def abort(msg):
59 58 print('abort: %s' % msg)
60 59 # Return 0 so run-tests.py could compare the output.
61 60 sys.exit()
62 61
63 62
64 63 # Register a revlog processor for flag EXTSTORED.
65 64 #
66 65 # It simply prepends a fixed header, and replaces '1' to 'i'. So it has
67 66 # insertion and replacement, and may be interesting to test revlog's line-based
68 67 # deltas.
69 68 _extheader = b'E\n'
70 69
71 70
72 71 def readprocessor(self, rawtext):
73 72 # True: the returned text could be used to verify hash
74 73 text = rawtext[len(_extheader) :].replace(b'i', b'1')
75 74 return text, True
76 75
77 76
78 77 def writeprocessor(self, text):
79 78 # False: the returned rawtext shouldn't be used to verify hash
80 79 rawtext = _extheader + text.replace(b'1', b'i')
81 80 return rawtext, False
82 81
83 82
84 83 def rawprocessor(self, rawtext):
85 84 # False: do not verify hash. Only the content returned by "readprocessor"
86 85 # can be used to verify hash.
87 86 return False
88 87
89 88
90 89 flagutil.addflagprocessor(
91 90 revlog.REVIDX_EXTSTORED, (readprocessor, writeprocessor, rawprocessor)
92 91 )
93 92
94 93 # Utilities about reading and appending revlog
95 94
96 95
97 96 def newtransaction():
98 97 # A transaction is required to write revlogs
99 98 report = lambda msg: None
100 99 return transaction.transaction(report, tvfs, {'plain': tvfs}, b'journal')
101 100
102 101
103 102 def newrevlog(name=b'_testrevlog', recreate=False):
104 103 if recreate:
105 104 tvfs.tryunlink(name + b'.i')
106 105 target = (constants.KIND_OTHER, b'test')
107 106 rlog = revlog.revlog(tvfs, target=target, radix=name)
108 107 return rlog
109 108
110 109
111 110 def appendrev(rlog, text, tr, isext=False, isdelta=True):
112 111 """Append a revision. If isext is True, set the EXTSTORED flag so flag
113 112 processor will be used (and rawtext is different from text). If isdelta is
114 113 True, force the revision to be a delta, otherwise it's full text.
115 114 """
116 115 nextrev = len(rlog)
117 116 p1 = rlog.node(nextrev - 1)
118 117 p2 = rlog.nullid
119 118 if isext:
120 119 flags = revlog.REVIDX_EXTSTORED
121 120 else:
122 121 flags = revlog.REVIDX_DEFAULT_FLAGS
123 122 # Change storedeltachains temporarily, to override revlog's delta decision
124 123 rlog._storedeltachains = isdelta
125 124 try:
126 125 rlog.addrevision(text, tr, nextrev, p1, p2, flags=flags)
127 126 return nextrev
128 127 except Exception as ex:
129 128 abort('rev %d: failed to append: %s' % (nextrev, ex))
130 129 finally:
131 130 # Restore storedeltachains. It is always True, see revlog.__init__
132 131 rlog._storedeltachains = True
133 132
134 133
135 134 def addgroupcopy(rlog, tr, destname=b'_destrevlog', optimaldelta=True):
136 135 """Copy revlog to destname using revlog.addgroup. Return the copied revlog.
137 136
138 137 This emulates push or pull. They use changegroup. Changegroup requires
139 138 repo to work. We don't have a repo, so a dummy changegroup is used.
140 139
141 140 If optimaldelta is True, use optimized delta parent, so the destination
142 141 revlog could probably reuse it. Otherwise it builds sub-optimal delta, and
143 142 the destination revlog needs more work to use it.
144 143
145 144 This exercises some revlog.addgroup (and revlog._addrevision(text=None))
146 145 code path, which is not covered by "appendrev" alone.
147 146 """
148 147
149 148 class dummychangegroup:
150 149 @staticmethod
151 150 def deltachunk(pnode):
152 151 pnode = pnode or rlog.nullid
153 152 parentrev = rlog.rev(pnode)
154 153 r = parentrev + 1
155 154 if r >= len(rlog):
156 155 return {}
157 156 if optimaldelta:
158 157 deltaparent = parentrev
159 158 else:
160 159 # suboptimal deltaparent
161 160 deltaparent = min(0, parentrev)
162 161 if not rlog.candelta(deltaparent, r):
163 162 deltaparent = -1
164 163 return {
165 164 b'node': rlog.node(r),
166 165 b'p1': pnode,
167 166 b'p2': rlog.nullid,
168 167 b'cs': rlog.node(rlog.linkrev(r)),
169 168 b'flags': rlog.flags(r),
170 169 b'deltabase': rlog.node(deltaparent),
171 170 b'delta': rlog.revdiff(deltaparent, r),
172 171 b'sidedata': rlog.sidedata(r),
173 172 }
174 173
175 174 def deltaiter(self):
176 175 chain = None
177 176 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
178 177 node = chunkdata[b'node']
179 178 p1 = chunkdata[b'p1']
180 179 p2 = chunkdata[b'p2']
181 180 cs = chunkdata[b'cs']
182 181 deltabase = chunkdata[b'deltabase']
183 182 delta = chunkdata[b'delta']
184 183 flags = chunkdata[b'flags']
185 184 sidedata = chunkdata[b'sidedata']
186 185
187 186 chain = node
188 187
189 188 yield (node, p1, p2, cs, deltabase, delta, flags, sidedata)
190 189
191 190 def linkmap(lnode):
192 191 return rlog.rev(lnode)
193 192
194 193 dlog = newrevlog(destname, recreate=True)
195 194 dummydeltas = dummychangegroup().deltaiter()
196 195 dlog.addgroup(dummydeltas, linkmap, tr)
197 196 return dlog
198 197
199 198
200 199 def lowlevelcopy(rlog, tr, destname=b'_destrevlog'):
201 200 """Like addgroupcopy, but use the low level revlog._addrevision directly.
202 201
203 202 It exercises some code paths that are hard to reach easily otherwise.
204 203 """
205 204 dlog = newrevlog(destname, recreate=True)
206 205 for r in rlog:
207 206 p1 = rlog.node(r - 1)
208 207 p2 = rlog.nullid
209 208 if r == 0 or (rlog.flags(r) & revlog.REVIDX_EXTSTORED):
210 209 text = rlog.rawdata(r)
211 210 cachedelta = None
212 211 else:
213 212 # deltaparent cannot have EXTSTORED flag.
214 213 deltaparent = max(
215 214 [-1]
216 215 + [
217 216 p
218 217 for p in range(r)
219 218 if rlog.flags(p) & revlog.REVIDX_EXTSTORED == 0
220 219 ]
221 220 )
222 221 text = None
223 222 cachedelta = (deltaparent, rlog.revdiff(deltaparent, r))
224 223 flags = rlog.flags(r)
225 224 with dlog._writing(_NoTransaction()):
226 225 dlog._addrevision(
227 226 rlog.node(r),
228 227 text,
229 228 tr,
230 229 r,
231 230 p1,
232 231 p2,
233 232 flags,
234 233 cachedelta,
235 234 )
236 235 return dlog
237 236
238 237
239 238 # Utilities to generate revisions for testing
240 239
241 240
242 241 def genbits(n):
243 242 """Given a number n, generate (2 ** (n * 2) + 1) numbers in range(2 ** n).
244 243 i.e. the generated numbers have a width of n bits.
245 244
246 245 The combination of two adjacent numbers will cover all possible cases.
247 246 That is to say, given any x, y where both x, and y are in range(2 ** n),
248 247 there is an x followed immediately by y in the generated sequence.
249 248 """
250 249 m = 2 ** n
251 250
252 251 # Gray Code. See https://en.wikipedia.org/wiki/Gray_code
253 252 gray = lambda x: x ^ (x >> 1)
254 253 reversegray = {gray(i): i for i in range(m)}
255 254
256 255 # Generate (n * 2) bit gray code, yield lower n bits as X, and look for
257 256 # the next unused gray code where higher n bits equal to X.
258 257
259 258 # For gray codes whose higher bits are X, a[X] of them have been used.
260 259 a = [0] * m
261 260
262 261 # Iterate from 0.
263 262 x = 0
264 263 yield x
265 264 for i in range(m * m):
266 265 x = reversegray[x]
267 266 y = gray(a[x] + x * m) & (m - 1)
268 267 assert a[x] < m
269 268 a[x] += 1
270 269 x = y
271 270 yield x
272 271
273 272
274 273 def gentext(rev):
275 274 '''Given a revision number, generate dummy text'''
276 275 return b''.join(b'%d\n' % j for j in range(-1, rev % 5))
277 276
278 277
279 278 def writecases(rlog, tr):
280 279 """Write some revisions interested to the test.
281 280
282 281 The test is interested in 3 properties of a revision:
283 282
284 283 - Is it a delta or a full text? (isdelta)
285 284 This is to catch some delta application issues.
286 285 - Does it have a flag of EXTSTORED? (isext)
287 286 This is to catch some flag processor issues. Especially when
288 287 interacted with revlog deltas.
289 288 - Is its text empty? (isempty)
290 289 This is less important. It is intended to try to catch some careless
291 290 checks like "if text" instead of "if text is None". Note: if flag
292 291 processor is involved, raw text may be not empty.
293 292
294 293 Write 65 revisions. So that all combinations of the above flags for
295 294 adjacent revisions are covered. That is to say,
296 295
297 296 len(set(
298 297 (r.delta, r.ext, r.empty, (r+1).delta, (r+1).ext, (r+1).empty)
299 298 for r in range(len(rlog) - 1)
300 299 )) is 64.
301 300
302 301 Where "r.delta", "r.ext", and "r.empty" are booleans matching properties
303 302 mentioned above.
304 303
305 304 Return expected [(text, rawtext)].
306 305 """
307 306 result = []
308 307 for i, x in enumerate(genbits(3)):
309 308 isdelta, isext, isempty = bool(x & 1), bool(x & 2), bool(x & 4)
310 309 if isempty:
311 310 text = b''
312 311 else:
313 312 text = gentext(i)
314 313 rev = appendrev(rlog, text, tr, isext=isext, isdelta=isdelta)
315 314
316 315 # Verify text, rawtext, and rawsize
317 316 if isext:
318 317 rawtext = writeprocessor(None, text)[0]
319 318 else:
320 319 rawtext = text
321 320 if rlog.rawsize(rev) != len(rawtext):
322 321 abort('rev %d: wrong rawsize' % rev)
323 322 if rlog.revision(rev) != text:
324 323 abort('rev %d: wrong text' % rev)
325 324 if rlog.rawdata(rev) != rawtext:
326 325 abort('rev %d: wrong rawtext' % rev)
327 326 result.append((text, rawtext))
328 327
329 328 # Verify flags like isdelta, isext work as expected
330 329 # isdelta can be overridden to False if this or p1 has isext set
331 330 if bool(rlog.deltaparent(rev) > -1) and not isdelta:
332 331 abort('rev %d: isdelta is unexpected' % rev)
333 332 if bool(rlog.flags(rev)) != isext:
334 333 abort('rev %d: isext is ineffective' % rev)
335 334 return result
336 335
337 336
338 337 # Main test and checking
339 338
340 339
341 340 def checkrevlog(rlog, expected):
342 341 '''Check if revlog has expected contents. expected is [(text, rawtext)]'''
343 342 # Test using different access orders. This could expose some issues
344 343 # depending on revlog caching (see revlog._cache).
345 344 for r0 in range(len(rlog) - 1):
346 345 r1 = r0 + 1
347 346 for revorder in [[r0, r1], [r1, r0]]:
348 347 for raworder in [[True], [False], [True, False], [False, True]]:
349 348 nlog = newrevlog()
350 349 for rev in revorder:
351 350 for raw in raworder:
352 351 if raw:
353 352 t = nlog.rawdata(rev)
354 353 else:
355 354 t = nlog.revision(rev)
356 355 if t != expected[rev][int(raw)]:
357 356 abort(
358 357 'rev %d: corrupted %stext'
359 358 % (rev, raw and 'raw' or '')
360 359 )
361 360
362 361
363 362 slicingdata = [
364 363 ([0, 1, 2, 3, 55, 56, 58, 59, 60], [[0, 1], [2], [58], [59, 60]], 10),
365 364 ([0, 1, 2, 3, 55, 56, 58, 59, 60], [[0, 1], [2], [58], [59, 60]], 10),
366 365 (
367 366 [-1, 0, 1, 2, 3, 55, 56, 58, 59, 60],
368 367 [[-1, 0, 1], [2], [58], [59, 60]],
369 368 10,
370 369 ),
371 370 ]
372 371
373 372
374 373 def slicingtest(rlog):
375 374 oldmin = rlog._srmingapsize
376 375 try:
377 376 # the test revlog is small, we remove the floor under which we
378 377 # slicing is diregarded.
379 378 rlog._srmingapsize = 0
380 379 for item in slicingdata:
381 380 chain, expected, target = item
382 381 result = deltas.slicechunk(rlog, chain, targetsize=target)
383 382 result = list(result)
384 383 if result != expected:
385 384 print('slicing differ:')
386 385 print(' chain: %s' % chain)
387 386 print(' target: %s' % target)
388 387 print(' expected: %s' % expected)
389 388 print(' result: %s' % result)
390 389 finally:
391 390 rlog._srmingapsize = oldmin
392 391
393 392
394 393 def md5sum(s):
395 394 return hashlib.md5(s).digest()
396 395
397 396
398 397 def _maketext(*coord):
399 398 """create piece of text according to range of integers
400 399
401 400 The test returned use a md5sum of the integer to make it less
402 401 compressible"""
403 402 pieces = []
404 403 for start, size in coord:
405 404 num = range(start, start + size)
406 405 p = [md5sum(b'%d' % r) for r in num]
407 406 pieces.append(b'\n'.join(p))
408 407 return b'\n'.join(pieces) + b'\n'
409 408
410 409
411 410 data = [
412 411 _maketext((0, 120), (456, 60)),
413 412 _maketext((0, 120), (345, 60)),
414 413 _maketext((0, 120), (734, 60)),
415 414 _maketext((0, 120), (734, 60), (923, 45)),
416 415 _maketext((0, 120), (734, 60), (234, 45)),
417 416 _maketext((0, 120), (734, 60), (564, 45)),
418 417 _maketext((0, 120), (734, 60), (361, 45)),
419 418 _maketext((0, 120), (734, 60), (489, 45)),
420 419 _maketext((0, 120), (123, 60)),
421 420 _maketext((0, 120), (145, 60)),
422 421 _maketext((0, 120), (104, 60)),
423 422 _maketext((0, 120), (430, 60)),
424 423 _maketext((0, 120), (430, 60), (923, 45)),
425 424 _maketext((0, 120), (430, 60), (234, 45)),
426 425 _maketext((0, 120), (430, 60), (564, 45)),
427 426 _maketext((0, 120), (430, 60), (361, 45)),
428 427 _maketext((0, 120), (430, 60), (489, 45)),
429 428 _maketext((0, 120), (249, 60)),
430 429 _maketext((0, 120), (832, 60)),
431 430 _maketext((0, 120), (891, 60)),
432 431 _maketext((0, 120), (543, 60)),
433 432 _maketext((0, 120), (120, 60)),
434 433 _maketext((0, 120), (60, 60), (768, 30)),
435 434 _maketext((0, 120), (60, 60), (260, 30)),
436 435 _maketext((0, 120), (60, 60), (450, 30)),
437 436 _maketext((0, 120), (60, 60), (361, 30)),
438 437 _maketext((0, 120), (60, 60), (886, 30)),
439 438 _maketext((0, 120), (60, 60), (116, 30)),
440 439 _maketext((0, 120), (60, 60), (567, 30), (629, 40)),
441 440 _maketext((0, 120), (60, 60), (569, 30), (745, 40)),
442 441 _maketext((0, 120), (60, 60), (777, 30), (700, 40)),
443 442 _maketext((0, 120), (60, 60), (618, 30), (398, 40), (158, 10)),
444 443 ]
445 444
446 445
447 446 def makesnapshot(tr):
448 447 rl = newrevlog(name=b'_snaprevlog3', recreate=True)
449 448 for i in data:
450 449 appendrev(rl, i, tr)
451 450 return rl
452 451
453 452
454 453 snapshots = [-1, 0, 6, 8, 11, 17, 19, 21, 25, 30]
455 454
456 455
457 456 def issnapshottest(rlog):
458 457 result = []
459 458 if rlog.issnapshot(-1):
460 459 result.append(-1)
461 460 for rev in rlog:
462 461 if rlog.issnapshot(rev):
463 462 result.append(rev)
464 463 if snapshots != result:
465 464 print('snapshot differ:')
466 465 print(' expected: %s' % snapshots)
467 466 print(' got: %s' % result)
468 467
469 468
470 469 snapshotmapall = {0: [6, 8, 11, 17, 19, 25], 8: [21], -1: [0, 30]}
471 470 snapshotmap15 = {0: [17, 19, 25], 8: [21], -1: [30]}
472 471
473 472
474 473 def findsnapshottest(rlog):
475 resultall = collections.defaultdict(list)
476 deltas._findsnapshots(rlog, resultall, 0)
477 resultall = dict(resultall.items())
474 cache = deltas.SnapshotCache()
475 cache.update(rlog)
476 resultall = dict(cache.snapshots)
478 477 if resultall != snapshotmapall:
479 478 print('snapshot map differ:')
480 479 print(' expected: %s' % snapshotmapall)
481 480 print(' got: %s' % resultall)
482 result15 = collections.defaultdict(list)
483 deltas._findsnapshots(rlog, result15, 15)
484 result15 = dict(result15.items())
481 cache15 = deltas.SnapshotCache()
482 cache15.update(rlog, 15)
483 result15 = dict(cache15.snapshots)
485 484 if result15 != snapshotmap15:
486 485 print('snapshot map differ:')
487 486 print(' expected: %s' % snapshotmap15)
488 487 print(' got: %s' % result15)
489 488
490 489
491 490 def maintest():
492 491 with newtransaction() as tr:
493 492 rl = newrevlog(recreate=True)
494 493 expected = writecases(rl, tr)
495 494 checkrevlog(rl, expected)
496 495 print('local test passed')
497 496 # Copy via revlog.addgroup
498 497 rl1 = addgroupcopy(rl, tr)
499 498 checkrevlog(rl1, expected)
500 499 rl2 = addgroupcopy(rl, tr, optimaldelta=False)
501 500 checkrevlog(rl2, expected)
502 501 print('addgroupcopy test passed')
503 502 # Copy via revlog.clone
504 503 rl3 = newrevlog(name=b'_destrevlog3', recreate=True)
505 504 rl.clone(tr, rl3)
506 505 checkrevlog(rl3, expected)
507 506 print('clone test passed')
508 507 # Copy via low-level revlog._addrevision
509 508 rl4 = lowlevelcopy(rl, tr)
510 509 checkrevlog(rl4, expected)
511 510 print('lowlevelcopy test passed')
512 511 slicingtest(rl)
513 512 print('slicing test passed')
514 513 rl5 = makesnapshot(tr)
515 514 issnapshottest(rl5)
516 515 print('issnapshot test passed')
517 516 findsnapshottest(rl5)
518 517 print('findsnapshot test passed')
519 518
520 519
521 520 try:
522 521 maintest()
523 522 except Exception as ex:
524 523 abort('crashed: %s' % ex)
General Comments 0
You need to be logged in to leave comments. Login now