##// END OF EJS Templates
revlog: add a C implementation of `headrevsdiff`...
Arseniy Alekseyev -
r52289:3f37d80d default
parent child Browse files
Show More
@@ -1,3305 +1,3498 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Olivia Mackall <olivia@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #define PY_SSIZE_T_CLEAN
11 11 #include <Python.h>
12 12 #include <assert.h>
13 13 #include <ctype.h>
14 14 #include <limits.h>
15 15 #include <stddef.h>
16 16 #include <stdlib.h>
17 17 #include <string.h>
18 18 #include <structmember.h>
19 19
20 20 #include "bitmanipulation.h"
21 21 #include "charencode.h"
22 22 #include "compat.h"
23 23 #include "revlog.h"
24 24 #include "util.h"
25 25
26 26 typedef struct indexObjectStruct indexObject;
27 27
28 28 typedef struct {
29 29 int children[16];
30 30 } nodetreenode;
31 31
32 32 typedef struct {
33 33 int abi_version;
34 34 Py_ssize_t (*index_length)(const indexObject *);
35 35 const char *(*index_node)(indexObject *, Py_ssize_t);
36 36 int (*fast_rank)(indexObject *, Py_ssize_t);
37 37 int (*index_parents)(PyObject *, int, int *);
38 38 } Revlog_CAPI;
39 39
40 40 /*
41 41 * A base-16 trie for fast node->rev mapping.
42 42 *
43 43 * Positive value is index of the next node in the trie
44 44 * Negative value is a leaf: -(rev + 2)
45 45 * Zero is empty
46 46 */
47 47 typedef struct {
48 48 indexObject *index;
49 49 nodetreenode *nodes;
50 50 Py_ssize_t nodelen;
51 51 size_t length; /* # nodes in use */
52 52 size_t capacity; /* # nodes allocated */
53 53 int depth; /* maximum depth of tree */
54 54 int splits; /* # splits performed */
55 55 } nodetree;
56 56
57 57 typedef struct {
58 58 PyObject_HEAD /* ; */
59 59 nodetree nt;
60 60 } nodetreeObject;
61 61
62 62 /*
63 63 * This class has two behaviors.
64 64 *
65 65 * When used in a list-like way (with integer keys), we decode an
66 66 * entry in a RevlogNG index file on demand. We have limited support for
67 67 * integer-keyed insert and delete, only at elements right before the
68 68 * end.
69 69 *
70 70 * With string keys, we lazily perform a reverse mapping from node to
71 71 * rev, using a base-16 trie.
72 72 */
73 73 struct indexObjectStruct {
74 74 PyObject_HEAD
75 75 /* Type-specific fields go here. */
76 76 PyObject *data; /* raw bytes of index */
77 77 Py_ssize_t nodelen; /* digest size of the hash, 20 for SHA-1 */
78 78 PyObject *nullentry; /* fast path for references to null */
79 79 Py_buffer buf; /* buffer of data */
80 80 const char **offsets; /* populated on demand */
81 81 Py_ssize_t length; /* current on-disk number of elements */
82 82 unsigned new_length; /* number of added elements */
83 83 unsigned added_length; /* space reserved for added elements */
84 84 char *added; /* populated on demand */
85 85 PyObject *headrevs; /* cache, invalidated on changes */
86 86 PyObject *filteredrevs; /* filtered revs set */
87 87 nodetree nt; /* base-16 trie */
88 88 int ntinitialized; /* 0 or 1 */
89 89 int ntrev; /* last rev scanned */
90 90 int ntlookups; /* # lookups */
91 91 int ntmisses; /* # lookups that miss the cache */
92 92 int inlined;
93 93 long entry_size; /* size of index headers. Differs in v1 v.s. v2 format
94 94 */
95 95 long rust_ext_compat; /* compatibility with being used in rust
96 96 extensions */
97 97 long format_version; /* format version selector (format_*) */
98 98 };
99 99
100 100 static Py_ssize_t index_length(const indexObject *self)
101 101 {
102 102 return self->length + self->new_length;
103 103 }
104 104
105 105 static const char nullid[32] = {0};
106 106 static const Py_ssize_t nullrev = -1;
107 107
108 108 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
109 109
110 110 static int index_find_node(indexObject *self, const char *node);
111 111
112 112 #if LONG_MAX == 0x7fffffffL
113 113 static const char *const tuple_format = "Kiiiiiiy#KiBBi";
114 114 #else
115 115 static const char *const tuple_format = "kiiiiiiy#kiBBi";
116 116 #endif
117 117
118 118 /* A RevlogNG v1 index entry is 64 bytes long. */
119 119 static const long v1_entry_size = 64;
120 120
121 121 /* A Revlogv2 index entry is 96 bytes long. */
122 122 static const long v2_entry_size = 96;
123 123
124 124 /* A Changelogv2 index entry is 96 bytes long. */
125 125 static const long cl2_entry_size = 96;
126 126
127 127 /* Internal format version.
128 128 * Must match their counterparts in revlogutils/constants.py */
129 129 static const long format_v1 = 1; /* constants.py: REVLOGV1 */
130 130 static const long format_v2 = 0xDEAD; /* constants.py: REVLOGV2 */
131 131 static const long format_cl2 = 0xD34D; /* constants.py: CHANGELOGV2 */
132 132
133 133 static const long entry_v1_offset_high = 0;
134 134 static const long entry_v1_offset_offset_flags = 4;
135 135 static const long entry_v1_offset_comp_len = 8;
136 136 static const long entry_v1_offset_uncomp_len = 12;
137 137 static const long entry_v1_offset_base_rev = 16;
138 138 static const long entry_v1_offset_link_rev = 20;
139 139 static const long entry_v1_offset_parent_1 = 24;
140 140 static const long entry_v1_offset_parent_2 = 28;
141 141 static const long entry_v1_offset_node_id = 32;
142 142
143 143 static const long entry_v2_offset_high = 0;
144 144 static const long entry_v2_offset_offset_flags = 4;
145 145 static const long entry_v2_offset_comp_len = 8;
146 146 static const long entry_v2_offset_uncomp_len = 12;
147 147 static const long entry_v2_offset_base_rev = 16;
148 148 static const long entry_v2_offset_link_rev = 20;
149 149 static const long entry_v2_offset_parent_1 = 24;
150 150 static const long entry_v2_offset_parent_2 = 28;
151 151 static const long entry_v2_offset_node_id = 32;
152 152 static const long entry_v2_offset_sidedata_offset = 64;
153 153 static const long entry_v2_offset_sidedata_comp_len = 72;
154 154 static const long entry_v2_offset_all_comp_mode = 76;
155 155 /* next free offset: 77 */
156 156
157 157 static const long entry_cl2_offset_high = 0;
158 158 static const long entry_cl2_offset_offset_flags = 4;
159 159 static const long entry_cl2_offset_comp_len = 8;
160 160 static const long entry_cl2_offset_uncomp_len = 12;
161 161 static const long entry_cl2_offset_parent_1 = 16;
162 162 static const long entry_cl2_offset_parent_2 = 20;
163 163 static const long entry_cl2_offset_node_id = 24;
164 164 static const long entry_cl2_offset_sidedata_offset = 56;
165 165 static const long entry_cl2_offset_sidedata_comp_len = 64;
166 166 static const long entry_cl2_offset_all_comp_mode = 68;
167 167 static const long entry_cl2_offset_rank = 69;
168 168 /* next free offset: 73 */
169 169
170 170 static const char comp_mode_inline = 2;
171 171 static const int rank_unknown = -1;
172 172
173 173 static void raise_revlog_error(void)
174 174 {
175 175 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
176 176
177 177 mod = PyImport_ImportModule("mercurial.error");
178 178 if (mod == NULL) {
179 179 goto cleanup;
180 180 }
181 181
182 182 dict = PyModule_GetDict(mod);
183 183 if (dict == NULL) {
184 184 goto cleanup;
185 185 }
186 186 Py_INCREF(dict);
187 187
188 188 errclass = PyDict_GetItemString(dict, "RevlogError");
189 189 if (errclass == NULL) {
190 190 PyErr_SetString(PyExc_SystemError,
191 191 "could not find RevlogError");
192 192 goto cleanup;
193 193 }
194 194
195 195 /* value of exception is ignored by callers */
196 196 PyErr_SetString(errclass, "RevlogError");
197 197
198 198 cleanup:
199 199 Py_XDECREF(dict);
200 200 Py_XDECREF(mod);
201 201 }
202 202
203 203 /*
204 204 * Return a pointer to the beginning of a RevlogNG record.
205 205 */
206 206 static const char *index_deref(indexObject *self, Py_ssize_t pos)
207 207 {
208 208 if (pos >= self->length)
209 209 return self->added + (pos - self->length) * self->entry_size;
210 210
211 211 if (self->inlined && pos > 0) {
212 212 if (self->offsets == NULL) {
213 213 Py_ssize_t ret;
214 214 self->offsets =
215 215 PyMem_Malloc(self->length * sizeof(*self->offsets));
216 216 if (self->offsets == NULL)
217 217 return (const char *)PyErr_NoMemory();
218 218 ret = inline_scan(self, self->offsets);
219 219 if (ret == -1) {
220 220 return NULL;
221 221 };
222 222 }
223 223 return self->offsets[pos];
224 224 }
225 225
226 226 return (const char *)(self->buf.buf) + pos * self->entry_size;
227 227 }
228 228
229 229 /*
230 230 * Get parents of the given rev.
231 231 *
232 232 * The specified rev must be valid and must not be nullrev. A returned
233 233 * parent revision may be nullrev, but is guaranteed to be in valid range.
234 234 */
235 235 static inline int index_get_parents(indexObject *self, Py_ssize_t rev, int *ps,
236 236 int maxrev)
237 237 {
238 238 const char *data = index_deref(self, rev);
239 239
240 240 if (self->format_version == format_v1) {
241 241 ps[0] = getbe32(data + entry_v1_offset_parent_1);
242 242 ps[1] = getbe32(data + entry_v1_offset_parent_2);
243 243 } else if (self->format_version == format_v2) {
244 244 ps[0] = getbe32(data + entry_v2_offset_parent_1);
245 245 ps[1] = getbe32(data + entry_v2_offset_parent_2);
246 246 } else if (self->format_version == format_cl2) {
247 247 ps[0] = getbe32(data + entry_cl2_offset_parent_1);
248 248 ps[1] = getbe32(data + entry_cl2_offset_parent_2);
249 249 } else {
250 250 raise_revlog_error();
251 251 return -1;
252 252 }
253 253
254 254 /* If index file is corrupted, ps[] may point to invalid revisions. So
255 255 * there is a risk of buffer overflow to trust them unconditionally. */
256 256 if (ps[0] < -1 || ps[0] > maxrev || ps[1] < -1 || ps[1] > maxrev) {
257 257 PyErr_SetString(PyExc_ValueError, "parent out of range");
258 258 return -1;
259 259 }
260 260 return 0;
261 261 }
262 262
263 263 /*
264 264 * Get parents of the given rev.
265 265 *
266 266 * If the specified rev is out of range, IndexError will be raised. If the
267 267 * revlog entry is corrupted, ValueError may be raised.
268 268 *
269 269 * Returns 0 on success or -1 on failure.
270 270 */
271 271 static int HgRevlogIndex_GetParents(PyObject *op, int rev, int *ps)
272 272 {
273 273 int tiprev;
274 274 if (!op || !HgRevlogIndex_Check(op) || !ps) {
275 275 PyErr_BadInternalCall();
276 276 return -1;
277 277 }
278 278 tiprev = (int)index_length((indexObject *)op) - 1;
279 279 if (rev < -1 || rev > tiprev) {
280 280 PyErr_Format(PyExc_IndexError, "rev out of range: %d", rev);
281 281 return -1;
282 282 } else if (rev == -1) {
283 283 ps[0] = ps[1] = -1;
284 284 return 0;
285 285 } else {
286 286 return index_get_parents((indexObject *)op, rev, ps, tiprev);
287 287 }
288 288 }
289 289
290 290 static inline int64_t index_get_start(indexObject *self, Py_ssize_t rev)
291 291 {
292 292 const char *data;
293 293 uint64_t offset;
294 294
295 295 if (rev == nullrev)
296 296 return 0;
297 297
298 298 data = index_deref(self, rev);
299 299
300 300 if (self->format_version == format_v1) {
301 301 offset = getbe32(data + entry_v1_offset_offset_flags);
302 302 if (rev == 0) {
303 303 /* mask out version number for the first entry */
304 304 offset &= 0xFFFF;
305 305 } else {
306 306 uint32_t offset_high =
307 307 getbe32(data + entry_v1_offset_high);
308 308 offset |= ((uint64_t)offset_high) << 32;
309 309 }
310 310 } else if (self->format_version == format_v2) {
311 311 offset = getbe32(data + entry_v2_offset_offset_flags);
312 312 if (rev == 0) {
313 313 /* mask out version number for the first entry */
314 314 offset &= 0xFFFF;
315 315 } else {
316 316 uint32_t offset_high =
317 317 getbe32(data + entry_v2_offset_high);
318 318 offset |= ((uint64_t)offset_high) << 32;
319 319 }
320 320 } else if (self->format_version == format_cl2) {
321 321 uint32_t offset_high = getbe32(data + entry_cl2_offset_high);
322 322 offset = getbe32(data + entry_cl2_offset_offset_flags);
323 323 offset |= ((uint64_t)offset_high) << 32;
324 324 } else {
325 325 raise_revlog_error();
326 326 return -1;
327 327 }
328 328
329 329 return (int64_t)(offset >> 16);
330 330 }
331 331
332 332 static inline int index_get_length(indexObject *self, Py_ssize_t rev)
333 333 {
334 334 const char *data;
335 335 int tmp;
336 336
337 337 if (rev == nullrev)
338 338 return 0;
339 339
340 340 data = index_deref(self, rev);
341 341
342 342 if (self->format_version == format_v1) {
343 343 tmp = (int)getbe32(data + entry_v1_offset_comp_len);
344 344 } else if (self->format_version == format_v2) {
345 345 tmp = (int)getbe32(data + entry_v2_offset_comp_len);
346 346 } else if (self->format_version == format_cl2) {
347 347 tmp = (int)getbe32(data + entry_cl2_offset_comp_len);
348 348 } else {
349 349 raise_revlog_error();
350 350 return -1;
351 351 }
352 352 if (tmp < 0) {
353 353 PyErr_Format(PyExc_OverflowError,
354 354 "revlog entry size out of bound (%d)", tmp);
355 355 return -1;
356 356 }
357 357 return tmp;
358 358 }
359 359
360 360 /*
361 361 * RevlogNG format (all in big endian, data may be inlined):
362 362 * 6 bytes: offset
363 363 * 2 bytes: flags
364 364 * 4 bytes: compressed length
365 365 * 4 bytes: uncompressed length
366 366 * 4 bytes: base revision
367 367 * 4 bytes: link revision
368 368 * 4 bytes: parent 1 revision
369 369 * 4 bytes: parent 2 revision
370 370 * 32 bytes: nodeid (only 20 bytes used with SHA-1)
371 371 */
372 372 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
373 373 {
374 374 uint64_t offset_flags, sidedata_offset;
375 375 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
376 376 sidedata_comp_len, rank = rank_unknown;
377 377 char data_comp_mode, sidedata_comp_mode;
378 378 const char *c_node_id;
379 379 const char *data;
380 380 Py_ssize_t length = index_length(self);
381 381
382 382 if (pos == nullrev) {
383 383 Py_INCREF(self->nullentry);
384 384 return self->nullentry;
385 385 }
386 386
387 387 if (pos < 0 || pos >= length) {
388 388 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
389 389 return NULL;
390 390 }
391 391
392 392 data = index_deref(self, pos);
393 393 if (data == NULL)
394 394 return NULL;
395 395
396 396 if (self->format_version == format_v1) {
397 397 offset_flags = getbe32(data + entry_v1_offset_offset_flags);
398 398 /*
399 399 * The first entry on-disk needs the version number masked out,
400 400 * but this doesn't apply if entries are added to an empty
401 401 * index.
402 402 */
403 403 if (self->length && pos == 0)
404 404 offset_flags &= 0xFFFF;
405 405 else {
406 406 uint32_t offset_high =
407 407 getbe32(data + entry_v1_offset_high);
408 408 offset_flags |= ((uint64_t)offset_high) << 32;
409 409 }
410 410
411 411 comp_len = getbe32(data + entry_v1_offset_comp_len);
412 412 uncomp_len = getbe32(data + entry_v1_offset_uncomp_len);
413 413 base_rev = getbe32(data + entry_v1_offset_base_rev);
414 414 link_rev = getbe32(data + entry_v1_offset_link_rev);
415 415 parent_1 = getbe32(data + entry_v1_offset_parent_1);
416 416 parent_2 = getbe32(data + entry_v1_offset_parent_2);
417 417 c_node_id = data + entry_v1_offset_node_id;
418 418
419 419 sidedata_offset = 0;
420 420 sidedata_comp_len = 0;
421 421 data_comp_mode = comp_mode_inline;
422 422 sidedata_comp_mode = comp_mode_inline;
423 423 } else if (self->format_version == format_v2) {
424 424 offset_flags = getbe32(data + entry_v2_offset_offset_flags);
425 425 /*
426 426 * The first entry on-disk needs the version number masked out,
427 427 * but this doesn't apply if entries are added to an empty
428 428 * index.
429 429 */
430 430 if (self->length && pos == 0)
431 431 offset_flags &= 0xFFFF;
432 432 else {
433 433 uint32_t offset_high =
434 434 getbe32(data + entry_v2_offset_high);
435 435 offset_flags |= ((uint64_t)offset_high) << 32;
436 436 }
437 437
438 438 comp_len = getbe32(data + entry_v2_offset_comp_len);
439 439 uncomp_len = getbe32(data + entry_v2_offset_uncomp_len);
440 440 base_rev = getbe32(data + entry_v2_offset_base_rev);
441 441 link_rev = getbe32(data + entry_v2_offset_link_rev);
442 442 parent_1 = getbe32(data + entry_v2_offset_parent_1);
443 443 parent_2 = getbe32(data + entry_v2_offset_parent_2);
444 444 c_node_id = data + entry_v2_offset_node_id;
445 445
446 446 sidedata_offset =
447 447 getbe64(data + entry_v2_offset_sidedata_offset);
448 448 sidedata_comp_len =
449 449 getbe32(data + entry_v2_offset_sidedata_comp_len);
450 450 data_comp_mode = data[entry_v2_offset_all_comp_mode] & 3;
451 451 sidedata_comp_mode =
452 452 ((data[entry_v2_offset_all_comp_mode] >> 2) & 3);
453 453 } else if (self->format_version == format_cl2) {
454 454 uint32_t offset_high = getbe32(data + entry_cl2_offset_high);
455 455 offset_flags = getbe32(data + entry_cl2_offset_offset_flags);
456 456 offset_flags |= ((uint64_t)offset_high) << 32;
457 457 comp_len = getbe32(data + entry_cl2_offset_comp_len);
458 458 uncomp_len = getbe32(data + entry_cl2_offset_uncomp_len);
459 459 /* base_rev and link_rev are not stored in changelogv2, but are
460 460 still used by some functions shared with the other revlogs.
461 461 They are supposed to contain links to other revisions,
462 462 but they always point to themselves in the case of a changelog.
463 463 */
464 464 base_rev = pos;
465 465 link_rev = pos;
466 466 parent_1 = getbe32(data + entry_cl2_offset_parent_1);
467 467 parent_2 = getbe32(data + entry_cl2_offset_parent_2);
468 468 c_node_id = data + entry_cl2_offset_node_id;
469 469 sidedata_offset =
470 470 getbe64(data + entry_cl2_offset_sidedata_offset);
471 471 sidedata_comp_len =
472 472 getbe32(data + entry_cl2_offset_sidedata_comp_len);
473 473 data_comp_mode = data[entry_cl2_offset_all_comp_mode] & 3;
474 474 sidedata_comp_mode =
475 475 ((data[entry_cl2_offset_all_comp_mode] >> 2) & 3);
476 476 rank = getbe32(data + entry_cl2_offset_rank);
477 477 } else {
478 478 raise_revlog_error();
479 479 return NULL;
480 480 }
481 481
482 482 return Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len,
483 483 base_rev, link_rev, parent_1, parent_2, c_node_id,
484 484 self->nodelen, sidedata_offset, sidedata_comp_len,
485 485 data_comp_mode, sidedata_comp_mode, rank);
486 486 }
487 487 /*
488 488 * Pack header information in binary
489 489 */
490 490 static PyObject *index_pack_header(indexObject *self, PyObject *args)
491 491 {
492 492 int header;
493 493 char out[4];
494 494 if (!PyArg_ParseTuple(args, "i", &header)) {
495 495 return NULL;
496 496 }
497 497 if (self->format_version != format_v1) {
498 498 PyErr_Format(PyExc_RuntimeError,
499 499 "version header should go in the docket, not the "
500 500 "index: %d",
501 501 header);
502 502 return NULL;
503 503 }
504 504 putbe32(header, out);
505 505 return PyBytes_FromStringAndSize(out, 4);
506 506 }
507 507 /*
508 508 * Return the raw binary string representing a revision
509 509 */
510 510 static PyObject *index_entry_binary(indexObject *self, PyObject *value)
511 511 {
512 512 long rev;
513 513 const char *data;
514 514 Py_ssize_t length = index_length(self);
515 515
516 516 if (!pylong_to_long(value, &rev)) {
517 517 return NULL;
518 518 }
519 519 if (rev < 0 || rev >= length) {
520 520 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
521 521 rev);
522 522 return NULL;
523 523 };
524 524
525 525 data = index_deref(self, rev);
526 526 if (data == NULL)
527 527 return NULL;
528 528 if (rev == 0 && self->format_version == format_v1) {
529 529 /* the header is eating the start of the first entry */
530 530 return PyBytes_FromStringAndSize(data + 4,
531 531 self->entry_size - 4);
532 532 }
533 533 return PyBytes_FromStringAndSize(data, self->entry_size);
534 534 }
535 535
536 536 /*
537 537 * Return the hash of node corresponding to the given rev.
538 538 */
539 539 static const char *index_node(indexObject *self, Py_ssize_t pos)
540 540 {
541 541 Py_ssize_t length = index_length(self);
542 542 const char *data;
543 543 const char *node_id;
544 544
545 545 if (pos == nullrev)
546 546 return nullid;
547 547
548 548 if (pos >= length)
549 549 return NULL;
550 550
551 551 data = index_deref(self, pos);
552 552
553 553 if (self->format_version == format_v1) {
554 554 node_id = data + entry_v1_offset_node_id;
555 555 } else if (self->format_version == format_v2) {
556 556 node_id = data + entry_v2_offset_node_id;
557 557 } else if (self->format_version == format_cl2) {
558 558 node_id = data + entry_cl2_offset_node_id;
559 559 } else {
560 560 raise_revlog_error();
561 561 return NULL;
562 562 }
563 563
564 564 return data ? node_id : NULL;
565 565 }
566 566
567 567 /*
568 568 * Return the stored rank of a given revision if known, or rank_unknown
569 569 * otherwise.
570 570 *
571 571 * The rank of a revision is the size of the sub-graph it defines as a head.
572 572 * Equivalently, the rank of a revision `r` is the size of the set
573 573 * `ancestors(r)`, `r` included.
574 574 *
575 575 * This method returns the rank retrieved from the revlog in constant time. It
576 576 * makes no attempt at computing unknown values for versions of the revlog
577 577 * which do not persist the rank.
578 578 */
579 579 static int index_fast_rank(indexObject *self, Py_ssize_t pos)
580 580 {
581 581 Py_ssize_t length = index_length(self);
582 582
583 583 if (self->format_version != format_cl2 || pos >= length) {
584 584 return rank_unknown;
585 585 }
586 586
587 587 if (pos == nullrev) {
588 588 return 0; /* convention */
589 589 }
590 590
591 591 return getbe32(index_deref(self, pos) + entry_cl2_offset_rank);
592 592 }
593 593
594 594 /*
595 595 * Return the hash of the node corresponding to the given rev. The
596 596 * rev is assumed to be existing. If not, an exception is set.
597 597 */
598 598 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
599 599 {
600 600 const char *node = index_node(self, pos);
601 601 if (node == NULL) {
602 602 PyErr_Format(PyExc_IndexError, "could not access rev %d",
603 603 (int)pos);
604 604 }
605 605 return node;
606 606 }
607 607
608 608 static int nt_insert(nodetree *self, const char *node, int rev);
609 609
610 610 static int node_check(Py_ssize_t nodelen, PyObject *obj, char **node)
611 611 {
612 612 Py_ssize_t thisnodelen;
613 613 if (PyBytes_AsStringAndSize(obj, node, &thisnodelen) == -1)
614 614 return -1;
615 615 if (nodelen == thisnodelen)
616 616 return 0;
617 617 PyErr_Format(PyExc_ValueError, "node len %zd != expected node len %zd",
618 618 thisnodelen, nodelen);
619 619 return -1;
620 620 }
621 621
622 622 static PyObject *index_append(indexObject *self, PyObject *obj)
623 623 {
624 624 uint64_t offset_flags, sidedata_offset;
625 625 int rev, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
626 626 sidedata_comp_len, rank;
627 627 char data_comp_mode, sidedata_comp_mode;
628 628 Py_ssize_t c_node_id_len;
629 629 const char *c_node_id;
630 630 char comp_field;
631 631 char *data;
632 632
633 633 if (!PyArg_ParseTuple(obj, tuple_format, &offset_flags, &comp_len,
634 634 &uncomp_len, &base_rev, &link_rev, &parent_1,
635 635 &parent_2, &c_node_id, &c_node_id_len,
636 636 &sidedata_offset, &sidedata_comp_len,
637 637 &data_comp_mode, &sidedata_comp_mode, &rank)) {
638 638 PyErr_SetString(PyExc_TypeError, "12-tuple required");
639 639 return NULL;
640 640 }
641 641
642 642 if (c_node_id_len != self->nodelen) {
643 643 PyErr_SetString(PyExc_TypeError, "invalid node");
644 644 return NULL;
645 645 }
646 646 if (self->format_version == format_v1) {
647 647
648 648 if (data_comp_mode != comp_mode_inline) {
649 649 PyErr_Format(PyExc_ValueError,
650 650 "invalid data compression mode: %i",
651 651 data_comp_mode);
652 652 return NULL;
653 653 }
654 654 if (sidedata_comp_mode != comp_mode_inline) {
655 655 PyErr_Format(PyExc_ValueError,
656 656 "invalid sidedata compression mode: %i",
657 657 sidedata_comp_mode);
658 658 return NULL;
659 659 }
660 660 }
661 661
662 662 if (self->new_length == self->added_length) {
663 663 size_t new_added_length =
664 664 self->added_length ? self->added_length * 2 : 4096;
665 665 void *new_added = PyMem_Realloc(
666 666 self->added, new_added_length * self->entry_size);
667 667 if (!new_added)
668 668 return PyErr_NoMemory();
669 669 self->added = new_added;
670 670 self->added_length = new_added_length;
671 671 }
672 672 rev = self->length + self->new_length;
673 673 data = self->added + self->entry_size * self->new_length++;
674 674
675 675 memset(data, 0, self->entry_size);
676 676
677 677 if (self->format_version == format_v1) {
678 678 putbe32(offset_flags >> 32, data + entry_v1_offset_high);
679 679 putbe32(offset_flags & 0xffffffffU,
680 680 data + entry_v1_offset_offset_flags);
681 681 putbe32(comp_len, data + entry_v1_offset_comp_len);
682 682 putbe32(uncomp_len, data + entry_v1_offset_uncomp_len);
683 683 putbe32(base_rev, data + entry_v1_offset_base_rev);
684 684 putbe32(link_rev, data + entry_v1_offset_link_rev);
685 685 putbe32(parent_1, data + entry_v1_offset_parent_1);
686 686 putbe32(parent_2, data + entry_v1_offset_parent_2);
687 687 memcpy(data + entry_v1_offset_node_id, c_node_id,
688 688 c_node_id_len);
689 689 } else if (self->format_version == format_v2) {
690 690 putbe32(offset_flags >> 32, data + entry_v2_offset_high);
691 691 putbe32(offset_flags & 0xffffffffU,
692 692 data + entry_v2_offset_offset_flags);
693 693 putbe32(comp_len, data + entry_v2_offset_comp_len);
694 694 putbe32(uncomp_len, data + entry_v2_offset_uncomp_len);
695 695 putbe32(base_rev, data + entry_v2_offset_base_rev);
696 696 putbe32(link_rev, data + entry_v2_offset_link_rev);
697 697 putbe32(parent_1, data + entry_v2_offset_parent_1);
698 698 putbe32(parent_2, data + entry_v2_offset_parent_2);
699 699 memcpy(data + entry_v2_offset_node_id, c_node_id,
700 700 c_node_id_len);
701 701 putbe64(sidedata_offset,
702 702 data + entry_v2_offset_sidedata_offset);
703 703 putbe32(sidedata_comp_len,
704 704 data + entry_v2_offset_sidedata_comp_len);
705 705 comp_field = data_comp_mode & 3;
706 706 comp_field = comp_field | (sidedata_comp_mode & 3) << 2;
707 707 data[entry_v2_offset_all_comp_mode] = comp_field;
708 708 } else if (self->format_version == format_cl2) {
709 709 putbe32(offset_flags >> 32, data + entry_cl2_offset_high);
710 710 putbe32(offset_flags & 0xffffffffU,
711 711 data + entry_cl2_offset_offset_flags);
712 712 putbe32(comp_len, data + entry_cl2_offset_comp_len);
713 713 putbe32(uncomp_len, data + entry_cl2_offset_uncomp_len);
714 714 putbe32(parent_1, data + entry_cl2_offset_parent_1);
715 715 putbe32(parent_2, data + entry_cl2_offset_parent_2);
716 716 memcpy(data + entry_cl2_offset_node_id, c_node_id,
717 717 c_node_id_len);
718 718 putbe64(sidedata_offset,
719 719 data + entry_cl2_offset_sidedata_offset);
720 720 putbe32(sidedata_comp_len,
721 721 data + entry_cl2_offset_sidedata_comp_len);
722 722 comp_field = data_comp_mode & 3;
723 723 comp_field = comp_field | (sidedata_comp_mode & 3) << 2;
724 724 data[entry_cl2_offset_all_comp_mode] = comp_field;
725 725 putbe32(rank, data + entry_cl2_offset_rank);
726 726 } else {
727 727 raise_revlog_error();
728 728 return NULL;
729 729 }
730 730
731 731 if (self->ntinitialized)
732 732 nt_insert(&self->nt, c_node_id, rev);
733 733
734 734 Py_CLEAR(self->headrevs);
735 735 Py_RETURN_NONE;
736 736 }
737 737
738 738 /* Replace an existing index entry's sidedata offset and length with new ones.
739 739 This cannot be used outside of the context of sidedata rewriting,
740 740 inside the transaction that creates the given revision. */
741 741 static PyObject *index_replace_sidedata_info(indexObject *self, PyObject *args)
742 742 {
743 743 uint64_t offset_flags, sidedata_offset;
744 744 Py_ssize_t rev;
745 745 int sidedata_comp_len;
746 746 char comp_mode;
747 747 char *data;
748 748 #if LONG_MAX == 0x7fffffffL
749 749 const char *const sidedata_format = "nKiKB";
750 750 #else
751 751 const char *const sidedata_format = "nkikB";
752 752 #endif
753 753
754 754 if (self->entry_size == v1_entry_size || self->inlined) {
755 755 /*
756 756 There is a bug in the transaction handling when going from an
757 757 inline revlog to a separate index and data file. Turn it off until
758 758 it's fixed, since v2 revlogs sometimes get rewritten on exchange.
759 759 See issue6485.
760 760 */
761 761 raise_revlog_error();
762 762 return NULL;
763 763 }
764 764
765 765 if (!PyArg_ParseTuple(args, sidedata_format, &rev, &sidedata_offset,
766 766 &sidedata_comp_len, &offset_flags, &comp_mode))
767 767 return NULL;
768 768
769 769 if (rev < 0 || rev >= index_length(self)) {
770 770 PyErr_SetString(PyExc_IndexError, "revision outside index");
771 771 return NULL;
772 772 }
773 773 if (rev < self->length) {
774 774 PyErr_SetString(
775 775 PyExc_IndexError,
776 776 "cannot rewrite entries outside of this transaction");
777 777 return NULL;
778 778 }
779 779
780 780 /* Find the newly added node, offset from the "already on-disk" length
781 781 */
782 782 data = self->added + self->entry_size * (rev - self->length);
783 783 if (self->format_version == format_v2) {
784 784 putbe64(offset_flags, data + entry_v2_offset_high);
785 785 putbe64(sidedata_offset,
786 786 data + entry_v2_offset_sidedata_offset);
787 787 putbe32(sidedata_comp_len,
788 788 data + entry_v2_offset_sidedata_comp_len);
789 789 data[entry_v2_offset_all_comp_mode] =
790 790 (data[entry_v2_offset_all_comp_mode] & ~(3 << 2)) |
791 791 ((comp_mode & 3) << 2);
792 792 } else if (self->format_version == format_cl2) {
793 793 putbe64(offset_flags, data + entry_cl2_offset_high);
794 794 putbe64(sidedata_offset,
795 795 data + entry_cl2_offset_sidedata_offset);
796 796 putbe32(sidedata_comp_len,
797 797 data + entry_cl2_offset_sidedata_comp_len);
798 798 data[entry_cl2_offset_all_comp_mode] =
799 799 (data[entry_cl2_offset_all_comp_mode] & ~(3 << 2)) |
800 800 ((comp_mode & 3) << 2);
801 801 } else {
802 802 raise_revlog_error();
803 803 return NULL;
804 804 }
805 805
806 806 Py_RETURN_NONE;
807 807 }
808 808
809 809 static PyObject *index_stats(indexObject *self)
810 810 {
811 811 PyObject *obj = PyDict_New();
812 812 PyObject *s = NULL;
813 813 PyObject *t = NULL;
814 814
815 815 if (obj == NULL)
816 816 return NULL;
817 817
818 818 #define istat(__n, __d) \
819 819 do { \
820 820 s = PyBytes_FromString(__d); \
821 821 t = PyLong_FromSsize_t(self->__n); \
822 822 if (!s || !t) \
823 823 goto bail; \
824 824 if (PyDict_SetItem(obj, s, t) == -1) \
825 825 goto bail; \
826 826 Py_CLEAR(s); \
827 827 Py_CLEAR(t); \
828 828 } while (0)
829 829
830 830 if (self->added_length)
831 831 istat(new_length, "index entries added");
832 832 istat(length, "revs in memory");
833 833 istat(ntlookups, "node trie lookups");
834 834 istat(ntmisses, "node trie misses");
835 835 istat(ntrev, "node trie last rev scanned");
836 836 if (self->ntinitialized) {
837 837 istat(nt.capacity, "node trie capacity");
838 838 istat(nt.depth, "node trie depth");
839 839 istat(nt.length, "node trie count");
840 840 istat(nt.splits, "node trie splits");
841 841 }
842 842
843 843 #undef istat
844 844
845 845 return obj;
846 846
847 847 bail:
848 848 Py_XDECREF(obj);
849 849 Py_XDECREF(s);
850 850 Py_XDECREF(t);
851 851 return NULL;
852 852 }
853 853
854 854 /*
855 855 * When we cache a list, we want to be sure the caller can't mutate
856 856 * the cached copy.
857 857 */
858 858 static PyObject *list_copy(PyObject *list)
859 859 {
860 860 Py_ssize_t len = PyList_GET_SIZE(list);
861 861 PyObject *newlist = PyList_New(len);
862 862 Py_ssize_t i;
863 863
864 864 if (newlist == NULL)
865 865 return NULL;
866 866
867 867 for (i = 0; i < len; i++) {
868 868 PyObject *obj = PyList_GET_ITEM(list, i);
869 869 Py_INCREF(obj);
870 870 PyList_SET_ITEM(newlist, i, obj);
871 871 }
872 872
873 873 return newlist;
874 874 }
875 875
876 876 static int check_filter(PyObject *filter, Py_ssize_t arg)
877 877 {
878 878 if (filter) {
879 879 PyObject *arglist, *result;
880 880 int isfiltered;
881 881
882 882 arglist = Py_BuildValue("(n)", arg);
883 883 if (!arglist) {
884 884 return -1;
885 885 }
886 886
887 887 result = PyObject_Call(filter, arglist, NULL);
888 888 Py_DECREF(arglist);
889 889 if (!result) {
890 890 return -1;
891 891 }
892 892
893 893 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
894 894 * same as this function, so we can just return it directly.*/
895 895 isfiltered = PyObject_IsTrue(result);
896 896 Py_DECREF(result);
897 897 return isfiltered;
898 898 } else {
899 899 return 0;
900 900 }
901 901 }
902 902
903 903 static inline void set_phase_from_parents(char *phases, int parent_1,
904 904 int parent_2, Py_ssize_t i)
905 905 {
906 906 if (parent_1 >= 0 && phases[parent_1] > phases[i])
907 907 phases[i] = phases[parent_1];
908 908 if (parent_2 >= 0 && phases[parent_2] > phases[i])
909 909 phases[i] = phases[parent_2];
910 910 }
911 911
912 912 /* Take ownership of a given Python value and add it to a Python list.
913 913 Return -1 on failure (including if [elem] is NULL). */
914 914 static int pylist_append_owned(PyObject *list, PyObject *elem)
915 915 {
916 916 int res;
917 917
918 918 if (elem == NULL)
919 919 return -1;
920 920 res = PyList_Append(list, elem);
921 921 Py_DECREF(elem);
922 922 return res;
923 923 }
924 924
925 925 static PyObject *reachableroots2(indexObject *self, PyObject *args)
926 926 {
927 927
928 928 /* Input */
929 929 long minroot;
930 930 PyObject *includepatharg = NULL;
931 931 int includepath = 0;
932 932 /* heads and roots are lists */
933 933 PyObject *heads = NULL;
934 934 PyObject *roots = NULL;
935 935 PyObject *reachable = NULL;
936 936
937 937 Py_ssize_t len = index_length(self);
938 938 long revnum;
939 939 Py_ssize_t k;
940 940 Py_ssize_t i;
941 941 Py_ssize_t l;
942 942 int r;
943 943 int parents[2];
944 944
945 945 /* Internal data structure:
946 946 * tovisit: array of length len+1 (all revs + nullrev), filled upto
947 947 * lentovisit
948 948 *
949 949 * revstates: array of length len+1 (all revs + nullrev) */
950 950 int *tovisit = NULL;
951 951 long lentovisit = 0;
952 952 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
953 953 char *revstates = NULL;
954 954
955 955 /* Get arguments */
956 956 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
957 957 &PyList_Type, &roots, &PyBool_Type,
958 958 &includepatharg))
959 959 goto bail;
960 960
961 961 if (includepatharg == Py_True)
962 962 includepath = 1;
963 963
964 964 /* Initialize return set */
965 965 reachable = PyList_New(0);
966 966 if (reachable == NULL)
967 967 goto bail;
968 968
969 969 /* Initialize internal datastructures */
970 970 tovisit = (int *)malloc((len + 1) * sizeof(int));
971 971 if (tovisit == NULL) {
972 972 PyErr_NoMemory();
973 973 goto bail;
974 974 }
975 975
976 976 revstates = (char *)calloc(len + 1, 1);
977 977 if (revstates == NULL) {
978 978 PyErr_NoMemory();
979 979 goto bail;
980 980 }
981 981
982 982 l = PyList_GET_SIZE(roots);
983 983 for (i = 0; i < l; i++) {
984 984 revnum = PyLong_AsLong(PyList_GET_ITEM(roots, i));
985 985 if (revnum == -1 && PyErr_Occurred())
986 986 goto bail;
987 987 /* If root is out of range, e.g. wdir(), it must be unreachable
988 988 * from heads. So we can just ignore it. */
989 989 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
990 990 continue;
991 991 revstates[revnum + 1] |= RS_ROOT;
992 992 }
993 993
994 994 /* Populate tovisit with all the heads */
995 995 l = PyList_GET_SIZE(heads);
996 996 for (i = 0; i < l; i++) {
997 997 revnum = PyLong_AsLong(PyList_GET_ITEM(heads, i));
998 998 if (revnum == -1 && PyErr_Occurred())
999 999 goto bail;
1000 1000 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
1001 1001 PyErr_SetString(PyExc_IndexError, "head out of range");
1002 1002 goto bail;
1003 1003 }
1004 1004 if (!(revstates[revnum + 1] & RS_SEEN)) {
1005 1005 tovisit[lentovisit++] = (int)revnum;
1006 1006 revstates[revnum + 1] |= RS_SEEN;
1007 1007 }
1008 1008 }
1009 1009
1010 1010 /* Visit the tovisit list and find the reachable roots */
1011 1011 k = 0;
1012 1012 while (k < lentovisit) {
1013 1013 /* Add the node to reachable if it is a root*/
1014 1014 revnum = tovisit[k++];
1015 1015 if (revstates[revnum + 1] & RS_ROOT) {
1016 1016 revstates[revnum + 1] |= RS_REACHABLE;
1017 1017 r = pylist_append_owned(reachable,
1018 1018 PyLong_FromLong(revnum));
1019 1019 if (r < 0)
1020 1020 goto bail;
1021 1021 if (includepath == 0)
1022 1022 continue;
1023 1023 }
1024 1024
1025 1025 /* Add its parents to the list of nodes to visit */
1026 1026 if (revnum == nullrev)
1027 1027 continue;
1028 1028 r = index_get_parents(self, revnum, parents, (int)len - 1);
1029 1029 if (r < 0)
1030 1030 goto bail;
1031 1031 for (i = 0; i < 2; i++) {
1032 1032 if (!(revstates[parents[i] + 1] & RS_SEEN) &&
1033 1033 parents[i] >= minroot) {
1034 1034 tovisit[lentovisit++] = parents[i];
1035 1035 revstates[parents[i] + 1] |= RS_SEEN;
1036 1036 }
1037 1037 }
1038 1038 }
1039 1039
1040 1040 /* Find all the nodes in between the roots we found and the heads
1041 1041 * and add them to the reachable set */
1042 1042 if (includepath == 1) {
1043 1043 long minidx = minroot;
1044 1044 if (minidx < 0)
1045 1045 minidx = 0;
1046 1046 for (i = minidx; i < len; i++) {
1047 1047 if (!(revstates[i + 1] & RS_SEEN))
1048 1048 continue;
1049 1049 r = index_get_parents(self, i, parents, (int)len - 1);
1050 1050 /* Corrupted index file, error is set from
1051 1051 * index_get_parents */
1052 1052 if (r < 0)
1053 1053 goto bail;
1054 1054 if (((revstates[parents[0] + 1] |
1055 1055 revstates[parents[1] + 1]) &
1056 1056 RS_REACHABLE) &&
1057 1057 !(revstates[i + 1] & RS_REACHABLE)) {
1058 1058 revstates[i + 1] |= RS_REACHABLE;
1059 1059 r = pylist_append_owned(reachable,
1060 1060 PyLong_FromSsize_t(i));
1061 1061 if (r < 0)
1062 1062 goto bail;
1063 1063 }
1064 1064 }
1065 1065 }
1066 1066
1067 1067 free(revstates);
1068 1068 free(tovisit);
1069 1069 return reachable;
1070 1070 bail:
1071 1071 Py_XDECREF(reachable);
1072 1072 free(revstates);
1073 1073 free(tovisit);
1074 1074 return NULL;
1075 1075 }
1076 1076
1077 1077 static int add_roots_get_min(indexObject *self, PyObject *roots, char *phases,
1078 1078 char phase)
1079 1079 {
1080 1080 Py_ssize_t len = index_length(self);
1081 1081 PyObject *item;
1082 1082 PyObject *iterator;
1083 1083 int rev, minrev = -1;
1084 1084 char *node;
1085 1085
1086 1086 if (!PySet_Check(roots)) {
1087 1087 PyErr_SetString(PyExc_TypeError,
1088 1088 "roots must be a set of nodes");
1089 1089 return -2;
1090 1090 }
1091 1091 iterator = PyObject_GetIter(roots);
1092 1092 if (iterator == NULL)
1093 1093 return -2;
1094 1094 while ((item = PyIter_Next(iterator))) {
1095 1095 if (node_check(self->nodelen, item, &node) == -1)
1096 1096 goto failed;
1097 1097 rev = index_find_node(self, node);
1098 1098 /* null is implicitly public, so negative is invalid */
1099 1099 if (rev < 0 || rev >= len)
1100 1100 goto failed;
1101 1101 phases[rev] = phase;
1102 1102 if (minrev == -1 || minrev > rev)
1103 1103 minrev = rev;
1104 1104 Py_DECREF(item);
1105 1105 }
1106 1106 Py_DECREF(iterator);
1107 1107 return minrev;
1108 1108 failed:
1109 1109 Py_DECREF(iterator);
1110 1110 Py_DECREF(item);
1111 1111 return -2;
1112 1112 }
1113 1113
1114 1114 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
1115 1115 {
1116 1116 /* 0: public (untracked), 1: draft, 2: secret, 32: archive,
1117 1117 96: internal */
1118 1118 static const char trackedphases[] = {1, 2, 32, 96};
1119 1119 PyObject *roots = Py_None;
1120 1120 PyObject *phasesetsdict = NULL;
1121 1121 PyObject *phasesets[4] = {NULL, NULL, NULL, NULL};
1122 1122 Py_ssize_t len = index_length(self);
1123 1123 char *phases = NULL;
1124 1124 int minphaserev = -1, rev, i;
1125 1125 const int numphases = (int)(sizeof(phasesets) / sizeof(phasesets[0]));
1126 1126
1127 1127 if (!PyArg_ParseTuple(args, "O", &roots))
1128 1128 return NULL;
1129 1129 if (roots == NULL || !PyDict_Check(roots)) {
1130 1130 PyErr_SetString(PyExc_TypeError, "roots must be a dictionary");
1131 1131 return NULL;
1132 1132 }
1133 1133
1134 1134 phases = calloc(len, 1);
1135 1135 if (phases == NULL) {
1136 1136 PyErr_NoMemory();
1137 1137 return NULL;
1138 1138 }
1139 1139
1140 1140 for (i = 0; i < numphases; ++i) {
1141 1141 PyObject *pyphase = PyLong_FromLong(trackedphases[i]);
1142 1142 PyObject *phaseroots = NULL;
1143 1143 if (pyphase == NULL)
1144 1144 goto release;
1145 1145 phaseroots = PyDict_GetItem(roots, pyphase);
1146 1146 Py_DECREF(pyphase);
1147 1147 if (phaseroots == NULL)
1148 1148 continue;
1149 1149 rev = add_roots_get_min(self, phaseroots, phases,
1150 1150 trackedphases[i]);
1151 1151 if (rev == -2)
1152 1152 goto release;
1153 1153 if (rev != -1 && (minphaserev == -1 || rev < minphaserev))
1154 1154 minphaserev = rev;
1155 1155 }
1156 1156
1157 1157 for (i = 0; i < numphases; ++i) {
1158 1158 phasesets[i] = PySet_New(NULL);
1159 1159 if (phasesets[i] == NULL)
1160 1160 goto release;
1161 1161 }
1162 1162
1163 1163 if (minphaserev == -1)
1164 1164 minphaserev = len;
1165 1165 for (rev = minphaserev; rev < len; ++rev) {
1166 1166 PyObject *pyphase = NULL;
1167 1167 PyObject *pyrev = NULL;
1168 1168 int parents[2];
1169 1169 /*
1170 1170 * The parent lookup could be skipped for phaseroots, but
1171 1171 * phase --force would historically not recompute them
1172 1172 * correctly, leaving descendents with a lower phase around.
1173 1173 * As such, unconditionally recompute the phase.
1174 1174 */
1175 1175 if (index_get_parents(self, rev, parents, (int)len - 1) < 0)
1176 1176 goto release;
1177 1177 set_phase_from_parents(phases, parents[0], parents[1], rev);
1178 1178 switch (phases[rev]) {
1179 1179 case 0:
1180 1180 continue;
1181 1181 case 1:
1182 1182 pyphase = phasesets[0];
1183 1183 break;
1184 1184 case 2:
1185 1185 pyphase = phasesets[1];
1186 1186 break;
1187 1187 case 32:
1188 1188 pyphase = phasesets[2];
1189 1189 break;
1190 1190 case 96:
1191 1191 pyphase = phasesets[3];
1192 1192 break;
1193 1193 default:
1194 1194 /* this should never happen since the phase number is
1195 1195 * specified by this function. */
1196 1196 PyErr_SetString(PyExc_SystemError,
1197 1197 "bad phase number in internal list");
1198 1198 goto release;
1199 1199 }
1200 1200 pyrev = PyLong_FromLong(rev);
1201 1201 if (pyrev == NULL)
1202 1202 goto release;
1203 1203 if (PySet_Add(pyphase, pyrev) == -1) {
1204 1204 Py_DECREF(pyrev);
1205 1205 goto release;
1206 1206 }
1207 1207 Py_DECREF(pyrev);
1208 1208 }
1209 1209
1210 1210 phasesetsdict = _dict_new_presized(numphases);
1211 1211 if (phasesetsdict == NULL)
1212 1212 goto release;
1213 1213 for (i = 0; i < numphases; ++i) {
1214 1214 PyObject *pyphase = PyLong_FromLong(trackedphases[i]);
1215 1215 if (pyphase == NULL)
1216 1216 goto release;
1217 1217 if (PyDict_SetItem(phasesetsdict, pyphase, phasesets[i]) ==
1218 1218 -1) {
1219 1219 Py_DECREF(pyphase);
1220 1220 goto release;
1221 1221 }
1222 1222 Py_DECREF(phasesets[i]);
1223 1223 phasesets[i] = NULL;
1224 1224 }
1225 1225
1226 1226 free(phases);
1227 1227 return Py_BuildValue("nN", len, phasesetsdict);
1228 1228
1229 1229 release:
1230 1230 for (i = 0; i < numphases; ++i)
1231 1231 Py_XDECREF(phasesets[i]);
1232 1232 Py_XDECREF(phasesetsdict);
1233 1233
1234 1234 free(phases);
1235 1235 return NULL;
1236 1236 }
1237 1237
1238 1238 static PyObject *index_headrevs(indexObject *self, PyObject *args)
1239 1239 {
1240 1240 Py_ssize_t i, j, len;
1241 1241 char *nothead = NULL;
1242 1242 PyObject *heads = NULL;
1243 1243 PyObject *filter = NULL;
1244 1244 PyObject *filteredrevs = Py_None;
1245 1245
1246 1246 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
1247 1247 return NULL;
1248 1248 }
1249 1249
1250 1250 if (self->headrevs && filteredrevs == self->filteredrevs)
1251 1251 return list_copy(self->headrevs);
1252 1252
1253 1253 Py_DECREF(self->filteredrevs);
1254 1254 self->filteredrevs = filteredrevs;
1255 1255 Py_INCREF(filteredrevs);
1256 1256
1257 1257 if (filteredrevs != Py_None) {
1258 1258 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
1259 1259 if (!filter) {
1260 1260 PyErr_SetString(
1261 1261 PyExc_TypeError,
1262 1262 "filteredrevs has no attribute __contains__");
1263 1263 goto bail;
1264 1264 }
1265 1265 }
1266 1266
1267 1267 len = index_length(self);
1268 1268 heads = PyList_New(0);
1269 1269 if (heads == NULL)
1270 1270 goto bail;
1271 1271 if (len == 0) {
1272 1272 if (pylist_append_owned(heads, PyLong_FromLong(-1)) == -1) {
1273 1273 Py_XDECREF(nullid);
1274 1274 goto bail;
1275 1275 }
1276 1276 goto done;
1277 1277 }
1278 1278
1279 1279 nothead = calloc(len, 1);
1280 1280 if (nothead == NULL) {
1281 1281 PyErr_NoMemory();
1282 1282 goto bail;
1283 1283 }
1284 1284
1285 1285 for (i = len - 1; i >= 0; i--) {
1286 1286 int isfiltered;
1287 1287 int parents[2];
1288 1288
1289 1289 /* If nothead[i] == 1, it means we've seen an unfiltered child
1290 1290 * of this node already, and therefore this node is not
1291 1291 * filtered. So we can skip the expensive check_filter step.
1292 1292 */
1293 1293 if (nothead[i] != 1) {
1294 1294 isfiltered = check_filter(filter, i);
1295 1295 if (isfiltered == -1) {
1296 1296 PyErr_SetString(PyExc_TypeError,
1297 1297 "unable to check filter");
1298 1298 goto bail;
1299 1299 }
1300 1300
1301 1301 if (isfiltered) {
1302 1302 nothead[i] = 1;
1303 1303 continue;
1304 1304 }
1305 1305 }
1306 1306
1307 1307 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
1308 1308 goto bail;
1309 1309 for (j = 0; j < 2; j++) {
1310 1310 if (parents[j] >= 0)
1311 1311 nothead[parents[j]] = 1;
1312 1312 }
1313 1313 }
1314 1314
1315 1315 for (i = 0; i < len; i++) {
1316 1316 if (nothead[i])
1317 1317 continue;
1318 1318 if (pylist_append_owned(heads, PyLong_FromSsize_t(i)) == -1) {
1319 1319 goto bail;
1320 1320 }
1321 1321 }
1322 1322
1323 1323 done:
1324 1324 self->headrevs = heads;
1325 1325 Py_XDECREF(filter);
1326 1326 free(nothead);
1327 1327 return list_copy(self->headrevs);
1328 1328 bail:
1329 1329 Py_XDECREF(filter);
1330 1330 Py_XDECREF(heads);
1331 1331 free(nothead);
1332 1332 return NULL;
1333 1333 }
1334 1334
1335 /* "rgs" stands for "reverse growable set".
1336 It is a representation of a set of integers that can't exceed, but
1337 tend to be close to `max`.
1338
1339 `body` is a growable bit array covering the range `max-len+1..max`,
1340 in reverse order.
1341 `sum` keeps track of the cardinality of the set.
1342 */
1343 typedef struct rgs {
1344 int max;
1345 int len;
1346 char *body;
1347 int sum;
1348 } rgs;
1349
1350 static int rgs_offset(rgs *rgs, int i)
1351 {
1352 return rgs->max - i;
1353 }
1354
1355 /* returns 1 on success, 0 on failure */
1356 static int rgs_alloc(rgs *rgs, int max)
1357 {
1358 int new_len = 64;
1359 char *new_body = calloc(new_len, 1);
1360 if (new_body == NULL)
1361 return 0;
1362 rgs->len = new_len;
1363 rgs->body = new_body;
1364 rgs->max = max;
1365 rgs->sum = 0;
1366 return 1;
1367 }
1368
1369 static bool rgs_get(rgs *rgs, int i)
1370 {
1371 int offset = rgs_offset(rgs, i);
1372 if (offset >= rgs->len) {
1373 return 0;
1374 }
1375 if (offset < 0) {
1376 abort();
1377 }
1378 return rgs->body[offset];
1379 }
1380
1381 /* Realloc `body` to length `new_len`.
1382 Returns -1 when out of memory. */
1383 static int rgs_realloc(rgs *rgs, int new_len)
1384 {
1385 int old_len = rgs->len;
1386 char *old_body = rgs->body;
1387 char *new_body = calloc(new_len, 1);
1388 assert(new_len >= old_len);
1389 if (new_body == NULL)
1390 return -1;
1391 memcpy(new_body, old_body, old_len);
1392 free(old_body);
1393 rgs->body = new_body;
1394 rgs->len = new_len;
1395 return 0;
1396 }
1397
1398 /* Realloc the rgs `body` to include the `offset` */
1399 static int rgs_realloc_amortized(rgs *rgs, int offset)
1400 {
1401 int old_len = rgs->len;
1402 int new_len = old_len * 4;
1403 if (offset >= new_len)
1404 new_len = offset + 1;
1405 return rgs_realloc(rgs, new_len);
1406 }
1407
1408 /* returns 0 on success, -1 on error */
1409 static int rgs_set(rgs *rgs, int i, bool v)
1410 {
1411 int offset = rgs_offset(rgs, i);
1412 if (offset >= rgs->len) {
1413 if (v == 0) {
1414 /* no-op change: no need to resize */
1415 return 0;
1416 }
1417 if (rgs_realloc_amortized(rgs, offset) == -1)
1418 return -1;
1419 }
1420 if (offset < 0) {
1421 abort();
1422 }
1423 rgs->sum -= rgs->body[offset];
1424 rgs->sum += v;
1425 rgs->body[offset] = v;
1426 return 0;
1427 }
1428
1429 /* Add a size_t value to a Python list. Return -1 on failure. */
1430 static inline int pylist_append_size_t(PyObject *list, size_t v)
1431 {
1432 return pylist_append_owned(list, PyLong_FromSsize_t(v));
1433 }
1434
1435 static PyObject *index_headrevsdiff(indexObject *self, PyObject *args)
1436 {
1437 int begin, end;
1438 Py_ssize_t i, j;
1439 PyObject *heads_added = NULL;
1440 PyObject *heads_removed = NULL;
1441 PyObject *res = NULL;
1442 rgs rgs;
1443 rgs.body = NULL;
1444
1445 if (!PyArg_ParseTuple(args, "ii", &begin, &end))
1446 goto bail;
1447
1448 if (!rgs_alloc(&rgs, end))
1449 goto bail;
1450
1451 heads_added = PyList_New(0);
1452 if (heads_added == NULL)
1453 goto bail;
1454 heads_removed = PyList_New(0);
1455 if (heads_removed == NULL)
1456 goto bail;
1457
1458 for (i = end - 1; i >= begin; i--) {
1459 int parents[2];
1460
1461 if (rgs_get(&rgs, i)) {
1462 if (rgs_set(&rgs, i, false) == -1) {
1463 goto bail;
1464 };
1465 } else {
1466 if (pylist_append_size_t(heads_added, i) == -1) {
1467 goto bail;
1468 }
1469 }
1470
1471 if (index_get_parents(self, i, parents, i) < 0)
1472 goto bail;
1473 for (j = 0; j < 2; j++) {
1474 if (parents[j] >= 0)
1475 if (rgs_set(&rgs, parents[j], true) == -1) {
1476 goto bail;
1477 }
1478 }
1479 }
1480
1481 while (rgs.sum) {
1482 int parents[2];
1483
1484 if (rgs_get(&rgs, i)) {
1485 if (rgs_set(&rgs, i, false) == -1) {
1486 goto bail;
1487 }
1488 if (pylist_append_size_t(heads_removed, i) == -1) {
1489 goto bail;
1490 }
1491 }
1492
1493 if (index_get_parents(self, i, parents, i) < 0)
1494 goto bail;
1495 for (j = 0; j < 2; j++) {
1496 if (parents[j] >= 0)
1497 if (rgs_set(&rgs, parents[j], false) == -1) {
1498 /* can't actually fail */
1499 goto bail;
1500 }
1501 }
1502 i--;
1503 }
1504
1505 if (begin == 0 && end > 0) {
1506 if (pylist_append_size_t(heads_removed, -1) == -1) {
1507 goto bail;
1508 }
1509 }
1510
1511 if (!(res = PyTuple_Pack(2, heads_removed, heads_added))) {
1512 goto bail;
1513 }
1514
1515 Py_XDECREF(heads_removed);
1516 Py_XDECREF(heads_added);
1517 free(rgs.body);
1518 return res;
1519 bail:
1520 Py_XDECREF(heads_added);
1521 Py_XDECREF(heads_removed);
1522 free(rgs.body);
1523 return NULL;
1524 }
1525
1335 1526 /**
1336 1527 * Obtain the base revision index entry.
1337 1528 *
1338 1529 * Callers must ensure that rev >= 0 or illegal memory access may occur.
1339 1530 */
1340 1531 static inline int index_baserev(indexObject *self, int rev)
1341 1532 {
1342 1533 const char *data;
1343 1534 int result;
1344 1535
1345 1536 data = index_deref(self, rev);
1346 1537 if (data == NULL)
1347 1538 return -2;
1348 1539
1349 1540 if (self->format_version == format_v1) {
1350 1541 result = getbe32(data + entry_v1_offset_base_rev);
1351 1542 } else if (self->format_version == format_v2) {
1352 1543 result = getbe32(data + entry_v2_offset_base_rev);
1353 1544 } else if (self->format_version == format_cl2) {
1354 1545 return rev;
1355 1546 } else {
1356 1547 raise_revlog_error();
1357 1548 return -1;
1358 1549 }
1359 1550
1360 1551 if (result > rev) {
1361 1552 PyErr_Format(
1362 1553 PyExc_ValueError,
1363 1554 "corrupted revlog, revision base above revision: %d, %d",
1364 1555 rev, result);
1365 1556 return -2;
1366 1557 }
1367 1558 if (result < -1) {
1368 1559 PyErr_Format(
1369 1560 PyExc_ValueError,
1370 1561 "corrupted revlog, revision base out of range: %d, %d", rev,
1371 1562 result);
1372 1563 return -2;
1373 1564 }
1374 1565 return result;
1375 1566 }
1376 1567
1377 1568 /**
1378 1569 * Find if a revision is a snapshot or not
1379 1570 *
1380 1571 * Only relevant for sparse-revlog case.
1381 1572 * Callers must ensure that rev is in a valid range.
1382 1573 */
1383 1574 static int index_issnapshotrev(indexObject *self, Py_ssize_t rev)
1384 1575 {
1385 1576 int ps[2];
1386 1577 int b;
1387 1578 Py_ssize_t base;
1388 1579 while (rev >= 0) {
1389 1580 base = (Py_ssize_t)index_baserev(self, rev);
1390 1581 if (base == rev) {
1391 1582 base = -1;
1392 1583 }
1393 1584 if (base == -2) {
1394 1585 assert(PyErr_Occurred());
1395 1586 return -1;
1396 1587 }
1397 1588 if (base == -1) {
1398 1589 return 1;
1399 1590 }
1400 1591 if (index_get_parents(self, rev, ps, (int)rev) < 0) {
1401 1592 assert(PyErr_Occurred());
1402 1593 return -1;
1403 1594 };
1404 1595 while ((index_get_length(self, ps[0]) == 0) && ps[0] >= 0) {
1405 1596 b = index_baserev(self, ps[0]);
1406 1597 if (b == ps[0]) {
1407 1598 break;
1408 1599 }
1409 1600 ps[0] = b;
1410 1601 }
1411 1602 while ((index_get_length(self, ps[1]) == 0) && ps[1] >= 0) {
1412 1603 b = index_baserev(self, ps[1]);
1413 1604 if (b == ps[1]) {
1414 1605 break;
1415 1606 }
1416 1607 ps[1] = b;
1417 1608 }
1418 1609 if (base == ps[0] || base == ps[1]) {
1419 1610 return 0;
1420 1611 }
1421 1612 rev = base;
1422 1613 }
1423 1614 return rev == -1;
1424 1615 }
1425 1616
1426 1617 static PyObject *index_issnapshot(indexObject *self, PyObject *value)
1427 1618 {
1428 1619 long rev;
1429 1620 int issnap;
1430 1621 Py_ssize_t length = index_length(self);
1431 1622
1432 1623 if (!pylong_to_long(value, &rev)) {
1433 1624 return NULL;
1434 1625 }
1435 1626 if (rev < -1 || rev >= length) {
1436 1627 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
1437 1628 rev);
1438 1629 return NULL;
1439 1630 };
1440 1631 issnap = index_issnapshotrev(self, (Py_ssize_t)rev);
1441 1632 if (issnap < 0) {
1442 1633 return NULL;
1443 1634 };
1444 1635 return PyBool_FromLong((long)issnap);
1445 1636 }
1446 1637
1447 1638 static PyObject *index_findsnapshots(indexObject *self, PyObject *args)
1448 1639 {
1449 1640 Py_ssize_t start_rev;
1450 1641 Py_ssize_t end_rev;
1451 1642 PyObject *cache;
1452 1643 Py_ssize_t base;
1453 1644 Py_ssize_t rev;
1454 1645 PyObject *key = NULL;
1455 1646 PyObject *value = NULL;
1456 1647 const Py_ssize_t length = index_length(self);
1457 1648 if (!PyArg_ParseTuple(args, "O!nn", &PyDict_Type, &cache, &start_rev,
1458 1649 &end_rev)) {
1459 1650 return NULL;
1460 1651 }
1461 1652 end_rev += 1;
1462 1653 if (end_rev > length) {
1463 1654 end_rev = length;
1464 1655 }
1465 1656 if (start_rev < 0) {
1466 1657 start_rev = 0;
1467 1658 }
1468 1659 for (rev = start_rev; rev < end_rev; rev++) {
1469 1660 int issnap;
1470 1661 PyObject *allvalues = NULL;
1471 1662 issnap = index_issnapshotrev(self, rev);
1472 1663 if (issnap < 0) {
1473 1664 goto bail;
1474 1665 }
1475 1666 if (issnap == 0) {
1476 1667 continue;
1477 1668 }
1478 1669 base = (Py_ssize_t)index_baserev(self, rev);
1479 1670 if (base == rev) {
1480 1671 base = -1;
1481 1672 }
1482 1673 if (base == -2) {
1483 1674 assert(PyErr_Occurred());
1484 1675 goto bail;
1485 1676 }
1486 1677 key = PyLong_FromSsize_t(base);
1487 1678 allvalues = PyDict_GetItem(cache, key);
1488 1679 if (allvalues == NULL && PyErr_Occurred()) {
1489 1680 goto bail;
1490 1681 }
1491 1682 if (allvalues == NULL) {
1492 1683 int r;
1493 1684 allvalues = PySet_New(0);
1494 1685 if (!allvalues) {
1495 1686 goto bail;
1496 1687 }
1497 1688 r = PyDict_SetItem(cache, key, allvalues);
1498 1689 Py_DECREF(allvalues);
1499 1690 if (r < 0) {
1500 1691 goto bail;
1501 1692 }
1502 1693 }
1503 1694 value = PyLong_FromSsize_t(rev);
1504 1695 if (PySet_Add(allvalues, value)) {
1505 1696 goto bail;
1506 1697 }
1507 1698 Py_CLEAR(key);
1508 1699 Py_CLEAR(value);
1509 1700 }
1510 1701 Py_RETURN_NONE;
1511 1702 bail:
1512 1703 Py_XDECREF(key);
1513 1704 Py_XDECREF(value);
1514 1705 return NULL;
1515 1706 }
1516 1707
1517 1708 static PyObject *index_deltachain(indexObject *self, PyObject *args)
1518 1709 {
1519 1710 int rev, generaldelta;
1520 1711 PyObject *stoparg;
1521 1712 int stoprev, iterrev, baserev = -1;
1522 1713 int stopped;
1523 1714 PyObject *chain = NULL, *result = NULL;
1524 1715 const Py_ssize_t length = index_length(self);
1525 1716
1526 1717 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
1527 1718 return NULL;
1528 1719 }
1529 1720
1530 1721 if (PyLong_Check(stoparg)) {
1531 1722 stoprev = (int)PyLong_AsLong(stoparg);
1532 1723 if (stoprev == -1 && PyErr_Occurred()) {
1533 1724 return NULL;
1534 1725 }
1535 1726 } else if (stoparg == Py_None) {
1536 1727 stoprev = -2;
1537 1728 } else {
1538 1729 PyErr_SetString(PyExc_ValueError,
1539 1730 "stoprev must be integer or None");
1540 1731 return NULL;
1541 1732 }
1542 1733
1543 1734 if (rev < 0 || rev >= length) {
1544 1735 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1545 1736 return NULL;
1546 1737 }
1547 1738
1548 1739 chain = PyList_New(0);
1549 1740 if (chain == NULL) {
1550 1741 return NULL;
1551 1742 }
1552 1743
1553 1744 baserev = index_baserev(self, rev);
1554 1745
1555 1746 /* This should never happen. */
1556 1747 if (baserev <= -2) {
1557 1748 /* Error should be set by index_deref() */
1558 1749 assert(PyErr_Occurred());
1559 1750 goto bail;
1560 1751 }
1561 1752
1562 1753 iterrev = rev;
1563 1754
1564 1755 while (iterrev != baserev && iterrev != stoprev) {
1565 1756 if (pylist_append_owned(chain, PyLong_FromLong(iterrev))) {
1566 1757 goto bail;
1567 1758 }
1568 1759
1569 1760 if (generaldelta) {
1570 1761 iterrev = baserev;
1571 1762 } else {
1572 1763 iterrev--;
1573 1764 }
1574 1765
1575 1766 if (iterrev < 0) {
1576 1767 break;
1577 1768 }
1578 1769
1579 1770 if (iterrev >= length) {
1580 1771 PyErr_SetString(PyExc_IndexError,
1581 1772 "revision outside index");
1582 1773 return NULL;
1583 1774 }
1584 1775
1585 1776 baserev = index_baserev(self, iterrev);
1586 1777
1587 1778 /* This should never happen. */
1588 1779 if (baserev <= -2) {
1589 1780 /* Error should be set by index_deref() */
1590 1781 assert(PyErr_Occurred());
1591 1782 goto bail;
1592 1783 }
1593 1784 }
1594 1785
1595 1786 if (iterrev == stoprev) {
1596 1787 stopped = 1;
1597 1788 } else {
1598 1789 if (pylist_append_owned(chain, PyLong_FromLong(iterrev))) {
1599 1790 goto bail;
1600 1791 }
1601 1792
1602 1793 stopped = 0;
1603 1794 }
1604 1795
1605 1796 if (PyList_Reverse(chain)) {
1606 1797 goto bail;
1607 1798 }
1608 1799
1609 1800 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
1610 1801 Py_DECREF(chain);
1611 1802 return result;
1612 1803
1613 1804 bail:
1614 1805 Py_DECREF(chain);
1615 1806 return NULL;
1616 1807 }
1617 1808
1618 1809 static inline int64_t
1619 1810 index_segment_span(indexObject *self, Py_ssize_t start_rev, Py_ssize_t end_rev)
1620 1811 {
1621 1812 int64_t start_offset;
1622 1813 int64_t end_offset;
1623 1814 int end_size;
1624 1815 start_offset = index_get_start(self, start_rev);
1625 1816 if (start_offset < 0) {
1626 1817 return -1;
1627 1818 }
1628 1819 end_offset = index_get_start(self, end_rev);
1629 1820 if (end_offset < 0) {
1630 1821 return -1;
1631 1822 }
1632 1823 end_size = index_get_length(self, end_rev);
1633 1824 if (end_size < 0) {
1634 1825 return -1;
1635 1826 }
1636 1827 if (end_offset < start_offset) {
1637 1828 PyErr_Format(PyExc_ValueError,
1638 1829 "corrupted revlog index: inconsistent offset "
1639 1830 "between revisions (%zd) and (%zd)",
1640 1831 start_rev, end_rev);
1641 1832 return -1;
1642 1833 }
1643 1834 return (end_offset - start_offset) + (int64_t)end_size;
1644 1835 }
1645 1836
1646 1837 /* returns endidx so that revs[startidx:endidx] has no empty trailing revs */
1647 1838 static Py_ssize_t trim_endidx(indexObject *self, const Py_ssize_t *revs,
1648 1839 Py_ssize_t startidx, Py_ssize_t endidx)
1649 1840 {
1650 1841 int length;
1651 1842 while (endidx > 1 && endidx > startidx) {
1652 1843 length = index_get_length(self, revs[endidx - 1]);
1653 1844 if (length < 0) {
1654 1845 return -1;
1655 1846 }
1656 1847 if (length != 0) {
1657 1848 break;
1658 1849 }
1659 1850 endidx -= 1;
1660 1851 }
1661 1852 return endidx;
1662 1853 }
1663 1854
1664 1855 struct Gap {
1665 1856 int64_t size;
1666 1857 Py_ssize_t idx;
1667 1858 };
1668 1859
1669 1860 static int gap_compare(const void *left, const void *right)
1670 1861 {
1671 1862 const struct Gap *l_left = ((const struct Gap *)left);
1672 1863 const struct Gap *l_right = ((const struct Gap *)right);
1673 1864 if (l_left->size < l_right->size) {
1674 1865 return -1;
1675 1866 } else if (l_left->size > l_right->size) {
1676 1867 return 1;
1677 1868 }
1678 1869 return 0;
1679 1870 }
1680 1871 static int Py_ssize_t_compare(const void *left, const void *right)
1681 1872 {
1682 1873 const Py_ssize_t l_left = *(const Py_ssize_t *)left;
1683 1874 const Py_ssize_t l_right = *(const Py_ssize_t *)right;
1684 1875 if (l_left < l_right) {
1685 1876 return -1;
1686 1877 } else if (l_left > l_right) {
1687 1878 return 1;
1688 1879 }
1689 1880 return 0;
1690 1881 }
1691 1882
1692 1883 static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
1693 1884 {
1694 1885 /* method arguments */
1695 1886 PyObject *list_revs = NULL; /* revisions in the chain */
1696 1887 double targetdensity = 0; /* min density to achieve */
1697 1888 Py_ssize_t mingapsize = 0; /* threshold to ignore gaps */
1698 1889
1699 1890 /* other core variables */
1700 1891 Py_ssize_t idxlen = index_length(self);
1701 1892 Py_ssize_t i; /* used for various iteration */
1702 1893 PyObject *result = NULL; /* the final return of the function */
1703 1894
1704 1895 /* generic information about the delta chain being slice */
1705 1896 Py_ssize_t num_revs = 0; /* size of the full delta chain */
1706 1897 Py_ssize_t *revs = NULL; /* native array of revision in the chain */
1707 1898 int64_t chainpayload = 0; /* sum of all delta in the chain */
1708 1899 int64_t deltachainspan = 0; /* distance from first byte to last byte */
1709 1900
1710 1901 /* variable used for slicing the delta chain */
1711 1902 int64_t readdata = 0; /* amount of data currently planned to be read */
1712 1903 double density = 0; /* ration of payload data compared to read ones */
1713 1904 int64_t previous_end;
1714 1905 struct Gap *gaps = NULL; /* array of notable gap in the chain */
1715 1906 Py_ssize_t num_gaps =
1716 1907 0; /* total number of notable gap recorded so far */
1717 1908 Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
1718 1909 Py_ssize_t num_selected = 0; /* number of gaps skipped */
1719 1910 PyObject *allchunks = NULL; /* all slices */
1720 1911 Py_ssize_t previdx;
1721 1912
1722 1913 /* parsing argument */
1723 1914 if (!PyArg_ParseTuple(args, "O!dn", &PyList_Type, &list_revs,
1724 1915 &targetdensity, &mingapsize)) {
1725 1916 goto bail;
1726 1917 }
1727 1918
1728 1919 /* If the delta chain contains a single element, we do not need slicing
1729 1920 */
1730 1921 num_revs = PyList_GET_SIZE(list_revs);
1731 1922 if (num_revs <= 1) {
1732 1923 result = PyTuple_Pack(1, list_revs);
1733 1924 goto done;
1734 1925 }
1735 1926
1736 1927 /* Turn the python list into a native integer array (for efficiency) */
1737 1928 revs = (Py_ssize_t *)calloc(num_revs, sizeof(Py_ssize_t));
1738 1929 if (revs == NULL) {
1739 1930 PyErr_NoMemory();
1740 1931 goto bail;
1741 1932 }
1742 1933 for (i = 0; i < num_revs; i++) {
1743 1934 Py_ssize_t revnum =
1744 1935 PyLong_AsLong(PyList_GET_ITEM(list_revs, i));
1745 1936 if (revnum == -1 && PyErr_Occurred()) {
1746 1937 goto bail;
1747 1938 }
1748 1939 if (revnum < nullrev || revnum >= idxlen) {
1749 1940 PyErr_Format(PyExc_IndexError,
1750 1941 "index out of range: %zd", revnum);
1751 1942 goto bail;
1752 1943 }
1753 1944 revs[i] = revnum;
1754 1945 }
1755 1946
1756 1947 /* Compute and check various property of the unsliced delta chain */
1757 1948 deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
1758 1949 if (deltachainspan < 0) {
1759 1950 goto bail;
1760 1951 }
1761 1952
1762 1953 if (deltachainspan <= mingapsize) {
1763 1954 result = PyTuple_Pack(1, list_revs);
1764 1955 goto done;
1765 1956 }
1766 1957 chainpayload = 0;
1767 1958 for (i = 0; i < num_revs; i++) {
1768 1959 int tmp = index_get_length(self, revs[i]);
1769 1960 if (tmp < 0) {
1770 1961 goto bail;
1771 1962 }
1772 1963 chainpayload += tmp;
1773 1964 }
1774 1965
1775 1966 readdata = deltachainspan;
1776 1967 density = 1.0;
1777 1968
1778 1969 if (0 < deltachainspan) {
1779 1970 density = (double)chainpayload / (double)deltachainspan;
1780 1971 }
1781 1972
1782 1973 if (density >= targetdensity) {
1783 1974 result = PyTuple_Pack(1, list_revs);
1784 1975 goto done;
1785 1976 }
1786 1977
1787 1978 /* if chain is too sparse, look for relevant gaps */
1788 1979 gaps = (struct Gap *)calloc(num_revs, sizeof(struct Gap));
1789 1980 if (gaps == NULL) {
1790 1981 PyErr_NoMemory();
1791 1982 goto bail;
1792 1983 }
1793 1984
1794 1985 previous_end = -1;
1795 1986 for (i = 0; i < num_revs; i++) {
1796 1987 int64_t revstart;
1797 1988 int revsize;
1798 1989 revstart = index_get_start(self, revs[i]);
1799 1990 if (revstart < 0) {
1800 1991 goto bail;
1801 1992 };
1802 1993 revsize = index_get_length(self, revs[i]);
1803 1994 if (revsize < 0) {
1804 1995 goto bail;
1805 1996 };
1806 1997 if (revsize == 0) {
1807 1998 continue;
1808 1999 }
1809 2000 if (previous_end >= 0) {
1810 2001 int64_t gapsize = revstart - previous_end;
1811 2002 if (gapsize > mingapsize) {
1812 2003 gaps[num_gaps].size = gapsize;
1813 2004 gaps[num_gaps].idx = i;
1814 2005 num_gaps += 1;
1815 2006 }
1816 2007 }
1817 2008 previous_end = revstart + revsize;
1818 2009 }
1819 2010 if (num_gaps == 0) {
1820 2011 result = PyTuple_Pack(1, list_revs);
1821 2012 goto done;
1822 2013 }
1823 2014 qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
1824 2015
1825 2016 /* Slice the largest gap first, they improve the density the most */
1826 2017 selected_indices =
1827 2018 (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
1828 2019 if (selected_indices == NULL) {
1829 2020 PyErr_NoMemory();
1830 2021 goto bail;
1831 2022 }
1832 2023
1833 2024 for (i = num_gaps - 1; i >= 0; i--) {
1834 2025 selected_indices[num_selected] = gaps[i].idx;
1835 2026 readdata -= gaps[i].size;
1836 2027 num_selected += 1;
1837 2028 if (readdata <= 0) {
1838 2029 density = 1.0;
1839 2030 } else {
1840 2031 density = (double)chainpayload / (double)readdata;
1841 2032 }
1842 2033 if (density >= targetdensity) {
1843 2034 break;
1844 2035 }
1845 2036 }
1846 2037 qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
1847 2038 &Py_ssize_t_compare);
1848 2039
1849 2040 /* create the resulting slice */
1850 2041 allchunks = PyList_New(0);
1851 2042 if (allchunks == NULL) {
1852 2043 goto bail;
1853 2044 }
1854 2045 previdx = 0;
1855 2046 selected_indices[num_selected] = num_revs;
1856 2047 for (i = 0; i <= num_selected; i++) {
1857 2048 Py_ssize_t idx = selected_indices[i];
1858 2049 Py_ssize_t endidx = trim_endidx(self, revs, previdx, idx);
1859 2050 if (endidx < 0) {
1860 2051 goto bail;
1861 2052 }
1862 2053 if (previdx < endidx) {
1863 2054 PyObject *chunk =
1864 2055 PyList_GetSlice(list_revs, previdx, endidx);
1865 2056 if (pylist_append_owned(allchunks, chunk) == -1) {
1866 2057 goto bail;
1867 2058 }
1868 2059 }
1869 2060 previdx = idx;
1870 2061 }
1871 2062 result = allchunks;
1872 2063 goto done;
1873 2064
1874 2065 bail:
1875 2066 Py_XDECREF(allchunks);
1876 2067 done:
1877 2068 free(revs);
1878 2069 free(gaps);
1879 2070 free(selected_indices);
1880 2071 return result;
1881 2072 }
1882 2073
1883 2074 static inline int nt_level(const char *node, Py_ssize_t level)
1884 2075 {
1885 2076 int v = node[level >> 1];
1886 2077 if (!(level & 1))
1887 2078 v >>= 4;
1888 2079 return v & 0xf;
1889 2080 }
1890 2081
1891 2082 /*
1892 2083 * Return values:
1893 2084 *
1894 2085 * -4: match is ambiguous (multiple candidates)
1895 2086 * -2: not found
1896 2087 * rest: valid rev
1897 2088 */
1898 2089 static int nt_find(nodetree *self, const char *node, Py_ssize_t nodelen,
1899 2090 int hex)
1900 2091 {
1901 2092 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
1902 2093 int level, maxlevel, off;
1903 2094
1904 2095 /* If the input is binary, do a fast check for the nullid first. */
1905 2096 if (!hex && nodelen == self->nodelen && node[0] == '\0' &&
1906 2097 node[1] == '\0' && memcmp(node, nullid, self->nodelen) == 0)
1907 2098 return -1;
1908 2099
1909 2100 if (hex)
1910 2101 maxlevel = nodelen;
1911 2102 else
1912 2103 maxlevel = 2 * nodelen;
1913 2104 if (maxlevel > 2 * self->nodelen)
1914 2105 maxlevel = 2 * self->nodelen;
1915 2106
1916 2107 for (level = off = 0; level < maxlevel; level++) {
1917 2108 int k = getnybble(node, level);
1918 2109 nodetreenode *n = &self->nodes[off];
1919 2110 int v = n->children[k];
1920 2111
1921 2112 if (v < 0) {
1922 2113 const char *n;
1923 2114 Py_ssize_t i;
1924 2115
1925 2116 v = -(v + 2);
1926 2117 n = index_node(self->index, v);
1927 2118 if (n == NULL)
1928 2119 return -2;
1929 2120 for (i = level; i < maxlevel; i++)
1930 2121 if (getnybble(node, i) != nt_level(n, i))
1931 2122 return -2;
1932 2123 return v;
1933 2124 }
1934 2125 if (v == 0)
1935 2126 return -2;
1936 2127 off = v;
1937 2128 }
1938 2129 /* multiple matches against an ambiguous prefix */
1939 2130 return -4;
1940 2131 }
1941 2132
1942 2133 static int nt_new(nodetree *self)
1943 2134 {
1944 2135 if (self->length == self->capacity) {
1945 2136 size_t newcapacity;
1946 2137 nodetreenode *newnodes;
1947 2138 newcapacity = self->capacity * 2;
1948 2139 if (newcapacity >= SIZE_MAX / sizeof(nodetreenode)) {
1949 2140 PyErr_SetString(PyExc_MemoryError,
1950 2141 "overflow in nt_new");
1951 2142 return -1;
1952 2143 }
1953 2144 newnodes =
1954 2145 realloc(self->nodes, newcapacity * sizeof(nodetreenode));
1955 2146 if (newnodes == NULL) {
1956 2147 PyErr_SetString(PyExc_MemoryError, "out of memory");
1957 2148 return -1;
1958 2149 }
1959 2150 self->capacity = newcapacity;
1960 2151 self->nodes = newnodes;
1961 2152 memset(&self->nodes[self->length], 0,
1962 2153 sizeof(nodetreenode) * (self->capacity - self->length));
1963 2154 }
1964 2155 return self->length++;
1965 2156 }
1966 2157
1967 2158 static int nt_insert(nodetree *self, const char *node, int rev)
1968 2159 {
1969 2160 int level = 0;
1970 2161 int off = 0;
1971 2162
1972 2163 while (level < 2 * self->nodelen) {
1973 2164 int k = nt_level(node, level);
1974 2165 nodetreenode *n;
1975 2166 int v;
1976 2167
1977 2168 n = &self->nodes[off];
1978 2169 v = n->children[k];
1979 2170
1980 2171 if (v == 0) {
1981 2172 n->children[k] = -rev - 2;
1982 2173 return 0;
1983 2174 }
1984 2175 if (v < 0) {
1985 2176 const char *oldnode =
1986 2177 index_node_existing(self->index, -(v + 2));
1987 2178 int noff;
1988 2179
1989 2180 if (oldnode == NULL)
1990 2181 return -1;
1991 2182 if (!memcmp(oldnode, node, self->nodelen)) {
1992 2183 n->children[k] = -rev - 2;
1993 2184 return 0;
1994 2185 }
1995 2186 noff = nt_new(self);
1996 2187 if (noff == -1)
1997 2188 return -1;
1998 2189 /* self->nodes may have been changed by realloc */
1999 2190 self->nodes[off].children[k] = noff;
2000 2191 off = noff;
2001 2192 n = &self->nodes[off];
2002 2193 n->children[nt_level(oldnode, ++level)] = v;
2003 2194 if (level > self->depth)
2004 2195 self->depth = level;
2005 2196 self->splits += 1;
2006 2197 } else {
2007 2198 level += 1;
2008 2199 off = v;
2009 2200 }
2010 2201 }
2011 2202
2012 2203 return -1;
2013 2204 }
2014 2205
2015 2206 static PyObject *ntobj_insert(nodetreeObject *self, PyObject *args)
2016 2207 {
2017 2208 Py_ssize_t rev;
2018 2209 const char *node;
2019 2210 Py_ssize_t length;
2020 2211 if (!PyArg_ParseTuple(args, "n", &rev))
2021 2212 return NULL;
2022 2213 length = index_length(self->nt.index);
2023 2214 if (rev < 0 || rev >= length) {
2024 2215 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
2025 2216 return NULL;
2026 2217 }
2027 2218 node = index_node_existing(self->nt.index, rev);
2028 2219 if (nt_insert(&self->nt, node, (int)rev) == -1)
2029 2220 return NULL;
2030 2221 Py_RETURN_NONE;
2031 2222 }
2032 2223
2033 2224 static int nt_delete_node(nodetree *self, const char *node)
2034 2225 {
2035 2226 /* rev==-2 happens to get encoded as 0, which is interpreted as not set
2036 2227 */
2037 2228 return nt_insert(self, node, -2);
2038 2229 }
2039 2230
2040 2231 static int nt_init(nodetree *self, indexObject *index, unsigned capacity)
2041 2232 {
2042 2233 /* Initialize before overflow-checking to avoid nt_dealloc() crash. */
2043 2234 self->nodes = NULL;
2044 2235
2045 2236 self->index = index;
2046 2237 /* The input capacity is in terms of revisions, while the field is in
2047 2238 * terms of nodetree nodes. */
2048 2239 self->capacity = (capacity < 4 ? 4 : capacity / 2);
2049 2240 self->nodelen = index->nodelen;
2050 2241 self->depth = 0;
2051 2242 self->splits = 0;
2052 2243 if (self->capacity > SIZE_MAX / sizeof(nodetreenode)) {
2053 2244 PyErr_SetString(PyExc_ValueError, "overflow in init_nt");
2054 2245 return -1;
2055 2246 }
2056 2247 self->nodes = calloc(self->capacity, sizeof(nodetreenode));
2057 2248 if (self->nodes == NULL) {
2058 2249 PyErr_NoMemory();
2059 2250 return -1;
2060 2251 }
2061 2252 self->length = 1;
2062 2253 return 0;
2063 2254 }
2064 2255
2065 2256 static int ntobj_init(nodetreeObject *self, PyObject *args)
2066 2257 {
2067 2258 PyObject *index;
2068 2259 unsigned capacity;
2069 2260 if (!PyArg_ParseTuple(args, "O!I", &HgRevlogIndex_Type, &index,
2070 2261 &capacity))
2071 2262 return -1;
2072 2263 Py_INCREF(index);
2073 2264 return nt_init(&self->nt, (indexObject *)index, capacity);
2074 2265 }
2075 2266
2076 2267 static int nt_partialmatch(nodetree *self, const char *node, Py_ssize_t nodelen)
2077 2268 {
2078 2269 return nt_find(self, node, nodelen, 1);
2079 2270 }
2080 2271
2081 2272 /*
2082 2273 * Find the length of the shortest unique prefix of node.
2083 2274 *
2084 2275 * Return values:
2085 2276 *
2086 2277 * -3: error (exception set)
2087 2278 * -2: not found (no exception set)
2088 2279 * rest: length of shortest prefix
2089 2280 */
2090 2281 static int nt_shortest(nodetree *self, const char *node)
2091 2282 {
2092 2283 int level, off;
2093 2284
2094 2285 for (level = off = 0; level < 2 * self->nodelen; level++) {
2095 2286 int k, v;
2096 2287 nodetreenode *n = &self->nodes[off];
2097 2288 k = nt_level(node, level);
2098 2289 v = n->children[k];
2099 2290 if (v < 0) {
2100 2291 const char *n;
2101 2292 v = -(v + 2);
2102 2293 n = index_node_existing(self->index, v);
2103 2294 if (n == NULL)
2104 2295 return -3;
2105 2296 if (memcmp(node, n, self->nodelen) != 0)
2106 2297 /*
2107 2298 * Found a unique prefix, but it wasn't for the
2108 2299 * requested node (i.e the requested node does
2109 2300 * not exist).
2110 2301 */
2111 2302 return -2;
2112 2303 return level + 1;
2113 2304 }
2114 2305 if (v == 0)
2115 2306 return -2;
2116 2307 off = v;
2117 2308 }
2118 2309 /*
2119 2310 * The node was still not unique after 40 hex digits, so this won't
2120 2311 * happen. Also, if we get here, then there's a programming error in
2121 2312 * this file that made us insert a node longer than 40 hex digits.
2122 2313 */
2123 2314 PyErr_SetString(PyExc_Exception, "broken node tree");
2124 2315 return -3;
2125 2316 }
2126 2317
2127 2318 static PyObject *ntobj_shortest(nodetreeObject *self, PyObject *args)
2128 2319 {
2129 2320 PyObject *val;
2130 2321 char *node;
2131 2322 int length;
2132 2323
2133 2324 if (!PyArg_ParseTuple(args, "O", &val))
2134 2325 return NULL;
2135 2326 if (node_check(self->nt.nodelen, val, &node) == -1)
2136 2327 return NULL;
2137 2328
2138 2329 length = nt_shortest(&self->nt, node);
2139 2330 if (length == -3)
2140 2331 return NULL;
2141 2332 if (length == -2) {
2142 2333 raise_revlog_error();
2143 2334 return NULL;
2144 2335 }
2145 2336 return PyLong_FromLong(length);
2146 2337 }
2147 2338
2148 2339 static void nt_dealloc(nodetree *self)
2149 2340 {
2150 2341 free(self->nodes);
2151 2342 self->nodes = NULL;
2152 2343 }
2153 2344
2154 2345 static void ntobj_dealloc(nodetreeObject *self)
2155 2346 {
2156 2347 Py_XDECREF(self->nt.index);
2157 2348 nt_dealloc(&self->nt);
2158 2349 PyObject_Del(self);
2159 2350 }
2160 2351
2161 2352 static PyMethodDef ntobj_methods[] = {
2162 2353 {"insert", (PyCFunction)ntobj_insert, METH_VARARGS,
2163 2354 "insert an index entry"},
2164 2355 {"shortest", (PyCFunction)ntobj_shortest, METH_VARARGS,
2165 2356 "find length of shortest hex nodeid of a binary ID"},
2166 2357 {NULL} /* Sentinel */
2167 2358 };
2168 2359
2169 2360 static PyTypeObject nodetreeType = {
2170 2361 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2171 2362 "parsers.nodetree", /* tp_name */
2172 2363 sizeof(nodetreeObject), /* tp_basicsize */
2173 2364 0, /* tp_itemsize */
2174 2365 (destructor)ntobj_dealloc, /* tp_dealloc */
2175 2366 0, /* tp_print */
2176 2367 0, /* tp_getattr */
2177 2368 0, /* tp_setattr */
2178 2369 0, /* tp_compare */
2179 2370 0, /* tp_repr */
2180 2371 0, /* tp_as_number */
2181 2372 0, /* tp_as_sequence */
2182 2373 0, /* tp_as_mapping */
2183 2374 0, /* tp_hash */
2184 2375 0, /* tp_call */
2185 2376 0, /* tp_str */
2186 2377 0, /* tp_getattro */
2187 2378 0, /* tp_setattro */
2188 2379 0, /* tp_as_buffer */
2189 2380 Py_TPFLAGS_DEFAULT, /* tp_flags */
2190 2381 "nodetree", /* tp_doc */
2191 2382 0, /* tp_traverse */
2192 2383 0, /* tp_clear */
2193 2384 0, /* tp_richcompare */
2194 2385 0, /* tp_weaklistoffset */
2195 2386 0, /* tp_iter */
2196 2387 0, /* tp_iternext */
2197 2388 ntobj_methods, /* tp_methods */
2198 2389 0, /* tp_members */
2199 2390 0, /* tp_getset */
2200 2391 0, /* tp_base */
2201 2392 0, /* tp_dict */
2202 2393 0, /* tp_descr_get */
2203 2394 0, /* tp_descr_set */
2204 2395 0, /* tp_dictoffset */
2205 2396 (initproc)ntobj_init, /* tp_init */
2206 2397 0, /* tp_alloc */
2207 2398 };
2208 2399
2209 2400 static int index_init_nt(indexObject *self)
2210 2401 {
2211 2402 if (!self->ntinitialized) {
2212 2403 if (nt_init(&self->nt, self, (int)self->length) == -1) {
2213 2404 nt_dealloc(&self->nt);
2214 2405 return -1;
2215 2406 }
2216 2407 if (nt_insert(&self->nt, nullid, -1) == -1) {
2217 2408 nt_dealloc(&self->nt);
2218 2409 return -1;
2219 2410 }
2220 2411 self->ntinitialized = 1;
2221 2412 self->ntrev = (int)index_length(self);
2222 2413 self->ntlookups = 1;
2223 2414 self->ntmisses = 0;
2224 2415 }
2225 2416 return 0;
2226 2417 }
2227 2418
2228 2419 /*
2229 2420 * Return values:
2230 2421 *
2231 2422 * -3: error (exception set)
2232 2423 * -2: not found (no exception set)
2233 2424 * rest: valid rev
2234 2425 */
2235 2426 static int index_find_node(indexObject *self, const char *node)
2236 2427 {
2237 2428 int rev;
2238 2429
2239 2430 if (index_init_nt(self) == -1)
2240 2431 return -3;
2241 2432
2242 2433 self->ntlookups++;
2243 2434 rev = nt_find(&self->nt, node, self->nodelen, 0);
2244 2435 if (rev >= -1)
2245 2436 return rev;
2246 2437
2247 2438 /*
2248 2439 * For the first handful of lookups, we scan the entire index,
2249 2440 * and cache only the matching nodes. This optimizes for cases
2250 2441 * like "hg tip", where only a few nodes are accessed.
2251 2442 *
2252 2443 * After that, we cache every node we visit, using a single
2253 2444 * scan amortized over multiple lookups. This gives the best
2254 2445 * bulk performance, e.g. for "hg log".
2255 2446 */
2256 2447 if (self->ntmisses++ < 4) {
2257 2448 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2258 2449 const char *n = index_node_existing(self, rev);
2259 2450 if (n == NULL)
2260 2451 return -3;
2261 2452 if (memcmp(node, n, self->nodelen) == 0) {
2262 2453 if (nt_insert(&self->nt, n, rev) == -1)
2263 2454 return -3;
2264 2455 break;
2265 2456 }
2266 2457 }
2267 2458 } else {
2268 2459 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2269 2460 const char *n = index_node_existing(self, rev);
2270 2461 if (n == NULL)
2271 2462 return -3;
2272 2463 if (nt_insert(&self->nt, n, rev) == -1) {
2273 2464 self->ntrev = rev + 1;
2274 2465 return -3;
2275 2466 }
2276 2467 if (memcmp(node, n, self->nodelen) == 0) {
2277 2468 break;
2278 2469 }
2279 2470 }
2280 2471 self->ntrev = rev;
2281 2472 }
2282 2473
2283 2474 if (rev >= 0)
2284 2475 return rev;
2285 2476 return -2;
2286 2477 }
2287 2478
2288 2479 static PyObject *index_getitem(indexObject *self, PyObject *value)
2289 2480 {
2290 2481 char *node;
2291 2482 int rev;
2292 2483
2293 2484 if (PyLong_Check(value)) {
2294 2485 long idx;
2295 2486 if (!pylong_to_long(value, &idx)) {
2296 2487 return NULL;
2297 2488 }
2298 2489 return index_get(self, idx);
2299 2490 }
2300 2491
2301 2492 if (node_check(self->nodelen, value, &node) == -1)
2302 2493 return NULL;
2303 2494 rev = index_find_node(self, node);
2304 2495 if (rev >= -1)
2305 2496 return PyLong_FromLong(rev);
2306 2497 if (rev == -2)
2307 2498 raise_revlog_error();
2308 2499 return NULL;
2309 2500 }
2310 2501
2311 2502 /*
2312 2503 * Fully populate the radix tree.
2313 2504 */
2314 2505 static int index_populate_nt(indexObject *self)
2315 2506 {
2316 2507 int rev;
2317 2508 if (self->ntrev > 0) {
2318 2509 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2319 2510 const char *n = index_node_existing(self, rev);
2320 2511 if (n == NULL)
2321 2512 return -1;
2322 2513 if (nt_insert(&self->nt, n, rev) == -1)
2323 2514 return -1;
2324 2515 }
2325 2516 self->ntrev = -1;
2326 2517 }
2327 2518 return 0;
2328 2519 }
2329 2520
2330 2521 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
2331 2522 {
2332 2523 const char *fullnode;
2333 2524 Py_ssize_t nodelen;
2334 2525 char *node;
2335 2526 int rev, i;
2336 2527
2337 2528 if (!PyArg_ParseTuple(args, "y#", &node, &nodelen))
2338 2529 return NULL;
2339 2530
2340 2531 if (nodelen < 1) {
2341 2532 PyErr_SetString(PyExc_ValueError, "key too short");
2342 2533 return NULL;
2343 2534 }
2344 2535
2345 2536 if (nodelen > 2 * self->nodelen) {
2346 2537 PyErr_SetString(PyExc_ValueError, "key too long");
2347 2538 return NULL;
2348 2539 }
2349 2540
2350 2541 for (i = 0; i < nodelen; i++)
2351 2542 hexdigit(node, i);
2352 2543 if (PyErr_Occurred()) {
2353 2544 /* input contains non-hex characters */
2354 2545 PyErr_Clear();
2355 2546 Py_RETURN_NONE;
2356 2547 }
2357 2548
2358 2549 if (index_init_nt(self) == -1)
2359 2550 return NULL;
2360 2551 if (index_populate_nt(self) == -1)
2361 2552 return NULL;
2362 2553 rev = nt_partialmatch(&self->nt, node, nodelen);
2363 2554
2364 2555 switch (rev) {
2365 2556 case -4:
2366 2557 raise_revlog_error();
2367 2558 return NULL;
2368 2559 case -2:
2369 2560 Py_RETURN_NONE;
2370 2561 case -1:
2371 2562 return PyBytes_FromStringAndSize(nullid, self->nodelen);
2372 2563 }
2373 2564
2374 2565 fullnode = index_node_existing(self, rev);
2375 2566 if (fullnode == NULL) {
2376 2567 return NULL;
2377 2568 }
2378 2569 return PyBytes_FromStringAndSize(fullnode, self->nodelen);
2379 2570 }
2380 2571
2381 2572 static PyObject *index_shortest(indexObject *self, PyObject *args)
2382 2573 {
2383 2574 PyObject *val;
2384 2575 char *node;
2385 2576 int length;
2386 2577
2387 2578 if (!PyArg_ParseTuple(args, "O", &val))
2388 2579 return NULL;
2389 2580 if (node_check(self->nodelen, val, &node) == -1)
2390 2581 return NULL;
2391 2582
2392 2583 self->ntlookups++;
2393 2584 if (index_init_nt(self) == -1)
2394 2585 return NULL;
2395 2586 if (index_populate_nt(self) == -1)
2396 2587 return NULL;
2397 2588 length = nt_shortest(&self->nt, node);
2398 2589 if (length == -3)
2399 2590 return NULL;
2400 2591 if (length == -2) {
2401 2592 raise_revlog_error();
2402 2593 return NULL;
2403 2594 }
2404 2595 return PyLong_FromLong(length);
2405 2596 }
2406 2597
2407 2598 static PyObject *index_m_get(indexObject *self, PyObject *args)
2408 2599 {
2409 2600 PyObject *val;
2410 2601 char *node;
2411 2602 int rev;
2412 2603
2413 2604 if (!PyArg_ParseTuple(args, "O", &val))
2414 2605 return NULL;
2415 2606 if (node_check(self->nodelen, val, &node) == -1)
2416 2607 return NULL;
2417 2608 rev = index_find_node(self, node);
2418 2609 if (rev == -3)
2419 2610 return NULL;
2420 2611 if (rev == -2)
2421 2612 Py_RETURN_NONE;
2422 2613 return PyLong_FromLong(rev);
2423 2614 }
2424 2615
2425 2616 static int index_contains(indexObject *self, PyObject *value)
2426 2617 {
2427 2618 char *node;
2428 2619
2429 2620 if (PyLong_Check(value)) {
2430 2621 long rev;
2431 2622 if (!pylong_to_long(value, &rev)) {
2432 2623 return -1;
2433 2624 }
2434 2625 return rev >= -1 && rev < index_length(self);
2435 2626 }
2436 2627
2437 2628 if (node_check(self->nodelen, value, &node) == -1)
2438 2629 return -1;
2439 2630
2440 2631 switch (index_find_node(self, node)) {
2441 2632 case -3:
2442 2633 return -1;
2443 2634 case -2:
2444 2635 return 0;
2445 2636 default:
2446 2637 return 1;
2447 2638 }
2448 2639 }
2449 2640
2450 2641 static PyObject *index_m_has_node(indexObject *self, PyObject *args)
2451 2642 {
2452 2643 int ret = index_contains(self, args);
2453 2644 if (ret < 0)
2454 2645 return NULL;
2455 2646 return PyBool_FromLong((long)ret);
2456 2647 }
2457 2648
2458 2649 static PyObject *index_m_rev(indexObject *self, PyObject *val)
2459 2650 {
2460 2651 char *node;
2461 2652 int rev;
2462 2653
2463 2654 if (node_check(self->nodelen, val, &node) == -1)
2464 2655 return NULL;
2465 2656 rev = index_find_node(self, node);
2466 2657 if (rev >= -1)
2467 2658 return PyLong_FromLong(rev);
2468 2659 if (rev == -2)
2469 2660 raise_revlog_error();
2470 2661 return NULL;
2471 2662 }
2472 2663
2473 2664 typedef uint64_t bitmask;
2474 2665
2475 2666 /*
2476 2667 * Given a disjoint set of revs, return all candidates for the
2477 2668 * greatest common ancestor. In revset notation, this is the set
2478 2669 * "heads(::a and ::b and ...)"
2479 2670 */
2480 2671 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
2481 2672 int revcount)
2482 2673 {
2483 2674 const bitmask allseen = (1ull << revcount) - 1;
2484 2675 const bitmask poison = 1ull << revcount;
2485 2676 PyObject *gca = PyList_New(0);
2486 2677 int i, v, interesting;
2487 2678 int maxrev = -1;
2488 2679 bitmask sp;
2489 2680 bitmask *seen;
2490 2681
2491 2682 if (gca == NULL)
2492 2683 return PyErr_NoMemory();
2493 2684
2494 2685 for (i = 0; i < revcount; i++) {
2495 2686 if (revs[i] > maxrev)
2496 2687 maxrev = revs[i];
2497 2688 }
2498 2689
2499 2690 seen = calloc(sizeof(*seen), maxrev + 1);
2500 2691 if (seen == NULL) {
2501 2692 Py_DECREF(gca);
2502 2693 return PyErr_NoMemory();
2503 2694 }
2504 2695
2505 2696 for (i = 0; i < revcount; i++)
2506 2697 seen[revs[i]] = 1ull << i;
2507 2698
2508 2699 interesting = revcount;
2509 2700
2510 2701 for (v = maxrev; v >= 0 && interesting; v--) {
2511 2702 bitmask sv = seen[v];
2512 2703 int parents[2];
2513 2704
2514 2705 if (!sv)
2515 2706 continue;
2516 2707
2517 2708 if (sv < poison) {
2518 2709 interesting -= 1;
2519 2710 if (sv == allseen) {
2520 2711 if (pylist_append_owned(
2521 2712 gca, PyLong_FromLong(v)) == -1) {
2522 2713 goto bail;
2523 2714 }
2524 2715 sv |= poison;
2525 2716 for (i = 0; i < revcount; i++) {
2526 2717 if (revs[i] == v)
2527 2718 goto done;
2528 2719 }
2529 2720 }
2530 2721 }
2531 2722 if (index_get_parents(self, v, parents, maxrev) < 0)
2532 2723 goto bail;
2533 2724
2534 2725 for (i = 0; i < 2; i++) {
2535 2726 int p = parents[i];
2536 2727 if (p == -1)
2537 2728 continue;
2538 2729 sp = seen[p];
2539 2730 if (sv < poison) {
2540 2731 if (sp == 0) {
2541 2732 seen[p] = sv;
2542 2733 interesting++;
2543 2734 } else if (sp != sv)
2544 2735 seen[p] |= sv;
2545 2736 } else {
2546 2737 if (sp && sp < poison)
2547 2738 interesting--;
2548 2739 seen[p] = sv;
2549 2740 }
2550 2741 }
2551 2742 }
2552 2743
2553 2744 done:
2554 2745 free(seen);
2555 2746 return gca;
2556 2747 bail:
2557 2748 free(seen);
2558 2749 Py_XDECREF(gca);
2559 2750 return NULL;
2560 2751 }
2561 2752
2562 2753 /*
2563 2754 * Given a disjoint set of revs, return the subset with the longest
2564 2755 * path to the root.
2565 2756 */
2566 2757 static PyObject *find_deepest(indexObject *self, PyObject *revs)
2567 2758 {
2568 2759 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
2569 2760 static const Py_ssize_t capacity = 24;
2570 2761 int *depth, *interesting = NULL;
2571 2762 int i, j, v, ninteresting;
2572 2763 PyObject *dict = NULL, *keys = NULL;
2573 2764 long *seen = NULL;
2574 2765 int maxrev = -1;
2575 2766 long final;
2576 2767
2577 2768 if (revcount > capacity) {
2578 2769 PyErr_Format(PyExc_OverflowError,
2579 2770 "bitset size (%ld) > capacity (%ld)",
2580 2771 (long)revcount, (long)capacity);
2581 2772 return NULL;
2582 2773 }
2583 2774
2584 2775 for (i = 0; i < revcount; i++) {
2585 2776 int n = (int)PyLong_AsLong(PyList_GET_ITEM(revs, i));
2586 2777 if (n > maxrev)
2587 2778 maxrev = n;
2588 2779 }
2589 2780
2590 2781 depth = calloc(sizeof(*depth), maxrev + 1);
2591 2782 if (depth == NULL)
2592 2783 return PyErr_NoMemory();
2593 2784
2594 2785 seen = calloc(sizeof(*seen), maxrev + 1);
2595 2786 if (seen == NULL) {
2596 2787 PyErr_NoMemory();
2597 2788 goto bail;
2598 2789 }
2599 2790
2600 2791 interesting = calloc(sizeof(*interesting), ((size_t)1) << revcount);
2601 2792 if (interesting == NULL) {
2602 2793 PyErr_NoMemory();
2603 2794 goto bail;
2604 2795 }
2605 2796
2606 2797 if (PyList_Sort(revs) == -1)
2607 2798 goto bail;
2608 2799
2609 2800 for (i = 0; i < revcount; i++) {
2610 2801 int n = (int)PyLong_AsLong(PyList_GET_ITEM(revs, i));
2611 2802 long b = 1l << i;
2612 2803 depth[n] = 1;
2613 2804 seen[n] = b;
2614 2805 interesting[b] = 1;
2615 2806 }
2616 2807
2617 2808 /* invariant: ninteresting is the number of non-zero entries in
2618 2809 * interesting. */
2619 2810 ninteresting = (int)revcount;
2620 2811
2621 2812 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
2622 2813 int dv = depth[v];
2623 2814 int parents[2];
2624 2815 long sv;
2625 2816
2626 2817 if (dv == 0)
2627 2818 continue;
2628 2819
2629 2820 sv = seen[v];
2630 2821 if (index_get_parents(self, v, parents, maxrev) < 0)
2631 2822 goto bail;
2632 2823
2633 2824 for (i = 0; i < 2; i++) {
2634 2825 int p = parents[i];
2635 2826 long sp;
2636 2827 int dp;
2637 2828
2638 2829 if (p == -1)
2639 2830 continue;
2640 2831
2641 2832 dp = depth[p];
2642 2833 sp = seen[p];
2643 2834 if (dp <= dv) {
2644 2835 depth[p] = dv + 1;
2645 2836 if (sp != sv) {
2646 2837 interesting[sv] += 1;
2647 2838 seen[p] = sv;
2648 2839 if (sp) {
2649 2840 interesting[sp] -= 1;
2650 2841 if (interesting[sp] == 0)
2651 2842 ninteresting -= 1;
2652 2843 }
2653 2844 }
2654 2845 } else if (dv == dp - 1) {
2655 2846 long nsp = sp | sv;
2656 2847 if (nsp == sp)
2657 2848 continue;
2658 2849 seen[p] = nsp;
2659 2850 interesting[sp] -= 1;
2660 2851 if (interesting[sp] == 0)
2661 2852 ninteresting -= 1;
2662 2853 if (interesting[nsp] == 0)
2663 2854 ninteresting += 1;
2664 2855 interesting[nsp] += 1;
2665 2856 }
2666 2857 }
2667 2858 interesting[sv] -= 1;
2668 2859 if (interesting[sv] == 0)
2669 2860 ninteresting -= 1;
2670 2861 }
2671 2862
2672 2863 final = 0;
2673 2864 j = ninteresting;
2674 2865 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
2675 2866 if (interesting[i] == 0)
2676 2867 continue;
2677 2868 final |= i;
2678 2869 j -= 1;
2679 2870 }
2680 2871 if (final == 0) {
2681 2872 keys = PyList_New(0);
2682 2873 goto bail;
2683 2874 }
2684 2875
2685 2876 dict = PyDict_New();
2686 2877 if (dict == NULL)
2687 2878 goto bail;
2688 2879
2689 2880 for (i = 0; i < revcount; i++) {
2690 2881 PyObject *key;
2691 2882
2692 2883 if ((final & (1 << i)) == 0)
2693 2884 continue;
2694 2885
2695 2886 key = PyList_GET_ITEM(revs, i);
2696 2887 Py_INCREF(key);
2697 2888 Py_INCREF(Py_None);
2698 2889 if (PyDict_SetItem(dict, key, Py_None) == -1) {
2699 2890 Py_DECREF(key);
2700 2891 Py_DECREF(Py_None);
2701 2892 goto bail;
2702 2893 }
2703 2894 }
2704 2895
2705 2896 keys = PyDict_Keys(dict);
2706 2897
2707 2898 bail:
2708 2899 free(depth);
2709 2900 free(seen);
2710 2901 free(interesting);
2711 2902 Py_XDECREF(dict);
2712 2903
2713 2904 return keys;
2714 2905 }
2715 2906
2716 2907 /*
2717 2908 * Given a (possibly overlapping) set of revs, return all the
2718 2909 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
2719 2910 */
2720 2911 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
2721 2912 {
2722 2913 PyObject *ret = NULL;
2723 2914 Py_ssize_t argcount, i, len;
2724 2915 bitmask repeat = 0;
2725 2916 int revcount = 0;
2726 2917 int *revs;
2727 2918
2728 2919 argcount = PySequence_Length(args);
2729 2920 revs = PyMem_Malloc(argcount * sizeof(*revs));
2730 2921 if (argcount > 0 && revs == NULL)
2731 2922 return PyErr_NoMemory();
2732 2923 len = index_length(self);
2733 2924
2734 2925 for (i = 0; i < argcount; i++) {
2735 2926 static const int capacity = 24;
2736 2927 PyObject *obj = PySequence_GetItem(args, i);
2737 2928 bitmask x;
2738 2929 long val;
2739 2930
2740 2931 if (!PyLong_Check(obj)) {
2741 2932 PyErr_SetString(PyExc_TypeError,
2742 2933 "arguments must all be ints");
2743 2934 Py_DECREF(obj);
2744 2935 goto bail;
2745 2936 }
2746 2937 val = PyLong_AsLong(obj);
2747 2938 Py_DECREF(obj);
2748 2939 if (val == -1) {
2749 2940 ret = PyList_New(0);
2750 2941 goto done;
2751 2942 }
2752 2943 if (val < 0 || val >= len) {
2753 2944 PyErr_SetString(PyExc_IndexError, "index out of range");
2754 2945 goto bail;
2755 2946 }
2756 2947 /* this cheesy bloom filter lets us avoid some more
2757 2948 * expensive duplicate checks in the common set-is-disjoint
2758 2949 * case */
2759 2950 x = 1ull << (val & 0x3f);
2760 2951 if (repeat & x) {
2761 2952 int k;
2762 2953 for (k = 0; k < revcount; k++) {
2763 2954 if (val == revs[k])
2764 2955 goto duplicate;
2765 2956 }
2766 2957 } else
2767 2958 repeat |= x;
2768 2959 if (revcount >= capacity) {
2769 2960 PyErr_Format(PyExc_OverflowError,
2770 2961 "bitset size (%d) > capacity (%d)",
2771 2962 revcount, capacity);
2772 2963 goto bail;
2773 2964 }
2774 2965 revs[revcount++] = (int)val;
2775 2966 duplicate:;
2776 2967 }
2777 2968
2778 2969 if (revcount == 0) {
2779 2970 ret = PyList_New(0);
2780 2971 goto done;
2781 2972 }
2782 2973 if (revcount == 1) {
2783 2974 PyObject *obj;
2784 2975 ret = PyList_New(1);
2785 2976 if (ret == NULL)
2786 2977 goto bail;
2787 2978 obj = PyLong_FromLong(revs[0]);
2788 2979 if (obj == NULL)
2789 2980 goto bail;
2790 2981 PyList_SET_ITEM(ret, 0, obj);
2791 2982 goto done;
2792 2983 }
2793 2984
2794 2985 ret = find_gca_candidates(self, revs, revcount);
2795 2986 if (ret == NULL)
2796 2987 goto bail;
2797 2988
2798 2989 done:
2799 2990 PyMem_Free(revs);
2800 2991 return ret;
2801 2992
2802 2993 bail:
2803 2994 PyMem_Free(revs);
2804 2995 Py_XDECREF(ret);
2805 2996 return NULL;
2806 2997 }
2807 2998
2808 2999 /*
2809 3000 * Given a (possibly overlapping) set of revs, return the greatest
2810 3001 * common ancestors: those with the longest path to the root.
2811 3002 */
2812 3003 static PyObject *index_ancestors(indexObject *self, PyObject *args)
2813 3004 {
2814 3005 PyObject *ret;
2815 3006 PyObject *gca = index_commonancestorsheads(self, args);
2816 3007 if (gca == NULL)
2817 3008 return NULL;
2818 3009
2819 3010 if (PyList_GET_SIZE(gca) <= 1) {
2820 3011 return gca;
2821 3012 }
2822 3013
2823 3014 ret = find_deepest(self, gca);
2824 3015 Py_DECREF(gca);
2825 3016 return ret;
2826 3017 }
2827 3018
2828 3019 /*
2829 3020 * Invalidate any trie entries introduced by added revs.
2830 3021 */
2831 3022 static void index_invalidate_added(indexObject *self, Py_ssize_t start)
2832 3023 {
2833 3024 Py_ssize_t i, len;
2834 3025
2835 3026 len = self->length + self->new_length;
2836 3027 i = start - self->length;
2837 3028 if (i < 0)
2838 3029 return;
2839 3030
2840 3031 for (i = start; i < len; i++) {
2841 3032 const char *node = index_node(self, i);
2842 3033 nt_delete_node(&self->nt, node);
2843 3034 }
2844 3035
2845 3036 self->new_length = start - self->length;
2846 3037 }
2847 3038
2848 3039 /*
2849 3040 * Delete a numeric range of revs, which must be at the end of the
2850 3041 * range.
2851 3042 */
2852 3043 static int index_slice_del(indexObject *self, PyObject *item)
2853 3044 {
2854 3045 Py_ssize_t start, stop, step, slicelength;
2855 3046 Py_ssize_t length = index_length(self) + 1;
2856 3047 int ret = 0;
2857 3048
2858 3049 if (PySlice_GetIndicesEx(item, length, &start, &stop, &step,
2859 3050 &slicelength) < 0)
2860 3051 return -1;
2861 3052
2862 3053 if (slicelength <= 0)
2863 3054 return 0;
2864 3055
2865 3056 if ((step < 0 && start < stop) || (step > 0 && start > stop))
2866 3057 stop = start;
2867 3058
2868 3059 if (step < 0) {
2869 3060 stop = start + 1;
2870 3061 start = stop + step * (slicelength - 1) - 1;
2871 3062 step = -step;
2872 3063 }
2873 3064
2874 3065 if (step != 1) {
2875 3066 PyErr_SetString(PyExc_ValueError,
2876 3067 "revlog index delete requires step size of 1");
2877 3068 return -1;
2878 3069 }
2879 3070
2880 3071 if (stop != length - 1) {
2881 3072 PyErr_SetString(PyExc_IndexError,
2882 3073 "revlog index deletion indices are invalid");
2883 3074 return -1;
2884 3075 }
2885 3076
2886 3077 if (start < self->length) {
2887 3078 if (self->ntinitialized) {
2888 3079 Py_ssize_t i;
2889 3080
2890 3081 for (i = start; i < self->length; i++) {
2891 3082 const char *node = index_node_existing(self, i);
2892 3083 if (node == NULL)
2893 3084 return -1;
2894 3085
2895 3086 nt_delete_node(&self->nt, node);
2896 3087 }
2897 3088 if (self->new_length)
2898 3089 index_invalidate_added(self, self->length);
2899 3090 if (self->ntrev > start)
2900 3091 self->ntrev = (int)start;
2901 3092 } else if (self->new_length) {
2902 3093 self->new_length = 0;
2903 3094 }
2904 3095
2905 3096 self->length = start;
2906 3097 goto done;
2907 3098 }
2908 3099
2909 3100 if (self->ntinitialized) {
2910 3101 index_invalidate_added(self, start);
2911 3102 if (self->ntrev > start)
2912 3103 self->ntrev = (int)start;
2913 3104 } else {
2914 3105 self->new_length = start - self->length;
2915 3106 }
2916 3107 done:
2917 3108 Py_CLEAR(self->headrevs);
2918 3109 return ret;
2919 3110 }
2920 3111
2921 3112 /*
2922 3113 * Supported ops:
2923 3114 *
2924 3115 * slice deletion
2925 3116 * string assignment (extend node->rev mapping)
2926 3117 * string deletion (shrink node->rev mapping)
2927 3118 */
2928 3119 static int index_assign_subscript(indexObject *self, PyObject *item,
2929 3120 PyObject *value)
2930 3121 {
2931 3122 char *node;
2932 3123 long rev;
2933 3124
2934 3125 if (PySlice_Check(item) && value == NULL)
2935 3126 return index_slice_del(self, item);
2936 3127
2937 3128 if (node_check(self->nodelen, item, &node) == -1)
2938 3129 return -1;
2939 3130
2940 3131 if (value == NULL)
2941 3132 return self->ntinitialized ? nt_delete_node(&self->nt, node)
2942 3133 : 0;
2943 3134 rev = PyLong_AsLong(value);
2944 3135 if (rev > INT_MAX || rev < 0) {
2945 3136 if (!PyErr_Occurred())
2946 3137 PyErr_SetString(PyExc_ValueError, "rev out of range");
2947 3138 return -1;
2948 3139 }
2949 3140
2950 3141 if (index_init_nt(self) == -1)
2951 3142 return -1;
2952 3143 return nt_insert(&self->nt, node, (int)rev);
2953 3144 }
2954 3145
2955 3146 /*
2956 3147 * Find all RevlogNG entries in an index that has inline data. Update
2957 3148 * the optional "offsets" table with those entries.
2958 3149 */
2959 3150 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
2960 3151 {
2961 3152 const char *data = (const char *)self->buf.buf;
2962 3153 Py_ssize_t pos = 0;
2963 3154 Py_ssize_t end = self->buf.len;
2964 3155 long incr = self->entry_size;
2965 3156 Py_ssize_t len = 0;
2966 3157
2967 3158 while (pos + self->entry_size <= end && pos >= 0) {
2968 3159 uint32_t comp_len, sidedata_comp_len = 0;
2969 3160 /* 3rd element of header is length of compressed inline data */
2970 3161 if (self->format_version == format_v1) {
2971 3162 comp_len =
2972 3163 getbe32(data + pos + entry_v1_offset_comp_len);
2973 3164 sidedata_comp_len = 0;
2974 3165 } else if (self->format_version == format_v2) {
2975 3166 comp_len =
2976 3167 getbe32(data + pos + entry_v2_offset_comp_len);
2977 3168 sidedata_comp_len = getbe32(
2978 3169 data + pos + entry_v2_offset_sidedata_comp_len);
2979 3170 } else {
2980 3171 raise_revlog_error();
2981 3172 return -1;
2982 3173 }
2983 3174 incr = self->entry_size + comp_len + sidedata_comp_len;
2984 3175 if (offsets)
2985 3176 offsets[len] = data + pos;
2986 3177 len++;
2987 3178 pos += incr;
2988 3179 }
2989 3180
2990 3181 if (pos != end) {
2991 3182 if (!PyErr_Occurred())
2992 3183 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2993 3184 return -1;
2994 3185 }
2995 3186
2996 3187 return len;
2997 3188 }
2998 3189
2999 3190 static int index_init(indexObject *self, PyObject *args, PyObject *kwargs)
3000 3191 {
3001 3192 PyObject *data_obj, *inlined_obj;
3002 3193 Py_ssize_t size;
3003 3194
3004 3195 static char *kwlist[] = {"data", "inlined", "format", NULL};
3005 3196
3006 3197 /* Initialize before argument-checking to avoid index_dealloc() crash.
3007 3198 */
3008 3199 self->added = NULL;
3009 3200 self->new_length = 0;
3010 3201 self->added_length = 0;
3011 3202 self->data = NULL;
3012 3203 memset(&self->buf, 0, sizeof(self->buf));
3013 3204 self->headrevs = NULL;
3014 3205 self->filteredrevs = Py_None;
3015 3206 Py_INCREF(Py_None);
3016 3207 self->ntinitialized = 0;
3017 3208 self->offsets = NULL;
3018 3209 self->nodelen = 20;
3019 3210 self->nullentry = NULL;
3020 3211 self->rust_ext_compat = 0;
3021 3212 self->format_version = format_v1;
3022 3213
3023 3214 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|l", kwlist,
3024 3215 &data_obj, &inlined_obj,
3025 3216 &(self->format_version)))
3026 3217 return -1;
3027 3218 if (!PyObject_CheckBuffer(data_obj)) {
3028 3219 PyErr_SetString(PyExc_TypeError,
3029 3220 "data does not support buffer interface");
3030 3221 return -1;
3031 3222 }
3032 3223 if (self->nodelen < 20 || self->nodelen > (Py_ssize_t)sizeof(nullid)) {
3033 3224 PyErr_SetString(PyExc_RuntimeError, "unsupported node size");
3034 3225 return -1;
3035 3226 }
3036 3227
3037 3228 if (self->format_version == format_v1) {
3038 3229 self->rust_ext_compat = 1;
3039 3230 self->entry_size = v1_entry_size;
3040 3231 } else if (self->format_version == format_v2) {
3041 3232 self->entry_size = v2_entry_size;
3042 3233 } else if (self->format_version == format_cl2) {
3043 3234 self->entry_size = cl2_entry_size;
3044 3235 }
3045 3236
3046 3237 self->nullentry = Py_BuildValue(
3047 3238 "iiiiiiiy#iiBBi", 0, 0, 0, -1, -1, -1, -1, nullid, self->nodelen, 0,
3048 3239 0, comp_mode_inline, comp_mode_inline, rank_unknown);
3049 3240
3050 3241 if (!self->nullentry)
3051 3242 return -1;
3052 3243 PyObject_GC_UnTrack(self->nullentry);
3053 3244
3054 3245 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
3055 3246 return -1;
3056 3247 size = self->buf.len;
3057 3248
3058 3249 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
3059 3250 self->data = data_obj;
3060 3251
3061 3252 self->ntlookups = self->ntmisses = 0;
3062 3253 self->ntrev = -1;
3063 3254 Py_INCREF(self->data);
3064 3255
3065 3256 if (self->inlined) {
3066 3257 Py_ssize_t len = inline_scan(self, NULL);
3067 3258 if (len == -1)
3068 3259 goto bail;
3069 3260 self->length = len;
3070 3261 } else {
3071 3262 if (size % self->entry_size) {
3072 3263 PyErr_SetString(PyExc_ValueError, "corrupt index file");
3073 3264 goto bail;
3074 3265 }
3075 3266 self->length = size / self->entry_size;
3076 3267 }
3077 3268
3078 3269 return 0;
3079 3270 bail:
3080 3271 return -1;
3081 3272 }
3082 3273
3083 3274 static PyObject *index_nodemap(indexObject *self)
3084 3275 {
3085 3276 Py_INCREF(self);
3086 3277 return (PyObject *)self;
3087 3278 }
3088 3279
3089 3280 static void _index_clearcaches(indexObject *self)
3090 3281 {
3091 3282 if (self->offsets) {
3092 3283 PyMem_Free((void *)self->offsets);
3093 3284 self->offsets = NULL;
3094 3285 }
3095 3286 if (self->ntinitialized) {
3096 3287 nt_dealloc(&self->nt);
3097 3288 }
3098 3289 self->ntinitialized = 0;
3099 3290 Py_CLEAR(self->headrevs);
3100 3291 }
3101 3292
3102 3293 static PyObject *index_clearcaches(indexObject *self)
3103 3294 {
3104 3295 _index_clearcaches(self);
3105 3296 self->ntrev = -1;
3106 3297 self->ntlookups = self->ntmisses = 0;
3107 3298 Py_RETURN_NONE;
3108 3299 }
3109 3300
3110 3301 static void index_dealloc(indexObject *self)
3111 3302 {
3112 3303 _index_clearcaches(self);
3113 3304 Py_XDECREF(self->filteredrevs);
3114 3305 if (self->buf.buf) {
3115 3306 PyBuffer_Release(&self->buf);
3116 3307 memset(&self->buf, 0, sizeof(self->buf));
3117 3308 }
3118 3309 Py_XDECREF(self->data);
3119 3310 PyMem_Free(self->added);
3120 3311 Py_XDECREF(self->nullentry);
3121 3312 PyObject_Del(self);
3122 3313 }
3123 3314
3124 3315 static PySequenceMethods index_sequence_methods = {
3125 3316 (lenfunc)index_length, /* sq_length */
3126 3317 0, /* sq_concat */
3127 3318 0, /* sq_repeat */
3128 3319 (ssizeargfunc)index_get, /* sq_item */
3129 3320 0, /* sq_slice */
3130 3321 0, /* sq_ass_item */
3131 3322 0, /* sq_ass_slice */
3132 3323 (objobjproc)index_contains, /* sq_contains */
3133 3324 };
3134 3325
3135 3326 static PyMappingMethods index_mapping_methods = {
3136 3327 (lenfunc)index_length, /* mp_length */
3137 3328 (binaryfunc)index_getitem, /* mp_subscript */
3138 3329 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
3139 3330 };
3140 3331
3141 3332 static PyMethodDef index_methods[] = {
3142 3333 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
3143 3334 "return the gca set of the given revs"},
3335 {"headrevsdiff", (PyCFunction)index_headrevsdiff, METH_VARARGS,
3336 "return the set of heads removed/added by a range of commits"},
3144 3337 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
3145 3338 METH_VARARGS,
3146 3339 "return the heads of the common ancestors of the given revs"},
3147 3340 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
3148 3341 "clear the index caches"},
3149 3342 {"get", (PyCFunction)index_m_get, METH_VARARGS, "get an index entry"},
3150 3343 {"get_rev", (PyCFunction)index_m_get, METH_VARARGS,
3151 3344 "return `rev` associated with a node or None"},
3152 3345 {"has_node", (PyCFunction)index_m_has_node, METH_O,
3153 3346 "return True if the node exist in the index"},
3154 3347 {"rev", (PyCFunction)index_m_rev, METH_O,
3155 3348 "return `rev` associated with a node or raise RevlogError"},
3156 3349 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets, METH_VARARGS,
3157 3350 "compute phases"},
3158 3351 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
3159 3352 "reachableroots"},
3160 3353 {"replace_sidedata_info", (PyCFunction)index_replace_sidedata_info,
3161 3354 METH_VARARGS, "replace an existing index entry with a new value"},
3162 3355 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
3163 3356 "get head revisions"}, /* Can do filtering since 3.2 */
3164 3357 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
3165 3358 "get filtered head revisions"}, /* Can always do filtering */
3166 3359 {"issnapshot", (PyCFunction)index_issnapshot, METH_O,
3167 3360 "True if the object is a snapshot"},
3168 3361 {"findsnapshots", (PyCFunction)index_findsnapshots, METH_VARARGS,
3169 3362 "Gather snapshot data in a cache dict"},
3170 3363 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
3171 3364 "determine revisions with deltas to reconstruct fulltext"},
3172 3365 {"slicechunktodensity", (PyCFunction)index_slicechunktodensity,
3173 3366 METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"},
3174 3367 {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
3175 3368 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
3176 3369 "match a potentially ambiguous node ID"},
3177 3370 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
3178 3371 "find length of shortest hex nodeid of a binary ID"},
3179 3372 {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"},
3180 3373 {"entry_binary", (PyCFunction)index_entry_binary, METH_O,
3181 3374 "return an entry in binary form"},
3182 3375 {"pack_header", (PyCFunction)index_pack_header, METH_VARARGS,
3183 3376 "pack the revlog header information into binary"},
3184 3377 {NULL} /* Sentinel */
3185 3378 };
3186 3379
3187 3380 static PyGetSetDef index_getset[] = {
3188 3381 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
3189 3382 {NULL} /* Sentinel */
3190 3383 };
3191 3384
3192 3385 static PyMemberDef index_members[] = {
3193 3386 {"entry_size", T_LONG, offsetof(indexObject, entry_size), 0,
3194 3387 "size of an index entry"},
3195 3388 {"rust_ext_compat", T_LONG, offsetof(indexObject, rust_ext_compat), 0,
3196 3389 "size of an index entry"},
3197 3390 {NULL} /* Sentinel */
3198 3391 };
3199 3392
3200 3393 PyTypeObject HgRevlogIndex_Type = {
3201 3394 PyVarObject_HEAD_INIT(NULL, 0) /* header */
3202 3395 "parsers.index", /* tp_name */
3203 3396 sizeof(indexObject), /* tp_basicsize */
3204 3397 0, /* tp_itemsize */
3205 3398 (destructor)index_dealloc, /* tp_dealloc */
3206 3399 0, /* tp_print */
3207 3400 0, /* tp_getattr */
3208 3401 0, /* tp_setattr */
3209 3402 0, /* tp_compare */
3210 3403 0, /* tp_repr */
3211 3404 0, /* tp_as_number */
3212 3405 &index_sequence_methods, /* tp_as_sequence */
3213 3406 &index_mapping_methods, /* tp_as_mapping */
3214 3407 0, /* tp_hash */
3215 3408 0, /* tp_call */
3216 3409 0, /* tp_str */
3217 3410 0, /* tp_getattro */
3218 3411 0, /* tp_setattro */
3219 3412 0, /* tp_as_buffer */
3220 3413 Py_TPFLAGS_DEFAULT, /* tp_flags */
3221 3414 "revlog index", /* tp_doc */
3222 3415 0, /* tp_traverse */
3223 3416 0, /* tp_clear */
3224 3417 0, /* tp_richcompare */
3225 3418 0, /* tp_weaklistoffset */
3226 3419 0, /* tp_iter */
3227 3420 0, /* tp_iternext */
3228 3421 index_methods, /* tp_methods */
3229 3422 index_members, /* tp_members */
3230 3423 index_getset, /* tp_getset */
3231 3424 0, /* tp_base */
3232 3425 0, /* tp_dict */
3233 3426 0, /* tp_descr_get */
3234 3427 0, /* tp_descr_set */
3235 3428 0, /* tp_dictoffset */
3236 3429 (initproc)index_init, /* tp_init */
3237 3430 0, /* tp_alloc */
3238 3431 };
3239 3432
3240 3433 /*
3241 3434 * returns a tuple of the form (index, cache) with elements as
3242 3435 * follows:
3243 3436 *
3244 3437 * index: an index object that lazily parses Revlog (v1 or v2) records
3245 3438 * cache: if data is inlined, a tuple (0, index_file_content), else None
3246 3439 * index_file_content could be a string, or a buffer
3247 3440 *
3248 3441 * added complications are for backwards compatibility
3249 3442 */
3250 3443 PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs)
3251 3444 {
3252 3445 PyObject *cache = NULL;
3253 3446 indexObject *idx;
3254 3447 int ret;
3255 3448
3256 3449 idx = PyObject_New(indexObject, &HgRevlogIndex_Type);
3257 3450 if (idx == NULL)
3258 3451 goto bail;
3259 3452
3260 3453 ret = index_init(idx, args, kwargs);
3261 3454 if (ret == -1)
3262 3455 goto bail;
3263 3456
3264 3457 if (idx->inlined) {
3265 3458 cache = Py_BuildValue("iO", 0, idx->data);
3266 3459 if (cache == NULL)
3267 3460 goto bail;
3268 3461 } else {
3269 3462 cache = Py_None;
3270 3463 Py_INCREF(cache);
3271 3464 }
3272 3465
3273 3466 return Py_BuildValue("NN", idx, cache);
3274 3467
3275 3468 bail:
3276 3469 Py_XDECREF(idx);
3277 3470 Py_XDECREF(cache);
3278 3471 return NULL;
3279 3472 }
3280 3473
3281 3474 static Revlog_CAPI CAPI = {
3282 3475 /* increment the abi_version field upon each change in the Revlog_CAPI
3283 3476 struct or in the ABI of the listed functions */
3284 3477 3, index_length, index_node, index_fast_rank, HgRevlogIndex_GetParents,
3285 3478 };
3286 3479
3287 3480 void revlog_module_init(PyObject *mod)
3288 3481 {
3289 3482 PyObject *caps = NULL;
3290 3483 HgRevlogIndex_Type.tp_new = PyType_GenericNew;
3291 3484 if (PyType_Ready(&HgRevlogIndex_Type) < 0)
3292 3485 return;
3293 3486 Py_INCREF(&HgRevlogIndex_Type);
3294 3487 PyModule_AddObject(mod, "index", (PyObject *)&HgRevlogIndex_Type);
3295 3488
3296 3489 nodetreeType.tp_new = PyType_GenericNew;
3297 3490 if (PyType_Ready(&nodetreeType) < 0)
3298 3491 return;
3299 3492 Py_INCREF(&nodetreeType);
3300 3493 PyModule_AddObject(mod, "nodetree", (PyObject *)&nodetreeType);
3301 3494
3302 3495 caps = PyCapsule_New(&CAPI, "mercurial.cext.parsers.revlog_CAPI", NULL);
3303 3496 if (caps != NULL)
3304 3497 PyModule_AddObject(mod, "revlog_CAPI", caps);
3305 3498 }
General Comments 0
You need to be logged in to leave comments. Login now