##// END OF EJS Templates
revlog: use appropriate format char for int ("i" instead of "I")...
av6 -
r50107:d3d3495a default
parent child Browse files
Show More
@@ -1,3300 +1,3300 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Olivia Mackall <olivia@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #define PY_SSIZE_T_CLEAN
11 11 #include <Python.h>
12 12 #include <assert.h>
13 13 #include <ctype.h>
14 14 #include <limits.h>
15 15 #include <stddef.h>
16 16 #include <stdlib.h>
17 17 #include <string.h>
18 18 #include <structmember.h>
19 19
20 20 #include "bitmanipulation.h"
21 21 #include "charencode.h"
22 22 #include "compat.h"
23 23 #include "revlog.h"
24 24 #include "util.h"
25 25
26 26 typedef struct indexObjectStruct indexObject;
27 27
28 28 typedef struct {
29 29 int children[16];
30 30 } nodetreenode;
31 31
32 32 typedef struct {
33 33 int abi_version;
34 34 Py_ssize_t (*index_length)(const indexObject *);
35 35 const char *(*index_node)(indexObject *, Py_ssize_t);
36 36 int (*fast_rank)(indexObject *, Py_ssize_t);
37 37 int (*index_parents)(PyObject *, int, int *);
38 38 } Revlog_CAPI;
39 39
40 40 /*
41 41 * A base-16 trie for fast node->rev mapping.
42 42 *
43 43 * Positive value is index of the next node in the trie
44 44 * Negative value is a leaf: -(rev + 2)
45 45 * Zero is empty
46 46 */
47 47 typedef struct {
48 48 indexObject *index;
49 49 nodetreenode *nodes;
50 50 Py_ssize_t nodelen;
51 51 size_t length; /* # nodes in use */
52 52 size_t capacity; /* # nodes allocated */
53 53 int depth; /* maximum depth of tree */
54 54 int splits; /* # splits performed */
55 55 } nodetree;
56 56
57 57 typedef struct {
58 58 PyObject_HEAD /* ; */
59 59 nodetree nt;
60 60 } nodetreeObject;
61 61
62 62 /*
63 63 * This class has two behaviors.
64 64 *
65 65 * When used in a list-like way (with integer keys), we decode an
66 66 * entry in a RevlogNG index file on demand. We have limited support for
67 67 * integer-keyed insert and delete, only at elements right before the
68 68 * end.
69 69 *
70 70 * With string keys, we lazily perform a reverse mapping from node to
71 71 * rev, using a base-16 trie.
72 72 */
73 73 struct indexObjectStruct {
74 74 PyObject_HEAD
75 75 /* Type-specific fields go here. */
76 76 PyObject *data; /* raw bytes of index */
77 77 Py_ssize_t nodelen; /* digest size of the hash, 20 for SHA-1 */
78 78 PyObject *nullentry; /* fast path for references to null */
79 79 Py_buffer buf; /* buffer of data */
80 80 const char **offsets; /* populated on demand */
81 81 Py_ssize_t length; /* current on-disk number of elements */
82 82 unsigned new_length; /* number of added elements */
83 83 unsigned added_length; /* space reserved for added elements */
84 84 char *added; /* populated on demand */
85 85 PyObject *headrevs; /* cache, invalidated on changes */
86 86 PyObject *filteredrevs; /* filtered revs set */
87 87 nodetree nt; /* base-16 trie */
88 88 int ntinitialized; /* 0 or 1 */
89 89 int ntrev; /* last rev scanned */
90 90 int ntlookups; /* # lookups */
91 91 int ntmisses; /* # lookups that miss the cache */
92 92 int inlined;
93 93 long entry_size; /* size of index headers. Differs in v1 v.s. v2 format
94 94 */
95 95 long rust_ext_compat; /* compatibility with being used in rust
96 96 extensions */
97 97 long format_version; /* format version selector (format_*) */
98 98 };
99 99
100 100 static Py_ssize_t index_length(const indexObject *self)
101 101 {
102 102 return self->length + self->new_length;
103 103 }
104 104
105 105 static const char nullid[32] = {0};
106 106 static const Py_ssize_t nullrev = -1;
107 107
108 108 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
109 109
110 110 static int index_find_node(indexObject *self, const char *node);
111 111
112 112 #if LONG_MAX == 0x7fffffffL
113 113 static const char *const tuple_format = "Kiiiiiiy#KiBBi";
114 114 #else
115 115 static const char *const tuple_format = "kiiiiiiy#kiBBi";
116 116 #endif
117 117
118 118 /* A RevlogNG v1 index entry is 64 bytes long. */
119 119 static const long v1_entry_size = 64;
120 120
121 121 /* A Revlogv2 index entry is 96 bytes long. */
122 122 static const long v2_entry_size = 96;
123 123
124 124 /* A Changelogv2 index entry is 96 bytes long. */
125 125 static const long cl2_entry_size = 96;
126 126
127 127 /* Internal format version.
128 128 * Must match their counterparts in revlogutils/constants.py */
129 129 static const long format_v1 = 1; /* constants.py: REVLOGV1 */
130 130 static const long format_v2 = 0xDEAD; /* constants.py: REVLOGV2 */
131 131 static const long format_cl2 = 0xD34D; /* constants.py: CHANGELOGV2 */
132 132
133 133 static const long entry_v1_offset_high = 0;
134 134 static const long entry_v1_offset_offset_flags = 4;
135 135 static const long entry_v1_offset_comp_len = 8;
136 136 static const long entry_v1_offset_uncomp_len = 12;
137 137 static const long entry_v1_offset_base_rev = 16;
138 138 static const long entry_v1_offset_link_rev = 20;
139 139 static const long entry_v1_offset_parent_1 = 24;
140 140 static const long entry_v1_offset_parent_2 = 28;
141 141 static const long entry_v1_offset_node_id = 32;
142 142
143 143 static const long entry_v2_offset_high = 0;
144 144 static const long entry_v2_offset_offset_flags = 4;
145 145 static const long entry_v2_offset_comp_len = 8;
146 146 static const long entry_v2_offset_uncomp_len = 12;
147 147 static const long entry_v2_offset_base_rev = 16;
148 148 static const long entry_v2_offset_link_rev = 20;
149 149 static const long entry_v2_offset_parent_1 = 24;
150 150 static const long entry_v2_offset_parent_2 = 28;
151 151 static const long entry_v2_offset_node_id = 32;
152 152 static const long entry_v2_offset_sidedata_offset = 64;
153 153 static const long entry_v2_offset_sidedata_comp_len = 72;
154 154 static const long entry_v2_offset_all_comp_mode = 76;
155 155 /* next free offset: 77 */
156 156
157 157 static const long entry_cl2_offset_high = 0;
158 158 static const long entry_cl2_offset_offset_flags = 4;
159 159 static const long entry_cl2_offset_comp_len = 8;
160 160 static const long entry_cl2_offset_uncomp_len = 12;
161 161 static const long entry_cl2_offset_parent_1 = 16;
162 162 static const long entry_cl2_offset_parent_2 = 20;
163 163 static const long entry_cl2_offset_node_id = 24;
164 164 static const long entry_cl2_offset_sidedata_offset = 56;
165 165 static const long entry_cl2_offset_sidedata_comp_len = 64;
166 166 static const long entry_cl2_offset_all_comp_mode = 68;
167 167 static const long entry_cl2_offset_rank = 69;
168 168 /* next free offset: 73 */
169 169
170 170 static const char comp_mode_inline = 2;
171 171 static const int rank_unknown = -1;
172 172
173 173 static void raise_revlog_error(void)
174 174 {
175 175 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
176 176
177 177 mod = PyImport_ImportModule("mercurial.error");
178 178 if (mod == NULL) {
179 179 goto cleanup;
180 180 }
181 181
182 182 dict = PyModule_GetDict(mod);
183 183 if (dict == NULL) {
184 184 goto cleanup;
185 185 }
186 186 Py_INCREF(dict);
187 187
188 188 errclass = PyDict_GetItemString(dict, "RevlogError");
189 189 if (errclass == NULL) {
190 190 PyErr_SetString(PyExc_SystemError,
191 191 "could not find RevlogError");
192 192 goto cleanup;
193 193 }
194 194
195 195 /* value of exception is ignored by callers */
196 196 PyErr_SetString(errclass, "RevlogError");
197 197
198 198 cleanup:
199 199 Py_XDECREF(dict);
200 200 Py_XDECREF(mod);
201 201 }
202 202
203 203 /*
204 204 * Return a pointer to the beginning of a RevlogNG record.
205 205 */
206 206 static const char *index_deref(indexObject *self, Py_ssize_t pos)
207 207 {
208 208 if (pos >= self->length)
209 209 return self->added + (pos - self->length) * self->entry_size;
210 210
211 211 if (self->inlined && pos > 0) {
212 212 if (self->offsets == NULL) {
213 213 Py_ssize_t ret;
214 214 self->offsets =
215 215 PyMem_Malloc(self->length * sizeof(*self->offsets));
216 216 if (self->offsets == NULL)
217 217 return (const char *)PyErr_NoMemory();
218 218 ret = inline_scan(self, self->offsets);
219 219 if (ret == -1) {
220 220 return NULL;
221 221 };
222 222 }
223 223 return self->offsets[pos];
224 224 }
225 225
226 226 return (const char *)(self->buf.buf) + pos * self->entry_size;
227 227 }
228 228
229 229 /*
230 230 * Get parents of the given rev.
231 231 *
232 232 * The specified rev must be valid and must not be nullrev. A returned
233 233 * parent revision may be nullrev, but is guaranteed to be in valid range.
234 234 */
235 235 static inline int index_get_parents(indexObject *self, Py_ssize_t rev, int *ps,
236 236 int maxrev)
237 237 {
238 238 const char *data = index_deref(self, rev);
239 239
240 240 if (self->format_version == format_v1) {
241 241 ps[0] = getbe32(data + entry_v1_offset_parent_1);
242 242 ps[1] = getbe32(data + entry_v1_offset_parent_2);
243 243 } else if (self->format_version == format_v2) {
244 244 ps[0] = getbe32(data + entry_v2_offset_parent_1);
245 245 ps[1] = getbe32(data + entry_v2_offset_parent_2);
246 246 } else if (self->format_version == format_cl2) {
247 247 ps[0] = getbe32(data + entry_cl2_offset_parent_1);
248 248 ps[1] = getbe32(data + entry_cl2_offset_parent_2);
249 249 } else {
250 250 raise_revlog_error();
251 251 return -1;
252 252 }
253 253
254 254 /* If index file is corrupted, ps[] may point to invalid revisions. So
255 255 * there is a risk of buffer overflow to trust them unconditionally. */
256 256 if (ps[0] < -1 || ps[0] > maxrev || ps[1] < -1 || ps[1] > maxrev) {
257 257 PyErr_SetString(PyExc_ValueError, "parent out of range");
258 258 return -1;
259 259 }
260 260 return 0;
261 261 }
262 262
263 263 /*
264 264 * Get parents of the given rev.
265 265 *
266 266 * If the specified rev is out of range, IndexError will be raised. If the
267 267 * revlog entry is corrupted, ValueError may be raised.
268 268 *
269 269 * Returns 0 on success or -1 on failure.
270 270 */
271 271 static int HgRevlogIndex_GetParents(PyObject *op, int rev, int *ps)
272 272 {
273 273 int tiprev;
274 274 if (!op || !HgRevlogIndex_Check(op) || !ps) {
275 275 PyErr_BadInternalCall();
276 276 return -1;
277 277 }
278 278 tiprev = (int)index_length((indexObject *)op) - 1;
279 279 if (rev < -1 || rev > tiprev) {
280 280 PyErr_Format(PyExc_IndexError, "rev out of range: %d", rev);
281 281 return -1;
282 282 } else if (rev == -1) {
283 283 ps[0] = ps[1] = -1;
284 284 return 0;
285 285 } else {
286 286 return index_get_parents((indexObject *)op, rev, ps, tiprev);
287 287 }
288 288 }
289 289
290 290 static inline int64_t index_get_start(indexObject *self, Py_ssize_t rev)
291 291 {
292 292 const char *data;
293 293 uint64_t offset;
294 294
295 295 if (rev == nullrev)
296 296 return 0;
297 297
298 298 data = index_deref(self, rev);
299 299
300 300 if (self->format_version == format_v1) {
301 301 offset = getbe32(data + entry_v1_offset_offset_flags);
302 302 if (rev == 0) {
303 303 /* mask out version number for the first entry */
304 304 offset &= 0xFFFF;
305 305 } else {
306 306 uint32_t offset_high =
307 307 getbe32(data + entry_v1_offset_high);
308 308 offset |= ((uint64_t)offset_high) << 32;
309 309 }
310 310 } else if (self->format_version == format_v2) {
311 311 offset = getbe32(data + entry_v2_offset_offset_flags);
312 312 if (rev == 0) {
313 313 /* mask out version number for the first entry */
314 314 offset &= 0xFFFF;
315 315 } else {
316 316 uint32_t offset_high =
317 317 getbe32(data + entry_v2_offset_high);
318 318 offset |= ((uint64_t)offset_high) << 32;
319 319 }
320 320 } else if (self->format_version == format_cl2) {
321 321 uint32_t offset_high = getbe32(data + entry_cl2_offset_high);
322 322 offset = getbe32(data + entry_cl2_offset_offset_flags);
323 323 offset |= ((uint64_t)offset_high) << 32;
324 324 } else {
325 325 raise_revlog_error();
326 326 return -1;
327 327 }
328 328
329 329 return (int64_t)(offset >> 16);
330 330 }
331 331
332 332 static inline int index_get_length(indexObject *self, Py_ssize_t rev)
333 333 {
334 334 const char *data;
335 335 int tmp;
336 336
337 337 if (rev == nullrev)
338 338 return 0;
339 339
340 340 data = index_deref(self, rev);
341 341
342 342 if (self->format_version == format_v1) {
343 343 tmp = (int)getbe32(data + entry_v1_offset_comp_len);
344 344 } else if (self->format_version == format_v2) {
345 345 tmp = (int)getbe32(data + entry_v2_offset_comp_len);
346 346 } else if (self->format_version == format_cl2) {
347 347 tmp = (int)getbe32(data + entry_cl2_offset_comp_len);
348 348 } else {
349 349 raise_revlog_error();
350 350 return -1;
351 351 }
352 352 if (tmp < 0) {
353 353 PyErr_Format(PyExc_OverflowError,
354 354 "revlog entry size out of bound (%d)", tmp);
355 355 return -1;
356 356 }
357 357 return tmp;
358 358 }
359 359
360 360 /*
361 361 * RevlogNG format (all in big endian, data may be inlined):
362 362 * 6 bytes: offset
363 363 * 2 bytes: flags
364 364 * 4 bytes: compressed length
365 365 * 4 bytes: uncompressed length
366 366 * 4 bytes: base revision
367 367 * 4 bytes: link revision
368 368 * 4 bytes: parent 1 revision
369 369 * 4 bytes: parent 2 revision
370 370 * 32 bytes: nodeid (only 20 bytes used with SHA-1)
371 371 */
372 372 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
373 373 {
374 374 uint64_t offset_flags, sidedata_offset;
375 375 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
376 376 sidedata_comp_len, rank = rank_unknown;
377 377 char data_comp_mode, sidedata_comp_mode;
378 378 const char *c_node_id;
379 379 const char *data;
380 380 Py_ssize_t length = index_length(self);
381 381
382 382 if (pos == nullrev) {
383 383 Py_INCREF(self->nullentry);
384 384 return self->nullentry;
385 385 }
386 386
387 387 if (pos < 0 || pos >= length) {
388 388 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
389 389 return NULL;
390 390 }
391 391
392 392 data = index_deref(self, pos);
393 393 if (data == NULL)
394 394 return NULL;
395 395
396 396 if (self->format_version == format_v1) {
397 397 offset_flags = getbe32(data + entry_v1_offset_offset_flags);
398 398 /*
399 399 * The first entry on-disk needs the version number masked out,
400 400 * but this doesn't apply if entries are added to an empty
401 401 * index.
402 402 */
403 403 if (self->length && pos == 0)
404 404 offset_flags &= 0xFFFF;
405 405 else {
406 406 uint32_t offset_high =
407 407 getbe32(data + entry_v1_offset_high);
408 408 offset_flags |= ((uint64_t)offset_high) << 32;
409 409 }
410 410
411 411 comp_len = getbe32(data + entry_v1_offset_comp_len);
412 412 uncomp_len = getbe32(data + entry_v1_offset_uncomp_len);
413 413 base_rev = getbe32(data + entry_v1_offset_base_rev);
414 414 link_rev = getbe32(data + entry_v1_offset_link_rev);
415 415 parent_1 = getbe32(data + entry_v1_offset_parent_1);
416 416 parent_2 = getbe32(data + entry_v1_offset_parent_2);
417 417 c_node_id = data + entry_v1_offset_node_id;
418 418
419 419 sidedata_offset = 0;
420 420 sidedata_comp_len = 0;
421 421 data_comp_mode = comp_mode_inline;
422 422 sidedata_comp_mode = comp_mode_inline;
423 423 } else if (self->format_version == format_v2) {
424 424 offset_flags = getbe32(data + entry_v2_offset_offset_flags);
425 425 /*
426 426 * The first entry on-disk needs the version number masked out,
427 427 * but this doesn't apply if entries are added to an empty
428 428 * index.
429 429 */
430 430 if (self->length && pos == 0)
431 431 offset_flags &= 0xFFFF;
432 432 else {
433 433 uint32_t offset_high =
434 434 getbe32(data + entry_v2_offset_high);
435 435 offset_flags |= ((uint64_t)offset_high) << 32;
436 436 }
437 437
438 438 comp_len = getbe32(data + entry_v2_offset_comp_len);
439 439 uncomp_len = getbe32(data + entry_v2_offset_uncomp_len);
440 440 base_rev = getbe32(data + entry_v2_offset_base_rev);
441 441 link_rev = getbe32(data + entry_v2_offset_link_rev);
442 442 parent_1 = getbe32(data + entry_v2_offset_parent_1);
443 443 parent_2 = getbe32(data + entry_v2_offset_parent_2);
444 444 c_node_id = data + entry_v2_offset_node_id;
445 445
446 446 sidedata_offset =
447 447 getbe64(data + entry_v2_offset_sidedata_offset);
448 448 sidedata_comp_len =
449 449 getbe32(data + entry_v2_offset_sidedata_comp_len);
450 450 data_comp_mode = data[entry_v2_offset_all_comp_mode] & 3;
451 451 sidedata_comp_mode =
452 452 ((data[entry_v2_offset_all_comp_mode] >> 2) & 3);
453 453 } else if (self->format_version == format_cl2) {
454 454 uint32_t offset_high = getbe32(data + entry_cl2_offset_high);
455 455 offset_flags = getbe32(data + entry_cl2_offset_offset_flags);
456 456 offset_flags |= ((uint64_t)offset_high) << 32;
457 457 comp_len = getbe32(data + entry_cl2_offset_comp_len);
458 458 uncomp_len = getbe32(data + entry_cl2_offset_uncomp_len);
459 459 /* base_rev and link_rev are not stored in changelogv2, but are
460 460 still used by some functions shared with the other revlogs.
461 461 They are supposed to contain links to other revisions,
462 462 but they always point to themselves in the case of a changelog.
463 463 */
464 464 base_rev = pos;
465 465 link_rev = pos;
466 466 parent_1 = getbe32(data + entry_cl2_offset_parent_1);
467 467 parent_2 = getbe32(data + entry_cl2_offset_parent_2);
468 468 c_node_id = data + entry_cl2_offset_node_id;
469 469 sidedata_offset =
470 470 getbe64(data + entry_cl2_offset_sidedata_offset);
471 471 sidedata_comp_len =
472 472 getbe32(data + entry_cl2_offset_sidedata_comp_len);
473 473 data_comp_mode = data[entry_cl2_offset_all_comp_mode] & 3;
474 474 sidedata_comp_mode =
475 475 ((data[entry_cl2_offset_all_comp_mode] >> 2) & 3);
476 476 rank = getbe32(data + entry_cl2_offset_rank);
477 477 } else {
478 478 raise_revlog_error();
479 479 return NULL;
480 480 }
481 481
482 482 return Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len,
483 483 base_rev, link_rev, parent_1, parent_2, c_node_id,
484 484 self->nodelen, sidedata_offset, sidedata_comp_len,
485 485 data_comp_mode, sidedata_comp_mode, rank);
486 486 }
487 487 /*
488 488 * Pack header information in binary
489 489 */
490 490 static PyObject *index_pack_header(indexObject *self, PyObject *args)
491 491 {
492 492 int header;
493 493 char out[4];
494 if (!PyArg_ParseTuple(args, "I", &header)) {
494 if (!PyArg_ParseTuple(args, "i", &header)) {
495 495 return NULL;
496 496 }
497 497 if (self->format_version != format_v1) {
498 498 PyErr_Format(PyExc_RuntimeError,
499 499 "version header should go in the docket, not the "
500 500 "index: %d",
501 501 header);
502 502 return NULL;
503 503 }
504 504 putbe32(header, out);
505 505 return PyBytes_FromStringAndSize(out, 4);
506 506 }
507 507 /*
508 508 * Return the raw binary string representing a revision
509 509 */
510 510 static PyObject *index_entry_binary(indexObject *self, PyObject *value)
511 511 {
512 512 long rev;
513 513 const char *data;
514 514 Py_ssize_t length = index_length(self);
515 515
516 516 if (!pylong_to_long(value, &rev)) {
517 517 return NULL;
518 518 }
519 519 if (rev < 0 || rev >= length) {
520 520 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
521 521 rev);
522 522 return NULL;
523 523 };
524 524
525 525 data = index_deref(self, rev);
526 526 if (data == NULL)
527 527 return NULL;
528 528 if (rev == 0 && self->format_version == format_v1) {
529 529 /* the header is eating the start of the first entry */
530 530 return PyBytes_FromStringAndSize(data + 4,
531 531 self->entry_size - 4);
532 532 }
533 533 return PyBytes_FromStringAndSize(data, self->entry_size);
534 534 }
535 535
536 536 /*
537 537 * Return the hash of node corresponding to the given rev.
538 538 */
539 539 static const char *index_node(indexObject *self, Py_ssize_t pos)
540 540 {
541 541 Py_ssize_t length = index_length(self);
542 542 const char *data;
543 543 const char *node_id;
544 544
545 545 if (pos == nullrev)
546 546 return nullid;
547 547
548 548 if (pos >= length)
549 549 return NULL;
550 550
551 551 data = index_deref(self, pos);
552 552
553 553 if (self->format_version == format_v1) {
554 554 node_id = data + entry_v1_offset_node_id;
555 555 } else if (self->format_version == format_v2) {
556 556 node_id = data + entry_v2_offset_node_id;
557 557 } else if (self->format_version == format_cl2) {
558 558 node_id = data + entry_cl2_offset_node_id;
559 559 } else {
560 560 raise_revlog_error();
561 561 return NULL;
562 562 }
563 563
564 564 return data ? node_id : NULL;
565 565 }
566 566
567 567 /*
568 568 * Return the stored rank of a given revision if known, or rank_unknown
569 569 * otherwise.
570 570 *
571 571 * The rank of a revision is the size of the sub-graph it defines as a head.
572 572 * Equivalently, the rank of a revision `r` is the size of the set
573 573 * `ancestors(r)`, `r` included.
574 574 *
575 575 * This method returns the rank retrieved from the revlog in constant time. It
576 576 * makes no attempt at computing unknown values for versions of the revlog
577 577 * which do not persist the rank.
578 578 */
579 579 static int index_fast_rank(indexObject *self, Py_ssize_t pos)
580 580 {
581 581 Py_ssize_t length = index_length(self);
582 582
583 583 if (self->format_version != format_cl2 || pos >= length) {
584 584 return rank_unknown;
585 585 }
586 586
587 587 if (pos == nullrev) {
588 588 return 0; /* convention */
589 589 }
590 590
591 591 return getbe32(index_deref(self, pos) + entry_cl2_offset_rank);
592 592 }
593 593
594 594 /*
595 595 * Return the hash of the node corresponding to the given rev. The
596 596 * rev is assumed to be existing. If not, an exception is set.
597 597 */
598 598 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
599 599 {
600 600 const char *node = index_node(self, pos);
601 601 if (node == NULL) {
602 602 PyErr_Format(PyExc_IndexError, "could not access rev %d",
603 603 (int)pos);
604 604 }
605 605 return node;
606 606 }
607 607
608 608 static int nt_insert(nodetree *self, const char *node, int rev);
609 609
610 610 static int node_check(Py_ssize_t nodelen, PyObject *obj, char **node)
611 611 {
612 612 Py_ssize_t thisnodelen;
613 613 if (PyBytes_AsStringAndSize(obj, node, &thisnodelen) == -1)
614 614 return -1;
615 615 if (nodelen == thisnodelen)
616 616 return 0;
617 617 PyErr_Format(PyExc_ValueError, "node len %zd != expected node len %zd",
618 618 thisnodelen, nodelen);
619 619 return -1;
620 620 }
621 621
622 622 static PyObject *index_append(indexObject *self, PyObject *obj)
623 623 {
624 624 uint64_t offset_flags, sidedata_offset;
625 625 int rev, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
626 626 sidedata_comp_len, rank;
627 627 char data_comp_mode, sidedata_comp_mode;
628 628 Py_ssize_t c_node_id_len;
629 629 const char *c_node_id;
630 630 char comp_field;
631 631 char *data;
632 632
633 633 if (!PyArg_ParseTuple(obj, tuple_format, &offset_flags, &comp_len,
634 634 &uncomp_len, &base_rev, &link_rev, &parent_1,
635 635 &parent_2, &c_node_id, &c_node_id_len,
636 636 &sidedata_offset, &sidedata_comp_len,
637 637 &data_comp_mode, &sidedata_comp_mode, &rank)) {
638 638 PyErr_SetString(PyExc_TypeError, "12-tuple required");
639 639 return NULL;
640 640 }
641 641
642 642 if (c_node_id_len != self->nodelen) {
643 643 PyErr_SetString(PyExc_TypeError, "invalid node");
644 644 return NULL;
645 645 }
646 646 if (self->format_version == format_v1) {
647 647
648 648 if (data_comp_mode != comp_mode_inline) {
649 649 PyErr_Format(PyExc_ValueError,
650 650 "invalid data compression mode: %i",
651 651 data_comp_mode);
652 652 return NULL;
653 653 }
654 654 if (sidedata_comp_mode != comp_mode_inline) {
655 655 PyErr_Format(PyExc_ValueError,
656 656 "invalid sidedata compression mode: %i",
657 657 sidedata_comp_mode);
658 658 return NULL;
659 659 }
660 660 }
661 661
662 662 if (self->new_length == self->added_length) {
663 663 size_t new_added_length =
664 664 self->added_length ? self->added_length * 2 : 4096;
665 665 void *new_added = PyMem_Realloc(
666 666 self->added, new_added_length * self->entry_size);
667 667 if (!new_added)
668 668 return PyErr_NoMemory();
669 669 self->added = new_added;
670 670 self->added_length = new_added_length;
671 671 }
672 672 rev = self->length + self->new_length;
673 673 data = self->added + self->entry_size * self->new_length++;
674 674
675 675 memset(data, 0, self->entry_size);
676 676
677 677 if (self->format_version == format_v1) {
678 678 putbe32(offset_flags >> 32, data + entry_v1_offset_high);
679 679 putbe32(offset_flags & 0xffffffffU,
680 680 data + entry_v1_offset_offset_flags);
681 681 putbe32(comp_len, data + entry_v1_offset_comp_len);
682 682 putbe32(uncomp_len, data + entry_v1_offset_uncomp_len);
683 683 putbe32(base_rev, data + entry_v1_offset_base_rev);
684 684 putbe32(link_rev, data + entry_v1_offset_link_rev);
685 685 putbe32(parent_1, data + entry_v1_offset_parent_1);
686 686 putbe32(parent_2, data + entry_v1_offset_parent_2);
687 687 memcpy(data + entry_v1_offset_node_id, c_node_id,
688 688 c_node_id_len);
689 689 } else if (self->format_version == format_v2) {
690 690 putbe32(offset_flags >> 32, data + entry_v2_offset_high);
691 691 putbe32(offset_flags & 0xffffffffU,
692 692 data + entry_v2_offset_offset_flags);
693 693 putbe32(comp_len, data + entry_v2_offset_comp_len);
694 694 putbe32(uncomp_len, data + entry_v2_offset_uncomp_len);
695 695 putbe32(base_rev, data + entry_v2_offset_base_rev);
696 696 putbe32(link_rev, data + entry_v2_offset_link_rev);
697 697 putbe32(parent_1, data + entry_v2_offset_parent_1);
698 698 putbe32(parent_2, data + entry_v2_offset_parent_2);
699 699 memcpy(data + entry_v2_offset_node_id, c_node_id,
700 700 c_node_id_len);
701 701 putbe64(sidedata_offset,
702 702 data + entry_v2_offset_sidedata_offset);
703 703 putbe32(sidedata_comp_len,
704 704 data + entry_v2_offset_sidedata_comp_len);
705 705 comp_field = data_comp_mode & 3;
706 706 comp_field = comp_field | (sidedata_comp_mode & 3) << 2;
707 707 data[entry_v2_offset_all_comp_mode] = comp_field;
708 708 } else if (self->format_version == format_cl2) {
709 709 putbe32(offset_flags >> 32, data + entry_cl2_offset_high);
710 710 putbe32(offset_flags & 0xffffffffU,
711 711 data + entry_cl2_offset_offset_flags);
712 712 putbe32(comp_len, data + entry_cl2_offset_comp_len);
713 713 putbe32(uncomp_len, data + entry_cl2_offset_uncomp_len);
714 714 putbe32(parent_1, data + entry_cl2_offset_parent_1);
715 715 putbe32(parent_2, data + entry_cl2_offset_parent_2);
716 716 memcpy(data + entry_cl2_offset_node_id, c_node_id,
717 717 c_node_id_len);
718 718 putbe64(sidedata_offset,
719 719 data + entry_cl2_offset_sidedata_offset);
720 720 putbe32(sidedata_comp_len,
721 721 data + entry_cl2_offset_sidedata_comp_len);
722 722 comp_field = data_comp_mode & 3;
723 723 comp_field = comp_field | (sidedata_comp_mode & 3) << 2;
724 724 data[entry_cl2_offset_all_comp_mode] = comp_field;
725 725 putbe32(rank, data + entry_cl2_offset_rank);
726 726 } else {
727 727 raise_revlog_error();
728 728 return NULL;
729 729 }
730 730
731 731 if (self->ntinitialized)
732 732 nt_insert(&self->nt, c_node_id, rev);
733 733
734 734 Py_CLEAR(self->headrevs);
735 735 Py_RETURN_NONE;
736 736 }
737 737
738 738 /* Replace an existing index entry's sidedata offset and length with new ones.
739 739 This cannot be used outside of the context of sidedata rewriting,
740 740 inside the transaction that creates the given revision. */
741 741 static PyObject *index_replace_sidedata_info(indexObject *self, PyObject *args)
742 742 {
743 743 uint64_t offset_flags, sidedata_offset;
744 744 Py_ssize_t rev;
745 745 int sidedata_comp_len;
746 746 char comp_mode;
747 747 char *data;
748 748 #if LONG_MAX == 0x7fffffffL
749 749 const char *const sidedata_format = "nKiKB";
750 750 #else
751 751 const char *const sidedata_format = "nkikB";
752 752 #endif
753 753
754 754 if (self->entry_size == v1_entry_size || self->inlined) {
755 755 /*
756 756 There is a bug in the transaction handling when going from an
757 757 inline revlog to a separate index and data file. Turn it off until
758 758 it's fixed, since v2 revlogs sometimes get rewritten on exchange.
759 759 See issue6485.
760 760 */
761 761 raise_revlog_error();
762 762 return NULL;
763 763 }
764 764
765 765 if (!PyArg_ParseTuple(args, sidedata_format, &rev, &sidedata_offset,
766 766 &sidedata_comp_len, &offset_flags, &comp_mode))
767 767 return NULL;
768 768
769 769 if (rev < 0 || rev >= index_length(self)) {
770 770 PyErr_SetString(PyExc_IndexError, "revision outside index");
771 771 return NULL;
772 772 }
773 773 if (rev < self->length) {
774 774 PyErr_SetString(
775 775 PyExc_IndexError,
776 776 "cannot rewrite entries outside of this transaction");
777 777 return NULL;
778 778 }
779 779
780 780 /* Find the newly added node, offset from the "already on-disk" length
781 781 */
782 782 data = self->added + self->entry_size * (rev - self->length);
783 783 if (self->format_version == format_v2) {
784 784 putbe64(offset_flags, data + entry_v2_offset_high);
785 785 putbe64(sidedata_offset,
786 786 data + entry_v2_offset_sidedata_offset);
787 787 putbe32(sidedata_comp_len,
788 788 data + entry_v2_offset_sidedata_comp_len);
789 789 data[entry_v2_offset_all_comp_mode] =
790 790 (data[entry_v2_offset_all_comp_mode] & ~(3 << 2)) |
791 791 ((comp_mode & 3) << 2);
792 792 } else if (self->format_version == format_cl2) {
793 793 putbe64(offset_flags, data + entry_cl2_offset_high);
794 794 putbe64(sidedata_offset,
795 795 data + entry_cl2_offset_sidedata_offset);
796 796 putbe32(sidedata_comp_len,
797 797 data + entry_cl2_offset_sidedata_comp_len);
798 798 data[entry_cl2_offset_all_comp_mode] =
799 799 (data[entry_cl2_offset_all_comp_mode] & ~(3 << 2)) |
800 800 ((comp_mode & 3) << 2);
801 801 } else {
802 802 raise_revlog_error();
803 803 return NULL;
804 804 }
805 805
806 806 Py_RETURN_NONE;
807 807 }
808 808
809 809 static PyObject *index_stats(indexObject *self)
810 810 {
811 811 PyObject *obj = PyDict_New();
812 812 PyObject *s = NULL;
813 813 PyObject *t = NULL;
814 814
815 815 if (obj == NULL)
816 816 return NULL;
817 817
818 818 #define istat(__n, __d) \
819 819 do { \
820 820 s = PyBytes_FromString(__d); \
821 821 t = PyLong_FromSsize_t(self->__n); \
822 822 if (!s || !t) \
823 823 goto bail; \
824 824 if (PyDict_SetItem(obj, s, t) == -1) \
825 825 goto bail; \
826 826 Py_CLEAR(s); \
827 827 Py_CLEAR(t); \
828 828 } while (0)
829 829
830 830 if (self->added_length)
831 831 istat(new_length, "index entries added");
832 832 istat(length, "revs in memory");
833 833 istat(ntlookups, "node trie lookups");
834 834 istat(ntmisses, "node trie misses");
835 835 istat(ntrev, "node trie last rev scanned");
836 836 if (self->ntinitialized) {
837 837 istat(nt.capacity, "node trie capacity");
838 838 istat(nt.depth, "node trie depth");
839 839 istat(nt.length, "node trie count");
840 840 istat(nt.splits, "node trie splits");
841 841 }
842 842
843 843 #undef istat
844 844
845 845 return obj;
846 846
847 847 bail:
848 848 Py_XDECREF(obj);
849 849 Py_XDECREF(s);
850 850 Py_XDECREF(t);
851 851 return NULL;
852 852 }
853 853
854 854 /*
855 855 * When we cache a list, we want to be sure the caller can't mutate
856 856 * the cached copy.
857 857 */
858 858 static PyObject *list_copy(PyObject *list)
859 859 {
860 860 Py_ssize_t len = PyList_GET_SIZE(list);
861 861 PyObject *newlist = PyList_New(len);
862 862 Py_ssize_t i;
863 863
864 864 if (newlist == NULL)
865 865 return NULL;
866 866
867 867 for (i = 0; i < len; i++) {
868 868 PyObject *obj = PyList_GET_ITEM(list, i);
869 869 Py_INCREF(obj);
870 870 PyList_SET_ITEM(newlist, i, obj);
871 871 }
872 872
873 873 return newlist;
874 874 }
875 875
876 876 static int check_filter(PyObject *filter, Py_ssize_t arg)
877 877 {
878 878 if (filter) {
879 879 PyObject *arglist, *result;
880 880 int isfiltered;
881 881
882 882 arglist = Py_BuildValue("(n)", arg);
883 883 if (!arglist) {
884 884 return -1;
885 885 }
886 886
887 887 result = PyObject_Call(filter, arglist, NULL);
888 888 Py_DECREF(arglist);
889 889 if (!result) {
890 890 return -1;
891 891 }
892 892
893 893 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
894 894 * same as this function, so we can just return it directly.*/
895 895 isfiltered = PyObject_IsTrue(result);
896 896 Py_DECREF(result);
897 897 return isfiltered;
898 898 } else {
899 899 return 0;
900 900 }
901 901 }
902 902
903 903 static inline void set_phase_from_parents(char *phases, int parent_1,
904 904 int parent_2, Py_ssize_t i)
905 905 {
906 906 if (parent_1 >= 0 && phases[parent_1] > phases[i])
907 907 phases[i] = phases[parent_1];
908 908 if (parent_2 >= 0 && phases[parent_2] > phases[i])
909 909 phases[i] = phases[parent_2];
910 910 }
911 911
912 912 static PyObject *reachableroots2(indexObject *self, PyObject *args)
913 913 {
914 914
915 915 /* Input */
916 916 long minroot;
917 917 PyObject *includepatharg = NULL;
918 918 int includepath = 0;
919 919 /* heads and roots are lists */
920 920 PyObject *heads = NULL;
921 921 PyObject *roots = NULL;
922 922 PyObject *reachable = NULL;
923 923
924 924 PyObject *val;
925 925 Py_ssize_t len = index_length(self);
926 926 long revnum;
927 927 Py_ssize_t k;
928 928 Py_ssize_t i;
929 929 Py_ssize_t l;
930 930 int r;
931 931 int parents[2];
932 932
933 933 /* Internal data structure:
934 934 * tovisit: array of length len+1 (all revs + nullrev), filled upto
935 935 * lentovisit
936 936 *
937 937 * revstates: array of length len+1 (all revs + nullrev) */
938 938 int *tovisit = NULL;
939 939 long lentovisit = 0;
940 940 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
941 941 char *revstates = NULL;
942 942
943 943 /* Get arguments */
944 944 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
945 945 &PyList_Type, &roots, &PyBool_Type,
946 946 &includepatharg))
947 947 goto bail;
948 948
949 949 if (includepatharg == Py_True)
950 950 includepath = 1;
951 951
952 952 /* Initialize return set */
953 953 reachable = PyList_New(0);
954 954 if (reachable == NULL)
955 955 goto bail;
956 956
957 957 /* Initialize internal datastructures */
958 958 tovisit = (int *)malloc((len + 1) * sizeof(int));
959 959 if (tovisit == NULL) {
960 960 PyErr_NoMemory();
961 961 goto bail;
962 962 }
963 963
964 964 revstates = (char *)calloc(len + 1, 1);
965 965 if (revstates == NULL) {
966 966 PyErr_NoMemory();
967 967 goto bail;
968 968 }
969 969
970 970 l = PyList_GET_SIZE(roots);
971 971 for (i = 0; i < l; i++) {
972 972 revnum = PyLong_AsLong(PyList_GET_ITEM(roots, i));
973 973 if (revnum == -1 && PyErr_Occurred())
974 974 goto bail;
975 975 /* If root is out of range, e.g. wdir(), it must be unreachable
976 976 * from heads. So we can just ignore it. */
977 977 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
978 978 continue;
979 979 revstates[revnum + 1] |= RS_ROOT;
980 980 }
981 981
982 982 /* Populate tovisit with all the heads */
983 983 l = PyList_GET_SIZE(heads);
984 984 for (i = 0; i < l; i++) {
985 985 revnum = PyLong_AsLong(PyList_GET_ITEM(heads, i));
986 986 if (revnum == -1 && PyErr_Occurred())
987 987 goto bail;
988 988 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
989 989 PyErr_SetString(PyExc_IndexError, "head out of range");
990 990 goto bail;
991 991 }
992 992 if (!(revstates[revnum + 1] & RS_SEEN)) {
993 993 tovisit[lentovisit++] = (int)revnum;
994 994 revstates[revnum + 1] |= RS_SEEN;
995 995 }
996 996 }
997 997
998 998 /* Visit the tovisit list and find the reachable roots */
999 999 k = 0;
1000 1000 while (k < lentovisit) {
1001 1001 /* Add the node to reachable if it is a root*/
1002 1002 revnum = tovisit[k++];
1003 1003 if (revstates[revnum + 1] & RS_ROOT) {
1004 1004 revstates[revnum + 1] |= RS_REACHABLE;
1005 1005 val = PyLong_FromLong(revnum);
1006 1006 if (val == NULL)
1007 1007 goto bail;
1008 1008 r = PyList_Append(reachable, val);
1009 1009 Py_DECREF(val);
1010 1010 if (r < 0)
1011 1011 goto bail;
1012 1012 if (includepath == 0)
1013 1013 continue;
1014 1014 }
1015 1015
1016 1016 /* Add its parents to the list of nodes to visit */
1017 1017 if (revnum == nullrev)
1018 1018 continue;
1019 1019 r = index_get_parents(self, revnum, parents, (int)len - 1);
1020 1020 if (r < 0)
1021 1021 goto bail;
1022 1022 for (i = 0; i < 2; i++) {
1023 1023 if (!(revstates[parents[i] + 1] & RS_SEEN) &&
1024 1024 parents[i] >= minroot) {
1025 1025 tovisit[lentovisit++] = parents[i];
1026 1026 revstates[parents[i] + 1] |= RS_SEEN;
1027 1027 }
1028 1028 }
1029 1029 }
1030 1030
1031 1031 /* Find all the nodes in between the roots we found and the heads
1032 1032 * and add them to the reachable set */
1033 1033 if (includepath == 1) {
1034 1034 long minidx = minroot;
1035 1035 if (minidx < 0)
1036 1036 minidx = 0;
1037 1037 for (i = minidx; i < len; i++) {
1038 1038 if (!(revstates[i + 1] & RS_SEEN))
1039 1039 continue;
1040 1040 r = index_get_parents(self, i, parents, (int)len - 1);
1041 1041 /* Corrupted index file, error is set from
1042 1042 * index_get_parents */
1043 1043 if (r < 0)
1044 1044 goto bail;
1045 1045 if (((revstates[parents[0] + 1] |
1046 1046 revstates[parents[1] + 1]) &
1047 1047 RS_REACHABLE) &&
1048 1048 !(revstates[i + 1] & RS_REACHABLE)) {
1049 1049 revstates[i + 1] |= RS_REACHABLE;
1050 1050 val = PyLong_FromSsize_t(i);
1051 1051 if (val == NULL)
1052 1052 goto bail;
1053 1053 r = PyList_Append(reachable, val);
1054 1054 Py_DECREF(val);
1055 1055 if (r < 0)
1056 1056 goto bail;
1057 1057 }
1058 1058 }
1059 1059 }
1060 1060
1061 1061 free(revstates);
1062 1062 free(tovisit);
1063 1063 return reachable;
1064 1064 bail:
1065 1065 Py_XDECREF(reachable);
1066 1066 free(revstates);
1067 1067 free(tovisit);
1068 1068 return NULL;
1069 1069 }
1070 1070
1071 1071 static int add_roots_get_min(indexObject *self, PyObject *roots, char *phases,
1072 1072 char phase)
1073 1073 {
1074 1074 Py_ssize_t len = index_length(self);
1075 1075 PyObject *item;
1076 1076 PyObject *iterator;
1077 1077 int rev, minrev = -1;
1078 1078 char *node;
1079 1079
1080 1080 if (!PySet_Check(roots)) {
1081 1081 PyErr_SetString(PyExc_TypeError,
1082 1082 "roots must be a set of nodes");
1083 1083 return -2;
1084 1084 }
1085 1085 iterator = PyObject_GetIter(roots);
1086 1086 if (iterator == NULL)
1087 1087 return -2;
1088 1088 while ((item = PyIter_Next(iterator))) {
1089 1089 if (node_check(self->nodelen, item, &node) == -1)
1090 1090 goto failed;
1091 1091 rev = index_find_node(self, node);
1092 1092 /* null is implicitly public, so negative is invalid */
1093 1093 if (rev < 0 || rev >= len)
1094 1094 goto failed;
1095 1095 phases[rev] = phase;
1096 1096 if (minrev == -1 || minrev > rev)
1097 1097 minrev = rev;
1098 1098 Py_DECREF(item);
1099 1099 }
1100 1100 Py_DECREF(iterator);
1101 1101 return minrev;
1102 1102 failed:
1103 1103 Py_DECREF(iterator);
1104 1104 Py_DECREF(item);
1105 1105 return -2;
1106 1106 }
1107 1107
1108 1108 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
1109 1109 {
1110 1110 /* 0: public (untracked), 1: draft, 2: secret, 32: archive,
1111 1111 96: internal */
1112 1112 static const char trackedphases[] = {1, 2, 32, 96};
1113 1113 PyObject *roots = Py_None;
1114 1114 PyObject *phasesetsdict = NULL;
1115 1115 PyObject *phasesets[4] = {NULL, NULL, NULL, NULL};
1116 1116 Py_ssize_t len = index_length(self);
1117 1117 char *phases = NULL;
1118 1118 int minphaserev = -1, rev, i;
1119 1119 const int numphases = (int)(sizeof(phasesets) / sizeof(phasesets[0]));
1120 1120
1121 1121 if (!PyArg_ParseTuple(args, "O", &roots))
1122 1122 return NULL;
1123 1123 if (roots == NULL || !PyDict_Check(roots)) {
1124 1124 PyErr_SetString(PyExc_TypeError, "roots must be a dictionary");
1125 1125 return NULL;
1126 1126 }
1127 1127
1128 1128 phases = calloc(len, 1);
1129 1129 if (phases == NULL) {
1130 1130 PyErr_NoMemory();
1131 1131 return NULL;
1132 1132 }
1133 1133
1134 1134 for (i = 0; i < numphases; ++i) {
1135 1135 PyObject *pyphase = PyLong_FromLong(trackedphases[i]);
1136 1136 PyObject *phaseroots = NULL;
1137 1137 if (pyphase == NULL)
1138 1138 goto release;
1139 1139 phaseroots = PyDict_GetItem(roots, pyphase);
1140 1140 Py_DECREF(pyphase);
1141 1141 if (phaseroots == NULL)
1142 1142 continue;
1143 1143 rev = add_roots_get_min(self, phaseroots, phases,
1144 1144 trackedphases[i]);
1145 1145 if (rev == -2)
1146 1146 goto release;
1147 1147 if (rev != -1 && (minphaserev == -1 || rev < minphaserev))
1148 1148 minphaserev = rev;
1149 1149 }
1150 1150
1151 1151 for (i = 0; i < numphases; ++i) {
1152 1152 phasesets[i] = PySet_New(NULL);
1153 1153 if (phasesets[i] == NULL)
1154 1154 goto release;
1155 1155 }
1156 1156
1157 1157 if (minphaserev == -1)
1158 1158 minphaserev = len;
1159 1159 for (rev = minphaserev; rev < len; ++rev) {
1160 1160 PyObject *pyphase = NULL;
1161 1161 PyObject *pyrev = NULL;
1162 1162 int parents[2];
1163 1163 /*
1164 1164 * The parent lookup could be skipped for phaseroots, but
1165 1165 * phase --force would historically not recompute them
1166 1166 * correctly, leaving descendents with a lower phase around.
1167 1167 * As such, unconditionally recompute the phase.
1168 1168 */
1169 1169 if (index_get_parents(self, rev, parents, (int)len - 1) < 0)
1170 1170 goto release;
1171 1171 set_phase_from_parents(phases, parents[0], parents[1], rev);
1172 1172 switch (phases[rev]) {
1173 1173 case 0:
1174 1174 continue;
1175 1175 case 1:
1176 1176 pyphase = phasesets[0];
1177 1177 break;
1178 1178 case 2:
1179 1179 pyphase = phasesets[1];
1180 1180 break;
1181 1181 case 32:
1182 1182 pyphase = phasesets[2];
1183 1183 break;
1184 1184 case 96:
1185 1185 pyphase = phasesets[3];
1186 1186 break;
1187 1187 default:
1188 1188 /* this should never happen since the phase number is
1189 1189 * specified by this function. */
1190 1190 PyErr_SetString(PyExc_SystemError,
1191 1191 "bad phase number in internal list");
1192 1192 goto release;
1193 1193 }
1194 1194 pyrev = PyLong_FromLong(rev);
1195 1195 if (pyrev == NULL)
1196 1196 goto release;
1197 1197 if (PySet_Add(pyphase, pyrev) == -1) {
1198 1198 Py_DECREF(pyrev);
1199 1199 goto release;
1200 1200 }
1201 1201 Py_DECREF(pyrev);
1202 1202 }
1203 1203
1204 1204 phasesetsdict = _dict_new_presized(numphases);
1205 1205 if (phasesetsdict == NULL)
1206 1206 goto release;
1207 1207 for (i = 0; i < numphases; ++i) {
1208 1208 PyObject *pyphase = PyLong_FromLong(trackedphases[i]);
1209 1209 if (pyphase == NULL)
1210 1210 goto release;
1211 1211 if (PyDict_SetItem(phasesetsdict, pyphase, phasesets[i]) ==
1212 1212 -1) {
1213 1213 Py_DECREF(pyphase);
1214 1214 goto release;
1215 1215 }
1216 1216 Py_DECREF(phasesets[i]);
1217 1217 phasesets[i] = NULL;
1218 1218 }
1219 1219
1220 1220 free(phases);
1221 1221 return Py_BuildValue("nN", len, phasesetsdict);
1222 1222
1223 1223 release:
1224 1224 for (i = 0; i < numphases; ++i)
1225 1225 Py_XDECREF(phasesets[i]);
1226 1226 Py_XDECREF(phasesetsdict);
1227 1227
1228 1228 free(phases);
1229 1229 return NULL;
1230 1230 }
1231 1231
1232 1232 static PyObject *index_headrevs(indexObject *self, PyObject *args)
1233 1233 {
1234 1234 Py_ssize_t i, j, len;
1235 1235 char *nothead = NULL;
1236 1236 PyObject *heads = NULL;
1237 1237 PyObject *filter = NULL;
1238 1238 PyObject *filteredrevs = Py_None;
1239 1239
1240 1240 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
1241 1241 return NULL;
1242 1242 }
1243 1243
1244 1244 if (self->headrevs && filteredrevs == self->filteredrevs)
1245 1245 return list_copy(self->headrevs);
1246 1246
1247 1247 Py_DECREF(self->filteredrevs);
1248 1248 self->filteredrevs = filteredrevs;
1249 1249 Py_INCREF(filteredrevs);
1250 1250
1251 1251 if (filteredrevs != Py_None) {
1252 1252 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
1253 1253 if (!filter) {
1254 1254 PyErr_SetString(
1255 1255 PyExc_TypeError,
1256 1256 "filteredrevs has no attribute __contains__");
1257 1257 goto bail;
1258 1258 }
1259 1259 }
1260 1260
1261 1261 len = index_length(self);
1262 1262 heads = PyList_New(0);
1263 1263 if (heads == NULL)
1264 1264 goto bail;
1265 1265 if (len == 0) {
1266 1266 PyObject *nullid = PyLong_FromLong(-1);
1267 1267 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
1268 1268 Py_XDECREF(nullid);
1269 1269 goto bail;
1270 1270 }
1271 1271 goto done;
1272 1272 }
1273 1273
1274 1274 nothead = calloc(len, 1);
1275 1275 if (nothead == NULL) {
1276 1276 PyErr_NoMemory();
1277 1277 goto bail;
1278 1278 }
1279 1279
1280 1280 for (i = len - 1; i >= 0; i--) {
1281 1281 int isfiltered;
1282 1282 int parents[2];
1283 1283
1284 1284 /* If nothead[i] == 1, it means we've seen an unfiltered child
1285 1285 * of this node already, and therefore this node is not
1286 1286 * filtered. So we can skip the expensive check_filter step.
1287 1287 */
1288 1288 if (nothead[i] != 1) {
1289 1289 isfiltered = check_filter(filter, i);
1290 1290 if (isfiltered == -1) {
1291 1291 PyErr_SetString(PyExc_TypeError,
1292 1292 "unable to check filter");
1293 1293 goto bail;
1294 1294 }
1295 1295
1296 1296 if (isfiltered) {
1297 1297 nothead[i] = 1;
1298 1298 continue;
1299 1299 }
1300 1300 }
1301 1301
1302 1302 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
1303 1303 goto bail;
1304 1304 for (j = 0; j < 2; j++) {
1305 1305 if (parents[j] >= 0)
1306 1306 nothead[parents[j]] = 1;
1307 1307 }
1308 1308 }
1309 1309
1310 1310 for (i = 0; i < len; i++) {
1311 1311 PyObject *head;
1312 1312
1313 1313 if (nothead[i])
1314 1314 continue;
1315 1315 head = PyLong_FromSsize_t(i);
1316 1316 if (head == NULL || PyList_Append(heads, head) == -1) {
1317 1317 Py_XDECREF(head);
1318 1318 goto bail;
1319 1319 }
1320 1320 }
1321 1321
1322 1322 done:
1323 1323 self->headrevs = heads;
1324 1324 Py_XDECREF(filter);
1325 1325 free(nothead);
1326 1326 return list_copy(self->headrevs);
1327 1327 bail:
1328 1328 Py_XDECREF(filter);
1329 1329 Py_XDECREF(heads);
1330 1330 free(nothead);
1331 1331 return NULL;
1332 1332 }
1333 1333
1334 1334 /**
1335 1335 * Obtain the base revision index entry.
1336 1336 *
1337 1337 * Callers must ensure that rev >= 0 or illegal memory access may occur.
1338 1338 */
1339 1339 static inline int index_baserev(indexObject *self, int rev)
1340 1340 {
1341 1341 const char *data;
1342 1342 int result;
1343 1343
1344 1344 data = index_deref(self, rev);
1345 1345 if (data == NULL)
1346 1346 return -2;
1347 1347
1348 1348 if (self->format_version == format_v1) {
1349 1349 result = getbe32(data + entry_v1_offset_base_rev);
1350 1350 } else if (self->format_version == format_v2) {
1351 1351 result = getbe32(data + entry_v2_offset_base_rev);
1352 1352 } else if (self->format_version == format_cl2) {
1353 1353 return rev;
1354 1354 } else {
1355 1355 raise_revlog_error();
1356 1356 return -1;
1357 1357 }
1358 1358
1359 1359 if (result > rev) {
1360 1360 PyErr_Format(
1361 1361 PyExc_ValueError,
1362 1362 "corrupted revlog, revision base above revision: %d, %d",
1363 1363 rev, result);
1364 1364 return -2;
1365 1365 }
1366 1366 if (result < -1) {
1367 1367 PyErr_Format(
1368 1368 PyExc_ValueError,
1369 1369 "corrupted revlog, revision base out of range: %d, %d", rev,
1370 1370 result);
1371 1371 return -2;
1372 1372 }
1373 1373 return result;
1374 1374 }
1375 1375
1376 1376 /**
1377 1377 * Find if a revision is a snapshot or not
1378 1378 *
1379 1379 * Only relevant for sparse-revlog case.
1380 1380 * Callers must ensure that rev is in a valid range.
1381 1381 */
1382 1382 static int index_issnapshotrev(indexObject *self, Py_ssize_t rev)
1383 1383 {
1384 1384 int ps[2];
1385 1385 Py_ssize_t base;
1386 1386 while (rev >= 0) {
1387 1387 base = (Py_ssize_t)index_baserev(self, rev);
1388 1388 if (base == rev) {
1389 1389 base = -1;
1390 1390 }
1391 1391 if (base == -2) {
1392 1392 assert(PyErr_Occurred());
1393 1393 return -1;
1394 1394 }
1395 1395 if (base == -1) {
1396 1396 return 1;
1397 1397 }
1398 1398 if (index_get_parents(self, rev, ps, (int)rev) < 0) {
1399 1399 assert(PyErr_Occurred());
1400 1400 return -1;
1401 1401 };
1402 1402 if (base == ps[0] || base == ps[1]) {
1403 1403 return 0;
1404 1404 }
1405 1405 rev = base;
1406 1406 }
1407 1407 return rev == -1;
1408 1408 }
1409 1409
1410 1410 static PyObject *index_issnapshot(indexObject *self, PyObject *value)
1411 1411 {
1412 1412 long rev;
1413 1413 int issnap;
1414 1414 Py_ssize_t length = index_length(self);
1415 1415
1416 1416 if (!pylong_to_long(value, &rev)) {
1417 1417 return NULL;
1418 1418 }
1419 1419 if (rev < -1 || rev >= length) {
1420 1420 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
1421 1421 rev);
1422 1422 return NULL;
1423 1423 };
1424 1424 issnap = index_issnapshotrev(self, (Py_ssize_t)rev);
1425 1425 if (issnap < 0) {
1426 1426 return NULL;
1427 1427 };
1428 1428 return PyBool_FromLong((long)issnap);
1429 1429 }
1430 1430
1431 1431 static PyObject *index_findsnapshots(indexObject *self, PyObject *args)
1432 1432 {
1433 1433 Py_ssize_t start_rev;
1434 1434 PyObject *cache;
1435 1435 Py_ssize_t base;
1436 1436 Py_ssize_t rev;
1437 1437 PyObject *key = NULL;
1438 1438 PyObject *value = NULL;
1439 1439 const Py_ssize_t length = index_length(self);
1440 1440 if (!PyArg_ParseTuple(args, "O!n", &PyDict_Type, &cache, &start_rev)) {
1441 1441 return NULL;
1442 1442 }
1443 1443 for (rev = start_rev; rev < length; rev++) {
1444 1444 int issnap;
1445 1445 PyObject *allvalues = NULL;
1446 1446 issnap = index_issnapshotrev(self, rev);
1447 1447 if (issnap < 0) {
1448 1448 goto bail;
1449 1449 }
1450 1450 if (issnap == 0) {
1451 1451 continue;
1452 1452 }
1453 1453 base = (Py_ssize_t)index_baserev(self, rev);
1454 1454 if (base == rev) {
1455 1455 base = -1;
1456 1456 }
1457 1457 if (base == -2) {
1458 1458 assert(PyErr_Occurred());
1459 1459 goto bail;
1460 1460 }
1461 1461 key = PyLong_FromSsize_t(base);
1462 1462 allvalues = PyDict_GetItem(cache, key);
1463 1463 if (allvalues == NULL && PyErr_Occurred()) {
1464 1464 goto bail;
1465 1465 }
1466 1466 if (allvalues == NULL) {
1467 1467 int r;
1468 1468 allvalues = PyList_New(0);
1469 1469 if (!allvalues) {
1470 1470 goto bail;
1471 1471 }
1472 1472 r = PyDict_SetItem(cache, key, allvalues);
1473 1473 Py_DECREF(allvalues);
1474 1474 if (r < 0) {
1475 1475 goto bail;
1476 1476 }
1477 1477 }
1478 1478 value = PyLong_FromSsize_t(rev);
1479 1479 if (PyList_Append(allvalues, value)) {
1480 1480 goto bail;
1481 1481 }
1482 1482 Py_CLEAR(key);
1483 1483 Py_CLEAR(value);
1484 1484 }
1485 1485 Py_RETURN_NONE;
1486 1486 bail:
1487 1487 Py_XDECREF(key);
1488 1488 Py_XDECREF(value);
1489 1489 return NULL;
1490 1490 }
1491 1491
1492 1492 static PyObject *index_deltachain(indexObject *self, PyObject *args)
1493 1493 {
1494 1494 int rev, generaldelta;
1495 1495 PyObject *stoparg;
1496 1496 int stoprev, iterrev, baserev = -1;
1497 1497 int stopped;
1498 1498 PyObject *chain = NULL, *result = NULL;
1499 1499 const Py_ssize_t length = index_length(self);
1500 1500
1501 1501 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
1502 1502 return NULL;
1503 1503 }
1504 1504
1505 1505 if (PyLong_Check(stoparg)) {
1506 1506 stoprev = (int)PyLong_AsLong(stoparg);
1507 1507 if (stoprev == -1 && PyErr_Occurred()) {
1508 1508 return NULL;
1509 1509 }
1510 1510 } else if (stoparg == Py_None) {
1511 1511 stoprev = -2;
1512 1512 } else {
1513 1513 PyErr_SetString(PyExc_ValueError,
1514 1514 "stoprev must be integer or None");
1515 1515 return NULL;
1516 1516 }
1517 1517
1518 1518 if (rev < 0 || rev >= length) {
1519 1519 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1520 1520 return NULL;
1521 1521 }
1522 1522
1523 1523 chain = PyList_New(0);
1524 1524 if (chain == NULL) {
1525 1525 return NULL;
1526 1526 }
1527 1527
1528 1528 baserev = index_baserev(self, rev);
1529 1529
1530 1530 /* This should never happen. */
1531 1531 if (baserev <= -2) {
1532 1532 /* Error should be set by index_deref() */
1533 1533 assert(PyErr_Occurred());
1534 1534 goto bail;
1535 1535 }
1536 1536
1537 1537 iterrev = rev;
1538 1538
1539 1539 while (iterrev != baserev && iterrev != stoprev) {
1540 1540 PyObject *value = PyLong_FromLong(iterrev);
1541 1541 if (value == NULL) {
1542 1542 goto bail;
1543 1543 }
1544 1544 if (PyList_Append(chain, value)) {
1545 1545 Py_DECREF(value);
1546 1546 goto bail;
1547 1547 }
1548 1548 Py_DECREF(value);
1549 1549
1550 1550 if (generaldelta) {
1551 1551 iterrev = baserev;
1552 1552 } else {
1553 1553 iterrev--;
1554 1554 }
1555 1555
1556 1556 if (iterrev < 0) {
1557 1557 break;
1558 1558 }
1559 1559
1560 1560 if (iterrev >= length) {
1561 1561 PyErr_SetString(PyExc_IndexError,
1562 1562 "revision outside index");
1563 1563 return NULL;
1564 1564 }
1565 1565
1566 1566 baserev = index_baserev(self, iterrev);
1567 1567
1568 1568 /* This should never happen. */
1569 1569 if (baserev <= -2) {
1570 1570 /* Error should be set by index_deref() */
1571 1571 assert(PyErr_Occurred());
1572 1572 goto bail;
1573 1573 }
1574 1574 }
1575 1575
1576 1576 if (iterrev == stoprev) {
1577 1577 stopped = 1;
1578 1578 } else {
1579 1579 PyObject *value = PyLong_FromLong(iterrev);
1580 1580 if (value == NULL) {
1581 1581 goto bail;
1582 1582 }
1583 1583 if (PyList_Append(chain, value)) {
1584 1584 Py_DECREF(value);
1585 1585 goto bail;
1586 1586 }
1587 1587 Py_DECREF(value);
1588 1588
1589 1589 stopped = 0;
1590 1590 }
1591 1591
1592 1592 if (PyList_Reverse(chain)) {
1593 1593 goto bail;
1594 1594 }
1595 1595
1596 1596 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
1597 1597 Py_DECREF(chain);
1598 1598 return result;
1599 1599
1600 1600 bail:
1601 1601 Py_DECREF(chain);
1602 1602 return NULL;
1603 1603 }
1604 1604
1605 1605 static inline int64_t
1606 1606 index_segment_span(indexObject *self, Py_ssize_t start_rev, Py_ssize_t end_rev)
1607 1607 {
1608 1608 int64_t start_offset;
1609 1609 int64_t end_offset;
1610 1610 int end_size;
1611 1611 start_offset = index_get_start(self, start_rev);
1612 1612 if (start_offset < 0) {
1613 1613 return -1;
1614 1614 }
1615 1615 end_offset = index_get_start(self, end_rev);
1616 1616 if (end_offset < 0) {
1617 1617 return -1;
1618 1618 }
1619 1619 end_size = index_get_length(self, end_rev);
1620 1620 if (end_size < 0) {
1621 1621 return -1;
1622 1622 }
1623 1623 if (end_offset < start_offset) {
1624 1624 PyErr_Format(PyExc_ValueError,
1625 1625 "corrupted revlog index: inconsistent offset "
1626 1626 "between revisions (%zd) and (%zd)",
1627 1627 start_rev, end_rev);
1628 1628 return -1;
1629 1629 }
1630 1630 return (end_offset - start_offset) + (int64_t)end_size;
1631 1631 }
1632 1632
1633 1633 /* returns endidx so that revs[startidx:endidx] has no empty trailing revs */
1634 1634 static Py_ssize_t trim_endidx(indexObject *self, const Py_ssize_t *revs,
1635 1635 Py_ssize_t startidx, Py_ssize_t endidx)
1636 1636 {
1637 1637 int length;
1638 1638 while (endidx > 1 && endidx > startidx) {
1639 1639 length = index_get_length(self, revs[endidx - 1]);
1640 1640 if (length < 0) {
1641 1641 return -1;
1642 1642 }
1643 1643 if (length != 0) {
1644 1644 break;
1645 1645 }
1646 1646 endidx -= 1;
1647 1647 }
1648 1648 return endidx;
1649 1649 }
1650 1650
1651 1651 struct Gap {
1652 1652 int64_t size;
1653 1653 Py_ssize_t idx;
1654 1654 };
1655 1655
1656 1656 static int gap_compare(const void *left, const void *right)
1657 1657 {
1658 1658 const struct Gap *l_left = ((const struct Gap *)left);
1659 1659 const struct Gap *l_right = ((const struct Gap *)right);
1660 1660 if (l_left->size < l_right->size) {
1661 1661 return -1;
1662 1662 } else if (l_left->size > l_right->size) {
1663 1663 return 1;
1664 1664 }
1665 1665 return 0;
1666 1666 }
1667 1667 static int Py_ssize_t_compare(const void *left, const void *right)
1668 1668 {
1669 1669 const Py_ssize_t l_left = *(const Py_ssize_t *)left;
1670 1670 const Py_ssize_t l_right = *(const Py_ssize_t *)right;
1671 1671 if (l_left < l_right) {
1672 1672 return -1;
1673 1673 } else if (l_left > l_right) {
1674 1674 return 1;
1675 1675 }
1676 1676 return 0;
1677 1677 }
1678 1678
1679 1679 static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
1680 1680 {
1681 1681 /* method arguments */
1682 1682 PyObject *list_revs = NULL; /* revisions in the chain */
1683 1683 double targetdensity = 0; /* min density to achieve */
1684 1684 Py_ssize_t mingapsize = 0; /* threshold to ignore gaps */
1685 1685
1686 1686 /* other core variables */
1687 1687 Py_ssize_t idxlen = index_length(self);
1688 1688 Py_ssize_t i; /* used for various iteration */
1689 1689 PyObject *result = NULL; /* the final return of the function */
1690 1690
1691 1691 /* generic information about the delta chain being slice */
1692 1692 Py_ssize_t num_revs = 0; /* size of the full delta chain */
1693 1693 Py_ssize_t *revs = NULL; /* native array of revision in the chain */
1694 1694 int64_t chainpayload = 0; /* sum of all delta in the chain */
1695 1695 int64_t deltachainspan = 0; /* distance from first byte to last byte */
1696 1696
1697 1697 /* variable used for slicing the delta chain */
1698 1698 int64_t readdata = 0; /* amount of data currently planned to be read */
1699 1699 double density = 0; /* ration of payload data compared to read ones */
1700 1700 int64_t previous_end;
1701 1701 struct Gap *gaps = NULL; /* array of notable gap in the chain */
1702 1702 Py_ssize_t num_gaps =
1703 1703 0; /* total number of notable gap recorded so far */
1704 1704 Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
1705 1705 Py_ssize_t num_selected = 0; /* number of gaps skipped */
1706 1706 PyObject *chunk = NULL; /* individual slice */
1707 1707 PyObject *allchunks = NULL; /* all slices */
1708 1708 Py_ssize_t previdx;
1709 1709
1710 1710 /* parsing argument */
1711 1711 if (!PyArg_ParseTuple(args, "O!dn", &PyList_Type, &list_revs,
1712 1712 &targetdensity, &mingapsize)) {
1713 1713 goto bail;
1714 1714 }
1715 1715
1716 1716 /* If the delta chain contains a single element, we do not need slicing
1717 1717 */
1718 1718 num_revs = PyList_GET_SIZE(list_revs);
1719 1719 if (num_revs <= 1) {
1720 1720 result = PyTuple_Pack(1, list_revs);
1721 1721 goto done;
1722 1722 }
1723 1723
1724 1724 /* Turn the python list into a native integer array (for efficiency) */
1725 1725 revs = (Py_ssize_t *)calloc(num_revs, sizeof(Py_ssize_t));
1726 1726 if (revs == NULL) {
1727 1727 PyErr_NoMemory();
1728 1728 goto bail;
1729 1729 }
1730 1730 for (i = 0; i < num_revs; i++) {
1731 1731 Py_ssize_t revnum =
1732 1732 PyLong_AsLong(PyList_GET_ITEM(list_revs, i));
1733 1733 if (revnum == -1 && PyErr_Occurred()) {
1734 1734 goto bail;
1735 1735 }
1736 1736 if (revnum < nullrev || revnum >= idxlen) {
1737 1737 PyErr_Format(PyExc_IndexError,
1738 1738 "index out of range: %zd", revnum);
1739 1739 goto bail;
1740 1740 }
1741 1741 revs[i] = revnum;
1742 1742 }
1743 1743
1744 1744 /* Compute and check various property of the unsliced delta chain */
1745 1745 deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
1746 1746 if (deltachainspan < 0) {
1747 1747 goto bail;
1748 1748 }
1749 1749
1750 1750 if (deltachainspan <= mingapsize) {
1751 1751 result = PyTuple_Pack(1, list_revs);
1752 1752 goto done;
1753 1753 }
1754 1754 chainpayload = 0;
1755 1755 for (i = 0; i < num_revs; i++) {
1756 1756 int tmp = index_get_length(self, revs[i]);
1757 1757 if (tmp < 0) {
1758 1758 goto bail;
1759 1759 }
1760 1760 chainpayload += tmp;
1761 1761 }
1762 1762
1763 1763 readdata = deltachainspan;
1764 1764 density = 1.0;
1765 1765
1766 1766 if (0 < deltachainspan) {
1767 1767 density = (double)chainpayload / (double)deltachainspan;
1768 1768 }
1769 1769
1770 1770 if (density >= targetdensity) {
1771 1771 result = PyTuple_Pack(1, list_revs);
1772 1772 goto done;
1773 1773 }
1774 1774
1775 1775 /* if chain is too sparse, look for relevant gaps */
1776 1776 gaps = (struct Gap *)calloc(num_revs, sizeof(struct Gap));
1777 1777 if (gaps == NULL) {
1778 1778 PyErr_NoMemory();
1779 1779 goto bail;
1780 1780 }
1781 1781
1782 1782 previous_end = -1;
1783 1783 for (i = 0; i < num_revs; i++) {
1784 1784 int64_t revstart;
1785 1785 int revsize;
1786 1786 revstart = index_get_start(self, revs[i]);
1787 1787 if (revstart < 0) {
1788 1788 goto bail;
1789 1789 };
1790 1790 revsize = index_get_length(self, revs[i]);
1791 1791 if (revsize < 0) {
1792 1792 goto bail;
1793 1793 };
1794 1794 if (revsize == 0) {
1795 1795 continue;
1796 1796 }
1797 1797 if (previous_end >= 0) {
1798 1798 int64_t gapsize = revstart - previous_end;
1799 1799 if (gapsize > mingapsize) {
1800 1800 gaps[num_gaps].size = gapsize;
1801 1801 gaps[num_gaps].idx = i;
1802 1802 num_gaps += 1;
1803 1803 }
1804 1804 }
1805 1805 previous_end = revstart + revsize;
1806 1806 }
1807 1807 if (num_gaps == 0) {
1808 1808 result = PyTuple_Pack(1, list_revs);
1809 1809 goto done;
1810 1810 }
1811 1811 qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
1812 1812
1813 1813 /* Slice the largest gap first, they improve the density the most */
1814 1814 selected_indices =
1815 1815 (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
1816 1816 if (selected_indices == NULL) {
1817 1817 PyErr_NoMemory();
1818 1818 goto bail;
1819 1819 }
1820 1820
1821 1821 for (i = num_gaps - 1; i >= 0; i--) {
1822 1822 selected_indices[num_selected] = gaps[i].idx;
1823 1823 readdata -= gaps[i].size;
1824 1824 num_selected += 1;
1825 1825 if (readdata <= 0) {
1826 1826 density = 1.0;
1827 1827 } else {
1828 1828 density = (double)chainpayload / (double)readdata;
1829 1829 }
1830 1830 if (density >= targetdensity) {
1831 1831 break;
1832 1832 }
1833 1833 }
1834 1834 qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
1835 1835 &Py_ssize_t_compare);
1836 1836
1837 1837 /* create the resulting slice */
1838 1838 allchunks = PyList_New(0);
1839 1839 if (allchunks == NULL) {
1840 1840 goto bail;
1841 1841 }
1842 1842 previdx = 0;
1843 1843 selected_indices[num_selected] = num_revs;
1844 1844 for (i = 0; i <= num_selected; i++) {
1845 1845 Py_ssize_t idx = selected_indices[i];
1846 1846 Py_ssize_t endidx = trim_endidx(self, revs, previdx, idx);
1847 1847 if (endidx < 0) {
1848 1848 goto bail;
1849 1849 }
1850 1850 if (previdx < endidx) {
1851 1851 chunk = PyList_GetSlice(list_revs, previdx, endidx);
1852 1852 if (chunk == NULL) {
1853 1853 goto bail;
1854 1854 }
1855 1855 if (PyList_Append(allchunks, chunk) == -1) {
1856 1856 goto bail;
1857 1857 }
1858 1858 Py_DECREF(chunk);
1859 1859 chunk = NULL;
1860 1860 }
1861 1861 previdx = idx;
1862 1862 }
1863 1863 result = allchunks;
1864 1864 goto done;
1865 1865
1866 1866 bail:
1867 1867 Py_XDECREF(allchunks);
1868 1868 Py_XDECREF(chunk);
1869 1869 done:
1870 1870 free(revs);
1871 1871 free(gaps);
1872 1872 free(selected_indices);
1873 1873 return result;
1874 1874 }
1875 1875
1876 1876 static inline int nt_level(const char *node, Py_ssize_t level)
1877 1877 {
1878 1878 int v = node[level >> 1];
1879 1879 if (!(level & 1))
1880 1880 v >>= 4;
1881 1881 return v & 0xf;
1882 1882 }
1883 1883
1884 1884 /*
1885 1885 * Return values:
1886 1886 *
1887 1887 * -4: match is ambiguous (multiple candidates)
1888 1888 * -2: not found
1889 1889 * rest: valid rev
1890 1890 */
1891 1891 static int nt_find(nodetree *self, const char *node, Py_ssize_t nodelen,
1892 1892 int hex)
1893 1893 {
1894 1894 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
1895 1895 int level, maxlevel, off;
1896 1896
1897 1897 /* If the input is binary, do a fast check for the nullid first. */
1898 1898 if (!hex && nodelen == self->nodelen && node[0] == '\0' &&
1899 1899 node[1] == '\0' && memcmp(node, nullid, self->nodelen) == 0)
1900 1900 return -1;
1901 1901
1902 1902 if (hex)
1903 1903 maxlevel = nodelen;
1904 1904 else
1905 1905 maxlevel = 2 * nodelen;
1906 1906 if (maxlevel > 2 * self->nodelen)
1907 1907 maxlevel = 2 * self->nodelen;
1908 1908
1909 1909 for (level = off = 0; level < maxlevel; level++) {
1910 1910 int k = getnybble(node, level);
1911 1911 nodetreenode *n = &self->nodes[off];
1912 1912 int v = n->children[k];
1913 1913
1914 1914 if (v < 0) {
1915 1915 const char *n;
1916 1916 Py_ssize_t i;
1917 1917
1918 1918 v = -(v + 2);
1919 1919 n = index_node(self->index, v);
1920 1920 if (n == NULL)
1921 1921 return -2;
1922 1922 for (i = level; i < maxlevel; i++)
1923 1923 if (getnybble(node, i) != nt_level(n, i))
1924 1924 return -2;
1925 1925 return v;
1926 1926 }
1927 1927 if (v == 0)
1928 1928 return -2;
1929 1929 off = v;
1930 1930 }
1931 1931 /* multiple matches against an ambiguous prefix */
1932 1932 return -4;
1933 1933 }
1934 1934
1935 1935 static int nt_new(nodetree *self)
1936 1936 {
1937 1937 if (self->length == self->capacity) {
1938 1938 size_t newcapacity;
1939 1939 nodetreenode *newnodes;
1940 1940 newcapacity = self->capacity * 2;
1941 1941 if (newcapacity >= SIZE_MAX / sizeof(nodetreenode)) {
1942 1942 PyErr_SetString(PyExc_MemoryError,
1943 1943 "overflow in nt_new");
1944 1944 return -1;
1945 1945 }
1946 1946 newnodes =
1947 1947 realloc(self->nodes, newcapacity * sizeof(nodetreenode));
1948 1948 if (newnodes == NULL) {
1949 1949 PyErr_SetString(PyExc_MemoryError, "out of memory");
1950 1950 return -1;
1951 1951 }
1952 1952 self->capacity = newcapacity;
1953 1953 self->nodes = newnodes;
1954 1954 memset(&self->nodes[self->length], 0,
1955 1955 sizeof(nodetreenode) * (self->capacity - self->length));
1956 1956 }
1957 1957 return self->length++;
1958 1958 }
1959 1959
1960 1960 static int nt_insert(nodetree *self, const char *node, int rev)
1961 1961 {
1962 1962 int level = 0;
1963 1963 int off = 0;
1964 1964
1965 1965 while (level < 2 * self->nodelen) {
1966 1966 int k = nt_level(node, level);
1967 1967 nodetreenode *n;
1968 1968 int v;
1969 1969
1970 1970 n = &self->nodes[off];
1971 1971 v = n->children[k];
1972 1972
1973 1973 if (v == 0) {
1974 1974 n->children[k] = -rev - 2;
1975 1975 return 0;
1976 1976 }
1977 1977 if (v < 0) {
1978 1978 const char *oldnode =
1979 1979 index_node_existing(self->index, -(v + 2));
1980 1980 int noff;
1981 1981
1982 1982 if (oldnode == NULL)
1983 1983 return -1;
1984 1984 if (!memcmp(oldnode, node, self->nodelen)) {
1985 1985 n->children[k] = -rev - 2;
1986 1986 return 0;
1987 1987 }
1988 1988 noff = nt_new(self);
1989 1989 if (noff == -1)
1990 1990 return -1;
1991 1991 /* self->nodes may have been changed by realloc */
1992 1992 self->nodes[off].children[k] = noff;
1993 1993 off = noff;
1994 1994 n = &self->nodes[off];
1995 1995 n->children[nt_level(oldnode, ++level)] = v;
1996 1996 if (level > self->depth)
1997 1997 self->depth = level;
1998 1998 self->splits += 1;
1999 1999 } else {
2000 2000 level += 1;
2001 2001 off = v;
2002 2002 }
2003 2003 }
2004 2004
2005 2005 return -1;
2006 2006 }
2007 2007
2008 2008 static PyObject *ntobj_insert(nodetreeObject *self, PyObject *args)
2009 2009 {
2010 2010 Py_ssize_t rev;
2011 2011 const char *node;
2012 2012 Py_ssize_t length;
2013 2013 if (!PyArg_ParseTuple(args, "n", &rev))
2014 2014 return NULL;
2015 2015 length = index_length(self->nt.index);
2016 2016 if (rev < 0 || rev >= length) {
2017 2017 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
2018 2018 return NULL;
2019 2019 }
2020 2020 node = index_node_existing(self->nt.index, rev);
2021 2021 if (nt_insert(&self->nt, node, (int)rev) == -1)
2022 2022 return NULL;
2023 2023 Py_RETURN_NONE;
2024 2024 }
2025 2025
2026 2026 static int nt_delete_node(nodetree *self, const char *node)
2027 2027 {
2028 2028 /* rev==-2 happens to get encoded as 0, which is interpreted as not set
2029 2029 */
2030 2030 return nt_insert(self, node, -2);
2031 2031 }
2032 2032
2033 2033 static int nt_init(nodetree *self, indexObject *index, unsigned capacity)
2034 2034 {
2035 2035 /* Initialize before overflow-checking to avoid nt_dealloc() crash. */
2036 2036 self->nodes = NULL;
2037 2037
2038 2038 self->index = index;
2039 2039 /* The input capacity is in terms of revisions, while the field is in
2040 2040 * terms of nodetree nodes. */
2041 2041 self->capacity = (capacity < 4 ? 4 : capacity / 2);
2042 2042 self->nodelen = index->nodelen;
2043 2043 self->depth = 0;
2044 2044 self->splits = 0;
2045 2045 if (self->capacity > SIZE_MAX / sizeof(nodetreenode)) {
2046 2046 PyErr_SetString(PyExc_ValueError, "overflow in init_nt");
2047 2047 return -1;
2048 2048 }
2049 2049 self->nodes = calloc(self->capacity, sizeof(nodetreenode));
2050 2050 if (self->nodes == NULL) {
2051 2051 PyErr_NoMemory();
2052 2052 return -1;
2053 2053 }
2054 2054 self->length = 1;
2055 2055 return 0;
2056 2056 }
2057 2057
2058 2058 static int ntobj_init(nodetreeObject *self, PyObject *args)
2059 2059 {
2060 2060 PyObject *index;
2061 2061 unsigned capacity;
2062 2062 if (!PyArg_ParseTuple(args, "O!I", &HgRevlogIndex_Type, &index,
2063 2063 &capacity))
2064 2064 return -1;
2065 2065 Py_INCREF(index);
2066 2066 return nt_init(&self->nt, (indexObject *)index, capacity);
2067 2067 }
2068 2068
2069 2069 static int nt_partialmatch(nodetree *self, const char *node, Py_ssize_t nodelen)
2070 2070 {
2071 2071 return nt_find(self, node, nodelen, 1);
2072 2072 }
2073 2073
2074 2074 /*
2075 2075 * Find the length of the shortest unique prefix of node.
2076 2076 *
2077 2077 * Return values:
2078 2078 *
2079 2079 * -3: error (exception set)
2080 2080 * -2: not found (no exception set)
2081 2081 * rest: length of shortest prefix
2082 2082 */
2083 2083 static int nt_shortest(nodetree *self, const char *node)
2084 2084 {
2085 2085 int level, off;
2086 2086
2087 2087 for (level = off = 0; level < 2 * self->nodelen; level++) {
2088 2088 int k, v;
2089 2089 nodetreenode *n = &self->nodes[off];
2090 2090 k = nt_level(node, level);
2091 2091 v = n->children[k];
2092 2092 if (v < 0) {
2093 2093 const char *n;
2094 2094 v = -(v + 2);
2095 2095 n = index_node_existing(self->index, v);
2096 2096 if (n == NULL)
2097 2097 return -3;
2098 2098 if (memcmp(node, n, self->nodelen) != 0)
2099 2099 /*
2100 2100 * Found a unique prefix, but it wasn't for the
2101 2101 * requested node (i.e the requested node does
2102 2102 * not exist).
2103 2103 */
2104 2104 return -2;
2105 2105 return level + 1;
2106 2106 }
2107 2107 if (v == 0)
2108 2108 return -2;
2109 2109 off = v;
2110 2110 }
2111 2111 /*
2112 2112 * The node was still not unique after 40 hex digits, so this won't
2113 2113 * happen. Also, if we get here, then there's a programming error in
2114 2114 * this file that made us insert a node longer than 40 hex digits.
2115 2115 */
2116 2116 PyErr_SetString(PyExc_Exception, "broken node tree");
2117 2117 return -3;
2118 2118 }
2119 2119
2120 2120 static PyObject *ntobj_shortest(nodetreeObject *self, PyObject *args)
2121 2121 {
2122 2122 PyObject *val;
2123 2123 char *node;
2124 2124 int length;
2125 2125
2126 2126 if (!PyArg_ParseTuple(args, "O", &val))
2127 2127 return NULL;
2128 2128 if (node_check(self->nt.nodelen, val, &node) == -1)
2129 2129 return NULL;
2130 2130
2131 2131 length = nt_shortest(&self->nt, node);
2132 2132 if (length == -3)
2133 2133 return NULL;
2134 2134 if (length == -2) {
2135 2135 raise_revlog_error();
2136 2136 return NULL;
2137 2137 }
2138 2138 return PyLong_FromLong(length);
2139 2139 }
2140 2140
2141 2141 static void nt_dealloc(nodetree *self)
2142 2142 {
2143 2143 free(self->nodes);
2144 2144 self->nodes = NULL;
2145 2145 }
2146 2146
2147 2147 static void ntobj_dealloc(nodetreeObject *self)
2148 2148 {
2149 2149 Py_XDECREF(self->nt.index);
2150 2150 nt_dealloc(&self->nt);
2151 2151 PyObject_Del(self);
2152 2152 }
2153 2153
2154 2154 static PyMethodDef ntobj_methods[] = {
2155 2155 {"insert", (PyCFunction)ntobj_insert, METH_VARARGS,
2156 2156 "insert an index entry"},
2157 2157 {"shortest", (PyCFunction)ntobj_shortest, METH_VARARGS,
2158 2158 "find length of shortest hex nodeid of a binary ID"},
2159 2159 {NULL} /* Sentinel */
2160 2160 };
2161 2161
2162 2162 static PyTypeObject nodetreeType = {
2163 2163 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2164 2164 "parsers.nodetree", /* tp_name */
2165 2165 sizeof(nodetreeObject), /* tp_basicsize */
2166 2166 0, /* tp_itemsize */
2167 2167 (destructor)ntobj_dealloc, /* tp_dealloc */
2168 2168 0, /* tp_print */
2169 2169 0, /* tp_getattr */
2170 2170 0, /* tp_setattr */
2171 2171 0, /* tp_compare */
2172 2172 0, /* tp_repr */
2173 2173 0, /* tp_as_number */
2174 2174 0, /* tp_as_sequence */
2175 2175 0, /* tp_as_mapping */
2176 2176 0, /* tp_hash */
2177 2177 0, /* tp_call */
2178 2178 0, /* tp_str */
2179 2179 0, /* tp_getattro */
2180 2180 0, /* tp_setattro */
2181 2181 0, /* tp_as_buffer */
2182 2182 Py_TPFLAGS_DEFAULT, /* tp_flags */
2183 2183 "nodetree", /* tp_doc */
2184 2184 0, /* tp_traverse */
2185 2185 0, /* tp_clear */
2186 2186 0, /* tp_richcompare */
2187 2187 0, /* tp_weaklistoffset */
2188 2188 0, /* tp_iter */
2189 2189 0, /* tp_iternext */
2190 2190 ntobj_methods, /* tp_methods */
2191 2191 0, /* tp_members */
2192 2192 0, /* tp_getset */
2193 2193 0, /* tp_base */
2194 2194 0, /* tp_dict */
2195 2195 0, /* tp_descr_get */
2196 2196 0, /* tp_descr_set */
2197 2197 0, /* tp_dictoffset */
2198 2198 (initproc)ntobj_init, /* tp_init */
2199 2199 0, /* tp_alloc */
2200 2200 };
2201 2201
2202 2202 static int index_init_nt(indexObject *self)
2203 2203 {
2204 2204 if (!self->ntinitialized) {
2205 2205 if (nt_init(&self->nt, self, (int)self->length) == -1) {
2206 2206 nt_dealloc(&self->nt);
2207 2207 return -1;
2208 2208 }
2209 2209 if (nt_insert(&self->nt, nullid, -1) == -1) {
2210 2210 nt_dealloc(&self->nt);
2211 2211 return -1;
2212 2212 }
2213 2213 self->ntinitialized = 1;
2214 2214 self->ntrev = (int)index_length(self);
2215 2215 self->ntlookups = 1;
2216 2216 self->ntmisses = 0;
2217 2217 }
2218 2218 return 0;
2219 2219 }
2220 2220
2221 2221 /*
2222 2222 * Return values:
2223 2223 *
2224 2224 * -3: error (exception set)
2225 2225 * -2: not found (no exception set)
2226 2226 * rest: valid rev
2227 2227 */
2228 2228 static int index_find_node(indexObject *self, const char *node)
2229 2229 {
2230 2230 int rev;
2231 2231
2232 2232 if (index_init_nt(self) == -1)
2233 2233 return -3;
2234 2234
2235 2235 self->ntlookups++;
2236 2236 rev = nt_find(&self->nt, node, self->nodelen, 0);
2237 2237 if (rev >= -1)
2238 2238 return rev;
2239 2239
2240 2240 /*
2241 2241 * For the first handful of lookups, we scan the entire index,
2242 2242 * and cache only the matching nodes. This optimizes for cases
2243 2243 * like "hg tip", where only a few nodes are accessed.
2244 2244 *
2245 2245 * After that, we cache every node we visit, using a single
2246 2246 * scan amortized over multiple lookups. This gives the best
2247 2247 * bulk performance, e.g. for "hg log".
2248 2248 */
2249 2249 if (self->ntmisses++ < 4) {
2250 2250 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2251 2251 const char *n = index_node_existing(self, rev);
2252 2252 if (n == NULL)
2253 2253 return -3;
2254 2254 if (memcmp(node, n, self->nodelen) == 0) {
2255 2255 if (nt_insert(&self->nt, n, rev) == -1)
2256 2256 return -3;
2257 2257 break;
2258 2258 }
2259 2259 }
2260 2260 } else {
2261 2261 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2262 2262 const char *n = index_node_existing(self, rev);
2263 2263 if (n == NULL)
2264 2264 return -3;
2265 2265 if (nt_insert(&self->nt, n, rev) == -1) {
2266 2266 self->ntrev = rev + 1;
2267 2267 return -3;
2268 2268 }
2269 2269 if (memcmp(node, n, self->nodelen) == 0) {
2270 2270 break;
2271 2271 }
2272 2272 }
2273 2273 self->ntrev = rev;
2274 2274 }
2275 2275
2276 2276 if (rev >= 0)
2277 2277 return rev;
2278 2278 return -2;
2279 2279 }
2280 2280
2281 2281 static PyObject *index_getitem(indexObject *self, PyObject *value)
2282 2282 {
2283 2283 char *node;
2284 2284 int rev;
2285 2285
2286 2286 if (PyLong_Check(value)) {
2287 2287 long idx;
2288 2288 if (!pylong_to_long(value, &idx)) {
2289 2289 return NULL;
2290 2290 }
2291 2291 return index_get(self, idx);
2292 2292 }
2293 2293
2294 2294 if (node_check(self->nodelen, value, &node) == -1)
2295 2295 return NULL;
2296 2296 rev = index_find_node(self, node);
2297 2297 if (rev >= -1)
2298 2298 return PyLong_FromLong(rev);
2299 2299 if (rev == -2)
2300 2300 raise_revlog_error();
2301 2301 return NULL;
2302 2302 }
2303 2303
2304 2304 /*
2305 2305 * Fully populate the radix tree.
2306 2306 */
2307 2307 static int index_populate_nt(indexObject *self)
2308 2308 {
2309 2309 int rev;
2310 2310 if (self->ntrev > 0) {
2311 2311 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2312 2312 const char *n = index_node_existing(self, rev);
2313 2313 if (n == NULL)
2314 2314 return -1;
2315 2315 if (nt_insert(&self->nt, n, rev) == -1)
2316 2316 return -1;
2317 2317 }
2318 2318 self->ntrev = -1;
2319 2319 }
2320 2320 return 0;
2321 2321 }
2322 2322
2323 2323 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
2324 2324 {
2325 2325 const char *fullnode;
2326 2326 Py_ssize_t nodelen;
2327 2327 char *node;
2328 2328 int rev, i;
2329 2329
2330 2330 if (!PyArg_ParseTuple(args, "y#", &node, &nodelen))
2331 2331 return NULL;
2332 2332
2333 2333 if (nodelen < 1) {
2334 2334 PyErr_SetString(PyExc_ValueError, "key too short");
2335 2335 return NULL;
2336 2336 }
2337 2337
2338 2338 if (nodelen > 2 * self->nodelen) {
2339 2339 PyErr_SetString(PyExc_ValueError, "key too long");
2340 2340 return NULL;
2341 2341 }
2342 2342
2343 2343 for (i = 0; i < nodelen; i++)
2344 2344 hexdigit(node, i);
2345 2345 if (PyErr_Occurred()) {
2346 2346 /* input contains non-hex characters */
2347 2347 PyErr_Clear();
2348 2348 Py_RETURN_NONE;
2349 2349 }
2350 2350
2351 2351 if (index_init_nt(self) == -1)
2352 2352 return NULL;
2353 2353 if (index_populate_nt(self) == -1)
2354 2354 return NULL;
2355 2355 rev = nt_partialmatch(&self->nt, node, nodelen);
2356 2356
2357 2357 switch (rev) {
2358 2358 case -4:
2359 2359 raise_revlog_error();
2360 2360 return NULL;
2361 2361 case -2:
2362 2362 Py_RETURN_NONE;
2363 2363 case -1:
2364 2364 return PyBytes_FromStringAndSize(nullid, self->nodelen);
2365 2365 }
2366 2366
2367 2367 fullnode = index_node_existing(self, rev);
2368 2368 if (fullnode == NULL) {
2369 2369 return NULL;
2370 2370 }
2371 2371 return PyBytes_FromStringAndSize(fullnode, self->nodelen);
2372 2372 }
2373 2373
2374 2374 static PyObject *index_shortest(indexObject *self, PyObject *args)
2375 2375 {
2376 2376 PyObject *val;
2377 2377 char *node;
2378 2378 int length;
2379 2379
2380 2380 if (!PyArg_ParseTuple(args, "O", &val))
2381 2381 return NULL;
2382 2382 if (node_check(self->nodelen, val, &node) == -1)
2383 2383 return NULL;
2384 2384
2385 2385 self->ntlookups++;
2386 2386 if (index_init_nt(self) == -1)
2387 2387 return NULL;
2388 2388 if (index_populate_nt(self) == -1)
2389 2389 return NULL;
2390 2390 length = nt_shortest(&self->nt, node);
2391 2391 if (length == -3)
2392 2392 return NULL;
2393 2393 if (length == -2) {
2394 2394 raise_revlog_error();
2395 2395 return NULL;
2396 2396 }
2397 2397 return PyLong_FromLong(length);
2398 2398 }
2399 2399
2400 2400 static PyObject *index_m_get(indexObject *self, PyObject *args)
2401 2401 {
2402 2402 PyObject *val;
2403 2403 char *node;
2404 2404 int rev;
2405 2405
2406 2406 if (!PyArg_ParseTuple(args, "O", &val))
2407 2407 return NULL;
2408 2408 if (node_check(self->nodelen, val, &node) == -1)
2409 2409 return NULL;
2410 2410 rev = index_find_node(self, node);
2411 2411 if (rev == -3)
2412 2412 return NULL;
2413 2413 if (rev == -2)
2414 2414 Py_RETURN_NONE;
2415 2415 return PyLong_FromLong(rev);
2416 2416 }
2417 2417
2418 2418 static int index_contains(indexObject *self, PyObject *value)
2419 2419 {
2420 2420 char *node;
2421 2421
2422 2422 if (PyLong_Check(value)) {
2423 2423 long rev;
2424 2424 if (!pylong_to_long(value, &rev)) {
2425 2425 return -1;
2426 2426 }
2427 2427 return rev >= -1 && rev < index_length(self);
2428 2428 }
2429 2429
2430 2430 if (node_check(self->nodelen, value, &node) == -1)
2431 2431 return -1;
2432 2432
2433 2433 switch (index_find_node(self, node)) {
2434 2434 case -3:
2435 2435 return -1;
2436 2436 case -2:
2437 2437 return 0;
2438 2438 default:
2439 2439 return 1;
2440 2440 }
2441 2441 }
2442 2442
2443 2443 static PyObject *index_m_has_node(indexObject *self, PyObject *args)
2444 2444 {
2445 2445 int ret = index_contains(self, args);
2446 2446 if (ret < 0)
2447 2447 return NULL;
2448 2448 return PyBool_FromLong((long)ret);
2449 2449 }
2450 2450
2451 2451 static PyObject *index_m_rev(indexObject *self, PyObject *val)
2452 2452 {
2453 2453 char *node;
2454 2454 int rev;
2455 2455
2456 2456 if (node_check(self->nodelen, val, &node) == -1)
2457 2457 return NULL;
2458 2458 rev = index_find_node(self, node);
2459 2459 if (rev >= -1)
2460 2460 return PyLong_FromLong(rev);
2461 2461 if (rev == -2)
2462 2462 raise_revlog_error();
2463 2463 return NULL;
2464 2464 }
2465 2465
2466 2466 typedef uint64_t bitmask;
2467 2467
2468 2468 /*
2469 2469 * Given a disjoint set of revs, return all candidates for the
2470 2470 * greatest common ancestor. In revset notation, this is the set
2471 2471 * "heads(::a and ::b and ...)"
2472 2472 */
2473 2473 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
2474 2474 int revcount)
2475 2475 {
2476 2476 const bitmask allseen = (1ull << revcount) - 1;
2477 2477 const bitmask poison = 1ull << revcount;
2478 2478 PyObject *gca = PyList_New(0);
2479 2479 int i, v, interesting;
2480 2480 int maxrev = -1;
2481 2481 bitmask sp;
2482 2482 bitmask *seen;
2483 2483
2484 2484 if (gca == NULL)
2485 2485 return PyErr_NoMemory();
2486 2486
2487 2487 for (i = 0; i < revcount; i++) {
2488 2488 if (revs[i] > maxrev)
2489 2489 maxrev = revs[i];
2490 2490 }
2491 2491
2492 2492 seen = calloc(sizeof(*seen), maxrev + 1);
2493 2493 if (seen == NULL) {
2494 2494 Py_DECREF(gca);
2495 2495 return PyErr_NoMemory();
2496 2496 }
2497 2497
2498 2498 for (i = 0; i < revcount; i++)
2499 2499 seen[revs[i]] = 1ull << i;
2500 2500
2501 2501 interesting = revcount;
2502 2502
2503 2503 for (v = maxrev; v >= 0 && interesting; v--) {
2504 2504 bitmask sv = seen[v];
2505 2505 int parents[2];
2506 2506
2507 2507 if (!sv)
2508 2508 continue;
2509 2509
2510 2510 if (sv < poison) {
2511 2511 interesting -= 1;
2512 2512 if (sv == allseen) {
2513 2513 PyObject *obj = PyLong_FromLong(v);
2514 2514 if (obj == NULL)
2515 2515 goto bail;
2516 2516 if (PyList_Append(gca, obj) == -1) {
2517 2517 Py_DECREF(obj);
2518 2518 goto bail;
2519 2519 }
2520 2520 sv |= poison;
2521 2521 for (i = 0; i < revcount; i++) {
2522 2522 if (revs[i] == v)
2523 2523 goto done;
2524 2524 }
2525 2525 }
2526 2526 }
2527 2527 if (index_get_parents(self, v, parents, maxrev) < 0)
2528 2528 goto bail;
2529 2529
2530 2530 for (i = 0; i < 2; i++) {
2531 2531 int p = parents[i];
2532 2532 if (p == -1)
2533 2533 continue;
2534 2534 sp = seen[p];
2535 2535 if (sv < poison) {
2536 2536 if (sp == 0) {
2537 2537 seen[p] = sv;
2538 2538 interesting++;
2539 2539 } else if (sp != sv)
2540 2540 seen[p] |= sv;
2541 2541 } else {
2542 2542 if (sp && sp < poison)
2543 2543 interesting--;
2544 2544 seen[p] = sv;
2545 2545 }
2546 2546 }
2547 2547 }
2548 2548
2549 2549 done:
2550 2550 free(seen);
2551 2551 return gca;
2552 2552 bail:
2553 2553 free(seen);
2554 2554 Py_XDECREF(gca);
2555 2555 return NULL;
2556 2556 }
2557 2557
2558 2558 /*
2559 2559 * Given a disjoint set of revs, return the subset with the longest
2560 2560 * path to the root.
2561 2561 */
2562 2562 static PyObject *find_deepest(indexObject *self, PyObject *revs)
2563 2563 {
2564 2564 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
2565 2565 static const Py_ssize_t capacity = 24;
2566 2566 int *depth, *interesting = NULL;
2567 2567 int i, j, v, ninteresting;
2568 2568 PyObject *dict = NULL, *keys = NULL;
2569 2569 long *seen = NULL;
2570 2570 int maxrev = -1;
2571 2571 long final;
2572 2572
2573 2573 if (revcount > capacity) {
2574 2574 PyErr_Format(PyExc_OverflowError,
2575 2575 "bitset size (%ld) > capacity (%ld)",
2576 2576 (long)revcount, (long)capacity);
2577 2577 return NULL;
2578 2578 }
2579 2579
2580 2580 for (i = 0; i < revcount; i++) {
2581 2581 int n = (int)PyLong_AsLong(PyList_GET_ITEM(revs, i));
2582 2582 if (n > maxrev)
2583 2583 maxrev = n;
2584 2584 }
2585 2585
2586 2586 depth = calloc(sizeof(*depth), maxrev + 1);
2587 2587 if (depth == NULL)
2588 2588 return PyErr_NoMemory();
2589 2589
2590 2590 seen = calloc(sizeof(*seen), maxrev + 1);
2591 2591 if (seen == NULL) {
2592 2592 PyErr_NoMemory();
2593 2593 goto bail;
2594 2594 }
2595 2595
2596 2596 interesting = calloc(sizeof(*interesting), ((size_t)1) << revcount);
2597 2597 if (interesting == NULL) {
2598 2598 PyErr_NoMemory();
2599 2599 goto bail;
2600 2600 }
2601 2601
2602 2602 if (PyList_Sort(revs) == -1)
2603 2603 goto bail;
2604 2604
2605 2605 for (i = 0; i < revcount; i++) {
2606 2606 int n = (int)PyLong_AsLong(PyList_GET_ITEM(revs, i));
2607 2607 long b = 1l << i;
2608 2608 depth[n] = 1;
2609 2609 seen[n] = b;
2610 2610 interesting[b] = 1;
2611 2611 }
2612 2612
2613 2613 /* invariant: ninteresting is the number of non-zero entries in
2614 2614 * interesting. */
2615 2615 ninteresting = (int)revcount;
2616 2616
2617 2617 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
2618 2618 int dv = depth[v];
2619 2619 int parents[2];
2620 2620 long sv;
2621 2621
2622 2622 if (dv == 0)
2623 2623 continue;
2624 2624
2625 2625 sv = seen[v];
2626 2626 if (index_get_parents(self, v, parents, maxrev) < 0)
2627 2627 goto bail;
2628 2628
2629 2629 for (i = 0; i < 2; i++) {
2630 2630 int p = parents[i];
2631 2631 long sp;
2632 2632 int dp;
2633 2633
2634 2634 if (p == -1)
2635 2635 continue;
2636 2636
2637 2637 dp = depth[p];
2638 2638 sp = seen[p];
2639 2639 if (dp <= dv) {
2640 2640 depth[p] = dv + 1;
2641 2641 if (sp != sv) {
2642 2642 interesting[sv] += 1;
2643 2643 seen[p] = sv;
2644 2644 if (sp) {
2645 2645 interesting[sp] -= 1;
2646 2646 if (interesting[sp] == 0)
2647 2647 ninteresting -= 1;
2648 2648 }
2649 2649 }
2650 2650 } else if (dv == dp - 1) {
2651 2651 long nsp = sp | sv;
2652 2652 if (nsp == sp)
2653 2653 continue;
2654 2654 seen[p] = nsp;
2655 2655 interesting[sp] -= 1;
2656 2656 if (interesting[sp] == 0)
2657 2657 ninteresting -= 1;
2658 2658 if (interesting[nsp] == 0)
2659 2659 ninteresting += 1;
2660 2660 interesting[nsp] += 1;
2661 2661 }
2662 2662 }
2663 2663 interesting[sv] -= 1;
2664 2664 if (interesting[sv] == 0)
2665 2665 ninteresting -= 1;
2666 2666 }
2667 2667
2668 2668 final = 0;
2669 2669 j = ninteresting;
2670 2670 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
2671 2671 if (interesting[i] == 0)
2672 2672 continue;
2673 2673 final |= i;
2674 2674 j -= 1;
2675 2675 }
2676 2676 if (final == 0) {
2677 2677 keys = PyList_New(0);
2678 2678 goto bail;
2679 2679 }
2680 2680
2681 2681 dict = PyDict_New();
2682 2682 if (dict == NULL)
2683 2683 goto bail;
2684 2684
2685 2685 for (i = 0; i < revcount; i++) {
2686 2686 PyObject *key;
2687 2687
2688 2688 if ((final & (1 << i)) == 0)
2689 2689 continue;
2690 2690
2691 2691 key = PyList_GET_ITEM(revs, i);
2692 2692 Py_INCREF(key);
2693 2693 Py_INCREF(Py_None);
2694 2694 if (PyDict_SetItem(dict, key, Py_None) == -1) {
2695 2695 Py_DECREF(key);
2696 2696 Py_DECREF(Py_None);
2697 2697 goto bail;
2698 2698 }
2699 2699 }
2700 2700
2701 2701 keys = PyDict_Keys(dict);
2702 2702
2703 2703 bail:
2704 2704 free(depth);
2705 2705 free(seen);
2706 2706 free(interesting);
2707 2707 Py_XDECREF(dict);
2708 2708
2709 2709 return keys;
2710 2710 }
2711 2711
2712 2712 /*
2713 2713 * Given a (possibly overlapping) set of revs, return all the
2714 2714 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
2715 2715 */
2716 2716 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
2717 2717 {
2718 2718 PyObject *ret = NULL;
2719 2719 Py_ssize_t argcount, i, len;
2720 2720 bitmask repeat = 0;
2721 2721 int revcount = 0;
2722 2722 int *revs;
2723 2723
2724 2724 argcount = PySequence_Length(args);
2725 2725 revs = PyMem_Malloc(argcount * sizeof(*revs));
2726 2726 if (argcount > 0 && revs == NULL)
2727 2727 return PyErr_NoMemory();
2728 2728 len = index_length(self);
2729 2729
2730 2730 for (i = 0; i < argcount; i++) {
2731 2731 static const int capacity = 24;
2732 2732 PyObject *obj = PySequence_GetItem(args, i);
2733 2733 bitmask x;
2734 2734 long val;
2735 2735
2736 2736 if (!PyLong_Check(obj)) {
2737 2737 PyErr_SetString(PyExc_TypeError,
2738 2738 "arguments must all be ints");
2739 2739 Py_DECREF(obj);
2740 2740 goto bail;
2741 2741 }
2742 2742 val = PyLong_AsLong(obj);
2743 2743 Py_DECREF(obj);
2744 2744 if (val == -1) {
2745 2745 ret = PyList_New(0);
2746 2746 goto done;
2747 2747 }
2748 2748 if (val < 0 || val >= len) {
2749 2749 PyErr_SetString(PyExc_IndexError, "index out of range");
2750 2750 goto bail;
2751 2751 }
2752 2752 /* this cheesy bloom filter lets us avoid some more
2753 2753 * expensive duplicate checks in the common set-is-disjoint
2754 2754 * case */
2755 2755 x = 1ull << (val & 0x3f);
2756 2756 if (repeat & x) {
2757 2757 int k;
2758 2758 for (k = 0; k < revcount; k++) {
2759 2759 if (val == revs[k])
2760 2760 goto duplicate;
2761 2761 }
2762 2762 } else
2763 2763 repeat |= x;
2764 2764 if (revcount >= capacity) {
2765 2765 PyErr_Format(PyExc_OverflowError,
2766 2766 "bitset size (%d) > capacity (%d)",
2767 2767 revcount, capacity);
2768 2768 goto bail;
2769 2769 }
2770 2770 revs[revcount++] = (int)val;
2771 2771 duplicate:;
2772 2772 }
2773 2773
2774 2774 if (revcount == 0) {
2775 2775 ret = PyList_New(0);
2776 2776 goto done;
2777 2777 }
2778 2778 if (revcount == 1) {
2779 2779 PyObject *obj;
2780 2780 ret = PyList_New(1);
2781 2781 if (ret == NULL)
2782 2782 goto bail;
2783 2783 obj = PyLong_FromLong(revs[0]);
2784 2784 if (obj == NULL)
2785 2785 goto bail;
2786 2786 PyList_SET_ITEM(ret, 0, obj);
2787 2787 goto done;
2788 2788 }
2789 2789
2790 2790 ret = find_gca_candidates(self, revs, revcount);
2791 2791 if (ret == NULL)
2792 2792 goto bail;
2793 2793
2794 2794 done:
2795 2795 PyMem_Free(revs);
2796 2796 return ret;
2797 2797
2798 2798 bail:
2799 2799 PyMem_Free(revs);
2800 2800 Py_XDECREF(ret);
2801 2801 return NULL;
2802 2802 }
2803 2803
2804 2804 /*
2805 2805 * Given a (possibly overlapping) set of revs, return the greatest
2806 2806 * common ancestors: those with the longest path to the root.
2807 2807 */
2808 2808 static PyObject *index_ancestors(indexObject *self, PyObject *args)
2809 2809 {
2810 2810 PyObject *ret;
2811 2811 PyObject *gca = index_commonancestorsheads(self, args);
2812 2812 if (gca == NULL)
2813 2813 return NULL;
2814 2814
2815 2815 if (PyList_GET_SIZE(gca) <= 1) {
2816 2816 return gca;
2817 2817 }
2818 2818
2819 2819 ret = find_deepest(self, gca);
2820 2820 Py_DECREF(gca);
2821 2821 return ret;
2822 2822 }
2823 2823
2824 2824 /*
2825 2825 * Invalidate any trie entries introduced by added revs.
2826 2826 */
2827 2827 static void index_invalidate_added(indexObject *self, Py_ssize_t start)
2828 2828 {
2829 2829 Py_ssize_t i, len;
2830 2830
2831 2831 len = self->length + self->new_length;
2832 2832 i = start - self->length;
2833 2833 if (i < 0)
2834 2834 return;
2835 2835
2836 2836 for (i = start; i < len; i++) {
2837 2837 const char *node = index_node(self, i);
2838 2838 nt_delete_node(&self->nt, node);
2839 2839 }
2840 2840
2841 2841 self->new_length = start - self->length;
2842 2842 }
2843 2843
2844 2844 /*
2845 2845 * Delete a numeric range of revs, which must be at the end of the
2846 2846 * range.
2847 2847 */
2848 2848 static int index_slice_del(indexObject *self, PyObject *item)
2849 2849 {
2850 2850 Py_ssize_t start, stop, step, slicelength;
2851 2851 Py_ssize_t length = index_length(self) + 1;
2852 2852 int ret = 0;
2853 2853
2854 2854 if (PySlice_GetIndicesEx(item, length, &start, &stop, &step,
2855 2855 &slicelength) < 0)
2856 2856 return -1;
2857 2857
2858 2858 if (slicelength <= 0)
2859 2859 return 0;
2860 2860
2861 2861 if ((step < 0 && start < stop) || (step > 0 && start > stop))
2862 2862 stop = start;
2863 2863
2864 2864 if (step < 0) {
2865 2865 stop = start + 1;
2866 2866 start = stop + step * (slicelength - 1) - 1;
2867 2867 step = -step;
2868 2868 }
2869 2869
2870 2870 if (step != 1) {
2871 2871 PyErr_SetString(PyExc_ValueError,
2872 2872 "revlog index delete requires step size of 1");
2873 2873 return -1;
2874 2874 }
2875 2875
2876 2876 if (stop != length - 1) {
2877 2877 PyErr_SetString(PyExc_IndexError,
2878 2878 "revlog index deletion indices are invalid");
2879 2879 return -1;
2880 2880 }
2881 2881
2882 2882 if (start < self->length) {
2883 2883 if (self->ntinitialized) {
2884 2884 Py_ssize_t i;
2885 2885
2886 2886 for (i = start; i < self->length; i++) {
2887 2887 const char *node = index_node_existing(self, i);
2888 2888 if (node == NULL)
2889 2889 return -1;
2890 2890
2891 2891 nt_delete_node(&self->nt, node);
2892 2892 }
2893 2893 if (self->new_length)
2894 2894 index_invalidate_added(self, self->length);
2895 2895 if (self->ntrev > start)
2896 2896 self->ntrev = (int)start;
2897 2897 } else if (self->new_length) {
2898 2898 self->new_length = 0;
2899 2899 }
2900 2900
2901 2901 self->length = start;
2902 2902 goto done;
2903 2903 }
2904 2904
2905 2905 if (self->ntinitialized) {
2906 2906 index_invalidate_added(self, start);
2907 2907 if (self->ntrev > start)
2908 2908 self->ntrev = (int)start;
2909 2909 } else {
2910 2910 self->new_length = start - self->length;
2911 2911 }
2912 2912 done:
2913 2913 Py_CLEAR(self->headrevs);
2914 2914 return ret;
2915 2915 }
2916 2916
2917 2917 /*
2918 2918 * Supported ops:
2919 2919 *
2920 2920 * slice deletion
2921 2921 * string assignment (extend node->rev mapping)
2922 2922 * string deletion (shrink node->rev mapping)
2923 2923 */
2924 2924 static int index_assign_subscript(indexObject *self, PyObject *item,
2925 2925 PyObject *value)
2926 2926 {
2927 2927 char *node;
2928 2928 long rev;
2929 2929
2930 2930 if (PySlice_Check(item) && value == NULL)
2931 2931 return index_slice_del(self, item);
2932 2932
2933 2933 if (node_check(self->nodelen, item, &node) == -1)
2934 2934 return -1;
2935 2935
2936 2936 if (value == NULL)
2937 2937 return self->ntinitialized ? nt_delete_node(&self->nt, node)
2938 2938 : 0;
2939 2939 rev = PyLong_AsLong(value);
2940 2940 if (rev > INT_MAX || rev < 0) {
2941 2941 if (!PyErr_Occurred())
2942 2942 PyErr_SetString(PyExc_ValueError, "rev out of range");
2943 2943 return -1;
2944 2944 }
2945 2945
2946 2946 if (index_init_nt(self) == -1)
2947 2947 return -1;
2948 2948 return nt_insert(&self->nt, node, (int)rev);
2949 2949 }
2950 2950
2951 2951 /*
2952 2952 * Find all RevlogNG entries in an index that has inline data. Update
2953 2953 * the optional "offsets" table with those entries.
2954 2954 */
2955 2955 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
2956 2956 {
2957 2957 const char *data = (const char *)self->buf.buf;
2958 2958 Py_ssize_t pos = 0;
2959 2959 Py_ssize_t end = self->buf.len;
2960 2960 long incr = self->entry_size;
2961 2961 Py_ssize_t len = 0;
2962 2962
2963 2963 while (pos + self->entry_size <= end && pos >= 0) {
2964 2964 uint32_t comp_len, sidedata_comp_len = 0;
2965 2965 /* 3rd element of header is length of compressed inline data */
2966 2966 if (self->format_version == format_v1) {
2967 2967 comp_len =
2968 2968 getbe32(data + pos + entry_v1_offset_comp_len);
2969 2969 sidedata_comp_len = 0;
2970 2970 } else if (self->format_version == format_v2) {
2971 2971 comp_len =
2972 2972 getbe32(data + pos + entry_v2_offset_comp_len);
2973 2973 sidedata_comp_len = getbe32(
2974 2974 data + pos + entry_v2_offset_sidedata_comp_len);
2975 2975 } else {
2976 2976 raise_revlog_error();
2977 2977 return -1;
2978 2978 }
2979 2979 incr = self->entry_size + comp_len + sidedata_comp_len;
2980 2980 if (offsets)
2981 2981 offsets[len] = data + pos;
2982 2982 len++;
2983 2983 pos += incr;
2984 2984 }
2985 2985
2986 2986 if (pos != end) {
2987 2987 if (!PyErr_Occurred())
2988 2988 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2989 2989 return -1;
2990 2990 }
2991 2991
2992 2992 return len;
2993 2993 }
2994 2994
2995 2995 static int index_init(indexObject *self, PyObject *args, PyObject *kwargs)
2996 2996 {
2997 2997 PyObject *data_obj, *inlined_obj;
2998 2998 Py_ssize_t size;
2999 2999
3000 3000 static char *kwlist[] = {"data", "inlined", "format", NULL};
3001 3001
3002 3002 /* Initialize before argument-checking to avoid index_dealloc() crash.
3003 3003 */
3004 3004 self->added = NULL;
3005 3005 self->new_length = 0;
3006 3006 self->added_length = 0;
3007 3007 self->data = NULL;
3008 3008 memset(&self->buf, 0, sizeof(self->buf));
3009 3009 self->headrevs = NULL;
3010 3010 self->filteredrevs = Py_None;
3011 3011 Py_INCREF(Py_None);
3012 3012 self->ntinitialized = 0;
3013 3013 self->offsets = NULL;
3014 3014 self->nodelen = 20;
3015 3015 self->nullentry = NULL;
3016 3016 self->rust_ext_compat = 1;
3017 3017 self->format_version = format_v1;
3018 3018
3019 3019 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|l", kwlist,
3020 3020 &data_obj, &inlined_obj,
3021 3021 &(self->format_version)))
3022 3022 return -1;
3023 3023 if (!PyObject_CheckBuffer(data_obj)) {
3024 3024 PyErr_SetString(PyExc_TypeError,
3025 3025 "data does not support buffer interface");
3026 3026 return -1;
3027 3027 }
3028 3028 if (self->nodelen < 20 || self->nodelen > (Py_ssize_t)sizeof(nullid)) {
3029 3029 PyErr_SetString(PyExc_RuntimeError, "unsupported node size");
3030 3030 return -1;
3031 3031 }
3032 3032
3033 3033 if (self->format_version == format_v1) {
3034 3034 self->entry_size = v1_entry_size;
3035 3035 } else if (self->format_version == format_v2) {
3036 3036 self->entry_size = v2_entry_size;
3037 3037 } else if (self->format_version == format_cl2) {
3038 3038 self->entry_size = cl2_entry_size;
3039 3039 }
3040 3040
3041 3041 self->nullentry = Py_BuildValue(
3042 3042 "iiiiiiiy#iiBBi", 0, 0, 0, -1, -1, -1, -1, nullid, self->nodelen, 0,
3043 3043 0, comp_mode_inline, comp_mode_inline, rank_unknown);
3044 3044
3045 3045 if (!self->nullentry)
3046 3046 return -1;
3047 3047 PyObject_GC_UnTrack(self->nullentry);
3048 3048
3049 3049 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
3050 3050 return -1;
3051 3051 size = self->buf.len;
3052 3052
3053 3053 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
3054 3054 self->data = data_obj;
3055 3055
3056 3056 self->ntlookups = self->ntmisses = 0;
3057 3057 self->ntrev = -1;
3058 3058 Py_INCREF(self->data);
3059 3059
3060 3060 if (self->inlined) {
3061 3061 Py_ssize_t len = inline_scan(self, NULL);
3062 3062 if (len == -1)
3063 3063 goto bail;
3064 3064 self->length = len;
3065 3065 } else {
3066 3066 if (size % self->entry_size) {
3067 3067 PyErr_SetString(PyExc_ValueError, "corrupt index file");
3068 3068 goto bail;
3069 3069 }
3070 3070 self->length = size / self->entry_size;
3071 3071 }
3072 3072
3073 3073 return 0;
3074 3074 bail:
3075 3075 return -1;
3076 3076 }
3077 3077
3078 3078 static PyObject *index_nodemap(indexObject *self)
3079 3079 {
3080 3080 Py_INCREF(self);
3081 3081 return (PyObject *)self;
3082 3082 }
3083 3083
3084 3084 static void _index_clearcaches(indexObject *self)
3085 3085 {
3086 3086 if (self->offsets) {
3087 3087 PyMem_Free((void *)self->offsets);
3088 3088 self->offsets = NULL;
3089 3089 }
3090 3090 if (self->ntinitialized) {
3091 3091 nt_dealloc(&self->nt);
3092 3092 }
3093 3093 self->ntinitialized = 0;
3094 3094 Py_CLEAR(self->headrevs);
3095 3095 }
3096 3096
3097 3097 static PyObject *index_clearcaches(indexObject *self)
3098 3098 {
3099 3099 _index_clearcaches(self);
3100 3100 self->ntrev = -1;
3101 3101 self->ntlookups = self->ntmisses = 0;
3102 3102 Py_RETURN_NONE;
3103 3103 }
3104 3104
3105 3105 static void index_dealloc(indexObject *self)
3106 3106 {
3107 3107 _index_clearcaches(self);
3108 3108 Py_XDECREF(self->filteredrevs);
3109 3109 if (self->buf.buf) {
3110 3110 PyBuffer_Release(&self->buf);
3111 3111 memset(&self->buf, 0, sizeof(self->buf));
3112 3112 }
3113 3113 Py_XDECREF(self->data);
3114 3114 PyMem_Free(self->added);
3115 3115 Py_XDECREF(self->nullentry);
3116 3116 PyObject_Del(self);
3117 3117 }
3118 3118
3119 3119 static PySequenceMethods index_sequence_methods = {
3120 3120 (lenfunc)index_length, /* sq_length */
3121 3121 0, /* sq_concat */
3122 3122 0, /* sq_repeat */
3123 3123 (ssizeargfunc)index_get, /* sq_item */
3124 3124 0, /* sq_slice */
3125 3125 0, /* sq_ass_item */
3126 3126 0, /* sq_ass_slice */
3127 3127 (objobjproc)index_contains, /* sq_contains */
3128 3128 };
3129 3129
3130 3130 static PyMappingMethods index_mapping_methods = {
3131 3131 (lenfunc)index_length, /* mp_length */
3132 3132 (binaryfunc)index_getitem, /* mp_subscript */
3133 3133 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
3134 3134 };
3135 3135
3136 3136 static PyMethodDef index_methods[] = {
3137 3137 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
3138 3138 "return the gca set of the given revs"},
3139 3139 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
3140 3140 METH_VARARGS,
3141 3141 "return the heads of the common ancestors of the given revs"},
3142 3142 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
3143 3143 "clear the index caches"},
3144 3144 {"get", (PyCFunction)index_m_get, METH_VARARGS, "get an index entry"},
3145 3145 {"get_rev", (PyCFunction)index_m_get, METH_VARARGS,
3146 3146 "return `rev` associated with a node or None"},
3147 3147 {"has_node", (PyCFunction)index_m_has_node, METH_O,
3148 3148 "return True if the node exist in the index"},
3149 3149 {"rev", (PyCFunction)index_m_rev, METH_O,
3150 3150 "return `rev` associated with a node or raise RevlogError"},
3151 3151 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets, METH_VARARGS,
3152 3152 "compute phases"},
3153 3153 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
3154 3154 "reachableroots"},
3155 3155 {"replace_sidedata_info", (PyCFunction)index_replace_sidedata_info,
3156 3156 METH_VARARGS, "replace an existing index entry with a new value"},
3157 3157 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
3158 3158 "get head revisions"}, /* Can do filtering since 3.2 */
3159 3159 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
3160 3160 "get filtered head revisions"}, /* Can always do filtering */
3161 3161 {"issnapshot", (PyCFunction)index_issnapshot, METH_O,
3162 3162 "True if the object is a snapshot"},
3163 3163 {"findsnapshots", (PyCFunction)index_findsnapshots, METH_VARARGS,
3164 3164 "Gather snapshot data in a cache dict"},
3165 3165 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
3166 3166 "determine revisions with deltas to reconstruct fulltext"},
3167 3167 {"slicechunktodensity", (PyCFunction)index_slicechunktodensity,
3168 3168 METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"},
3169 3169 {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
3170 3170 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
3171 3171 "match a potentially ambiguous node ID"},
3172 3172 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
3173 3173 "find length of shortest hex nodeid of a binary ID"},
3174 3174 {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"},
3175 3175 {"entry_binary", (PyCFunction)index_entry_binary, METH_O,
3176 3176 "return an entry in binary form"},
3177 3177 {"pack_header", (PyCFunction)index_pack_header, METH_VARARGS,
3178 3178 "pack the revlog header information into binary"},
3179 3179 {NULL} /* Sentinel */
3180 3180 };
3181 3181
3182 3182 static PyGetSetDef index_getset[] = {
3183 3183 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
3184 3184 {NULL} /* Sentinel */
3185 3185 };
3186 3186
3187 3187 static PyMemberDef index_members[] = {
3188 3188 {"entry_size", T_LONG, offsetof(indexObject, entry_size), 0,
3189 3189 "size of an index entry"},
3190 3190 {"rust_ext_compat", T_LONG, offsetof(indexObject, rust_ext_compat), 0,
3191 3191 "size of an index entry"},
3192 3192 {NULL} /* Sentinel */
3193 3193 };
3194 3194
3195 3195 PyTypeObject HgRevlogIndex_Type = {
3196 3196 PyVarObject_HEAD_INIT(NULL, 0) /* header */
3197 3197 "parsers.index", /* tp_name */
3198 3198 sizeof(indexObject), /* tp_basicsize */
3199 3199 0, /* tp_itemsize */
3200 3200 (destructor)index_dealloc, /* tp_dealloc */
3201 3201 0, /* tp_print */
3202 3202 0, /* tp_getattr */
3203 3203 0, /* tp_setattr */
3204 3204 0, /* tp_compare */
3205 3205 0, /* tp_repr */
3206 3206 0, /* tp_as_number */
3207 3207 &index_sequence_methods, /* tp_as_sequence */
3208 3208 &index_mapping_methods, /* tp_as_mapping */
3209 3209 0, /* tp_hash */
3210 3210 0, /* tp_call */
3211 3211 0, /* tp_str */
3212 3212 0, /* tp_getattro */
3213 3213 0, /* tp_setattro */
3214 3214 0, /* tp_as_buffer */
3215 3215 Py_TPFLAGS_DEFAULT, /* tp_flags */
3216 3216 "revlog index", /* tp_doc */
3217 3217 0, /* tp_traverse */
3218 3218 0, /* tp_clear */
3219 3219 0, /* tp_richcompare */
3220 3220 0, /* tp_weaklistoffset */
3221 3221 0, /* tp_iter */
3222 3222 0, /* tp_iternext */
3223 3223 index_methods, /* tp_methods */
3224 3224 index_members, /* tp_members */
3225 3225 index_getset, /* tp_getset */
3226 3226 0, /* tp_base */
3227 3227 0, /* tp_dict */
3228 3228 0, /* tp_descr_get */
3229 3229 0, /* tp_descr_set */
3230 3230 0, /* tp_dictoffset */
3231 3231 (initproc)index_init, /* tp_init */
3232 3232 0, /* tp_alloc */
3233 3233 };
3234 3234
3235 3235 /*
3236 3236 * returns a tuple of the form (index, cache) with elements as
3237 3237 * follows:
3238 3238 *
3239 3239 * index: an index object that lazily parses Revlog (v1 or v2) records
3240 3240 * cache: if data is inlined, a tuple (0, index_file_content), else None
3241 3241 * index_file_content could be a string, or a buffer
3242 3242 *
3243 3243 * added complications are for backwards compatibility
3244 3244 */
3245 3245 PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs)
3246 3246 {
3247 3247 PyObject *cache = NULL;
3248 3248 indexObject *idx;
3249 3249 int ret;
3250 3250
3251 3251 idx = PyObject_New(indexObject, &HgRevlogIndex_Type);
3252 3252 if (idx == NULL)
3253 3253 goto bail;
3254 3254
3255 3255 ret = index_init(idx, args, kwargs);
3256 3256 if (ret == -1)
3257 3257 goto bail;
3258 3258
3259 3259 if (idx->inlined) {
3260 3260 cache = Py_BuildValue("iO", 0, idx->data);
3261 3261 if (cache == NULL)
3262 3262 goto bail;
3263 3263 } else {
3264 3264 cache = Py_None;
3265 3265 Py_INCREF(cache);
3266 3266 }
3267 3267
3268 3268 return Py_BuildValue("NN", idx, cache);
3269 3269
3270 3270 bail:
3271 3271 Py_XDECREF(idx);
3272 3272 Py_XDECREF(cache);
3273 3273 return NULL;
3274 3274 }
3275 3275
3276 3276 static Revlog_CAPI CAPI = {
3277 3277 /* increment the abi_version field upon each change in the Revlog_CAPI
3278 3278 struct or in the ABI of the listed functions */
3279 3279 3, index_length, index_node, index_fast_rank, HgRevlogIndex_GetParents,
3280 3280 };
3281 3281
3282 3282 void revlog_module_init(PyObject *mod)
3283 3283 {
3284 3284 PyObject *caps = NULL;
3285 3285 HgRevlogIndex_Type.tp_new = PyType_GenericNew;
3286 3286 if (PyType_Ready(&HgRevlogIndex_Type) < 0)
3287 3287 return;
3288 3288 Py_INCREF(&HgRevlogIndex_Type);
3289 3289 PyModule_AddObject(mod, "index", (PyObject *)&HgRevlogIndex_Type);
3290 3290
3291 3291 nodetreeType.tp_new = PyType_GenericNew;
3292 3292 if (PyType_Ready(&nodetreeType) < 0)
3293 3293 return;
3294 3294 Py_INCREF(&nodetreeType);
3295 3295 PyModule_AddObject(mod, "nodetree", (PyObject *)&nodetreeType);
3296 3296
3297 3297 caps = PyCapsule_New(&CAPI, "mercurial.cext.parsers.revlog_CAPI", NULL);
3298 3298 if (caps != NULL)
3299 3299 PyModule_AddObject(mod, "revlog_CAPI", caps);
3300 3300 }
General Comments 0
You need to be logged in to leave comments. Login now