##// END OF EJS Templates
cext: fix potential memory leaks of list items appended with PyList_Append...
Arseniy Alekseyev -
r52281:cb5175ed stable
parent child Browse files
Show More
@@ -1,3324 +1,3304 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Olivia Mackall <olivia@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #define PY_SSIZE_T_CLEAN
11 11 #include <Python.h>
12 12 #include <assert.h>
13 13 #include <ctype.h>
14 14 #include <limits.h>
15 15 #include <stddef.h>
16 16 #include <stdlib.h>
17 17 #include <string.h>
18 18 #include <structmember.h>
19 19
20 20 #include "bitmanipulation.h"
21 21 #include "charencode.h"
22 22 #include "compat.h"
23 23 #include "revlog.h"
24 24 #include "util.h"
25 25
26 26 typedef struct indexObjectStruct indexObject;
27 27
28 28 typedef struct {
29 29 int children[16];
30 30 } nodetreenode;
31 31
32 32 typedef struct {
33 33 int abi_version;
34 34 Py_ssize_t (*index_length)(const indexObject *);
35 35 const char *(*index_node)(indexObject *, Py_ssize_t);
36 36 int (*fast_rank)(indexObject *, Py_ssize_t);
37 37 int (*index_parents)(PyObject *, int, int *);
38 38 } Revlog_CAPI;
39 39
40 40 /*
41 41 * A base-16 trie for fast node->rev mapping.
42 42 *
43 43 * Positive value is index of the next node in the trie
44 44 * Negative value is a leaf: -(rev + 2)
45 45 * Zero is empty
46 46 */
47 47 typedef struct {
48 48 indexObject *index;
49 49 nodetreenode *nodes;
50 50 Py_ssize_t nodelen;
51 51 size_t length; /* # nodes in use */
52 52 size_t capacity; /* # nodes allocated */
53 53 int depth; /* maximum depth of tree */
54 54 int splits; /* # splits performed */
55 55 } nodetree;
56 56
57 57 typedef struct {
58 58 PyObject_HEAD /* ; */
59 59 nodetree nt;
60 60 } nodetreeObject;
61 61
62 62 /*
63 63 * This class has two behaviors.
64 64 *
65 65 * When used in a list-like way (with integer keys), we decode an
66 66 * entry in a RevlogNG index file on demand. We have limited support for
67 67 * integer-keyed insert and delete, only at elements right before the
68 68 * end.
69 69 *
70 70 * With string keys, we lazily perform a reverse mapping from node to
71 71 * rev, using a base-16 trie.
72 72 */
73 73 struct indexObjectStruct {
74 74 PyObject_HEAD
75 75 /* Type-specific fields go here. */
76 76 PyObject *data; /* raw bytes of index */
77 77 Py_ssize_t nodelen; /* digest size of the hash, 20 for SHA-1 */
78 78 PyObject *nullentry; /* fast path for references to null */
79 79 Py_buffer buf; /* buffer of data */
80 80 const char **offsets; /* populated on demand */
81 81 Py_ssize_t length; /* current on-disk number of elements */
82 82 unsigned new_length; /* number of added elements */
83 83 unsigned added_length; /* space reserved for added elements */
84 84 char *added; /* populated on demand */
85 85 PyObject *headrevs; /* cache, invalidated on changes */
86 86 PyObject *filteredrevs; /* filtered revs set */
87 87 nodetree nt; /* base-16 trie */
88 88 int ntinitialized; /* 0 or 1 */
89 89 int ntrev; /* last rev scanned */
90 90 int ntlookups; /* # lookups */
91 91 int ntmisses; /* # lookups that miss the cache */
92 92 int inlined;
93 93 long entry_size; /* size of index headers. Differs in v1 v.s. v2 format
94 94 */
95 95 long rust_ext_compat; /* compatibility with being used in rust
96 96 extensions */
97 97 long format_version; /* format version selector (format_*) */
98 98 };
99 99
100 100 static Py_ssize_t index_length(const indexObject *self)
101 101 {
102 102 return self->length + self->new_length;
103 103 }
104 104
105 105 static const char nullid[32] = {0};
106 106 static const Py_ssize_t nullrev = -1;
107 107
108 108 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
109 109
110 110 static int index_find_node(indexObject *self, const char *node);
111 111
112 112 #if LONG_MAX == 0x7fffffffL
113 113 static const char *const tuple_format = "Kiiiiiiy#KiBBi";
114 114 #else
115 115 static const char *const tuple_format = "kiiiiiiy#kiBBi";
116 116 #endif
117 117
118 118 /* A RevlogNG v1 index entry is 64 bytes long. */
119 119 static const long v1_entry_size = 64;
120 120
121 121 /* A Revlogv2 index entry is 96 bytes long. */
122 122 static const long v2_entry_size = 96;
123 123
124 124 /* A Changelogv2 index entry is 96 bytes long. */
125 125 static const long cl2_entry_size = 96;
126 126
127 127 /* Internal format version.
128 128 * Must match their counterparts in revlogutils/constants.py */
129 129 static const long format_v1 = 1; /* constants.py: REVLOGV1 */
130 130 static const long format_v2 = 0xDEAD; /* constants.py: REVLOGV2 */
131 131 static const long format_cl2 = 0xD34D; /* constants.py: CHANGELOGV2 */
132 132
133 133 static const long entry_v1_offset_high = 0;
134 134 static const long entry_v1_offset_offset_flags = 4;
135 135 static const long entry_v1_offset_comp_len = 8;
136 136 static const long entry_v1_offset_uncomp_len = 12;
137 137 static const long entry_v1_offset_base_rev = 16;
138 138 static const long entry_v1_offset_link_rev = 20;
139 139 static const long entry_v1_offset_parent_1 = 24;
140 140 static const long entry_v1_offset_parent_2 = 28;
141 141 static const long entry_v1_offset_node_id = 32;
142 142
143 143 static const long entry_v2_offset_high = 0;
144 144 static const long entry_v2_offset_offset_flags = 4;
145 145 static const long entry_v2_offset_comp_len = 8;
146 146 static const long entry_v2_offset_uncomp_len = 12;
147 147 static const long entry_v2_offset_base_rev = 16;
148 148 static const long entry_v2_offset_link_rev = 20;
149 149 static const long entry_v2_offset_parent_1 = 24;
150 150 static const long entry_v2_offset_parent_2 = 28;
151 151 static const long entry_v2_offset_node_id = 32;
152 152 static const long entry_v2_offset_sidedata_offset = 64;
153 153 static const long entry_v2_offset_sidedata_comp_len = 72;
154 154 static const long entry_v2_offset_all_comp_mode = 76;
155 155 /* next free offset: 77 */
156 156
157 157 static const long entry_cl2_offset_high = 0;
158 158 static const long entry_cl2_offset_offset_flags = 4;
159 159 static const long entry_cl2_offset_comp_len = 8;
160 160 static const long entry_cl2_offset_uncomp_len = 12;
161 161 static const long entry_cl2_offset_parent_1 = 16;
162 162 static const long entry_cl2_offset_parent_2 = 20;
163 163 static const long entry_cl2_offset_node_id = 24;
164 164 static const long entry_cl2_offset_sidedata_offset = 56;
165 165 static const long entry_cl2_offset_sidedata_comp_len = 64;
166 166 static const long entry_cl2_offset_all_comp_mode = 68;
167 167 static const long entry_cl2_offset_rank = 69;
168 168 /* next free offset: 73 */
169 169
170 170 static const char comp_mode_inline = 2;
171 171 static const int rank_unknown = -1;
172 172
173 173 static void raise_revlog_error(void)
174 174 {
175 175 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
176 176
177 177 mod = PyImport_ImportModule("mercurial.error");
178 178 if (mod == NULL) {
179 179 goto cleanup;
180 180 }
181 181
182 182 dict = PyModule_GetDict(mod);
183 183 if (dict == NULL) {
184 184 goto cleanup;
185 185 }
186 186 Py_INCREF(dict);
187 187
188 188 errclass = PyDict_GetItemString(dict, "RevlogError");
189 189 if (errclass == NULL) {
190 190 PyErr_SetString(PyExc_SystemError,
191 191 "could not find RevlogError");
192 192 goto cleanup;
193 193 }
194 194
195 195 /* value of exception is ignored by callers */
196 196 PyErr_SetString(errclass, "RevlogError");
197 197
198 198 cleanup:
199 199 Py_XDECREF(dict);
200 200 Py_XDECREF(mod);
201 201 }
202 202
203 203 /*
204 204 * Return a pointer to the beginning of a RevlogNG record.
205 205 */
206 206 static const char *index_deref(indexObject *self, Py_ssize_t pos)
207 207 {
208 208 if (pos >= self->length)
209 209 return self->added + (pos - self->length) * self->entry_size;
210 210
211 211 if (self->inlined && pos > 0) {
212 212 if (self->offsets == NULL) {
213 213 Py_ssize_t ret;
214 214 self->offsets =
215 215 PyMem_Malloc(self->length * sizeof(*self->offsets));
216 216 if (self->offsets == NULL)
217 217 return (const char *)PyErr_NoMemory();
218 218 ret = inline_scan(self, self->offsets);
219 219 if (ret == -1) {
220 220 return NULL;
221 221 };
222 222 }
223 223 return self->offsets[pos];
224 224 }
225 225
226 226 return (const char *)(self->buf.buf) + pos * self->entry_size;
227 227 }
228 228
229 229 /*
230 230 * Get parents of the given rev.
231 231 *
232 232 * The specified rev must be valid and must not be nullrev. A returned
233 233 * parent revision may be nullrev, but is guaranteed to be in valid range.
234 234 */
235 235 static inline int index_get_parents(indexObject *self, Py_ssize_t rev, int *ps,
236 236 int maxrev)
237 237 {
238 238 const char *data = index_deref(self, rev);
239 239
240 240 if (self->format_version == format_v1) {
241 241 ps[0] = getbe32(data + entry_v1_offset_parent_1);
242 242 ps[1] = getbe32(data + entry_v1_offset_parent_2);
243 243 } else if (self->format_version == format_v2) {
244 244 ps[0] = getbe32(data + entry_v2_offset_parent_1);
245 245 ps[1] = getbe32(data + entry_v2_offset_parent_2);
246 246 } else if (self->format_version == format_cl2) {
247 247 ps[0] = getbe32(data + entry_cl2_offset_parent_1);
248 248 ps[1] = getbe32(data + entry_cl2_offset_parent_2);
249 249 } else {
250 250 raise_revlog_error();
251 251 return -1;
252 252 }
253 253
254 254 /* If index file is corrupted, ps[] may point to invalid revisions. So
255 255 * there is a risk of buffer overflow to trust them unconditionally. */
256 256 if (ps[0] < -1 || ps[0] > maxrev || ps[1] < -1 || ps[1] > maxrev) {
257 257 PyErr_SetString(PyExc_ValueError, "parent out of range");
258 258 return -1;
259 259 }
260 260 return 0;
261 261 }
262 262
263 263 /*
264 264 * Get parents of the given rev.
265 265 *
266 266 * If the specified rev is out of range, IndexError will be raised. If the
267 267 * revlog entry is corrupted, ValueError may be raised.
268 268 *
269 269 * Returns 0 on success or -1 on failure.
270 270 */
271 271 static int HgRevlogIndex_GetParents(PyObject *op, int rev, int *ps)
272 272 {
273 273 int tiprev;
274 274 if (!op || !HgRevlogIndex_Check(op) || !ps) {
275 275 PyErr_BadInternalCall();
276 276 return -1;
277 277 }
278 278 tiprev = (int)index_length((indexObject *)op) - 1;
279 279 if (rev < -1 || rev > tiprev) {
280 280 PyErr_Format(PyExc_IndexError, "rev out of range: %d", rev);
281 281 return -1;
282 282 } else if (rev == -1) {
283 283 ps[0] = ps[1] = -1;
284 284 return 0;
285 285 } else {
286 286 return index_get_parents((indexObject *)op, rev, ps, tiprev);
287 287 }
288 288 }
289 289
290 290 static inline int64_t index_get_start(indexObject *self, Py_ssize_t rev)
291 291 {
292 292 const char *data;
293 293 uint64_t offset;
294 294
295 295 if (rev == nullrev)
296 296 return 0;
297 297
298 298 data = index_deref(self, rev);
299 299
300 300 if (self->format_version == format_v1) {
301 301 offset = getbe32(data + entry_v1_offset_offset_flags);
302 302 if (rev == 0) {
303 303 /* mask out version number for the first entry */
304 304 offset &= 0xFFFF;
305 305 } else {
306 306 uint32_t offset_high =
307 307 getbe32(data + entry_v1_offset_high);
308 308 offset |= ((uint64_t)offset_high) << 32;
309 309 }
310 310 } else if (self->format_version == format_v2) {
311 311 offset = getbe32(data + entry_v2_offset_offset_flags);
312 312 if (rev == 0) {
313 313 /* mask out version number for the first entry */
314 314 offset &= 0xFFFF;
315 315 } else {
316 316 uint32_t offset_high =
317 317 getbe32(data + entry_v2_offset_high);
318 318 offset |= ((uint64_t)offset_high) << 32;
319 319 }
320 320 } else if (self->format_version == format_cl2) {
321 321 uint32_t offset_high = getbe32(data + entry_cl2_offset_high);
322 322 offset = getbe32(data + entry_cl2_offset_offset_flags);
323 323 offset |= ((uint64_t)offset_high) << 32;
324 324 } else {
325 325 raise_revlog_error();
326 326 return -1;
327 327 }
328 328
329 329 return (int64_t)(offset >> 16);
330 330 }
331 331
332 332 static inline int index_get_length(indexObject *self, Py_ssize_t rev)
333 333 {
334 334 const char *data;
335 335 int tmp;
336 336
337 337 if (rev == nullrev)
338 338 return 0;
339 339
340 340 data = index_deref(self, rev);
341 341
342 342 if (self->format_version == format_v1) {
343 343 tmp = (int)getbe32(data + entry_v1_offset_comp_len);
344 344 } else if (self->format_version == format_v2) {
345 345 tmp = (int)getbe32(data + entry_v2_offset_comp_len);
346 346 } else if (self->format_version == format_cl2) {
347 347 tmp = (int)getbe32(data + entry_cl2_offset_comp_len);
348 348 } else {
349 349 raise_revlog_error();
350 350 return -1;
351 351 }
352 352 if (tmp < 0) {
353 353 PyErr_Format(PyExc_OverflowError,
354 354 "revlog entry size out of bound (%d)", tmp);
355 355 return -1;
356 356 }
357 357 return tmp;
358 358 }
359 359
360 360 /*
361 361 * RevlogNG format (all in big endian, data may be inlined):
362 362 * 6 bytes: offset
363 363 * 2 bytes: flags
364 364 * 4 bytes: compressed length
365 365 * 4 bytes: uncompressed length
366 366 * 4 bytes: base revision
367 367 * 4 bytes: link revision
368 368 * 4 bytes: parent 1 revision
369 369 * 4 bytes: parent 2 revision
370 370 * 32 bytes: nodeid (only 20 bytes used with SHA-1)
371 371 */
372 372 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
373 373 {
374 374 uint64_t offset_flags, sidedata_offset;
375 375 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
376 376 sidedata_comp_len, rank = rank_unknown;
377 377 char data_comp_mode, sidedata_comp_mode;
378 378 const char *c_node_id;
379 379 const char *data;
380 380 Py_ssize_t length = index_length(self);
381 381
382 382 if (pos == nullrev) {
383 383 Py_INCREF(self->nullentry);
384 384 return self->nullentry;
385 385 }
386 386
387 387 if (pos < 0 || pos >= length) {
388 388 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
389 389 return NULL;
390 390 }
391 391
392 392 data = index_deref(self, pos);
393 393 if (data == NULL)
394 394 return NULL;
395 395
396 396 if (self->format_version == format_v1) {
397 397 offset_flags = getbe32(data + entry_v1_offset_offset_flags);
398 398 /*
399 399 * The first entry on-disk needs the version number masked out,
400 400 * but this doesn't apply if entries are added to an empty
401 401 * index.
402 402 */
403 403 if (self->length && pos == 0)
404 404 offset_flags &= 0xFFFF;
405 405 else {
406 406 uint32_t offset_high =
407 407 getbe32(data + entry_v1_offset_high);
408 408 offset_flags |= ((uint64_t)offset_high) << 32;
409 409 }
410 410
411 411 comp_len = getbe32(data + entry_v1_offset_comp_len);
412 412 uncomp_len = getbe32(data + entry_v1_offset_uncomp_len);
413 413 base_rev = getbe32(data + entry_v1_offset_base_rev);
414 414 link_rev = getbe32(data + entry_v1_offset_link_rev);
415 415 parent_1 = getbe32(data + entry_v1_offset_parent_1);
416 416 parent_2 = getbe32(data + entry_v1_offset_parent_2);
417 417 c_node_id = data + entry_v1_offset_node_id;
418 418
419 419 sidedata_offset = 0;
420 420 sidedata_comp_len = 0;
421 421 data_comp_mode = comp_mode_inline;
422 422 sidedata_comp_mode = comp_mode_inline;
423 423 } else if (self->format_version == format_v2) {
424 424 offset_flags = getbe32(data + entry_v2_offset_offset_flags);
425 425 /*
426 426 * The first entry on-disk needs the version number masked out,
427 427 * but this doesn't apply if entries are added to an empty
428 428 * index.
429 429 */
430 430 if (self->length && pos == 0)
431 431 offset_flags &= 0xFFFF;
432 432 else {
433 433 uint32_t offset_high =
434 434 getbe32(data + entry_v2_offset_high);
435 435 offset_flags |= ((uint64_t)offset_high) << 32;
436 436 }
437 437
438 438 comp_len = getbe32(data + entry_v2_offset_comp_len);
439 439 uncomp_len = getbe32(data + entry_v2_offset_uncomp_len);
440 440 base_rev = getbe32(data + entry_v2_offset_base_rev);
441 441 link_rev = getbe32(data + entry_v2_offset_link_rev);
442 442 parent_1 = getbe32(data + entry_v2_offset_parent_1);
443 443 parent_2 = getbe32(data + entry_v2_offset_parent_2);
444 444 c_node_id = data + entry_v2_offset_node_id;
445 445
446 446 sidedata_offset =
447 447 getbe64(data + entry_v2_offset_sidedata_offset);
448 448 sidedata_comp_len =
449 449 getbe32(data + entry_v2_offset_sidedata_comp_len);
450 450 data_comp_mode = data[entry_v2_offset_all_comp_mode] & 3;
451 451 sidedata_comp_mode =
452 452 ((data[entry_v2_offset_all_comp_mode] >> 2) & 3);
453 453 } else if (self->format_version == format_cl2) {
454 454 uint32_t offset_high = getbe32(data + entry_cl2_offset_high);
455 455 offset_flags = getbe32(data + entry_cl2_offset_offset_flags);
456 456 offset_flags |= ((uint64_t)offset_high) << 32;
457 457 comp_len = getbe32(data + entry_cl2_offset_comp_len);
458 458 uncomp_len = getbe32(data + entry_cl2_offset_uncomp_len);
459 459 /* base_rev and link_rev are not stored in changelogv2, but are
460 460 still used by some functions shared with the other revlogs.
461 461 They are supposed to contain links to other revisions,
462 462 but they always point to themselves in the case of a changelog.
463 463 */
464 464 base_rev = pos;
465 465 link_rev = pos;
466 466 parent_1 = getbe32(data + entry_cl2_offset_parent_1);
467 467 parent_2 = getbe32(data + entry_cl2_offset_parent_2);
468 468 c_node_id = data + entry_cl2_offset_node_id;
469 469 sidedata_offset =
470 470 getbe64(data + entry_cl2_offset_sidedata_offset);
471 471 sidedata_comp_len =
472 472 getbe32(data + entry_cl2_offset_sidedata_comp_len);
473 473 data_comp_mode = data[entry_cl2_offset_all_comp_mode] & 3;
474 474 sidedata_comp_mode =
475 475 ((data[entry_cl2_offset_all_comp_mode] >> 2) & 3);
476 476 rank = getbe32(data + entry_cl2_offset_rank);
477 477 } else {
478 478 raise_revlog_error();
479 479 return NULL;
480 480 }
481 481
482 482 return Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len,
483 483 base_rev, link_rev, parent_1, parent_2, c_node_id,
484 484 self->nodelen, sidedata_offset, sidedata_comp_len,
485 485 data_comp_mode, sidedata_comp_mode, rank);
486 486 }
487 487 /*
488 488 * Pack header information in binary
489 489 */
490 490 static PyObject *index_pack_header(indexObject *self, PyObject *args)
491 491 {
492 492 int header;
493 493 char out[4];
494 494 if (!PyArg_ParseTuple(args, "i", &header)) {
495 495 return NULL;
496 496 }
497 497 if (self->format_version != format_v1) {
498 498 PyErr_Format(PyExc_RuntimeError,
499 499 "version header should go in the docket, not the "
500 500 "index: %d",
501 501 header);
502 502 return NULL;
503 503 }
504 504 putbe32(header, out);
505 505 return PyBytes_FromStringAndSize(out, 4);
506 506 }
507 507 /*
508 508 * Return the raw binary string representing a revision
509 509 */
510 510 static PyObject *index_entry_binary(indexObject *self, PyObject *value)
511 511 {
512 512 long rev;
513 513 const char *data;
514 514 Py_ssize_t length = index_length(self);
515 515
516 516 if (!pylong_to_long(value, &rev)) {
517 517 return NULL;
518 518 }
519 519 if (rev < 0 || rev >= length) {
520 520 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
521 521 rev);
522 522 return NULL;
523 523 };
524 524
525 525 data = index_deref(self, rev);
526 526 if (data == NULL)
527 527 return NULL;
528 528 if (rev == 0 && self->format_version == format_v1) {
529 529 /* the header is eating the start of the first entry */
530 530 return PyBytes_FromStringAndSize(data + 4,
531 531 self->entry_size - 4);
532 532 }
533 533 return PyBytes_FromStringAndSize(data, self->entry_size);
534 534 }
535 535
536 536 /*
537 537 * Return the hash of node corresponding to the given rev.
538 538 */
539 539 static const char *index_node(indexObject *self, Py_ssize_t pos)
540 540 {
541 541 Py_ssize_t length = index_length(self);
542 542 const char *data;
543 543 const char *node_id;
544 544
545 545 if (pos == nullrev)
546 546 return nullid;
547 547
548 548 if (pos >= length)
549 549 return NULL;
550 550
551 551 data = index_deref(self, pos);
552 552
553 553 if (self->format_version == format_v1) {
554 554 node_id = data + entry_v1_offset_node_id;
555 555 } else if (self->format_version == format_v2) {
556 556 node_id = data + entry_v2_offset_node_id;
557 557 } else if (self->format_version == format_cl2) {
558 558 node_id = data + entry_cl2_offset_node_id;
559 559 } else {
560 560 raise_revlog_error();
561 561 return NULL;
562 562 }
563 563
564 564 return data ? node_id : NULL;
565 565 }
566 566
567 567 /*
568 568 * Return the stored rank of a given revision if known, or rank_unknown
569 569 * otherwise.
570 570 *
571 571 * The rank of a revision is the size of the sub-graph it defines as a head.
572 572 * Equivalently, the rank of a revision `r` is the size of the set
573 573 * `ancestors(r)`, `r` included.
574 574 *
575 575 * This method returns the rank retrieved from the revlog in constant time. It
576 576 * makes no attempt at computing unknown values for versions of the revlog
577 577 * which do not persist the rank.
578 578 */
579 579 static int index_fast_rank(indexObject *self, Py_ssize_t pos)
580 580 {
581 581 Py_ssize_t length = index_length(self);
582 582
583 583 if (self->format_version != format_cl2 || pos >= length) {
584 584 return rank_unknown;
585 585 }
586 586
587 587 if (pos == nullrev) {
588 588 return 0; /* convention */
589 589 }
590 590
591 591 return getbe32(index_deref(self, pos) + entry_cl2_offset_rank);
592 592 }
593 593
594 594 /*
595 595 * Return the hash of the node corresponding to the given rev. The
596 596 * rev is assumed to be existing. If not, an exception is set.
597 597 */
598 598 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
599 599 {
600 600 const char *node = index_node(self, pos);
601 601 if (node == NULL) {
602 602 PyErr_Format(PyExc_IndexError, "could not access rev %d",
603 603 (int)pos);
604 604 }
605 605 return node;
606 606 }
607 607
608 608 static int nt_insert(nodetree *self, const char *node, int rev);
609 609
610 610 static int node_check(Py_ssize_t nodelen, PyObject *obj, char **node)
611 611 {
612 612 Py_ssize_t thisnodelen;
613 613 if (PyBytes_AsStringAndSize(obj, node, &thisnodelen) == -1)
614 614 return -1;
615 615 if (nodelen == thisnodelen)
616 616 return 0;
617 617 PyErr_Format(PyExc_ValueError, "node len %zd != expected node len %zd",
618 618 thisnodelen, nodelen);
619 619 return -1;
620 620 }
621 621
622 622 static PyObject *index_append(indexObject *self, PyObject *obj)
623 623 {
624 624 uint64_t offset_flags, sidedata_offset;
625 625 int rev, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
626 626 sidedata_comp_len, rank;
627 627 char data_comp_mode, sidedata_comp_mode;
628 628 Py_ssize_t c_node_id_len;
629 629 const char *c_node_id;
630 630 char comp_field;
631 631 char *data;
632 632
633 633 if (!PyArg_ParseTuple(obj, tuple_format, &offset_flags, &comp_len,
634 634 &uncomp_len, &base_rev, &link_rev, &parent_1,
635 635 &parent_2, &c_node_id, &c_node_id_len,
636 636 &sidedata_offset, &sidedata_comp_len,
637 637 &data_comp_mode, &sidedata_comp_mode, &rank)) {
638 638 PyErr_SetString(PyExc_TypeError, "12-tuple required");
639 639 return NULL;
640 640 }
641 641
642 642 if (c_node_id_len != self->nodelen) {
643 643 PyErr_SetString(PyExc_TypeError, "invalid node");
644 644 return NULL;
645 645 }
646 646 if (self->format_version == format_v1) {
647 647
648 648 if (data_comp_mode != comp_mode_inline) {
649 649 PyErr_Format(PyExc_ValueError,
650 650 "invalid data compression mode: %i",
651 651 data_comp_mode);
652 652 return NULL;
653 653 }
654 654 if (sidedata_comp_mode != comp_mode_inline) {
655 655 PyErr_Format(PyExc_ValueError,
656 656 "invalid sidedata compression mode: %i",
657 657 sidedata_comp_mode);
658 658 return NULL;
659 659 }
660 660 }
661 661
662 662 if (self->new_length == self->added_length) {
663 663 size_t new_added_length =
664 664 self->added_length ? self->added_length * 2 : 4096;
665 665 void *new_added = PyMem_Realloc(
666 666 self->added, new_added_length * self->entry_size);
667 667 if (!new_added)
668 668 return PyErr_NoMemory();
669 669 self->added = new_added;
670 670 self->added_length = new_added_length;
671 671 }
672 672 rev = self->length + self->new_length;
673 673 data = self->added + self->entry_size * self->new_length++;
674 674
675 675 memset(data, 0, self->entry_size);
676 676
677 677 if (self->format_version == format_v1) {
678 678 putbe32(offset_flags >> 32, data + entry_v1_offset_high);
679 679 putbe32(offset_flags & 0xffffffffU,
680 680 data + entry_v1_offset_offset_flags);
681 681 putbe32(comp_len, data + entry_v1_offset_comp_len);
682 682 putbe32(uncomp_len, data + entry_v1_offset_uncomp_len);
683 683 putbe32(base_rev, data + entry_v1_offset_base_rev);
684 684 putbe32(link_rev, data + entry_v1_offset_link_rev);
685 685 putbe32(parent_1, data + entry_v1_offset_parent_1);
686 686 putbe32(parent_2, data + entry_v1_offset_parent_2);
687 687 memcpy(data + entry_v1_offset_node_id, c_node_id,
688 688 c_node_id_len);
689 689 } else if (self->format_version == format_v2) {
690 690 putbe32(offset_flags >> 32, data + entry_v2_offset_high);
691 691 putbe32(offset_flags & 0xffffffffU,
692 692 data + entry_v2_offset_offset_flags);
693 693 putbe32(comp_len, data + entry_v2_offset_comp_len);
694 694 putbe32(uncomp_len, data + entry_v2_offset_uncomp_len);
695 695 putbe32(base_rev, data + entry_v2_offset_base_rev);
696 696 putbe32(link_rev, data + entry_v2_offset_link_rev);
697 697 putbe32(parent_1, data + entry_v2_offset_parent_1);
698 698 putbe32(parent_2, data + entry_v2_offset_parent_2);
699 699 memcpy(data + entry_v2_offset_node_id, c_node_id,
700 700 c_node_id_len);
701 701 putbe64(sidedata_offset,
702 702 data + entry_v2_offset_sidedata_offset);
703 703 putbe32(sidedata_comp_len,
704 704 data + entry_v2_offset_sidedata_comp_len);
705 705 comp_field = data_comp_mode & 3;
706 706 comp_field = comp_field | (sidedata_comp_mode & 3) << 2;
707 707 data[entry_v2_offset_all_comp_mode] = comp_field;
708 708 } else if (self->format_version == format_cl2) {
709 709 putbe32(offset_flags >> 32, data + entry_cl2_offset_high);
710 710 putbe32(offset_flags & 0xffffffffU,
711 711 data + entry_cl2_offset_offset_flags);
712 712 putbe32(comp_len, data + entry_cl2_offset_comp_len);
713 713 putbe32(uncomp_len, data + entry_cl2_offset_uncomp_len);
714 714 putbe32(parent_1, data + entry_cl2_offset_parent_1);
715 715 putbe32(parent_2, data + entry_cl2_offset_parent_2);
716 716 memcpy(data + entry_cl2_offset_node_id, c_node_id,
717 717 c_node_id_len);
718 718 putbe64(sidedata_offset,
719 719 data + entry_cl2_offset_sidedata_offset);
720 720 putbe32(sidedata_comp_len,
721 721 data + entry_cl2_offset_sidedata_comp_len);
722 722 comp_field = data_comp_mode & 3;
723 723 comp_field = comp_field | (sidedata_comp_mode & 3) << 2;
724 724 data[entry_cl2_offset_all_comp_mode] = comp_field;
725 725 putbe32(rank, data + entry_cl2_offset_rank);
726 726 } else {
727 727 raise_revlog_error();
728 728 return NULL;
729 729 }
730 730
731 731 if (self->ntinitialized)
732 732 nt_insert(&self->nt, c_node_id, rev);
733 733
734 734 Py_CLEAR(self->headrevs);
735 735 Py_RETURN_NONE;
736 736 }
737 737
738 738 /* Replace an existing index entry's sidedata offset and length with new ones.
739 739 This cannot be used outside of the context of sidedata rewriting,
740 740 inside the transaction that creates the given revision. */
741 741 static PyObject *index_replace_sidedata_info(indexObject *self, PyObject *args)
742 742 {
743 743 uint64_t offset_flags, sidedata_offset;
744 744 Py_ssize_t rev;
745 745 int sidedata_comp_len;
746 746 char comp_mode;
747 747 char *data;
748 748 #if LONG_MAX == 0x7fffffffL
749 749 const char *const sidedata_format = "nKiKB";
750 750 #else
751 751 const char *const sidedata_format = "nkikB";
752 752 #endif
753 753
754 754 if (self->entry_size == v1_entry_size || self->inlined) {
755 755 /*
756 756 There is a bug in the transaction handling when going from an
757 757 inline revlog to a separate index and data file. Turn it off until
758 758 it's fixed, since v2 revlogs sometimes get rewritten on exchange.
759 759 See issue6485.
760 760 */
761 761 raise_revlog_error();
762 762 return NULL;
763 763 }
764 764
765 765 if (!PyArg_ParseTuple(args, sidedata_format, &rev, &sidedata_offset,
766 766 &sidedata_comp_len, &offset_flags, &comp_mode))
767 767 return NULL;
768 768
769 769 if (rev < 0 || rev >= index_length(self)) {
770 770 PyErr_SetString(PyExc_IndexError, "revision outside index");
771 771 return NULL;
772 772 }
773 773 if (rev < self->length) {
774 774 PyErr_SetString(
775 775 PyExc_IndexError,
776 776 "cannot rewrite entries outside of this transaction");
777 777 return NULL;
778 778 }
779 779
780 780 /* Find the newly added node, offset from the "already on-disk" length
781 781 */
782 782 data = self->added + self->entry_size * (rev - self->length);
783 783 if (self->format_version == format_v2) {
784 784 putbe64(offset_flags, data + entry_v2_offset_high);
785 785 putbe64(sidedata_offset,
786 786 data + entry_v2_offset_sidedata_offset);
787 787 putbe32(sidedata_comp_len,
788 788 data + entry_v2_offset_sidedata_comp_len);
789 789 data[entry_v2_offset_all_comp_mode] =
790 790 (data[entry_v2_offset_all_comp_mode] & ~(3 << 2)) |
791 791 ((comp_mode & 3) << 2);
792 792 } else if (self->format_version == format_cl2) {
793 793 putbe64(offset_flags, data + entry_cl2_offset_high);
794 794 putbe64(sidedata_offset,
795 795 data + entry_cl2_offset_sidedata_offset);
796 796 putbe32(sidedata_comp_len,
797 797 data + entry_cl2_offset_sidedata_comp_len);
798 798 data[entry_cl2_offset_all_comp_mode] =
799 799 (data[entry_cl2_offset_all_comp_mode] & ~(3 << 2)) |
800 800 ((comp_mode & 3) << 2);
801 801 } else {
802 802 raise_revlog_error();
803 803 return NULL;
804 804 }
805 805
806 806 Py_RETURN_NONE;
807 807 }
808 808
809 809 static PyObject *index_stats(indexObject *self)
810 810 {
811 811 PyObject *obj = PyDict_New();
812 812 PyObject *s = NULL;
813 813 PyObject *t = NULL;
814 814
815 815 if (obj == NULL)
816 816 return NULL;
817 817
818 818 #define istat(__n, __d) \
819 819 do { \
820 820 s = PyBytes_FromString(__d); \
821 821 t = PyLong_FromSsize_t(self->__n); \
822 822 if (!s || !t) \
823 823 goto bail; \
824 824 if (PyDict_SetItem(obj, s, t) == -1) \
825 825 goto bail; \
826 826 Py_CLEAR(s); \
827 827 Py_CLEAR(t); \
828 828 } while (0)
829 829
830 830 if (self->added_length)
831 831 istat(new_length, "index entries added");
832 832 istat(length, "revs in memory");
833 833 istat(ntlookups, "node trie lookups");
834 834 istat(ntmisses, "node trie misses");
835 835 istat(ntrev, "node trie last rev scanned");
836 836 if (self->ntinitialized) {
837 837 istat(nt.capacity, "node trie capacity");
838 838 istat(nt.depth, "node trie depth");
839 839 istat(nt.length, "node trie count");
840 840 istat(nt.splits, "node trie splits");
841 841 }
842 842
843 843 #undef istat
844 844
845 845 return obj;
846 846
847 847 bail:
848 848 Py_XDECREF(obj);
849 849 Py_XDECREF(s);
850 850 Py_XDECREF(t);
851 851 return NULL;
852 852 }
853 853
854 854 /*
855 855 * When we cache a list, we want to be sure the caller can't mutate
856 856 * the cached copy.
857 857 */
858 858 static PyObject *list_copy(PyObject *list)
859 859 {
860 860 Py_ssize_t len = PyList_GET_SIZE(list);
861 861 PyObject *newlist = PyList_New(len);
862 862 Py_ssize_t i;
863 863
864 864 if (newlist == NULL)
865 865 return NULL;
866 866
867 867 for (i = 0; i < len; i++) {
868 868 PyObject *obj = PyList_GET_ITEM(list, i);
869 869 Py_INCREF(obj);
870 870 PyList_SET_ITEM(newlist, i, obj);
871 871 }
872 872
873 873 return newlist;
874 874 }
875 875
876 876 static int check_filter(PyObject *filter, Py_ssize_t arg)
877 877 {
878 878 if (filter) {
879 879 PyObject *arglist, *result;
880 880 int isfiltered;
881 881
882 882 arglist = Py_BuildValue("(n)", arg);
883 883 if (!arglist) {
884 884 return -1;
885 885 }
886 886
887 887 result = PyObject_Call(filter, arglist, NULL);
888 888 Py_DECREF(arglist);
889 889 if (!result) {
890 890 return -1;
891 891 }
892 892
893 893 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
894 894 * same as this function, so we can just return it directly.*/
895 895 isfiltered = PyObject_IsTrue(result);
896 896 Py_DECREF(result);
897 897 return isfiltered;
898 898 } else {
899 899 return 0;
900 900 }
901 901 }
902 902
903 903 static inline void set_phase_from_parents(char *phases, int parent_1,
904 904 int parent_2, Py_ssize_t i)
905 905 {
906 906 if (parent_1 >= 0 && phases[parent_1] > phases[i])
907 907 phases[i] = phases[parent_1];
908 908 if (parent_2 >= 0 && phases[parent_2] > phases[i])
909 909 phases[i] = phases[parent_2];
910 910 }
911 911
912 /* Take ownership of a given Python value and add it to a Python list.
913 Return -1 on failure (including if [elem] is NULL). */
914 static int pylist_append_owned(PyObject *list, PyObject *elem)
915 {
916 int res;
917
918 if (elem == NULL)
919 return -1;
920 res = PyList_Append(list, elem);
921 Py_DECREF(elem);
922 return res;
923 }
924
912 925 static PyObject *reachableroots2(indexObject *self, PyObject *args)
913 926 {
914 927
915 928 /* Input */
916 929 long minroot;
917 930 PyObject *includepatharg = NULL;
918 931 int includepath = 0;
919 932 /* heads and roots are lists */
920 933 PyObject *heads = NULL;
921 934 PyObject *roots = NULL;
922 935 PyObject *reachable = NULL;
923 936
924 PyObject *val;
925 937 Py_ssize_t len = index_length(self);
926 938 long revnum;
927 939 Py_ssize_t k;
928 940 Py_ssize_t i;
929 941 Py_ssize_t l;
930 942 int r;
931 943 int parents[2];
932 944
933 945 /* Internal data structure:
934 946 * tovisit: array of length len+1 (all revs + nullrev), filled upto
935 947 * lentovisit
936 948 *
937 949 * revstates: array of length len+1 (all revs + nullrev) */
938 950 int *tovisit = NULL;
939 951 long lentovisit = 0;
940 952 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
941 953 char *revstates = NULL;
942 954
943 955 /* Get arguments */
944 956 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
945 957 &PyList_Type, &roots, &PyBool_Type,
946 958 &includepatharg))
947 959 goto bail;
948 960
949 961 if (includepatharg == Py_True)
950 962 includepath = 1;
951 963
952 964 /* Initialize return set */
953 965 reachable = PyList_New(0);
954 966 if (reachable == NULL)
955 967 goto bail;
956 968
957 969 /* Initialize internal datastructures */
958 970 tovisit = (int *)malloc((len + 1) * sizeof(int));
959 971 if (tovisit == NULL) {
960 972 PyErr_NoMemory();
961 973 goto bail;
962 974 }
963 975
964 976 revstates = (char *)calloc(len + 1, 1);
965 977 if (revstates == NULL) {
966 978 PyErr_NoMemory();
967 979 goto bail;
968 980 }
969 981
970 982 l = PyList_GET_SIZE(roots);
971 983 for (i = 0; i < l; i++) {
972 984 revnum = PyLong_AsLong(PyList_GET_ITEM(roots, i));
973 985 if (revnum == -1 && PyErr_Occurred())
974 986 goto bail;
975 987 /* If root is out of range, e.g. wdir(), it must be unreachable
976 988 * from heads. So we can just ignore it. */
977 989 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
978 990 continue;
979 991 revstates[revnum + 1] |= RS_ROOT;
980 992 }
981 993
982 994 /* Populate tovisit with all the heads */
983 995 l = PyList_GET_SIZE(heads);
984 996 for (i = 0; i < l; i++) {
985 997 revnum = PyLong_AsLong(PyList_GET_ITEM(heads, i));
986 998 if (revnum == -1 && PyErr_Occurred())
987 999 goto bail;
988 1000 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
989 1001 PyErr_SetString(PyExc_IndexError, "head out of range");
990 1002 goto bail;
991 1003 }
992 1004 if (!(revstates[revnum + 1] & RS_SEEN)) {
993 1005 tovisit[lentovisit++] = (int)revnum;
994 1006 revstates[revnum + 1] |= RS_SEEN;
995 1007 }
996 1008 }
997 1009
998 1010 /* Visit the tovisit list and find the reachable roots */
999 1011 k = 0;
1000 1012 while (k < lentovisit) {
1001 1013 /* Add the node to reachable if it is a root*/
1002 1014 revnum = tovisit[k++];
1003 1015 if (revstates[revnum + 1] & RS_ROOT) {
1004 1016 revstates[revnum + 1] |= RS_REACHABLE;
1005 val = PyLong_FromLong(revnum);
1006 if (val == NULL)
1007 goto bail;
1008 r = PyList_Append(reachable, val);
1009 Py_DECREF(val);
1017 r = pylist_append_owned(reachable,
1018 PyLong_FromLong(revnum));
1010 1019 if (r < 0)
1011 1020 goto bail;
1012 1021 if (includepath == 0)
1013 1022 continue;
1014 1023 }
1015 1024
1016 1025 /* Add its parents to the list of nodes to visit */
1017 1026 if (revnum == nullrev)
1018 1027 continue;
1019 1028 r = index_get_parents(self, revnum, parents, (int)len - 1);
1020 1029 if (r < 0)
1021 1030 goto bail;
1022 1031 for (i = 0; i < 2; i++) {
1023 1032 if (!(revstates[parents[i] + 1] & RS_SEEN) &&
1024 1033 parents[i] >= minroot) {
1025 1034 tovisit[lentovisit++] = parents[i];
1026 1035 revstates[parents[i] + 1] |= RS_SEEN;
1027 1036 }
1028 1037 }
1029 1038 }
1030 1039
1031 1040 /* Find all the nodes in between the roots we found and the heads
1032 1041 * and add them to the reachable set */
1033 1042 if (includepath == 1) {
1034 1043 long minidx = minroot;
1035 1044 if (minidx < 0)
1036 1045 minidx = 0;
1037 1046 for (i = minidx; i < len; i++) {
1038 1047 if (!(revstates[i + 1] & RS_SEEN))
1039 1048 continue;
1040 1049 r = index_get_parents(self, i, parents, (int)len - 1);
1041 1050 /* Corrupted index file, error is set from
1042 1051 * index_get_parents */
1043 1052 if (r < 0)
1044 1053 goto bail;
1045 1054 if (((revstates[parents[0] + 1] |
1046 1055 revstates[parents[1] + 1]) &
1047 1056 RS_REACHABLE) &&
1048 1057 !(revstates[i + 1] & RS_REACHABLE)) {
1049 1058 revstates[i + 1] |= RS_REACHABLE;
1050 val = PyLong_FromSsize_t(i);
1051 if (val == NULL)
1052 goto bail;
1053 r = PyList_Append(reachable, val);
1054 Py_DECREF(val);
1059 r = pylist_append_owned(reachable,
1060 PyLong_FromSsize_t(i));
1055 1061 if (r < 0)
1056 1062 goto bail;
1057 1063 }
1058 1064 }
1059 1065 }
1060 1066
1061 1067 free(revstates);
1062 1068 free(tovisit);
1063 1069 return reachable;
1064 1070 bail:
1065 1071 Py_XDECREF(reachable);
1066 1072 free(revstates);
1067 1073 free(tovisit);
1068 1074 return NULL;
1069 1075 }
1070 1076
1071 1077 static int add_roots_get_min(indexObject *self, PyObject *roots, char *phases,
1072 1078 char phase)
1073 1079 {
1074 1080 Py_ssize_t len = index_length(self);
1075 1081 PyObject *item;
1076 1082 PyObject *iterator;
1077 1083 int rev, minrev = -1;
1078 1084 char *node;
1079 1085
1080 1086 if (!PySet_Check(roots)) {
1081 1087 PyErr_SetString(PyExc_TypeError,
1082 1088 "roots must be a set of nodes");
1083 1089 return -2;
1084 1090 }
1085 1091 iterator = PyObject_GetIter(roots);
1086 1092 if (iterator == NULL)
1087 1093 return -2;
1088 1094 while ((item = PyIter_Next(iterator))) {
1089 1095 if (node_check(self->nodelen, item, &node) == -1)
1090 1096 goto failed;
1091 1097 rev = index_find_node(self, node);
1092 1098 /* null is implicitly public, so negative is invalid */
1093 1099 if (rev < 0 || rev >= len)
1094 1100 goto failed;
1095 1101 phases[rev] = phase;
1096 1102 if (minrev == -1 || minrev > rev)
1097 1103 minrev = rev;
1098 1104 Py_DECREF(item);
1099 1105 }
1100 1106 Py_DECREF(iterator);
1101 1107 return minrev;
1102 1108 failed:
1103 1109 Py_DECREF(iterator);
1104 1110 Py_DECREF(item);
1105 1111 return -2;
1106 1112 }
1107 1113
1108 1114 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
1109 1115 {
1110 1116 /* 0: public (untracked), 1: draft, 2: secret, 32: archive,
1111 1117 96: internal */
1112 1118 static const char trackedphases[] = {1, 2, 32, 96};
1113 1119 PyObject *roots = Py_None;
1114 1120 PyObject *phasesetsdict = NULL;
1115 1121 PyObject *phasesets[4] = {NULL, NULL, NULL, NULL};
1116 1122 Py_ssize_t len = index_length(self);
1117 1123 char *phases = NULL;
1118 1124 int minphaserev = -1, rev, i;
1119 1125 const int numphases = (int)(sizeof(phasesets) / sizeof(phasesets[0]));
1120 1126
1121 1127 if (!PyArg_ParseTuple(args, "O", &roots))
1122 1128 return NULL;
1123 1129 if (roots == NULL || !PyDict_Check(roots)) {
1124 1130 PyErr_SetString(PyExc_TypeError, "roots must be a dictionary");
1125 1131 return NULL;
1126 1132 }
1127 1133
1128 1134 phases = calloc(len, 1);
1129 1135 if (phases == NULL) {
1130 1136 PyErr_NoMemory();
1131 1137 return NULL;
1132 1138 }
1133 1139
1134 1140 for (i = 0; i < numphases; ++i) {
1135 1141 PyObject *pyphase = PyLong_FromLong(trackedphases[i]);
1136 1142 PyObject *phaseroots = NULL;
1137 1143 if (pyphase == NULL)
1138 1144 goto release;
1139 1145 phaseroots = PyDict_GetItem(roots, pyphase);
1140 1146 Py_DECREF(pyphase);
1141 1147 if (phaseroots == NULL)
1142 1148 continue;
1143 1149 rev = add_roots_get_min(self, phaseroots, phases,
1144 1150 trackedphases[i]);
1145 1151 if (rev == -2)
1146 1152 goto release;
1147 1153 if (rev != -1 && (minphaserev == -1 || rev < minphaserev))
1148 1154 minphaserev = rev;
1149 1155 }
1150 1156
1151 1157 for (i = 0; i < numphases; ++i) {
1152 1158 phasesets[i] = PySet_New(NULL);
1153 1159 if (phasesets[i] == NULL)
1154 1160 goto release;
1155 1161 }
1156 1162
1157 1163 if (minphaserev == -1)
1158 1164 minphaserev = len;
1159 1165 for (rev = minphaserev; rev < len; ++rev) {
1160 1166 PyObject *pyphase = NULL;
1161 1167 PyObject *pyrev = NULL;
1162 1168 int parents[2];
1163 1169 /*
1164 1170 * The parent lookup could be skipped for phaseroots, but
1165 1171 * phase --force would historically not recompute them
1166 1172 * correctly, leaving descendents with a lower phase around.
1167 1173 * As such, unconditionally recompute the phase.
1168 1174 */
1169 1175 if (index_get_parents(self, rev, parents, (int)len - 1) < 0)
1170 1176 goto release;
1171 1177 set_phase_from_parents(phases, parents[0], parents[1], rev);
1172 1178 switch (phases[rev]) {
1173 1179 case 0:
1174 1180 continue;
1175 1181 case 1:
1176 1182 pyphase = phasesets[0];
1177 1183 break;
1178 1184 case 2:
1179 1185 pyphase = phasesets[1];
1180 1186 break;
1181 1187 case 32:
1182 1188 pyphase = phasesets[2];
1183 1189 break;
1184 1190 case 96:
1185 1191 pyphase = phasesets[3];
1186 1192 break;
1187 1193 default:
1188 1194 /* this should never happen since the phase number is
1189 1195 * specified by this function. */
1190 1196 PyErr_SetString(PyExc_SystemError,
1191 1197 "bad phase number in internal list");
1192 1198 goto release;
1193 1199 }
1194 1200 pyrev = PyLong_FromLong(rev);
1195 1201 if (pyrev == NULL)
1196 1202 goto release;
1197 1203 if (PySet_Add(pyphase, pyrev) == -1) {
1198 1204 Py_DECREF(pyrev);
1199 1205 goto release;
1200 1206 }
1201 1207 Py_DECREF(pyrev);
1202 1208 }
1203 1209
1204 1210 phasesetsdict = _dict_new_presized(numphases);
1205 1211 if (phasesetsdict == NULL)
1206 1212 goto release;
1207 1213 for (i = 0; i < numphases; ++i) {
1208 1214 PyObject *pyphase = PyLong_FromLong(trackedphases[i]);
1209 1215 if (pyphase == NULL)
1210 1216 goto release;
1211 1217 if (PyDict_SetItem(phasesetsdict, pyphase, phasesets[i]) ==
1212 1218 -1) {
1213 1219 Py_DECREF(pyphase);
1214 1220 goto release;
1215 1221 }
1216 1222 Py_DECREF(phasesets[i]);
1217 1223 phasesets[i] = NULL;
1218 1224 }
1219 1225
1220 1226 free(phases);
1221 1227 return Py_BuildValue("nN", len, phasesetsdict);
1222 1228
1223 1229 release:
1224 1230 for (i = 0; i < numphases; ++i)
1225 1231 Py_XDECREF(phasesets[i]);
1226 1232 Py_XDECREF(phasesetsdict);
1227 1233
1228 1234 free(phases);
1229 1235 return NULL;
1230 1236 }
1231 1237
1232 1238 static PyObject *index_headrevs(indexObject *self, PyObject *args)
1233 1239 {
1234 1240 Py_ssize_t i, j, len;
1235 1241 char *nothead = NULL;
1236 1242 PyObject *heads = NULL;
1237 1243 PyObject *filter = NULL;
1238 1244 PyObject *filteredrevs = Py_None;
1239 1245
1240 1246 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
1241 1247 return NULL;
1242 1248 }
1243 1249
1244 1250 if (self->headrevs && filteredrevs == self->filteredrevs)
1245 1251 return list_copy(self->headrevs);
1246 1252
1247 1253 Py_DECREF(self->filteredrevs);
1248 1254 self->filteredrevs = filteredrevs;
1249 1255 Py_INCREF(filteredrevs);
1250 1256
1251 1257 if (filteredrevs != Py_None) {
1252 1258 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
1253 1259 if (!filter) {
1254 1260 PyErr_SetString(
1255 1261 PyExc_TypeError,
1256 1262 "filteredrevs has no attribute __contains__");
1257 1263 goto bail;
1258 1264 }
1259 1265 }
1260 1266
1261 1267 len = index_length(self);
1262 1268 heads = PyList_New(0);
1263 1269 if (heads == NULL)
1264 1270 goto bail;
1265 1271 if (len == 0) {
1266 PyObject *nullid = PyLong_FromLong(-1);
1267 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
1272 if (pylist_append_owned(heads, PyLong_FromLong(-1)) == -1) {
1268 1273 Py_XDECREF(nullid);
1269 1274 goto bail;
1270 1275 }
1271 1276 goto done;
1272 1277 }
1273 1278
1274 1279 nothead = calloc(len, 1);
1275 1280 if (nothead == NULL) {
1276 1281 PyErr_NoMemory();
1277 1282 goto bail;
1278 1283 }
1279 1284
1280 1285 for (i = len - 1; i >= 0; i--) {
1281 1286 int isfiltered;
1282 1287 int parents[2];
1283 1288
1284 1289 /* If nothead[i] == 1, it means we've seen an unfiltered child
1285 1290 * of this node already, and therefore this node is not
1286 1291 * filtered. So we can skip the expensive check_filter step.
1287 1292 */
1288 1293 if (nothead[i] != 1) {
1289 1294 isfiltered = check_filter(filter, i);
1290 1295 if (isfiltered == -1) {
1291 1296 PyErr_SetString(PyExc_TypeError,
1292 1297 "unable to check filter");
1293 1298 goto bail;
1294 1299 }
1295 1300
1296 1301 if (isfiltered) {
1297 1302 nothead[i] = 1;
1298 1303 continue;
1299 1304 }
1300 1305 }
1301 1306
1302 1307 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
1303 1308 goto bail;
1304 1309 for (j = 0; j < 2; j++) {
1305 1310 if (parents[j] >= 0)
1306 1311 nothead[parents[j]] = 1;
1307 1312 }
1308 1313 }
1309 1314
1310 1315 for (i = 0; i < len; i++) {
1311 PyObject *head;
1312
1313 1316 if (nothead[i])
1314 1317 continue;
1315 head = PyLong_FromSsize_t(i);
1316 if (head == NULL || PyList_Append(heads, head) == -1) {
1317 Py_XDECREF(head);
1318 if (pylist_append_owned(heads, PyLong_FromSsize_t(i)) == -1) {
1318 1319 goto bail;
1319 1320 }
1320 1321 }
1321 1322
1322 1323 done:
1323 1324 self->headrevs = heads;
1324 1325 Py_XDECREF(filter);
1325 1326 free(nothead);
1326 1327 return list_copy(self->headrevs);
1327 1328 bail:
1328 1329 Py_XDECREF(filter);
1329 1330 Py_XDECREF(heads);
1330 1331 free(nothead);
1331 1332 return NULL;
1332 1333 }
1333 1334
1334 1335 /**
1335 1336 * Obtain the base revision index entry.
1336 1337 *
1337 1338 * Callers must ensure that rev >= 0 or illegal memory access may occur.
1338 1339 */
1339 1340 static inline int index_baserev(indexObject *self, int rev)
1340 1341 {
1341 1342 const char *data;
1342 1343 int result;
1343 1344
1344 1345 data = index_deref(self, rev);
1345 1346 if (data == NULL)
1346 1347 return -2;
1347 1348
1348 1349 if (self->format_version == format_v1) {
1349 1350 result = getbe32(data + entry_v1_offset_base_rev);
1350 1351 } else if (self->format_version == format_v2) {
1351 1352 result = getbe32(data + entry_v2_offset_base_rev);
1352 1353 } else if (self->format_version == format_cl2) {
1353 1354 return rev;
1354 1355 } else {
1355 1356 raise_revlog_error();
1356 1357 return -1;
1357 1358 }
1358 1359
1359 1360 if (result > rev) {
1360 1361 PyErr_Format(
1361 1362 PyExc_ValueError,
1362 1363 "corrupted revlog, revision base above revision: %d, %d",
1363 1364 rev, result);
1364 1365 return -2;
1365 1366 }
1366 1367 if (result < -1) {
1367 1368 PyErr_Format(
1368 1369 PyExc_ValueError,
1369 1370 "corrupted revlog, revision base out of range: %d, %d", rev,
1370 1371 result);
1371 1372 return -2;
1372 1373 }
1373 1374 return result;
1374 1375 }
1375 1376
1376 1377 /**
1377 1378 * Find if a revision is a snapshot or not
1378 1379 *
1379 1380 * Only relevant for sparse-revlog case.
1380 1381 * Callers must ensure that rev is in a valid range.
1381 1382 */
1382 1383 static int index_issnapshotrev(indexObject *self, Py_ssize_t rev)
1383 1384 {
1384 1385 int ps[2];
1385 1386 int b;
1386 1387 Py_ssize_t base;
1387 1388 while (rev >= 0) {
1388 1389 base = (Py_ssize_t)index_baserev(self, rev);
1389 1390 if (base == rev) {
1390 1391 base = -1;
1391 1392 }
1392 1393 if (base == -2) {
1393 1394 assert(PyErr_Occurred());
1394 1395 return -1;
1395 1396 }
1396 1397 if (base == -1) {
1397 1398 return 1;
1398 1399 }
1399 1400 if (index_get_parents(self, rev, ps, (int)rev) < 0) {
1400 1401 assert(PyErr_Occurred());
1401 1402 return -1;
1402 1403 };
1403 1404 while ((index_get_length(self, ps[0]) == 0) && ps[0] >= 0) {
1404 1405 b = index_baserev(self, ps[0]);
1405 1406 if (b == ps[0]) {
1406 1407 break;
1407 1408 }
1408 1409 ps[0] = b;
1409 1410 }
1410 1411 while ((index_get_length(self, ps[1]) == 0) && ps[1] >= 0) {
1411 1412 b = index_baserev(self, ps[1]);
1412 1413 if (b == ps[1]) {
1413 1414 break;
1414 1415 }
1415 1416 ps[1] = b;
1416 1417 }
1417 1418 if (base == ps[0] || base == ps[1]) {
1418 1419 return 0;
1419 1420 }
1420 1421 rev = base;
1421 1422 }
1422 1423 return rev == -1;
1423 1424 }
1424 1425
1425 1426 static PyObject *index_issnapshot(indexObject *self, PyObject *value)
1426 1427 {
1427 1428 long rev;
1428 1429 int issnap;
1429 1430 Py_ssize_t length = index_length(self);
1430 1431
1431 1432 if (!pylong_to_long(value, &rev)) {
1432 1433 return NULL;
1433 1434 }
1434 1435 if (rev < -1 || rev >= length) {
1435 1436 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
1436 1437 rev);
1437 1438 return NULL;
1438 1439 };
1439 1440 issnap = index_issnapshotrev(self, (Py_ssize_t)rev);
1440 1441 if (issnap < 0) {
1441 1442 return NULL;
1442 1443 };
1443 1444 return PyBool_FromLong((long)issnap);
1444 1445 }
1445 1446
1446 1447 static PyObject *index_findsnapshots(indexObject *self, PyObject *args)
1447 1448 {
1448 1449 Py_ssize_t start_rev;
1449 1450 Py_ssize_t end_rev;
1450 1451 PyObject *cache;
1451 1452 Py_ssize_t base;
1452 1453 Py_ssize_t rev;
1453 1454 PyObject *key = NULL;
1454 1455 PyObject *value = NULL;
1455 1456 const Py_ssize_t length = index_length(self);
1456 1457 if (!PyArg_ParseTuple(args, "O!nn", &PyDict_Type, &cache, &start_rev,
1457 1458 &end_rev)) {
1458 1459 return NULL;
1459 1460 }
1460 1461 end_rev += 1;
1461 1462 if (end_rev > length) {
1462 1463 end_rev = length;
1463 1464 }
1464 1465 if (start_rev < 0) {
1465 1466 start_rev = 0;
1466 1467 }
1467 1468 for (rev = start_rev; rev < end_rev; rev++) {
1468 1469 int issnap;
1469 1470 PyObject *allvalues = NULL;
1470 1471 issnap = index_issnapshotrev(self, rev);
1471 1472 if (issnap < 0) {
1472 1473 goto bail;
1473 1474 }
1474 1475 if (issnap == 0) {
1475 1476 continue;
1476 1477 }
1477 1478 base = (Py_ssize_t)index_baserev(self, rev);
1478 1479 if (base == rev) {
1479 1480 base = -1;
1480 1481 }
1481 1482 if (base == -2) {
1482 1483 assert(PyErr_Occurred());
1483 1484 goto bail;
1484 1485 }
1485 1486 key = PyLong_FromSsize_t(base);
1486 1487 allvalues = PyDict_GetItem(cache, key);
1487 1488 if (allvalues == NULL && PyErr_Occurred()) {
1488 1489 goto bail;
1489 1490 }
1490 1491 if (allvalues == NULL) {
1491 1492 int r;
1492 1493 allvalues = PySet_New(0);
1493 1494 if (!allvalues) {
1494 1495 goto bail;
1495 1496 }
1496 1497 r = PyDict_SetItem(cache, key, allvalues);
1497 1498 Py_DECREF(allvalues);
1498 1499 if (r < 0) {
1499 1500 goto bail;
1500 1501 }
1501 1502 }
1502 1503 value = PyLong_FromSsize_t(rev);
1503 1504 if (PySet_Add(allvalues, value)) {
1504 1505 goto bail;
1505 1506 }
1506 1507 Py_CLEAR(key);
1507 1508 Py_CLEAR(value);
1508 1509 }
1509 1510 Py_RETURN_NONE;
1510 1511 bail:
1511 1512 Py_XDECREF(key);
1512 1513 Py_XDECREF(value);
1513 1514 return NULL;
1514 1515 }
1515 1516
1516 1517 static PyObject *index_deltachain(indexObject *self, PyObject *args)
1517 1518 {
1518 1519 int rev, generaldelta;
1519 1520 PyObject *stoparg;
1520 1521 int stoprev, iterrev, baserev = -1;
1521 1522 int stopped;
1522 1523 PyObject *chain = NULL, *result = NULL;
1523 1524 const Py_ssize_t length = index_length(self);
1524 1525
1525 1526 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
1526 1527 return NULL;
1527 1528 }
1528 1529
1529 1530 if (PyLong_Check(stoparg)) {
1530 1531 stoprev = (int)PyLong_AsLong(stoparg);
1531 1532 if (stoprev == -1 && PyErr_Occurred()) {
1532 1533 return NULL;
1533 1534 }
1534 1535 } else if (stoparg == Py_None) {
1535 1536 stoprev = -2;
1536 1537 } else {
1537 1538 PyErr_SetString(PyExc_ValueError,
1538 1539 "stoprev must be integer or None");
1539 1540 return NULL;
1540 1541 }
1541 1542
1542 1543 if (rev < 0 || rev >= length) {
1543 1544 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1544 1545 return NULL;
1545 1546 }
1546 1547
1547 1548 chain = PyList_New(0);
1548 1549 if (chain == NULL) {
1549 1550 return NULL;
1550 1551 }
1551 1552
1552 1553 baserev = index_baserev(self, rev);
1553 1554
1554 1555 /* This should never happen. */
1555 1556 if (baserev <= -2) {
1556 1557 /* Error should be set by index_deref() */
1557 1558 assert(PyErr_Occurred());
1558 1559 goto bail;
1559 1560 }
1560 1561
1561 1562 iterrev = rev;
1562 1563
1563 1564 while (iterrev != baserev && iterrev != stoprev) {
1564 PyObject *value = PyLong_FromLong(iterrev);
1565 if (value == NULL) {
1565 if (pylist_append_owned(chain, PyLong_FromLong(iterrev))) {
1566 1566 goto bail;
1567 1567 }
1568 if (PyList_Append(chain, value)) {
1569 Py_DECREF(value);
1570 goto bail;
1571 }
1572 Py_DECREF(value);
1573 1568
1574 1569 if (generaldelta) {
1575 1570 iterrev = baserev;
1576 1571 } else {
1577 1572 iterrev--;
1578 1573 }
1579 1574
1580 1575 if (iterrev < 0) {
1581 1576 break;
1582 1577 }
1583 1578
1584 1579 if (iterrev >= length) {
1585 1580 PyErr_SetString(PyExc_IndexError,
1586 1581 "revision outside index");
1587 1582 return NULL;
1588 1583 }
1589 1584
1590 1585 baserev = index_baserev(self, iterrev);
1591 1586
1592 1587 /* This should never happen. */
1593 1588 if (baserev <= -2) {
1594 1589 /* Error should be set by index_deref() */
1595 1590 assert(PyErr_Occurred());
1596 1591 goto bail;
1597 1592 }
1598 1593 }
1599 1594
1600 1595 if (iterrev == stoprev) {
1601 1596 stopped = 1;
1602 1597 } else {
1603 PyObject *value = PyLong_FromLong(iterrev);
1604 if (value == NULL) {
1598 if (pylist_append_owned(chain, PyLong_FromLong(iterrev))) {
1605 1599 goto bail;
1606 1600 }
1607 if (PyList_Append(chain, value)) {
1608 Py_DECREF(value);
1609 goto bail;
1610 }
1611 Py_DECREF(value);
1612 1601
1613 1602 stopped = 0;
1614 1603 }
1615 1604
1616 1605 if (PyList_Reverse(chain)) {
1617 1606 goto bail;
1618 1607 }
1619 1608
1620 1609 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
1621 1610 Py_DECREF(chain);
1622 1611 return result;
1623 1612
1624 1613 bail:
1625 1614 Py_DECREF(chain);
1626 1615 return NULL;
1627 1616 }
1628 1617
1629 1618 static inline int64_t
1630 1619 index_segment_span(indexObject *self, Py_ssize_t start_rev, Py_ssize_t end_rev)
1631 1620 {
1632 1621 int64_t start_offset;
1633 1622 int64_t end_offset;
1634 1623 int end_size;
1635 1624 start_offset = index_get_start(self, start_rev);
1636 1625 if (start_offset < 0) {
1637 1626 return -1;
1638 1627 }
1639 1628 end_offset = index_get_start(self, end_rev);
1640 1629 if (end_offset < 0) {
1641 1630 return -1;
1642 1631 }
1643 1632 end_size = index_get_length(self, end_rev);
1644 1633 if (end_size < 0) {
1645 1634 return -1;
1646 1635 }
1647 1636 if (end_offset < start_offset) {
1648 1637 PyErr_Format(PyExc_ValueError,
1649 1638 "corrupted revlog index: inconsistent offset "
1650 1639 "between revisions (%zd) and (%zd)",
1651 1640 start_rev, end_rev);
1652 1641 return -1;
1653 1642 }
1654 1643 return (end_offset - start_offset) + (int64_t)end_size;
1655 1644 }
1656 1645
1657 1646 /* returns endidx so that revs[startidx:endidx] has no empty trailing revs */
1658 1647 static Py_ssize_t trim_endidx(indexObject *self, const Py_ssize_t *revs,
1659 1648 Py_ssize_t startidx, Py_ssize_t endidx)
1660 1649 {
1661 1650 int length;
1662 1651 while (endidx > 1 && endidx > startidx) {
1663 1652 length = index_get_length(self, revs[endidx - 1]);
1664 1653 if (length < 0) {
1665 1654 return -1;
1666 1655 }
1667 1656 if (length != 0) {
1668 1657 break;
1669 1658 }
1670 1659 endidx -= 1;
1671 1660 }
1672 1661 return endidx;
1673 1662 }
1674 1663
1675 1664 struct Gap {
1676 1665 int64_t size;
1677 1666 Py_ssize_t idx;
1678 1667 };
1679 1668
1680 1669 static int gap_compare(const void *left, const void *right)
1681 1670 {
1682 1671 const struct Gap *l_left = ((const struct Gap *)left);
1683 1672 const struct Gap *l_right = ((const struct Gap *)right);
1684 1673 if (l_left->size < l_right->size) {
1685 1674 return -1;
1686 1675 } else if (l_left->size > l_right->size) {
1687 1676 return 1;
1688 1677 }
1689 1678 return 0;
1690 1679 }
1691 1680 static int Py_ssize_t_compare(const void *left, const void *right)
1692 1681 {
1693 1682 const Py_ssize_t l_left = *(const Py_ssize_t *)left;
1694 1683 const Py_ssize_t l_right = *(const Py_ssize_t *)right;
1695 1684 if (l_left < l_right) {
1696 1685 return -1;
1697 1686 } else if (l_left > l_right) {
1698 1687 return 1;
1699 1688 }
1700 1689 return 0;
1701 1690 }
1702 1691
1703 1692 static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
1704 1693 {
1705 1694 /* method arguments */
1706 1695 PyObject *list_revs = NULL; /* revisions in the chain */
1707 1696 double targetdensity = 0; /* min density to achieve */
1708 1697 Py_ssize_t mingapsize = 0; /* threshold to ignore gaps */
1709 1698
1710 1699 /* other core variables */
1711 1700 Py_ssize_t idxlen = index_length(self);
1712 1701 Py_ssize_t i; /* used for various iteration */
1713 1702 PyObject *result = NULL; /* the final return of the function */
1714 1703
1715 1704 /* generic information about the delta chain being slice */
1716 1705 Py_ssize_t num_revs = 0; /* size of the full delta chain */
1717 1706 Py_ssize_t *revs = NULL; /* native array of revision in the chain */
1718 1707 int64_t chainpayload = 0; /* sum of all delta in the chain */
1719 1708 int64_t deltachainspan = 0; /* distance from first byte to last byte */
1720 1709
1721 1710 /* variable used for slicing the delta chain */
1722 1711 int64_t readdata = 0; /* amount of data currently planned to be read */
1723 1712 double density = 0; /* ration of payload data compared to read ones */
1724 1713 int64_t previous_end;
1725 1714 struct Gap *gaps = NULL; /* array of notable gap in the chain */
1726 1715 Py_ssize_t num_gaps =
1727 1716 0; /* total number of notable gap recorded so far */
1728 1717 Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
1729 1718 Py_ssize_t num_selected = 0; /* number of gaps skipped */
1730 PyObject *chunk = NULL; /* individual slice */
1731 1719 PyObject *allchunks = NULL; /* all slices */
1732 1720 Py_ssize_t previdx;
1733 1721
1734 1722 /* parsing argument */
1735 1723 if (!PyArg_ParseTuple(args, "O!dn", &PyList_Type, &list_revs,
1736 1724 &targetdensity, &mingapsize)) {
1737 1725 goto bail;
1738 1726 }
1739 1727
1740 1728 /* If the delta chain contains a single element, we do not need slicing
1741 1729 */
1742 1730 num_revs = PyList_GET_SIZE(list_revs);
1743 1731 if (num_revs <= 1) {
1744 1732 result = PyTuple_Pack(1, list_revs);
1745 1733 goto done;
1746 1734 }
1747 1735
1748 1736 /* Turn the python list into a native integer array (for efficiency) */
1749 1737 revs = (Py_ssize_t *)calloc(num_revs, sizeof(Py_ssize_t));
1750 1738 if (revs == NULL) {
1751 1739 PyErr_NoMemory();
1752 1740 goto bail;
1753 1741 }
1754 1742 for (i = 0; i < num_revs; i++) {
1755 1743 Py_ssize_t revnum =
1756 1744 PyLong_AsLong(PyList_GET_ITEM(list_revs, i));
1757 1745 if (revnum == -1 && PyErr_Occurred()) {
1758 1746 goto bail;
1759 1747 }
1760 1748 if (revnum < nullrev || revnum >= idxlen) {
1761 1749 PyErr_Format(PyExc_IndexError,
1762 1750 "index out of range: %zd", revnum);
1763 1751 goto bail;
1764 1752 }
1765 1753 revs[i] = revnum;
1766 1754 }
1767 1755
1768 1756 /* Compute and check various property of the unsliced delta chain */
1769 1757 deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
1770 1758 if (deltachainspan < 0) {
1771 1759 goto bail;
1772 1760 }
1773 1761
1774 1762 if (deltachainspan <= mingapsize) {
1775 1763 result = PyTuple_Pack(1, list_revs);
1776 1764 goto done;
1777 1765 }
1778 1766 chainpayload = 0;
1779 1767 for (i = 0; i < num_revs; i++) {
1780 1768 int tmp = index_get_length(self, revs[i]);
1781 1769 if (tmp < 0) {
1782 1770 goto bail;
1783 1771 }
1784 1772 chainpayload += tmp;
1785 1773 }
1786 1774
1787 1775 readdata = deltachainspan;
1788 1776 density = 1.0;
1789 1777
1790 1778 if (0 < deltachainspan) {
1791 1779 density = (double)chainpayload / (double)deltachainspan;
1792 1780 }
1793 1781
1794 1782 if (density >= targetdensity) {
1795 1783 result = PyTuple_Pack(1, list_revs);
1796 1784 goto done;
1797 1785 }
1798 1786
1799 1787 /* if chain is too sparse, look for relevant gaps */
1800 1788 gaps = (struct Gap *)calloc(num_revs, sizeof(struct Gap));
1801 1789 if (gaps == NULL) {
1802 1790 PyErr_NoMemory();
1803 1791 goto bail;
1804 1792 }
1805 1793
1806 1794 previous_end = -1;
1807 1795 for (i = 0; i < num_revs; i++) {
1808 1796 int64_t revstart;
1809 1797 int revsize;
1810 1798 revstart = index_get_start(self, revs[i]);
1811 1799 if (revstart < 0) {
1812 1800 goto bail;
1813 1801 };
1814 1802 revsize = index_get_length(self, revs[i]);
1815 1803 if (revsize < 0) {
1816 1804 goto bail;
1817 1805 };
1818 1806 if (revsize == 0) {
1819 1807 continue;
1820 1808 }
1821 1809 if (previous_end >= 0) {
1822 1810 int64_t gapsize = revstart - previous_end;
1823 1811 if (gapsize > mingapsize) {
1824 1812 gaps[num_gaps].size = gapsize;
1825 1813 gaps[num_gaps].idx = i;
1826 1814 num_gaps += 1;
1827 1815 }
1828 1816 }
1829 1817 previous_end = revstart + revsize;
1830 1818 }
1831 1819 if (num_gaps == 0) {
1832 1820 result = PyTuple_Pack(1, list_revs);
1833 1821 goto done;
1834 1822 }
1835 1823 qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
1836 1824
1837 1825 /* Slice the largest gap first, they improve the density the most */
1838 1826 selected_indices =
1839 1827 (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
1840 1828 if (selected_indices == NULL) {
1841 1829 PyErr_NoMemory();
1842 1830 goto bail;
1843 1831 }
1844 1832
1845 1833 for (i = num_gaps - 1; i >= 0; i--) {
1846 1834 selected_indices[num_selected] = gaps[i].idx;
1847 1835 readdata -= gaps[i].size;
1848 1836 num_selected += 1;
1849 1837 if (readdata <= 0) {
1850 1838 density = 1.0;
1851 1839 } else {
1852 1840 density = (double)chainpayload / (double)readdata;
1853 1841 }
1854 1842 if (density >= targetdensity) {
1855 1843 break;
1856 1844 }
1857 1845 }
1858 1846 qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
1859 1847 &Py_ssize_t_compare);
1860 1848
1861 1849 /* create the resulting slice */
1862 1850 allchunks = PyList_New(0);
1863 1851 if (allchunks == NULL) {
1864 1852 goto bail;
1865 1853 }
1866 1854 previdx = 0;
1867 1855 selected_indices[num_selected] = num_revs;
1868 1856 for (i = 0; i <= num_selected; i++) {
1869 1857 Py_ssize_t idx = selected_indices[i];
1870 1858 Py_ssize_t endidx = trim_endidx(self, revs, previdx, idx);
1871 1859 if (endidx < 0) {
1872 1860 goto bail;
1873 1861 }
1874 1862 if (previdx < endidx) {
1875 chunk = PyList_GetSlice(list_revs, previdx, endidx);
1876 if (chunk == NULL) {
1863 PyObject *chunk =
1864 PyList_GetSlice(list_revs, previdx, endidx);
1865 if (pylist_append_owned(allchunks, chunk) == -1) {
1877 1866 goto bail;
1878 1867 }
1879 if (PyList_Append(allchunks, chunk) == -1) {
1880 goto bail;
1881 }
1882 Py_DECREF(chunk);
1883 chunk = NULL;
1884 1868 }
1885 1869 previdx = idx;
1886 1870 }
1887 1871 result = allchunks;
1888 1872 goto done;
1889 1873
1890 1874 bail:
1891 1875 Py_XDECREF(allchunks);
1892 Py_XDECREF(chunk);
1893 1876 done:
1894 1877 free(revs);
1895 1878 free(gaps);
1896 1879 free(selected_indices);
1897 1880 return result;
1898 1881 }
1899 1882
1900 1883 static inline int nt_level(const char *node, Py_ssize_t level)
1901 1884 {
1902 1885 int v = node[level >> 1];
1903 1886 if (!(level & 1))
1904 1887 v >>= 4;
1905 1888 return v & 0xf;
1906 1889 }
1907 1890
1908 1891 /*
1909 1892 * Return values:
1910 1893 *
1911 1894 * -4: match is ambiguous (multiple candidates)
1912 1895 * -2: not found
1913 1896 * rest: valid rev
1914 1897 */
1915 1898 static int nt_find(nodetree *self, const char *node, Py_ssize_t nodelen,
1916 1899 int hex)
1917 1900 {
1918 1901 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
1919 1902 int level, maxlevel, off;
1920 1903
1921 1904 /* If the input is binary, do a fast check for the nullid first. */
1922 1905 if (!hex && nodelen == self->nodelen && node[0] == '\0' &&
1923 1906 node[1] == '\0' && memcmp(node, nullid, self->nodelen) == 0)
1924 1907 return -1;
1925 1908
1926 1909 if (hex)
1927 1910 maxlevel = nodelen;
1928 1911 else
1929 1912 maxlevel = 2 * nodelen;
1930 1913 if (maxlevel > 2 * self->nodelen)
1931 1914 maxlevel = 2 * self->nodelen;
1932 1915
1933 1916 for (level = off = 0; level < maxlevel; level++) {
1934 1917 int k = getnybble(node, level);
1935 1918 nodetreenode *n = &self->nodes[off];
1936 1919 int v = n->children[k];
1937 1920
1938 1921 if (v < 0) {
1939 1922 const char *n;
1940 1923 Py_ssize_t i;
1941 1924
1942 1925 v = -(v + 2);
1943 1926 n = index_node(self->index, v);
1944 1927 if (n == NULL)
1945 1928 return -2;
1946 1929 for (i = level; i < maxlevel; i++)
1947 1930 if (getnybble(node, i) != nt_level(n, i))
1948 1931 return -2;
1949 1932 return v;
1950 1933 }
1951 1934 if (v == 0)
1952 1935 return -2;
1953 1936 off = v;
1954 1937 }
1955 1938 /* multiple matches against an ambiguous prefix */
1956 1939 return -4;
1957 1940 }
1958 1941
1959 1942 static int nt_new(nodetree *self)
1960 1943 {
1961 1944 if (self->length == self->capacity) {
1962 1945 size_t newcapacity;
1963 1946 nodetreenode *newnodes;
1964 1947 newcapacity = self->capacity * 2;
1965 1948 if (newcapacity >= SIZE_MAX / sizeof(nodetreenode)) {
1966 1949 PyErr_SetString(PyExc_MemoryError,
1967 1950 "overflow in nt_new");
1968 1951 return -1;
1969 1952 }
1970 1953 newnodes =
1971 1954 realloc(self->nodes, newcapacity * sizeof(nodetreenode));
1972 1955 if (newnodes == NULL) {
1973 1956 PyErr_SetString(PyExc_MemoryError, "out of memory");
1974 1957 return -1;
1975 1958 }
1976 1959 self->capacity = newcapacity;
1977 1960 self->nodes = newnodes;
1978 1961 memset(&self->nodes[self->length], 0,
1979 1962 sizeof(nodetreenode) * (self->capacity - self->length));
1980 1963 }
1981 1964 return self->length++;
1982 1965 }
1983 1966
1984 1967 static int nt_insert(nodetree *self, const char *node, int rev)
1985 1968 {
1986 1969 int level = 0;
1987 1970 int off = 0;
1988 1971
1989 1972 while (level < 2 * self->nodelen) {
1990 1973 int k = nt_level(node, level);
1991 1974 nodetreenode *n;
1992 1975 int v;
1993 1976
1994 1977 n = &self->nodes[off];
1995 1978 v = n->children[k];
1996 1979
1997 1980 if (v == 0) {
1998 1981 n->children[k] = -rev - 2;
1999 1982 return 0;
2000 1983 }
2001 1984 if (v < 0) {
2002 1985 const char *oldnode =
2003 1986 index_node_existing(self->index, -(v + 2));
2004 1987 int noff;
2005 1988
2006 1989 if (oldnode == NULL)
2007 1990 return -1;
2008 1991 if (!memcmp(oldnode, node, self->nodelen)) {
2009 1992 n->children[k] = -rev - 2;
2010 1993 return 0;
2011 1994 }
2012 1995 noff = nt_new(self);
2013 1996 if (noff == -1)
2014 1997 return -1;
2015 1998 /* self->nodes may have been changed by realloc */
2016 1999 self->nodes[off].children[k] = noff;
2017 2000 off = noff;
2018 2001 n = &self->nodes[off];
2019 2002 n->children[nt_level(oldnode, ++level)] = v;
2020 2003 if (level > self->depth)
2021 2004 self->depth = level;
2022 2005 self->splits += 1;
2023 2006 } else {
2024 2007 level += 1;
2025 2008 off = v;
2026 2009 }
2027 2010 }
2028 2011
2029 2012 return -1;
2030 2013 }
2031 2014
2032 2015 static PyObject *ntobj_insert(nodetreeObject *self, PyObject *args)
2033 2016 {
2034 2017 Py_ssize_t rev;
2035 2018 const char *node;
2036 2019 Py_ssize_t length;
2037 2020 if (!PyArg_ParseTuple(args, "n", &rev))
2038 2021 return NULL;
2039 2022 length = index_length(self->nt.index);
2040 2023 if (rev < 0 || rev >= length) {
2041 2024 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
2042 2025 return NULL;
2043 2026 }
2044 2027 node = index_node_existing(self->nt.index, rev);
2045 2028 if (nt_insert(&self->nt, node, (int)rev) == -1)
2046 2029 return NULL;
2047 2030 Py_RETURN_NONE;
2048 2031 }
2049 2032
2050 2033 static int nt_delete_node(nodetree *self, const char *node)
2051 2034 {
2052 2035 /* rev==-2 happens to get encoded as 0, which is interpreted as not set
2053 2036 */
2054 2037 return nt_insert(self, node, -2);
2055 2038 }
2056 2039
2057 2040 static int nt_init(nodetree *self, indexObject *index, unsigned capacity)
2058 2041 {
2059 2042 /* Initialize before overflow-checking to avoid nt_dealloc() crash. */
2060 2043 self->nodes = NULL;
2061 2044
2062 2045 self->index = index;
2063 2046 /* The input capacity is in terms of revisions, while the field is in
2064 2047 * terms of nodetree nodes. */
2065 2048 self->capacity = (capacity < 4 ? 4 : capacity / 2);
2066 2049 self->nodelen = index->nodelen;
2067 2050 self->depth = 0;
2068 2051 self->splits = 0;
2069 2052 if (self->capacity > SIZE_MAX / sizeof(nodetreenode)) {
2070 2053 PyErr_SetString(PyExc_ValueError, "overflow in init_nt");
2071 2054 return -1;
2072 2055 }
2073 2056 self->nodes = calloc(self->capacity, sizeof(nodetreenode));
2074 2057 if (self->nodes == NULL) {
2075 2058 PyErr_NoMemory();
2076 2059 return -1;
2077 2060 }
2078 2061 self->length = 1;
2079 2062 return 0;
2080 2063 }
2081 2064
2082 2065 static int ntobj_init(nodetreeObject *self, PyObject *args)
2083 2066 {
2084 2067 PyObject *index;
2085 2068 unsigned capacity;
2086 2069 if (!PyArg_ParseTuple(args, "O!I", &HgRevlogIndex_Type, &index,
2087 2070 &capacity))
2088 2071 return -1;
2089 2072 Py_INCREF(index);
2090 2073 return nt_init(&self->nt, (indexObject *)index, capacity);
2091 2074 }
2092 2075
2093 2076 static int nt_partialmatch(nodetree *self, const char *node, Py_ssize_t nodelen)
2094 2077 {
2095 2078 return nt_find(self, node, nodelen, 1);
2096 2079 }
2097 2080
2098 2081 /*
2099 2082 * Find the length of the shortest unique prefix of node.
2100 2083 *
2101 2084 * Return values:
2102 2085 *
2103 2086 * -3: error (exception set)
2104 2087 * -2: not found (no exception set)
2105 2088 * rest: length of shortest prefix
2106 2089 */
2107 2090 static int nt_shortest(nodetree *self, const char *node)
2108 2091 {
2109 2092 int level, off;
2110 2093
2111 2094 for (level = off = 0; level < 2 * self->nodelen; level++) {
2112 2095 int k, v;
2113 2096 nodetreenode *n = &self->nodes[off];
2114 2097 k = nt_level(node, level);
2115 2098 v = n->children[k];
2116 2099 if (v < 0) {
2117 2100 const char *n;
2118 2101 v = -(v + 2);
2119 2102 n = index_node_existing(self->index, v);
2120 2103 if (n == NULL)
2121 2104 return -3;
2122 2105 if (memcmp(node, n, self->nodelen) != 0)
2123 2106 /*
2124 2107 * Found a unique prefix, but it wasn't for the
2125 2108 * requested node (i.e the requested node does
2126 2109 * not exist).
2127 2110 */
2128 2111 return -2;
2129 2112 return level + 1;
2130 2113 }
2131 2114 if (v == 0)
2132 2115 return -2;
2133 2116 off = v;
2134 2117 }
2135 2118 /*
2136 2119 * The node was still not unique after 40 hex digits, so this won't
2137 2120 * happen. Also, if we get here, then there's a programming error in
2138 2121 * this file that made us insert a node longer than 40 hex digits.
2139 2122 */
2140 2123 PyErr_SetString(PyExc_Exception, "broken node tree");
2141 2124 return -3;
2142 2125 }
2143 2126
2144 2127 static PyObject *ntobj_shortest(nodetreeObject *self, PyObject *args)
2145 2128 {
2146 2129 PyObject *val;
2147 2130 char *node;
2148 2131 int length;
2149 2132
2150 2133 if (!PyArg_ParseTuple(args, "O", &val))
2151 2134 return NULL;
2152 2135 if (node_check(self->nt.nodelen, val, &node) == -1)
2153 2136 return NULL;
2154 2137
2155 2138 length = nt_shortest(&self->nt, node);
2156 2139 if (length == -3)
2157 2140 return NULL;
2158 2141 if (length == -2) {
2159 2142 raise_revlog_error();
2160 2143 return NULL;
2161 2144 }
2162 2145 return PyLong_FromLong(length);
2163 2146 }
2164 2147
2165 2148 static void nt_dealloc(nodetree *self)
2166 2149 {
2167 2150 free(self->nodes);
2168 2151 self->nodes = NULL;
2169 2152 }
2170 2153
2171 2154 static void ntobj_dealloc(nodetreeObject *self)
2172 2155 {
2173 2156 Py_XDECREF(self->nt.index);
2174 2157 nt_dealloc(&self->nt);
2175 2158 PyObject_Del(self);
2176 2159 }
2177 2160
2178 2161 static PyMethodDef ntobj_methods[] = {
2179 2162 {"insert", (PyCFunction)ntobj_insert, METH_VARARGS,
2180 2163 "insert an index entry"},
2181 2164 {"shortest", (PyCFunction)ntobj_shortest, METH_VARARGS,
2182 2165 "find length of shortest hex nodeid of a binary ID"},
2183 2166 {NULL} /* Sentinel */
2184 2167 };
2185 2168
2186 2169 static PyTypeObject nodetreeType = {
2187 2170 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2188 2171 "parsers.nodetree", /* tp_name */
2189 2172 sizeof(nodetreeObject), /* tp_basicsize */
2190 2173 0, /* tp_itemsize */
2191 2174 (destructor)ntobj_dealloc, /* tp_dealloc */
2192 2175 0, /* tp_print */
2193 2176 0, /* tp_getattr */
2194 2177 0, /* tp_setattr */
2195 2178 0, /* tp_compare */
2196 2179 0, /* tp_repr */
2197 2180 0, /* tp_as_number */
2198 2181 0, /* tp_as_sequence */
2199 2182 0, /* tp_as_mapping */
2200 2183 0, /* tp_hash */
2201 2184 0, /* tp_call */
2202 2185 0, /* tp_str */
2203 2186 0, /* tp_getattro */
2204 2187 0, /* tp_setattro */
2205 2188 0, /* tp_as_buffer */
2206 2189 Py_TPFLAGS_DEFAULT, /* tp_flags */
2207 2190 "nodetree", /* tp_doc */
2208 2191 0, /* tp_traverse */
2209 2192 0, /* tp_clear */
2210 2193 0, /* tp_richcompare */
2211 2194 0, /* tp_weaklistoffset */
2212 2195 0, /* tp_iter */
2213 2196 0, /* tp_iternext */
2214 2197 ntobj_methods, /* tp_methods */
2215 2198 0, /* tp_members */
2216 2199 0, /* tp_getset */
2217 2200 0, /* tp_base */
2218 2201 0, /* tp_dict */
2219 2202 0, /* tp_descr_get */
2220 2203 0, /* tp_descr_set */
2221 2204 0, /* tp_dictoffset */
2222 2205 (initproc)ntobj_init, /* tp_init */
2223 2206 0, /* tp_alloc */
2224 2207 };
2225 2208
2226 2209 static int index_init_nt(indexObject *self)
2227 2210 {
2228 2211 if (!self->ntinitialized) {
2229 2212 if (nt_init(&self->nt, self, (int)self->length) == -1) {
2230 2213 nt_dealloc(&self->nt);
2231 2214 return -1;
2232 2215 }
2233 2216 if (nt_insert(&self->nt, nullid, -1) == -1) {
2234 2217 nt_dealloc(&self->nt);
2235 2218 return -1;
2236 2219 }
2237 2220 self->ntinitialized = 1;
2238 2221 self->ntrev = (int)index_length(self);
2239 2222 self->ntlookups = 1;
2240 2223 self->ntmisses = 0;
2241 2224 }
2242 2225 return 0;
2243 2226 }
2244 2227
2245 2228 /*
2246 2229 * Return values:
2247 2230 *
2248 2231 * -3: error (exception set)
2249 2232 * -2: not found (no exception set)
2250 2233 * rest: valid rev
2251 2234 */
2252 2235 static int index_find_node(indexObject *self, const char *node)
2253 2236 {
2254 2237 int rev;
2255 2238
2256 2239 if (index_init_nt(self) == -1)
2257 2240 return -3;
2258 2241
2259 2242 self->ntlookups++;
2260 2243 rev = nt_find(&self->nt, node, self->nodelen, 0);
2261 2244 if (rev >= -1)
2262 2245 return rev;
2263 2246
2264 2247 /*
2265 2248 * For the first handful of lookups, we scan the entire index,
2266 2249 * and cache only the matching nodes. This optimizes for cases
2267 2250 * like "hg tip", where only a few nodes are accessed.
2268 2251 *
2269 2252 * After that, we cache every node we visit, using a single
2270 2253 * scan amortized over multiple lookups. This gives the best
2271 2254 * bulk performance, e.g. for "hg log".
2272 2255 */
2273 2256 if (self->ntmisses++ < 4) {
2274 2257 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2275 2258 const char *n = index_node_existing(self, rev);
2276 2259 if (n == NULL)
2277 2260 return -3;
2278 2261 if (memcmp(node, n, self->nodelen) == 0) {
2279 2262 if (nt_insert(&self->nt, n, rev) == -1)
2280 2263 return -3;
2281 2264 break;
2282 2265 }
2283 2266 }
2284 2267 } else {
2285 2268 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2286 2269 const char *n = index_node_existing(self, rev);
2287 2270 if (n == NULL)
2288 2271 return -3;
2289 2272 if (nt_insert(&self->nt, n, rev) == -1) {
2290 2273 self->ntrev = rev + 1;
2291 2274 return -3;
2292 2275 }
2293 2276 if (memcmp(node, n, self->nodelen) == 0) {
2294 2277 break;
2295 2278 }
2296 2279 }
2297 2280 self->ntrev = rev;
2298 2281 }
2299 2282
2300 2283 if (rev >= 0)
2301 2284 return rev;
2302 2285 return -2;
2303 2286 }
2304 2287
2305 2288 static PyObject *index_getitem(indexObject *self, PyObject *value)
2306 2289 {
2307 2290 char *node;
2308 2291 int rev;
2309 2292
2310 2293 if (PyLong_Check(value)) {
2311 2294 long idx;
2312 2295 if (!pylong_to_long(value, &idx)) {
2313 2296 return NULL;
2314 2297 }
2315 2298 return index_get(self, idx);
2316 2299 }
2317 2300
2318 2301 if (node_check(self->nodelen, value, &node) == -1)
2319 2302 return NULL;
2320 2303 rev = index_find_node(self, node);
2321 2304 if (rev >= -1)
2322 2305 return PyLong_FromLong(rev);
2323 2306 if (rev == -2)
2324 2307 raise_revlog_error();
2325 2308 return NULL;
2326 2309 }
2327 2310
2328 2311 /*
2329 2312 * Fully populate the radix tree.
2330 2313 */
2331 2314 static int index_populate_nt(indexObject *self)
2332 2315 {
2333 2316 int rev;
2334 2317 if (self->ntrev > 0) {
2335 2318 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2336 2319 const char *n = index_node_existing(self, rev);
2337 2320 if (n == NULL)
2338 2321 return -1;
2339 2322 if (nt_insert(&self->nt, n, rev) == -1)
2340 2323 return -1;
2341 2324 }
2342 2325 self->ntrev = -1;
2343 2326 }
2344 2327 return 0;
2345 2328 }
2346 2329
2347 2330 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
2348 2331 {
2349 2332 const char *fullnode;
2350 2333 Py_ssize_t nodelen;
2351 2334 char *node;
2352 2335 int rev, i;
2353 2336
2354 2337 if (!PyArg_ParseTuple(args, "y#", &node, &nodelen))
2355 2338 return NULL;
2356 2339
2357 2340 if (nodelen < 1) {
2358 2341 PyErr_SetString(PyExc_ValueError, "key too short");
2359 2342 return NULL;
2360 2343 }
2361 2344
2362 2345 if (nodelen > 2 * self->nodelen) {
2363 2346 PyErr_SetString(PyExc_ValueError, "key too long");
2364 2347 return NULL;
2365 2348 }
2366 2349
2367 2350 for (i = 0; i < nodelen; i++)
2368 2351 hexdigit(node, i);
2369 2352 if (PyErr_Occurred()) {
2370 2353 /* input contains non-hex characters */
2371 2354 PyErr_Clear();
2372 2355 Py_RETURN_NONE;
2373 2356 }
2374 2357
2375 2358 if (index_init_nt(self) == -1)
2376 2359 return NULL;
2377 2360 if (index_populate_nt(self) == -1)
2378 2361 return NULL;
2379 2362 rev = nt_partialmatch(&self->nt, node, nodelen);
2380 2363
2381 2364 switch (rev) {
2382 2365 case -4:
2383 2366 raise_revlog_error();
2384 2367 return NULL;
2385 2368 case -2:
2386 2369 Py_RETURN_NONE;
2387 2370 case -1:
2388 2371 return PyBytes_FromStringAndSize(nullid, self->nodelen);
2389 2372 }
2390 2373
2391 2374 fullnode = index_node_existing(self, rev);
2392 2375 if (fullnode == NULL) {
2393 2376 return NULL;
2394 2377 }
2395 2378 return PyBytes_FromStringAndSize(fullnode, self->nodelen);
2396 2379 }
2397 2380
2398 2381 static PyObject *index_shortest(indexObject *self, PyObject *args)
2399 2382 {
2400 2383 PyObject *val;
2401 2384 char *node;
2402 2385 int length;
2403 2386
2404 2387 if (!PyArg_ParseTuple(args, "O", &val))
2405 2388 return NULL;
2406 2389 if (node_check(self->nodelen, val, &node) == -1)
2407 2390 return NULL;
2408 2391
2409 2392 self->ntlookups++;
2410 2393 if (index_init_nt(self) == -1)
2411 2394 return NULL;
2412 2395 if (index_populate_nt(self) == -1)
2413 2396 return NULL;
2414 2397 length = nt_shortest(&self->nt, node);
2415 2398 if (length == -3)
2416 2399 return NULL;
2417 2400 if (length == -2) {
2418 2401 raise_revlog_error();
2419 2402 return NULL;
2420 2403 }
2421 2404 return PyLong_FromLong(length);
2422 2405 }
2423 2406
2424 2407 static PyObject *index_m_get(indexObject *self, PyObject *args)
2425 2408 {
2426 2409 PyObject *val;
2427 2410 char *node;
2428 2411 int rev;
2429 2412
2430 2413 if (!PyArg_ParseTuple(args, "O", &val))
2431 2414 return NULL;
2432 2415 if (node_check(self->nodelen, val, &node) == -1)
2433 2416 return NULL;
2434 2417 rev = index_find_node(self, node);
2435 2418 if (rev == -3)
2436 2419 return NULL;
2437 2420 if (rev == -2)
2438 2421 Py_RETURN_NONE;
2439 2422 return PyLong_FromLong(rev);
2440 2423 }
2441 2424
2442 2425 static int index_contains(indexObject *self, PyObject *value)
2443 2426 {
2444 2427 char *node;
2445 2428
2446 2429 if (PyLong_Check(value)) {
2447 2430 long rev;
2448 2431 if (!pylong_to_long(value, &rev)) {
2449 2432 return -1;
2450 2433 }
2451 2434 return rev >= -1 && rev < index_length(self);
2452 2435 }
2453 2436
2454 2437 if (node_check(self->nodelen, value, &node) == -1)
2455 2438 return -1;
2456 2439
2457 2440 switch (index_find_node(self, node)) {
2458 2441 case -3:
2459 2442 return -1;
2460 2443 case -2:
2461 2444 return 0;
2462 2445 default:
2463 2446 return 1;
2464 2447 }
2465 2448 }
2466 2449
2467 2450 static PyObject *index_m_has_node(indexObject *self, PyObject *args)
2468 2451 {
2469 2452 int ret = index_contains(self, args);
2470 2453 if (ret < 0)
2471 2454 return NULL;
2472 2455 return PyBool_FromLong((long)ret);
2473 2456 }
2474 2457
2475 2458 static PyObject *index_m_rev(indexObject *self, PyObject *val)
2476 2459 {
2477 2460 char *node;
2478 2461 int rev;
2479 2462
2480 2463 if (node_check(self->nodelen, val, &node) == -1)
2481 2464 return NULL;
2482 2465 rev = index_find_node(self, node);
2483 2466 if (rev >= -1)
2484 2467 return PyLong_FromLong(rev);
2485 2468 if (rev == -2)
2486 2469 raise_revlog_error();
2487 2470 return NULL;
2488 2471 }
2489 2472
2490 2473 typedef uint64_t bitmask;
2491 2474
2492 2475 /*
2493 2476 * Given a disjoint set of revs, return all candidates for the
2494 2477 * greatest common ancestor. In revset notation, this is the set
2495 2478 * "heads(::a and ::b and ...)"
2496 2479 */
2497 2480 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
2498 2481 int revcount)
2499 2482 {
2500 2483 const bitmask allseen = (1ull << revcount) - 1;
2501 2484 const bitmask poison = 1ull << revcount;
2502 2485 PyObject *gca = PyList_New(0);
2503 2486 int i, v, interesting;
2504 2487 int maxrev = -1;
2505 2488 bitmask sp;
2506 2489 bitmask *seen;
2507 2490
2508 2491 if (gca == NULL)
2509 2492 return PyErr_NoMemory();
2510 2493
2511 2494 for (i = 0; i < revcount; i++) {
2512 2495 if (revs[i] > maxrev)
2513 2496 maxrev = revs[i];
2514 2497 }
2515 2498
2516 2499 seen = calloc(sizeof(*seen), maxrev + 1);
2517 2500 if (seen == NULL) {
2518 2501 Py_DECREF(gca);
2519 2502 return PyErr_NoMemory();
2520 2503 }
2521 2504
2522 2505 for (i = 0; i < revcount; i++)
2523 2506 seen[revs[i]] = 1ull << i;
2524 2507
2525 2508 interesting = revcount;
2526 2509
2527 2510 for (v = maxrev; v >= 0 && interesting; v--) {
2528 2511 bitmask sv = seen[v];
2529 2512 int parents[2];
2530 2513
2531 2514 if (!sv)
2532 2515 continue;
2533 2516
2534 2517 if (sv < poison) {
2535 2518 interesting -= 1;
2536 2519 if (sv == allseen) {
2537 PyObject *obj = PyLong_FromLong(v);
2538 if (obj == NULL)
2539 goto bail;
2540 if (PyList_Append(gca, obj) == -1) {
2541 Py_DECREF(obj);
2520 if (pylist_append_owned(
2521 gca, PyLong_FromLong(v)) == -1) {
2542 2522 goto bail;
2543 2523 }
2544 2524 sv |= poison;
2545 2525 for (i = 0; i < revcount; i++) {
2546 2526 if (revs[i] == v)
2547 2527 goto done;
2548 2528 }
2549 2529 }
2550 2530 }
2551 2531 if (index_get_parents(self, v, parents, maxrev) < 0)
2552 2532 goto bail;
2553 2533
2554 2534 for (i = 0; i < 2; i++) {
2555 2535 int p = parents[i];
2556 2536 if (p == -1)
2557 2537 continue;
2558 2538 sp = seen[p];
2559 2539 if (sv < poison) {
2560 2540 if (sp == 0) {
2561 2541 seen[p] = sv;
2562 2542 interesting++;
2563 2543 } else if (sp != sv)
2564 2544 seen[p] |= sv;
2565 2545 } else {
2566 2546 if (sp && sp < poison)
2567 2547 interesting--;
2568 2548 seen[p] = sv;
2569 2549 }
2570 2550 }
2571 2551 }
2572 2552
2573 2553 done:
2574 2554 free(seen);
2575 2555 return gca;
2576 2556 bail:
2577 2557 free(seen);
2578 2558 Py_XDECREF(gca);
2579 2559 return NULL;
2580 2560 }
2581 2561
2582 2562 /*
2583 2563 * Given a disjoint set of revs, return the subset with the longest
2584 2564 * path to the root.
2585 2565 */
2586 2566 static PyObject *find_deepest(indexObject *self, PyObject *revs)
2587 2567 {
2588 2568 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
2589 2569 static const Py_ssize_t capacity = 24;
2590 2570 int *depth, *interesting = NULL;
2591 2571 int i, j, v, ninteresting;
2592 2572 PyObject *dict = NULL, *keys = NULL;
2593 2573 long *seen = NULL;
2594 2574 int maxrev = -1;
2595 2575 long final;
2596 2576
2597 2577 if (revcount > capacity) {
2598 2578 PyErr_Format(PyExc_OverflowError,
2599 2579 "bitset size (%ld) > capacity (%ld)",
2600 2580 (long)revcount, (long)capacity);
2601 2581 return NULL;
2602 2582 }
2603 2583
2604 2584 for (i = 0; i < revcount; i++) {
2605 2585 int n = (int)PyLong_AsLong(PyList_GET_ITEM(revs, i));
2606 2586 if (n > maxrev)
2607 2587 maxrev = n;
2608 2588 }
2609 2589
2610 2590 depth = calloc(sizeof(*depth), maxrev + 1);
2611 2591 if (depth == NULL)
2612 2592 return PyErr_NoMemory();
2613 2593
2614 2594 seen = calloc(sizeof(*seen), maxrev + 1);
2615 2595 if (seen == NULL) {
2616 2596 PyErr_NoMemory();
2617 2597 goto bail;
2618 2598 }
2619 2599
2620 2600 interesting = calloc(sizeof(*interesting), ((size_t)1) << revcount);
2621 2601 if (interesting == NULL) {
2622 2602 PyErr_NoMemory();
2623 2603 goto bail;
2624 2604 }
2625 2605
2626 2606 if (PyList_Sort(revs) == -1)
2627 2607 goto bail;
2628 2608
2629 2609 for (i = 0; i < revcount; i++) {
2630 2610 int n = (int)PyLong_AsLong(PyList_GET_ITEM(revs, i));
2631 2611 long b = 1l << i;
2632 2612 depth[n] = 1;
2633 2613 seen[n] = b;
2634 2614 interesting[b] = 1;
2635 2615 }
2636 2616
2637 2617 /* invariant: ninteresting is the number of non-zero entries in
2638 2618 * interesting. */
2639 2619 ninteresting = (int)revcount;
2640 2620
2641 2621 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
2642 2622 int dv = depth[v];
2643 2623 int parents[2];
2644 2624 long sv;
2645 2625
2646 2626 if (dv == 0)
2647 2627 continue;
2648 2628
2649 2629 sv = seen[v];
2650 2630 if (index_get_parents(self, v, parents, maxrev) < 0)
2651 2631 goto bail;
2652 2632
2653 2633 for (i = 0; i < 2; i++) {
2654 2634 int p = parents[i];
2655 2635 long sp;
2656 2636 int dp;
2657 2637
2658 2638 if (p == -1)
2659 2639 continue;
2660 2640
2661 2641 dp = depth[p];
2662 2642 sp = seen[p];
2663 2643 if (dp <= dv) {
2664 2644 depth[p] = dv + 1;
2665 2645 if (sp != sv) {
2666 2646 interesting[sv] += 1;
2667 2647 seen[p] = sv;
2668 2648 if (sp) {
2669 2649 interesting[sp] -= 1;
2670 2650 if (interesting[sp] == 0)
2671 2651 ninteresting -= 1;
2672 2652 }
2673 2653 }
2674 2654 } else if (dv == dp - 1) {
2675 2655 long nsp = sp | sv;
2676 2656 if (nsp == sp)
2677 2657 continue;
2678 2658 seen[p] = nsp;
2679 2659 interesting[sp] -= 1;
2680 2660 if (interesting[sp] == 0)
2681 2661 ninteresting -= 1;
2682 2662 if (interesting[nsp] == 0)
2683 2663 ninteresting += 1;
2684 2664 interesting[nsp] += 1;
2685 2665 }
2686 2666 }
2687 2667 interesting[sv] -= 1;
2688 2668 if (interesting[sv] == 0)
2689 2669 ninteresting -= 1;
2690 2670 }
2691 2671
2692 2672 final = 0;
2693 2673 j = ninteresting;
2694 2674 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
2695 2675 if (interesting[i] == 0)
2696 2676 continue;
2697 2677 final |= i;
2698 2678 j -= 1;
2699 2679 }
2700 2680 if (final == 0) {
2701 2681 keys = PyList_New(0);
2702 2682 goto bail;
2703 2683 }
2704 2684
2705 2685 dict = PyDict_New();
2706 2686 if (dict == NULL)
2707 2687 goto bail;
2708 2688
2709 2689 for (i = 0; i < revcount; i++) {
2710 2690 PyObject *key;
2711 2691
2712 2692 if ((final & (1 << i)) == 0)
2713 2693 continue;
2714 2694
2715 2695 key = PyList_GET_ITEM(revs, i);
2716 2696 Py_INCREF(key);
2717 2697 Py_INCREF(Py_None);
2718 2698 if (PyDict_SetItem(dict, key, Py_None) == -1) {
2719 2699 Py_DECREF(key);
2720 2700 Py_DECREF(Py_None);
2721 2701 goto bail;
2722 2702 }
2723 2703 }
2724 2704
2725 2705 keys = PyDict_Keys(dict);
2726 2706
2727 2707 bail:
2728 2708 free(depth);
2729 2709 free(seen);
2730 2710 free(interesting);
2731 2711 Py_XDECREF(dict);
2732 2712
2733 2713 return keys;
2734 2714 }
2735 2715
2736 2716 /*
2737 2717 * Given a (possibly overlapping) set of revs, return all the
2738 2718 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
2739 2719 */
2740 2720 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
2741 2721 {
2742 2722 PyObject *ret = NULL;
2743 2723 Py_ssize_t argcount, i, len;
2744 2724 bitmask repeat = 0;
2745 2725 int revcount = 0;
2746 2726 int *revs;
2747 2727
2748 2728 argcount = PySequence_Length(args);
2749 2729 revs = PyMem_Malloc(argcount * sizeof(*revs));
2750 2730 if (argcount > 0 && revs == NULL)
2751 2731 return PyErr_NoMemory();
2752 2732 len = index_length(self);
2753 2733
2754 2734 for (i = 0; i < argcount; i++) {
2755 2735 static const int capacity = 24;
2756 2736 PyObject *obj = PySequence_GetItem(args, i);
2757 2737 bitmask x;
2758 2738 long val;
2759 2739
2760 2740 if (!PyLong_Check(obj)) {
2761 2741 PyErr_SetString(PyExc_TypeError,
2762 2742 "arguments must all be ints");
2763 2743 Py_DECREF(obj);
2764 2744 goto bail;
2765 2745 }
2766 2746 val = PyLong_AsLong(obj);
2767 2747 Py_DECREF(obj);
2768 2748 if (val == -1) {
2769 2749 ret = PyList_New(0);
2770 2750 goto done;
2771 2751 }
2772 2752 if (val < 0 || val >= len) {
2773 2753 PyErr_SetString(PyExc_IndexError, "index out of range");
2774 2754 goto bail;
2775 2755 }
2776 2756 /* this cheesy bloom filter lets us avoid some more
2777 2757 * expensive duplicate checks in the common set-is-disjoint
2778 2758 * case */
2779 2759 x = 1ull << (val & 0x3f);
2780 2760 if (repeat & x) {
2781 2761 int k;
2782 2762 for (k = 0; k < revcount; k++) {
2783 2763 if (val == revs[k])
2784 2764 goto duplicate;
2785 2765 }
2786 2766 } else
2787 2767 repeat |= x;
2788 2768 if (revcount >= capacity) {
2789 2769 PyErr_Format(PyExc_OverflowError,
2790 2770 "bitset size (%d) > capacity (%d)",
2791 2771 revcount, capacity);
2792 2772 goto bail;
2793 2773 }
2794 2774 revs[revcount++] = (int)val;
2795 2775 duplicate:;
2796 2776 }
2797 2777
2798 2778 if (revcount == 0) {
2799 2779 ret = PyList_New(0);
2800 2780 goto done;
2801 2781 }
2802 2782 if (revcount == 1) {
2803 2783 PyObject *obj;
2804 2784 ret = PyList_New(1);
2805 2785 if (ret == NULL)
2806 2786 goto bail;
2807 2787 obj = PyLong_FromLong(revs[0]);
2808 2788 if (obj == NULL)
2809 2789 goto bail;
2810 2790 PyList_SET_ITEM(ret, 0, obj);
2811 2791 goto done;
2812 2792 }
2813 2793
2814 2794 ret = find_gca_candidates(self, revs, revcount);
2815 2795 if (ret == NULL)
2816 2796 goto bail;
2817 2797
2818 2798 done:
2819 2799 PyMem_Free(revs);
2820 2800 return ret;
2821 2801
2822 2802 bail:
2823 2803 PyMem_Free(revs);
2824 2804 Py_XDECREF(ret);
2825 2805 return NULL;
2826 2806 }
2827 2807
2828 2808 /*
2829 2809 * Given a (possibly overlapping) set of revs, return the greatest
2830 2810 * common ancestors: those with the longest path to the root.
2831 2811 */
2832 2812 static PyObject *index_ancestors(indexObject *self, PyObject *args)
2833 2813 {
2834 2814 PyObject *ret;
2835 2815 PyObject *gca = index_commonancestorsheads(self, args);
2836 2816 if (gca == NULL)
2837 2817 return NULL;
2838 2818
2839 2819 if (PyList_GET_SIZE(gca) <= 1) {
2840 2820 return gca;
2841 2821 }
2842 2822
2843 2823 ret = find_deepest(self, gca);
2844 2824 Py_DECREF(gca);
2845 2825 return ret;
2846 2826 }
2847 2827
2848 2828 /*
2849 2829 * Invalidate any trie entries introduced by added revs.
2850 2830 */
2851 2831 static void index_invalidate_added(indexObject *self, Py_ssize_t start)
2852 2832 {
2853 2833 Py_ssize_t i, len;
2854 2834
2855 2835 len = self->length + self->new_length;
2856 2836 i = start - self->length;
2857 2837 if (i < 0)
2858 2838 return;
2859 2839
2860 2840 for (i = start; i < len; i++) {
2861 2841 const char *node = index_node(self, i);
2862 2842 nt_delete_node(&self->nt, node);
2863 2843 }
2864 2844
2865 2845 self->new_length = start - self->length;
2866 2846 }
2867 2847
2868 2848 /*
2869 2849 * Delete a numeric range of revs, which must be at the end of the
2870 2850 * range.
2871 2851 */
2872 2852 static int index_slice_del(indexObject *self, PyObject *item)
2873 2853 {
2874 2854 Py_ssize_t start, stop, step, slicelength;
2875 2855 Py_ssize_t length = index_length(self) + 1;
2876 2856 int ret = 0;
2877 2857
2878 2858 if (PySlice_GetIndicesEx(item, length, &start, &stop, &step,
2879 2859 &slicelength) < 0)
2880 2860 return -1;
2881 2861
2882 2862 if (slicelength <= 0)
2883 2863 return 0;
2884 2864
2885 2865 if ((step < 0 && start < stop) || (step > 0 && start > stop))
2886 2866 stop = start;
2887 2867
2888 2868 if (step < 0) {
2889 2869 stop = start + 1;
2890 2870 start = stop + step * (slicelength - 1) - 1;
2891 2871 step = -step;
2892 2872 }
2893 2873
2894 2874 if (step != 1) {
2895 2875 PyErr_SetString(PyExc_ValueError,
2896 2876 "revlog index delete requires step size of 1");
2897 2877 return -1;
2898 2878 }
2899 2879
2900 2880 if (stop != length - 1) {
2901 2881 PyErr_SetString(PyExc_IndexError,
2902 2882 "revlog index deletion indices are invalid");
2903 2883 return -1;
2904 2884 }
2905 2885
2906 2886 if (start < self->length) {
2907 2887 if (self->ntinitialized) {
2908 2888 Py_ssize_t i;
2909 2889
2910 2890 for (i = start; i < self->length; i++) {
2911 2891 const char *node = index_node_existing(self, i);
2912 2892 if (node == NULL)
2913 2893 return -1;
2914 2894
2915 2895 nt_delete_node(&self->nt, node);
2916 2896 }
2917 2897 if (self->new_length)
2918 2898 index_invalidate_added(self, self->length);
2919 2899 if (self->ntrev > start)
2920 2900 self->ntrev = (int)start;
2921 2901 } else if (self->new_length) {
2922 2902 self->new_length = 0;
2923 2903 }
2924 2904
2925 2905 self->length = start;
2926 2906 goto done;
2927 2907 }
2928 2908
2929 2909 if (self->ntinitialized) {
2930 2910 index_invalidate_added(self, start);
2931 2911 if (self->ntrev > start)
2932 2912 self->ntrev = (int)start;
2933 2913 } else {
2934 2914 self->new_length = start - self->length;
2935 2915 }
2936 2916 done:
2937 2917 Py_CLEAR(self->headrevs);
2938 2918 return ret;
2939 2919 }
2940 2920
2941 2921 /*
2942 2922 * Supported ops:
2943 2923 *
2944 2924 * slice deletion
2945 2925 * string assignment (extend node->rev mapping)
2946 2926 * string deletion (shrink node->rev mapping)
2947 2927 */
2948 2928 static int index_assign_subscript(indexObject *self, PyObject *item,
2949 2929 PyObject *value)
2950 2930 {
2951 2931 char *node;
2952 2932 long rev;
2953 2933
2954 2934 if (PySlice_Check(item) && value == NULL)
2955 2935 return index_slice_del(self, item);
2956 2936
2957 2937 if (node_check(self->nodelen, item, &node) == -1)
2958 2938 return -1;
2959 2939
2960 2940 if (value == NULL)
2961 2941 return self->ntinitialized ? nt_delete_node(&self->nt, node)
2962 2942 : 0;
2963 2943 rev = PyLong_AsLong(value);
2964 2944 if (rev > INT_MAX || rev < 0) {
2965 2945 if (!PyErr_Occurred())
2966 2946 PyErr_SetString(PyExc_ValueError, "rev out of range");
2967 2947 return -1;
2968 2948 }
2969 2949
2970 2950 if (index_init_nt(self) == -1)
2971 2951 return -1;
2972 2952 return nt_insert(&self->nt, node, (int)rev);
2973 2953 }
2974 2954
2975 2955 /*
2976 2956 * Find all RevlogNG entries in an index that has inline data. Update
2977 2957 * the optional "offsets" table with those entries.
2978 2958 */
2979 2959 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
2980 2960 {
2981 2961 const char *data = (const char *)self->buf.buf;
2982 2962 Py_ssize_t pos = 0;
2983 2963 Py_ssize_t end = self->buf.len;
2984 2964 long incr = self->entry_size;
2985 2965 Py_ssize_t len = 0;
2986 2966
2987 2967 while (pos + self->entry_size <= end && pos >= 0) {
2988 2968 uint32_t comp_len, sidedata_comp_len = 0;
2989 2969 /* 3rd element of header is length of compressed inline data */
2990 2970 if (self->format_version == format_v1) {
2991 2971 comp_len =
2992 2972 getbe32(data + pos + entry_v1_offset_comp_len);
2993 2973 sidedata_comp_len = 0;
2994 2974 } else if (self->format_version == format_v2) {
2995 2975 comp_len =
2996 2976 getbe32(data + pos + entry_v2_offset_comp_len);
2997 2977 sidedata_comp_len = getbe32(
2998 2978 data + pos + entry_v2_offset_sidedata_comp_len);
2999 2979 } else {
3000 2980 raise_revlog_error();
3001 2981 return -1;
3002 2982 }
3003 2983 incr = self->entry_size + comp_len + sidedata_comp_len;
3004 2984 if (offsets)
3005 2985 offsets[len] = data + pos;
3006 2986 len++;
3007 2987 pos += incr;
3008 2988 }
3009 2989
3010 2990 if (pos != end) {
3011 2991 if (!PyErr_Occurred())
3012 2992 PyErr_SetString(PyExc_ValueError, "corrupt index file");
3013 2993 return -1;
3014 2994 }
3015 2995
3016 2996 return len;
3017 2997 }
3018 2998
3019 2999 static int index_init(indexObject *self, PyObject *args, PyObject *kwargs)
3020 3000 {
3021 3001 PyObject *data_obj, *inlined_obj;
3022 3002 Py_ssize_t size;
3023 3003
3024 3004 static char *kwlist[] = {"data", "inlined", "format", NULL};
3025 3005
3026 3006 /* Initialize before argument-checking to avoid index_dealloc() crash.
3027 3007 */
3028 3008 self->added = NULL;
3029 3009 self->new_length = 0;
3030 3010 self->added_length = 0;
3031 3011 self->data = NULL;
3032 3012 memset(&self->buf, 0, sizeof(self->buf));
3033 3013 self->headrevs = NULL;
3034 3014 self->filteredrevs = Py_None;
3035 3015 Py_INCREF(Py_None);
3036 3016 self->ntinitialized = 0;
3037 3017 self->offsets = NULL;
3038 3018 self->nodelen = 20;
3039 3019 self->nullentry = NULL;
3040 3020 self->rust_ext_compat = 1;
3041 3021 self->format_version = format_v1;
3042 3022
3043 3023 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|l", kwlist,
3044 3024 &data_obj, &inlined_obj,
3045 3025 &(self->format_version)))
3046 3026 return -1;
3047 3027 if (!PyObject_CheckBuffer(data_obj)) {
3048 3028 PyErr_SetString(PyExc_TypeError,
3049 3029 "data does not support buffer interface");
3050 3030 return -1;
3051 3031 }
3052 3032 if (self->nodelen < 20 || self->nodelen > (Py_ssize_t)sizeof(nullid)) {
3053 3033 PyErr_SetString(PyExc_RuntimeError, "unsupported node size");
3054 3034 return -1;
3055 3035 }
3056 3036
3057 3037 if (self->format_version == format_v1) {
3058 3038 self->entry_size = v1_entry_size;
3059 3039 } else if (self->format_version == format_v2) {
3060 3040 self->entry_size = v2_entry_size;
3061 3041 } else if (self->format_version == format_cl2) {
3062 3042 self->entry_size = cl2_entry_size;
3063 3043 }
3064 3044
3065 3045 self->nullentry = Py_BuildValue(
3066 3046 "iiiiiiiy#iiBBi", 0, 0, 0, -1, -1, -1, -1, nullid, self->nodelen, 0,
3067 3047 0, comp_mode_inline, comp_mode_inline, rank_unknown);
3068 3048
3069 3049 if (!self->nullentry)
3070 3050 return -1;
3071 3051 PyObject_GC_UnTrack(self->nullentry);
3072 3052
3073 3053 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
3074 3054 return -1;
3075 3055 size = self->buf.len;
3076 3056
3077 3057 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
3078 3058 self->data = data_obj;
3079 3059
3080 3060 self->ntlookups = self->ntmisses = 0;
3081 3061 self->ntrev = -1;
3082 3062 Py_INCREF(self->data);
3083 3063
3084 3064 if (self->inlined) {
3085 3065 Py_ssize_t len = inline_scan(self, NULL);
3086 3066 if (len == -1)
3087 3067 goto bail;
3088 3068 self->length = len;
3089 3069 } else {
3090 3070 if (size % self->entry_size) {
3091 3071 PyErr_SetString(PyExc_ValueError, "corrupt index file");
3092 3072 goto bail;
3093 3073 }
3094 3074 self->length = size / self->entry_size;
3095 3075 }
3096 3076
3097 3077 return 0;
3098 3078 bail:
3099 3079 return -1;
3100 3080 }
3101 3081
3102 3082 static PyObject *index_nodemap(indexObject *self)
3103 3083 {
3104 3084 Py_INCREF(self);
3105 3085 return (PyObject *)self;
3106 3086 }
3107 3087
3108 3088 static void _index_clearcaches(indexObject *self)
3109 3089 {
3110 3090 if (self->offsets) {
3111 3091 PyMem_Free((void *)self->offsets);
3112 3092 self->offsets = NULL;
3113 3093 }
3114 3094 if (self->ntinitialized) {
3115 3095 nt_dealloc(&self->nt);
3116 3096 }
3117 3097 self->ntinitialized = 0;
3118 3098 Py_CLEAR(self->headrevs);
3119 3099 }
3120 3100
3121 3101 static PyObject *index_clearcaches(indexObject *self)
3122 3102 {
3123 3103 _index_clearcaches(self);
3124 3104 self->ntrev = -1;
3125 3105 self->ntlookups = self->ntmisses = 0;
3126 3106 Py_RETURN_NONE;
3127 3107 }
3128 3108
3129 3109 static void index_dealloc(indexObject *self)
3130 3110 {
3131 3111 _index_clearcaches(self);
3132 3112 Py_XDECREF(self->filteredrevs);
3133 3113 if (self->buf.buf) {
3134 3114 PyBuffer_Release(&self->buf);
3135 3115 memset(&self->buf, 0, sizeof(self->buf));
3136 3116 }
3137 3117 Py_XDECREF(self->data);
3138 3118 PyMem_Free(self->added);
3139 3119 Py_XDECREF(self->nullentry);
3140 3120 PyObject_Del(self);
3141 3121 }
3142 3122
3143 3123 static PySequenceMethods index_sequence_methods = {
3144 3124 (lenfunc)index_length, /* sq_length */
3145 3125 0, /* sq_concat */
3146 3126 0, /* sq_repeat */
3147 3127 (ssizeargfunc)index_get, /* sq_item */
3148 3128 0, /* sq_slice */
3149 3129 0, /* sq_ass_item */
3150 3130 0, /* sq_ass_slice */
3151 3131 (objobjproc)index_contains, /* sq_contains */
3152 3132 };
3153 3133
3154 3134 static PyMappingMethods index_mapping_methods = {
3155 3135 (lenfunc)index_length, /* mp_length */
3156 3136 (binaryfunc)index_getitem, /* mp_subscript */
3157 3137 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
3158 3138 };
3159 3139
3160 3140 static PyMethodDef index_methods[] = {
3161 3141 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
3162 3142 "return the gca set of the given revs"},
3163 3143 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
3164 3144 METH_VARARGS,
3165 3145 "return the heads of the common ancestors of the given revs"},
3166 3146 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
3167 3147 "clear the index caches"},
3168 3148 {"get", (PyCFunction)index_m_get, METH_VARARGS, "get an index entry"},
3169 3149 {"get_rev", (PyCFunction)index_m_get, METH_VARARGS,
3170 3150 "return `rev` associated with a node or None"},
3171 3151 {"has_node", (PyCFunction)index_m_has_node, METH_O,
3172 3152 "return True if the node exist in the index"},
3173 3153 {"rev", (PyCFunction)index_m_rev, METH_O,
3174 3154 "return `rev` associated with a node or raise RevlogError"},
3175 3155 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets, METH_VARARGS,
3176 3156 "compute phases"},
3177 3157 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
3178 3158 "reachableroots"},
3179 3159 {"replace_sidedata_info", (PyCFunction)index_replace_sidedata_info,
3180 3160 METH_VARARGS, "replace an existing index entry with a new value"},
3181 3161 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
3182 3162 "get head revisions"}, /* Can do filtering since 3.2 */
3183 3163 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
3184 3164 "get filtered head revisions"}, /* Can always do filtering */
3185 3165 {"issnapshot", (PyCFunction)index_issnapshot, METH_O,
3186 3166 "True if the object is a snapshot"},
3187 3167 {"findsnapshots", (PyCFunction)index_findsnapshots, METH_VARARGS,
3188 3168 "Gather snapshot data in a cache dict"},
3189 3169 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
3190 3170 "determine revisions with deltas to reconstruct fulltext"},
3191 3171 {"slicechunktodensity", (PyCFunction)index_slicechunktodensity,
3192 3172 METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"},
3193 3173 {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
3194 3174 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
3195 3175 "match a potentially ambiguous node ID"},
3196 3176 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
3197 3177 "find length of shortest hex nodeid of a binary ID"},
3198 3178 {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"},
3199 3179 {"entry_binary", (PyCFunction)index_entry_binary, METH_O,
3200 3180 "return an entry in binary form"},
3201 3181 {"pack_header", (PyCFunction)index_pack_header, METH_VARARGS,
3202 3182 "pack the revlog header information into binary"},
3203 3183 {NULL} /* Sentinel */
3204 3184 };
3205 3185
3206 3186 static PyGetSetDef index_getset[] = {
3207 3187 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
3208 3188 {NULL} /* Sentinel */
3209 3189 };
3210 3190
3211 3191 static PyMemberDef index_members[] = {
3212 3192 {"entry_size", T_LONG, offsetof(indexObject, entry_size), 0,
3213 3193 "size of an index entry"},
3214 3194 {"rust_ext_compat", T_LONG, offsetof(indexObject, rust_ext_compat), 0,
3215 3195 "size of an index entry"},
3216 3196 {NULL} /* Sentinel */
3217 3197 };
3218 3198
3219 3199 PyTypeObject HgRevlogIndex_Type = {
3220 3200 PyVarObject_HEAD_INIT(NULL, 0) /* header */
3221 3201 "parsers.index", /* tp_name */
3222 3202 sizeof(indexObject), /* tp_basicsize */
3223 3203 0, /* tp_itemsize */
3224 3204 (destructor)index_dealloc, /* tp_dealloc */
3225 3205 0, /* tp_print */
3226 3206 0, /* tp_getattr */
3227 3207 0, /* tp_setattr */
3228 3208 0, /* tp_compare */
3229 3209 0, /* tp_repr */
3230 3210 0, /* tp_as_number */
3231 3211 &index_sequence_methods, /* tp_as_sequence */
3232 3212 &index_mapping_methods, /* tp_as_mapping */
3233 3213 0, /* tp_hash */
3234 3214 0, /* tp_call */
3235 3215 0, /* tp_str */
3236 3216 0, /* tp_getattro */
3237 3217 0, /* tp_setattro */
3238 3218 0, /* tp_as_buffer */
3239 3219 Py_TPFLAGS_DEFAULT, /* tp_flags */
3240 3220 "revlog index", /* tp_doc */
3241 3221 0, /* tp_traverse */
3242 3222 0, /* tp_clear */
3243 3223 0, /* tp_richcompare */
3244 3224 0, /* tp_weaklistoffset */
3245 3225 0, /* tp_iter */
3246 3226 0, /* tp_iternext */
3247 3227 index_methods, /* tp_methods */
3248 3228 index_members, /* tp_members */
3249 3229 index_getset, /* tp_getset */
3250 3230 0, /* tp_base */
3251 3231 0, /* tp_dict */
3252 3232 0, /* tp_descr_get */
3253 3233 0, /* tp_descr_set */
3254 3234 0, /* tp_dictoffset */
3255 3235 (initproc)index_init, /* tp_init */
3256 3236 0, /* tp_alloc */
3257 3237 };
3258 3238
3259 3239 /*
3260 3240 * returns a tuple of the form (index, cache) with elements as
3261 3241 * follows:
3262 3242 *
3263 3243 * index: an index object that lazily parses Revlog (v1 or v2) records
3264 3244 * cache: if data is inlined, a tuple (0, index_file_content), else None
3265 3245 * index_file_content could be a string, or a buffer
3266 3246 *
3267 3247 * added complications are for backwards compatibility
3268 3248 */
3269 3249 PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs)
3270 3250 {
3271 3251 PyObject *cache = NULL;
3272 3252 indexObject *idx;
3273 3253 int ret;
3274 3254
3275 3255 idx = PyObject_New(indexObject, &HgRevlogIndex_Type);
3276 3256 if (idx == NULL)
3277 3257 goto bail;
3278 3258
3279 3259 ret = index_init(idx, args, kwargs);
3280 3260 if (ret == -1)
3281 3261 goto bail;
3282 3262
3283 3263 if (idx->inlined) {
3284 3264 cache = Py_BuildValue("iO", 0, idx->data);
3285 3265 if (cache == NULL)
3286 3266 goto bail;
3287 3267 } else {
3288 3268 cache = Py_None;
3289 3269 Py_INCREF(cache);
3290 3270 }
3291 3271
3292 3272 return Py_BuildValue("NN", idx, cache);
3293 3273
3294 3274 bail:
3295 3275 Py_XDECREF(idx);
3296 3276 Py_XDECREF(cache);
3297 3277 return NULL;
3298 3278 }
3299 3279
3300 3280 static Revlog_CAPI CAPI = {
3301 3281 /* increment the abi_version field upon each change in the Revlog_CAPI
3302 3282 struct or in the ABI of the listed functions */
3303 3283 3, index_length, index_node, index_fast_rank, HgRevlogIndex_GetParents,
3304 3284 };
3305 3285
3306 3286 void revlog_module_init(PyObject *mod)
3307 3287 {
3308 3288 PyObject *caps = NULL;
3309 3289 HgRevlogIndex_Type.tp_new = PyType_GenericNew;
3310 3290 if (PyType_Ready(&HgRevlogIndex_Type) < 0)
3311 3291 return;
3312 3292 Py_INCREF(&HgRevlogIndex_Type);
3313 3293 PyModule_AddObject(mod, "index", (PyObject *)&HgRevlogIndex_Type);
3314 3294
3315 3295 nodetreeType.tp_new = PyType_GenericNew;
3316 3296 if (PyType_Ready(&nodetreeType) < 0)
3317 3297 return;
3318 3298 Py_INCREF(&nodetreeType);
3319 3299 PyModule_AddObject(mod, "nodetree", (PyObject *)&nodetreeType);
3320 3300
3321 3301 caps = PyCapsule_New(&CAPI, "mercurial.cext.parsers.revlog_CAPI", NULL);
3322 3302 if (caps != NULL)
3323 3303 PyModule_AddObject(mod, "revlog_CAPI", caps);
3324 3304 }
General Comments 0
You need to be logged in to leave comments. Login now