##// END OF EJS Templates
revlog: implement changelogv2 packing and unpacking in C...
pacien -
r49617:7dd5a2c0 default
parent child Browse files
Show More
@@ -1,3203 +1,3288 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Olivia Mackall <olivia@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #define PY_SSIZE_T_CLEAN
11 11 #include <Python.h>
12 12 #include <assert.h>
13 13 #include <ctype.h>
14 14 #include <limits.h>
15 15 #include <stddef.h>
16 16 #include <stdlib.h>
17 17 #include <string.h>
18 18 #include <structmember.h>
19 19
20 20 #include "bitmanipulation.h"
21 21 #include "charencode.h"
22 22 #include "compat.h"
23 23 #include "revlog.h"
24 24 #include "util.h"
25 25
26 26 #ifdef IS_PY3K
27 27 /* The mapping of Python types is meant to be temporary to get Python
28 28 * 3 to compile. We should remove this once Python 3 support is fully
29 29 * supported and proper types are used in the extensions themselves. */
30 30 #define PyInt_Check PyLong_Check
31 31 #define PyInt_FromLong PyLong_FromLong
32 32 #define PyInt_FromSsize_t PyLong_FromSsize_t
33 33 #define PyInt_AsLong PyLong_AsLong
34 34 #endif
35 35
36 36 typedef struct indexObjectStruct indexObject;
37 37
38 38 typedef struct {
39 39 int children[16];
40 40 } nodetreenode;
41 41
42 42 typedef struct {
43 43 int abi_version;
44 44 Py_ssize_t (*index_length)(const indexObject *);
45 45 const char *(*index_node)(indexObject *, Py_ssize_t);
46 46 int (*index_parents)(PyObject *, int, int *);
47 47 } Revlog_CAPI;
48 48
49 49 /*
50 50 * A base-16 trie for fast node->rev mapping.
51 51 *
52 52 * Positive value is index of the next node in the trie
53 53 * Negative value is a leaf: -(rev + 2)
54 54 * Zero is empty
55 55 */
56 56 typedef struct {
57 57 indexObject *index;
58 58 nodetreenode *nodes;
59 59 Py_ssize_t nodelen;
60 60 size_t length; /* # nodes in use */
61 61 size_t capacity; /* # nodes allocated */
62 62 int depth; /* maximum depth of tree */
63 63 int splits; /* # splits performed */
64 64 } nodetree;
65 65
66 66 typedef struct {
67 67 PyObject_HEAD /* ; */
68 68 nodetree nt;
69 69 } nodetreeObject;
70 70
71 71 /*
72 72 * This class has two behaviors.
73 73 *
74 74 * When used in a list-like way (with integer keys), we decode an
75 75 * entry in a RevlogNG index file on demand. We have limited support for
76 76 * integer-keyed insert and delete, only at elements right before the
77 77 * end.
78 78 *
79 79 * With string keys, we lazily perform a reverse mapping from node to
80 80 * rev, using a base-16 trie.
81 81 */
82 82 struct indexObjectStruct {
83 83 PyObject_HEAD
84 84 /* Type-specific fields go here. */
85 85 PyObject *data; /* raw bytes of index */
86 86 Py_ssize_t nodelen; /* digest size of the hash, 20 for SHA-1 */
87 87 PyObject *nullentry; /* fast path for references to null */
88 88 Py_buffer buf; /* buffer of data */
89 89 const char **offsets; /* populated on demand */
90 90 Py_ssize_t length; /* current on-disk number of elements */
91 91 unsigned new_length; /* number of added elements */
92 92 unsigned added_length; /* space reserved for added elements */
93 93 char *added; /* populated on demand */
94 94 PyObject *headrevs; /* cache, invalidated on changes */
95 95 PyObject *filteredrevs; /* filtered revs set */
96 96 nodetree nt; /* base-16 trie */
97 97 int ntinitialized; /* 0 or 1 */
98 98 int ntrev; /* last rev scanned */
99 99 int ntlookups; /* # lookups */
100 100 int ntmisses; /* # lookups that miss the cache */
101 101 int inlined;
102 102 long entry_size; /* size of index headers. Differs in v1 v.s. v2 format
103 103 */
104 104 long rust_ext_compat; /* compatibility with being used in rust
105 105 extensions */
106 106 char format_version; /* size of index headers. Differs in v1 v.s. v2
107 107 format */
108 108 };
109 109
110 110 static Py_ssize_t index_length(const indexObject *self)
111 111 {
112 112 return self->length + self->new_length;
113 113 }
114 114
115 115 static const char nullid[32] = {0};
116 116 static const Py_ssize_t nullrev = -1;
117 117
118 118 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
119 119
120 120 static int index_find_node(indexObject *self, const char *node);
121 121
122 122 #if LONG_MAX == 0x7fffffffL
123 123 static const char *const tuple_format =
124 124 PY23("Kiiiiiis#KiBBi", "Kiiiiiiy#KiBBi");
125 125 #else
126 126 static const char *const tuple_format =
127 127 PY23("kiiiiiis#kiBBi", "kiiiiiiy#kiBBi");
128 128 #endif
129 129
130 130 /* A RevlogNG v1 index entry is 64 bytes long. */
131 131 static const long v1_entry_size = 64;
132 132
133 133 /* A Revlogv2 index entry is 96 bytes long. */
134 134 static const long v2_entry_size = 96;
135 135
136 static const long format_v1 = 1; /* Internal only, could be any number */
137 static const long format_v2 = 2; /* Internal only, could be any number */
136 static const long format_v1 = 1; /* Internal only, could be any number */
137 static const long format_v2 = 2; /* Internal only, could be any number */
138 static const long format_cl2 = 3; /* Internal only, could be any number */
138 139
139 140 static const long entry_v1_offset_high = 0;
140 141 static const long entry_v1_offset_offset_flags = 4;
141 142 static const long entry_v1_offset_comp_len = 8;
142 143 static const long entry_v1_offset_uncomp_len = 12;
143 144 static const long entry_v1_offset_base_rev = 16;
144 145 static const long entry_v1_offset_link_rev = 20;
145 146 static const long entry_v1_offset_parent_1 = 24;
146 147 static const long entry_v1_offset_parent_2 = 28;
147 148 static const long entry_v1_offset_node_id = 32;
148 149
149 150 static const long entry_v2_offset_high = 0;
150 151 static const long entry_v2_offset_offset_flags = 4;
151 152 static const long entry_v2_offset_comp_len = 8;
152 153 static const long entry_v2_offset_uncomp_len = 12;
153 154 static const long entry_v2_offset_base_rev = 16;
154 155 static const long entry_v2_offset_link_rev = 20;
155 156 static const long entry_v2_offset_parent_1 = 24;
156 157 static const long entry_v2_offset_parent_2 = 28;
157 158 static const long entry_v2_offset_node_id = 32;
158 159 static const long entry_v2_offset_sidedata_offset = 64;
159 160 static const long entry_v2_offset_sidedata_comp_len = 72;
160 161 static const long entry_v2_offset_all_comp_mode = 76;
161 162 /* next free offset: 77 */
162 163
164 static const long entry_cl2_offset_high = 0;
165 static const long entry_cl2_offset_offset_flags = 4;
166 static const long entry_cl2_offset_comp_len = 8;
167 static const long entry_cl2_offset_uncomp_len = 12;
168 static const long entry_cl2_offset_parent_1 = 16;
169 static const long entry_cl2_offset_parent_2 = 20;
170 static const long entry_cl2_offset_node_id = 24;
171 static const long entry_cl2_offset_sidedata_offset = 56;
172 static const long entry_cl2_offset_sidedata_comp_len = 64;
173 static const long entry_cl2_offset_all_comp_mode = 68;
174 static const long entry_cl2_offset_rank = 69;
175 /* next free offset: 73 */
176
163 177 static const char comp_mode_inline = 2;
164 178 static const char rank_unknown = -1;
165 179
166 180 static void raise_revlog_error(void)
167 181 {
168 182 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
169 183
170 184 mod = PyImport_ImportModule("mercurial.error");
171 185 if (mod == NULL) {
172 186 goto cleanup;
173 187 }
174 188
175 189 dict = PyModule_GetDict(mod);
176 190 if (dict == NULL) {
177 191 goto cleanup;
178 192 }
179 193 Py_INCREF(dict);
180 194
181 195 errclass = PyDict_GetItemString(dict, "RevlogError");
182 196 if (errclass == NULL) {
183 197 PyErr_SetString(PyExc_SystemError,
184 198 "could not find RevlogError");
185 199 goto cleanup;
186 200 }
187 201
188 202 /* value of exception is ignored by callers */
189 203 PyErr_SetString(errclass, "RevlogError");
190 204
191 205 cleanup:
192 206 Py_XDECREF(dict);
193 207 Py_XDECREF(mod);
194 208 }
195 209
196 210 /*
197 211 * Return a pointer to the beginning of a RevlogNG record.
198 212 */
199 213 static const char *index_deref(indexObject *self, Py_ssize_t pos)
200 214 {
201 215 if (pos >= self->length)
202 216 return self->added + (pos - self->length) * self->entry_size;
203 217
204 218 if (self->inlined && pos > 0) {
205 219 if (self->offsets == NULL) {
206 220 Py_ssize_t ret;
207 221 self->offsets =
208 222 PyMem_Malloc(self->length * sizeof(*self->offsets));
209 223 if (self->offsets == NULL)
210 224 return (const char *)PyErr_NoMemory();
211 225 ret = inline_scan(self, self->offsets);
212 226 if (ret == -1) {
213 227 return NULL;
214 228 };
215 229 }
216 230 return self->offsets[pos];
217 231 }
218 232
219 233 return (const char *)(self->buf.buf) + pos * self->entry_size;
220 234 }
221 235
222 236 /*
223 237 * Get parents of the given rev.
224 238 *
225 239 * The specified rev must be valid and must not be nullrev. A returned
226 240 * parent revision may be nullrev, but is guaranteed to be in valid range.
227 241 */
228 242 static inline int index_get_parents(indexObject *self, Py_ssize_t rev, int *ps,
229 243 int maxrev)
230 244 {
231 245 const char *data = index_deref(self, rev);
232 246
233 247 if (self->format_version == format_v1) {
234 248 ps[0] = getbe32(data + entry_v1_offset_parent_1);
235 249 ps[1] = getbe32(data + entry_v1_offset_parent_2);
236 250 } else if (self->format_version == format_v2) {
237 251 ps[0] = getbe32(data + entry_v2_offset_parent_1);
238 252 ps[1] = getbe32(data + entry_v2_offset_parent_2);
253 } else if (self->format_version == format_cl2) {
254 ps[0] = getbe32(data + entry_cl2_offset_parent_1);
255 ps[1] = getbe32(data + entry_cl2_offset_parent_2);
239 256 } else {
240 257 raise_revlog_error();
241 258 return -1;
242 259 }
243 260
244 261 /* If index file is corrupted, ps[] may point to invalid revisions. So
245 262 * there is a risk of buffer overflow to trust them unconditionally. */
246 263 if (ps[0] < -1 || ps[0] > maxrev || ps[1] < -1 || ps[1] > maxrev) {
247 264 PyErr_SetString(PyExc_ValueError, "parent out of range");
248 265 return -1;
249 266 }
250 267 return 0;
251 268 }
252 269
253 270 /*
254 271 * Get parents of the given rev.
255 272 *
256 273 * If the specified rev is out of range, IndexError will be raised. If the
257 274 * revlog entry is corrupted, ValueError may be raised.
258 275 *
259 276 * Returns 0 on success or -1 on failure.
260 277 */
261 278 static int HgRevlogIndex_GetParents(PyObject *op, int rev, int *ps)
262 279 {
263 280 int tiprev;
264 281 if (!op || !HgRevlogIndex_Check(op) || !ps) {
265 282 PyErr_BadInternalCall();
266 283 return -1;
267 284 }
268 285 tiprev = (int)index_length((indexObject *)op) - 1;
269 286 if (rev < -1 || rev > tiprev) {
270 287 PyErr_Format(PyExc_IndexError, "rev out of range: %d", rev);
271 288 return -1;
272 289 } else if (rev == -1) {
273 290 ps[0] = ps[1] = -1;
274 291 return 0;
275 292 } else {
276 293 return index_get_parents((indexObject *)op, rev, ps, tiprev);
277 294 }
278 295 }
279 296
280 297 static inline int64_t index_get_start(indexObject *self, Py_ssize_t rev)
281 298 {
282 299 const char *data;
283 300 uint64_t offset;
284 301
285 302 if (rev == nullrev)
286 303 return 0;
287 304
288 305 data = index_deref(self, rev);
289 306
290 307 if (self->format_version == format_v1) {
291 308 offset = getbe32(data + entry_v1_offset_offset_flags);
292 309 if (rev == 0) {
293 310 /* mask out version number for the first entry */
294 311 offset &= 0xFFFF;
295 312 } else {
296 313 uint32_t offset_high =
297 314 getbe32(data + entry_v1_offset_high);
298 315 offset |= ((uint64_t)offset_high) << 32;
299 316 }
300 317 } else if (self->format_version == format_v2) {
301 318 offset = getbe32(data + entry_v2_offset_offset_flags);
302 319 if (rev == 0) {
303 320 /* mask out version number for the first entry */
304 321 offset &= 0xFFFF;
305 322 } else {
306 323 uint32_t offset_high =
307 324 getbe32(data + entry_v2_offset_high);
308 325 offset |= ((uint64_t)offset_high) << 32;
309 326 }
327 } else if (self->format_version == format_cl2) {
328 uint32_t offset_high = getbe32(data + entry_cl2_offset_high);
329 offset = getbe32(data + entry_cl2_offset_offset_flags);
330 offset |= ((uint64_t)offset_high) << 32;
310 331 } else {
311 332 raise_revlog_error();
312 333 return -1;
313 334 }
314 335
315 336 return (int64_t)(offset >> 16);
316 337 }
317 338
318 339 static inline int index_get_length(indexObject *self, Py_ssize_t rev)
319 340 {
320 341 const char *data;
321 342 int tmp;
322 343
323 344 if (rev == nullrev)
324 345 return 0;
325 346
326 347 data = index_deref(self, rev);
327 348
328 349 if (self->format_version == format_v1) {
329 350 tmp = (int)getbe32(data + entry_v1_offset_comp_len);
330 351 } else if (self->format_version == format_v2) {
331 352 tmp = (int)getbe32(data + entry_v2_offset_comp_len);
353 } else if (self->format_version == format_cl2) {
354 tmp = (int)getbe32(data + entry_cl2_offset_comp_len);
332 355 } else {
333 356 raise_revlog_error();
334 357 return -1;
335 358 }
336 359 if (tmp < 0) {
337 360 PyErr_Format(PyExc_OverflowError,
338 361 "revlog entry size out of bound (%d)", tmp);
339 362 return -1;
340 363 }
341 364 return tmp;
342 365 }
343 366
344 367 /*
345 368 * RevlogNG format (all in big endian, data may be inlined):
346 369 * 6 bytes: offset
347 370 * 2 bytes: flags
348 371 * 4 bytes: compressed length
349 372 * 4 bytes: uncompressed length
350 373 * 4 bytes: base revision
351 374 * 4 bytes: link revision
352 375 * 4 bytes: parent 1 revision
353 376 * 4 bytes: parent 2 revision
354 377 * 32 bytes: nodeid (only 20 bytes used with SHA-1)
355 378 */
356 379 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
357 380 {
358 381 uint64_t offset_flags, sidedata_offset;
359 382 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
360 sidedata_comp_len;
383 sidedata_comp_len, rank = rank_unknown;
361 384 char data_comp_mode, sidedata_comp_mode;
362 385 const char *c_node_id;
363 386 const char *data;
364 387 Py_ssize_t length = index_length(self);
365 388
366 389 if (pos == nullrev) {
367 390 Py_INCREF(self->nullentry);
368 391 return self->nullentry;
369 392 }
370 393
371 394 if (pos < 0 || pos >= length) {
372 395 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
373 396 return NULL;
374 397 }
375 398
376 399 data = index_deref(self, pos);
377 400 if (data == NULL)
378 401 return NULL;
379 402
380 403 if (self->format_version == format_v1) {
381 404 offset_flags = getbe32(data + entry_v1_offset_offset_flags);
382 405 /*
383 406 * The first entry on-disk needs the version number masked out,
384 407 * but this doesn't apply if entries are added to an empty
385 408 * index.
386 409 */
387 410 if (self->length && pos == 0)
388 411 offset_flags &= 0xFFFF;
389 412 else {
390 413 uint32_t offset_high =
391 414 getbe32(data + entry_v1_offset_high);
392 415 offset_flags |= ((uint64_t)offset_high) << 32;
393 416 }
394 417
395 418 comp_len = getbe32(data + entry_v1_offset_comp_len);
396 419 uncomp_len = getbe32(data + entry_v1_offset_uncomp_len);
397 420 base_rev = getbe32(data + entry_v1_offset_base_rev);
398 421 link_rev = getbe32(data + entry_v1_offset_link_rev);
399 422 parent_1 = getbe32(data + entry_v1_offset_parent_1);
400 423 parent_2 = getbe32(data + entry_v1_offset_parent_2);
401 424 c_node_id = data + entry_v1_offset_node_id;
402 425
403 426 sidedata_offset = 0;
404 427 sidedata_comp_len = 0;
405 428 data_comp_mode = comp_mode_inline;
406 429 sidedata_comp_mode = comp_mode_inline;
407 430 } else if (self->format_version == format_v2) {
408 431 offset_flags = getbe32(data + entry_v2_offset_offset_flags);
409 432 /*
410 433 * The first entry on-disk needs the version number masked out,
411 434 * but this doesn't apply if entries are added to an empty
412 435 * index.
413 436 */
414 437 if (self->length && pos == 0)
415 438 offset_flags &= 0xFFFF;
416 439 else {
417 440 uint32_t offset_high =
418 441 getbe32(data + entry_v2_offset_high);
419 442 offset_flags |= ((uint64_t)offset_high) << 32;
420 443 }
421 444
422 445 comp_len = getbe32(data + entry_v2_offset_comp_len);
423 446 uncomp_len = getbe32(data + entry_v2_offset_uncomp_len);
424 447 base_rev = getbe32(data + entry_v2_offset_base_rev);
425 448 link_rev = getbe32(data + entry_v2_offset_link_rev);
426 449 parent_1 = getbe32(data + entry_v2_offset_parent_1);
427 450 parent_2 = getbe32(data + entry_v2_offset_parent_2);
428 451 c_node_id = data + entry_v2_offset_node_id;
429 452
430 453 sidedata_offset =
431 454 getbe64(data + entry_v2_offset_sidedata_offset);
432 455 sidedata_comp_len =
433 456 getbe32(data + entry_v2_offset_sidedata_comp_len);
434 457 data_comp_mode = data[entry_v2_offset_all_comp_mode] & 3;
435 458 sidedata_comp_mode =
436 459 ((data[entry_v2_offset_all_comp_mode] >> 2) & 3);
460 } else if (self->format_version == format_cl2) {
461 uint32_t offset_high = getbe32(data + entry_cl2_offset_high);
462 offset_flags = getbe32(data + entry_cl2_offset_offset_flags);
463 offset_flags |= ((uint64_t)offset_high) << 32;
464 comp_len = getbe32(data + entry_cl2_offset_comp_len);
465 uncomp_len = getbe32(data + entry_cl2_offset_uncomp_len);
466 /* base_rev and link_rev are not stored in changelogv2, but are
467 still used by some functions shared with the other revlogs.
468 They are supposed to contain links to other revisions,
469 but they always point to themselves in the case of a changelog.
470 */
471 base_rev = pos;
472 link_rev = pos;
473 parent_1 = getbe32(data + entry_cl2_offset_parent_1);
474 parent_2 = getbe32(data + entry_cl2_offset_parent_2);
475 c_node_id = data + entry_cl2_offset_node_id;
476 sidedata_offset =
477 getbe64(data + entry_cl2_offset_sidedata_offset);
478 sidedata_comp_len =
479 getbe32(data + entry_cl2_offset_sidedata_comp_len);
480 data_comp_mode = data[entry_cl2_offset_all_comp_mode] & 3;
481 sidedata_comp_mode =
482 ((data[entry_cl2_offset_all_comp_mode] >> 2) & 3);
483 rank = getbe32(data + entry_cl2_offset_rank);
437 484 } else {
438 485 raise_revlog_error();
439 486 return NULL;
440 487 }
441 488
442 489 return Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len,
443 490 base_rev, link_rev, parent_1, parent_2, c_node_id,
444 491 self->nodelen, sidedata_offset, sidedata_comp_len,
445 data_comp_mode, sidedata_comp_mode, rank_unknown);
492 data_comp_mode, sidedata_comp_mode, rank);
446 493 }
447 494 /*
448 495 * Pack header information in binary
449 496 */
450 497 static PyObject *index_pack_header(indexObject *self, PyObject *args)
451 498 {
452 499 int header;
453 500 char out[4];
454 501 if (!PyArg_ParseTuple(args, "I", &header)) {
455 502 return NULL;
456 503 }
457 504 if (self->format_version != format_v1) {
458 505 PyErr_Format(PyExc_RuntimeError,
459 506 "version header should go in the docket, not the "
460 507 "index: %lu",
461 508 header);
462 509 return NULL;
463 510 }
464 511 putbe32(header, out);
465 512 return PyBytes_FromStringAndSize(out, 4);
466 513 }
467 514 /*
468 515 * Return the raw binary string representing a revision
469 516 */
470 517 static PyObject *index_entry_binary(indexObject *self, PyObject *value)
471 518 {
472 519 long rev;
473 520 const char *data;
474 521 Py_ssize_t length = index_length(self);
475 522
476 523 if (!pylong_to_long(value, &rev)) {
477 524 return NULL;
478 525 }
479 526 if (rev < 0 || rev >= length) {
480 527 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
481 528 rev);
482 529 return NULL;
483 530 };
484 531
485 532 data = index_deref(self, rev);
486 533 if (data == NULL)
487 534 return NULL;
488 535 if (rev == 0 && self->format_version == format_v1) {
489 536 /* the header is eating the start of the first entry */
490 537 return PyBytes_FromStringAndSize(data + 4,
491 538 self->entry_size - 4);
492 539 }
493 540 return PyBytes_FromStringAndSize(data, self->entry_size);
494 541 }
495 542
496 543 /*
497 544 * Return the hash of node corresponding to the given rev.
498 545 */
499 546 static const char *index_node(indexObject *self, Py_ssize_t pos)
500 547 {
501 548 Py_ssize_t length = index_length(self);
502 549 const char *data;
503 550 const char *node_id;
504 551
505 552 if (pos == nullrev)
506 553 return nullid;
507 554
508 555 if (pos >= length)
509 556 return NULL;
510 557
511 558 data = index_deref(self, pos);
512 559
513 560 if (self->format_version == format_v1) {
514 561 node_id = data + entry_v1_offset_node_id;
515 562 } else if (self->format_version == format_v2) {
516 563 node_id = data + entry_v2_offset_node_id;
564 } else if (self->format_version == format_cl2) {
565 node_id = data + entry_cl2_offset_node_id;
517 566 } else {
518 567 raise_revlog_error();
519 568 return NULL;
520 569 }
521 570
522 571 return data ? node_id : NULL;
523 572 }
524 573
525 574 /*
526 575 * Return the hash of the node corresponding to the given rev. The
527 576 * rev is assumed to be existing. If not, an exception is set.
528 577 */
529 578 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
530 579 {
531 580 const char *node = index_node(self, pos);
532 581 if (node == NULL) {
533 582 PyErr_Format(PyExc_IndexError, "could not access rev %d",
534 583 (int)pos);
535 584 }
536 585 return node;
537 586 }
538 587
539 588 static int nt_insert(nodetree *self, const char *node, int rev);
540 589
541 590 static int node_check(Py_ssize_t nodelen, PyObject *obj, char **node)
542 591 {
543 592 Py_ssize_t thisnodelen;
544 593 if (PyBytes_AsStringAndSize(obj, node, &thisnodelen) == -1)
545 594 return -1;
546 595 if (nodelen == thisnodelen)
547 596 return 0;
548 597 PyErr_Format(PyExc_ValueError, "node len %zd != expected node len %zd",
549 598 thisnodelen, nodelen);
550 599 return -1;
551 600 }
552 601
553 602 static PyObject *index_append(indexObject *self, PyObject *obj)
554 603 {
555 604 uint64_t offset_flags, sidedata_offset;
556 605 int rev, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
557 606 sidedata_comp_len, rank;
558 607 char data_comp_mode, sidedata_comp_mode;
559 608 Py_ssize_t c_node_id_len;
560 609 const char *c_node_id;
561 610 char comp_field;
562 611 char *data;
563 612
564 613 if (!PyArg_ParseTuple(obj, tuple_format, &offset_flags, &comp_len,
565 614 &uncomp_len, &base_rev, &link_rev, &parent_1,
566 615 &parent_2, &c_node_id, &c_node_id_len,
567 616 &sidedata_offset, &sidedata_comp_len,
568 617 &data_comp_mode, &sidedata_comp_mode, &rank)) {
569 618 PyErr_SetString(PyExc_TypeError, "12-tuple required");
570 619 return NULL;
571 620 }
572 621
573 622 if (c_node_id_len != self->nodelen) {
574 623 PyErr_SetString(PyExc_TypeError, "invalid node");
575 624 return NULL;
576 625 }
577 626 if (self->format_version == format_v1) {
578 627
579 628 if (data_comp_mode != comp_mode_inline) {
580 629 PyErr_Format(PyExc_ValueError,
581 630 "invalid data compression mode: %i",
582 631 data_comp_mode);
583 632 return NULL;
584 633 }
585 634 if (sidedata_comp_mode != comp_mode_inline) {
586 635 PyErr_Format(PyExc_ValueError,
587 636 "invalid sidedata compression mode: %i",
588 637 sidedata_comp_mode);
589 638 return NULL;
590 639 }
591 640 }
592 641
593 642 if (self->new_length == self->added_length) {
594 643 size_t new_added_length =
595 644 self->added_length ? self->added_length * 2 : 4096;
596 645 void *new_added = PyMem_Realloc(
597 646 self->added, new_added_length * self->entry_size);
598 647 if (!new_added)
599 648 return PyErr_NoMemory();
600 649 self->added = new_added;
601 650 self->added_length = new_added_length;
602 651 }
603 652 rev = self->length + self->new_length;
604 653 data = self->added + self->entry_size * self->new_length++;
605 654
606 655 memset(data, 0, self->entry_size);
607 656
608 657 if (self->format_version == format_v1) {
609 658 putbe32(offset_flags >> 32, data + entry_v1_offset_high);
610 659 putbe32(offset_flags & 0xffffffffU,
611 660 data + entry_v1_offset_offset_flags);
612 661 putbe32(comp_len, data + entry_v1_offset_comp_len);
613 662 putbe32(uncomp_len, data + entry_v1_offset_uncomp_len);
614 663 putbe32(base_rev, data + entry_v1_offset_base_rev);
615 664 putbe32(link_rev, data + entry_v1_offset_link_rev);
616 665 putbe32(parent_1, data + entry_v1_offset_parent_1);
617 666 putbe32(parent_2, data + entry_v1_offset_parent_2);
618 667 memcpy(data + entry_v1_offset_node_id, c_node_id,
619 668 c_node_id_len);
620 669 } else if (self->format_version == format_v2) {
621 670 putbe32(offset_flags >> 32, data + entry_v2_offset_high);
622 671 putbe32(offset_flags & 0xffffffffU,
623 672 data + entry_v2_offset_offset_flags);
624 673 putbe32(comp_len, data + entry_v2_offset_comp_len);
625 674 putbe32(uncomp_len, data + entry_v2_offset_uncomp_len);
626 675 putbe32(base_rev, data + entry_v2_offset_base_rev);
627 676 putbe32(link_rev, data + entry_v2_offset_link_rev);
628 677 putbe32(parent_1, data + entry_v2_offset_parent_1);
629 678 putbe32(parent_2, data + entry_v2_offset_parent_2);
630 679 memcpy(data + entry_v2_offset_node_id, c_node_id,
631 680 c_node_id_len);
632 681 putbe64(sidedata_offset,
633 682 data + entry_v2_offset_sidedata_offset);
634 683 putbe32(sidedata_comp_len,
635 684 data + entry_v2_offset_sidedata_comp_len);
636 685 comp_field = data_comp_mode & 3;
637 686 comp_field = comp_field | (sidedata_comp_mode & 3) << 2;
638 687 data[entry_v2_offset_all_comp_mode] = comp_field;
688 } else if (self->format_version == format_cl2) {
689 putbe32(offset_flags >> 32, data + entry_cl2_offset_high);
690 putbe32(offset_flags & 0xffffffffU,
691 data + entry_cl2_offset_offset_flags);
692 putbe32(comp_len, data + entry_cl2_offset_comp_len);
693 putbe32(uncomp_len, data + entry_cl2_offset_uncomp_len);
694 putbe32(parent_1, data + entry_cl2_offset_parent_1);
695 putbe32(parent_2, data + entry_cl2_offset_parent_2);
696 memcpy(data + entry_cl2_offset_node_id, c_node_id,
697 c_node_id_len);
698 putbe64(sidedata_offset,
699 data + entry_cl2_offset_sidedata_offset);
700 putbe32(sidedata_comp_len,
701 data + entry_cl2_offset_sidedata_comp_len);
702 comp_field = data_comp_mode & 3;
703 comp_field = comp_field | (sidedata_comp_mode & 3) << 2;
704 data[entry_cl2_offset_all_comp_mode] = comp_field;
705 putbe32(rank, data + entry_cl2_offset_rank);
639 706 } else {
640 707 raise_revlog_error();
641 708 return NULL;
642 709 }
643 710
644 711 if (self->ntinitialized)
645 712 nt_insert(&self->nt, c_node_id, rev);
646 713
647 714 Py_CLEAR(self->headrevs);
648 715 Py_RETURN_NONE;
649 716 }
650 717
651 718 /* Replace an existing index entry's sidedata offset and length with new ones.
652 719 This cannot be used outside of the context of sidedata rewriting,
653 720 inside the transaction that creates the given revision. */
654 721 static PyObject *index_replace_sidedata_info(indexObject *self, PyObject *args)
655 722 {
656 723 uint64_t offset_flags, sidedata_offset;
657 724 Py_ssize_t rev;
658 725 int sidedata_comp_len;
659 726 char comp_mode;
660 727 char *data;
661 728 #if LONG_MAX == 0x7fffffffL
662 729 const char *const sidedata_format = PY23("nKiKB", "nKiKB");
663 730 #else
664 731 const char *const sidedata_format = PY23("nkikB", "nkikB");
665 732 #endif
666 733
667 734 if (self->entry_size == v1_entry_size || self->inlined) {
668 735 /*
669 736 There is a bug in the transaction handling when going from an
670 737 inline revlog to a separate index and data file. Turn it off until
671 738 it's fixed, since v2 revlogs sometimes get rewritten on exchange.
672 739 See issue6485.
673 740 */
674 741 raise_revlog_error();
675 742 return NULL;
676 743 }
677 744
678 745 if (!PyArg_ParseTuple(args, sidedata_format, &rev, &sidedata_offset,
679 746 &sidedata_comp_len, &offset_flags, &comp_mode))
680 747 return NULL;
681 748
682 749 if (rev < 0 || rev >= index_length(self)) {
683 750 PyErr_SetString(PyExc_IndexError, "revision outside index");
684 751 return NULL;
685 752 }
686 753 if (rev < self->length) {
687 754 PyErr_SetString(
688 755 PyExc_IndexError,
689 756 "cannot rewrite entries outside of this transaction");
690 757 return NULL;
691 758 }
692 759
693 760 /* Find the newly added node, offset from the "already on-disk" length
694 761 */
695 762 data = self->added + self->entry_size * (rev - self->length);
696 putbe64(offset_flags, data + entry_v2_offset_high);
697 putbe64(sidedata_offset, data + entry_v2_offset_sidedata_offset);
698 putbe32(sidedata_comp_len, data + entry_v2_offset_sidedata_comp_len);
699 data[entry_v2_offset_all_comp_mode] =
700 (data[entry_v2_offset_all_comp_mode] & ~(3 << 2)) |
701 ((comp_mode & 3) << 2);
763 if (self->format_version == format_v2) {
764 putbe64(offset_flags, data + entry_v2_offset_high);
765 putbe64(sidedata_offset,
766 data + entry_v2_offset_sidedata_offset);
767 putbe32(sidedata_comp_len,
768 data + entry_v2_offset_sidedata_comp_len);
769 data[entry_v2_offset_all_comp_mode] =
770 (data[entry_v2_offset_all_comp_mode] & ~(3 << 2)) |
771 ((comp_mode & 3) << 2);
772 } else if (self->format_version == format_cl2) {
773 putbe64(offset_flags, data + entry_cl2_offset_high);
774 putbe64(sidedata_offset,
775 data + entry_cl2_offset_sidedata_offset);
776 putbe32(sidedata_comp_len,
777 data + entry_cl2_offset_sidedata_comp_len);
778 data[entry_cl2_offset_all_comp_mode] =
779 (data[entry_cl2_offset_all_comp_mode] & ~(3 << 2)) |
780 ((comp_mode & 3) << 2);
781 } else {
782 raise_revlog_error();
783 return NULL;
784 }
702 785
703 786 Py_RETURN_NONE;
704 787 }
705 788
706 789 static PyObject *index_stats(indexObject *self)
707 790 {
708 791 PyObject *obj = PyDict_New();
709 792 PyObject *s = NULL;
710 793 PyObject *t = NULL;
711 794
712 795 if (obj == NULL)
713 796 return NULL;
714 797
715 798 #define istat(__n, __d) \
716 799 do { \
717 800 s = PyBytes_FromString(__d); \
718 801 t = PyInt_FromSsize_t(self->__n); \
719 802 if (!s || !t) \
720 803 goto bail; \
721 804 if (PyDict_SetItem(obj, s, t) == -1) \
722 805 goto bail; \
723 806 Py_CLEAR(s); \
724 807 Py_CLEAR(t); \
725 808 } while (0)
726 809
727 810 if (self->added_length)
728 811 istat(new_length, "index entries added");
729 812 istat(length, "revs in memory");
730 813 istat(ntlookups, "node trie lookups");
731 814 istat(ntmisses, "node trie misses");
732 815 istat(ntrev, "node trie last rev scanned");
733 816 if (self->ntinitialized) {
734 817 istat(nt.capacity, "node trie capacity");
735 818 istat(nt.depth, "node trie depth");
736 819 istat(nt.length, "node trie count");
737 820 istat(nt.splits, "node trie splits");
738 821 }
739 822
740 823 #undef istat
741 824
742 825 return obj;
743 826
744 827 bail:
745 828 Py_XDECREF(obj);
746 829 Py_XDECREF(s);
747 830 Py_XDECREF(t);
748 831 return NULL;
749 832 }
750 833
751 834 /*
752 835 * When we cache a list, we want to be sure the caller can't mutate
753 836 * the cached copy.
754 837 */
755 838 static PyObject *list_copy(PyObject *list)
756 839 {
757 840 Py_ssize_t len = PyList_GET_SIZE(list);
758 841 PyObject *newlist = PyList_New(len);
759 842 Py_ssize_t i;
760 843
761 844 if (newlist == NULL)
762 845 return NULL;
763 846
764 847 for (i = 0; i < len; i++) {
765 848 PyObject *obj = PyList_GET_ITEM(list, i);
766 849 Py_INCREF(obj);
767 850 PyList_SET_ITEM(newlist, i, obj);
768 851 }
769 852
770 853 return newlist;
771 854 }
772 855
773 856 static int check_filter(PyObject *filter, Py_ssize_t arg)
774 857 {
775 858 if (filter) {
776 859 PyObject *arglist, *result;
777 860 int isfiltered;
778 861
779 862 arglist = Py_BuildValue("(n)", arg);
780 863 if (!arglist) {
781 864 return -1;
782 865 }
783 866
784 867 result = PyObject_Call(filter, arglist, NULL);
785 868 Py_DECREF(arglist);
786 869 if (!result) {
787 870 return -1;
788 871 }
789 872
790 873 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
791 874 * same as this function, so we can just return it directly.*/
792 875 isfiltered = PyObject_IsTrue(result);
793 876 Py_DECREF(result);
794 877 return isfiltered;
795 878 } else {
796 879 return 0;
797 880 }
798 881 }
799 882
800 883 static inline void set_phase_from_parents(char *phases, int parent_1,
801 884 int parent_2, Py_ssize_t i)
802 885 {
803 886 if (parent_1 >= 0 && phases[parent_1] > phases[i])
804 887 phases[i] = phases[parent_1];
805 888 if (parent_2 >= 0 && phases[parent_2] > phases[i])
806 889 phases[i] = phases[parent_2];
807 890 }
808 891
809 892 static PyObject *reachableroots2(indexObject *self, PyObject *args)
810 893 {
811 894
812 895 /* Input */
813 896 long minroot;
814 897 PyObject *includepatharg = NULL;
815 898 int includepath = 0;
816 899 /* heads and roots are lists */
817 900 PyObject *heads = NULL;
818 901 PyObject *roots = NULL;
819 902 PyObject *reachable = NULL;
820 903
821 904 PyObject *val;
822 905 Py_ssize_t len = index_length(self);
823 906 long revnum;
824 907 Py_ssize_t k;
825 908 Py_ssize_t i;
826 909 Py_ssize_t l;
827 910 int r;
828 911 int parents[2];
829 912
830 913 /* Internal data structure:
831 914 * tovisit: array of length len+1 (all revs + nullrev), filled upto
832 915 * lentovisit
833 916 *
834 917 * revstates: array of length len+1 (all revs + nullrev) */
835 918 int *tovisit = NULL;
836 919 long lentovisit = 0;
837 920 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
838 921 char *revstates = NULL;
839 922
840 923 /* Get arguments */
841 924 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
842 925 &PyList_Type, &roots, &PyBool_Type,
843 926 &includepatharg))
844 927 goto bail;
845 928
846 929 if (includepatharg == Py_True)
847 930 includepath = 1;
848 931
849 932 /* Initialize return set */
850 933 reachable = PyList_New(0);
851 934 if (reachable == NULL)
852 935 goto bail;
853 936
854 937 /* Initialize internal datastructures */
855 938 tovisit = (int *)malloc((len + 1) * sizeof(int));
856 939 if (tovisit == NULL) {
857 940 PyErr_NoMemory();
858 941 goto bail;
859 942 }
860 943
861 944 revstates = (char *)calloc(len + 1, 1);
862 945 if (revstates == NULL) {
863 946 PyErr_NoMemory();
864 947 goto bail;
865 948 }
866 949
867 950 l = PyList_GET_SIZE(roots);
868 951 for (i = 0; i < l; i++) {
869 952 revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
870 953 if (revnum == -1 && PyErr_Occurred())
871 954 goto bail;
872 955 /* If root is out of range, e.g. wdir(), it must be unreachable
873 956 * from heads. So we can just ignore it. */
874 957 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
875 958 continue;
876 959 revstates[revnum + 1] |= RS_ROOT;
877 960 }
878 961
879 962 /* Populate tovisit with all the heads */
880 963 l = PyList_GET_SIZE(heads);
881 964 for (i = 0; i < l; i++) {
882 965 revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i));
883 966 if (revnum == -1 && PyErr_Occurred())
884 967 goto bail;
885 968 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
886 969 PyErr_SetString(PyExc_IndexError, "head out of range");
887 970 goto bail;
888 971 }
889 972 if (!(revstates[revnum + 1] & RS_SEEN)) {
890 973 tovisit[lentovisit++] = (int)revnum;
891 974 revstates[revnum + 1] |= RS_SEEN;
892 975 }
893 976 }
894 977
895 978 /* Visit the tovisit list and find the reachable roots */
896 979 k = 0;
897 980 while (k < lentovisit) {
898 981 /* Add the node to reachable if it is a root*/
899 982 revnum = tovisit[k++];
900 983 if (revstates[revnum + 1] & RS_ROOT) {
901 984 revstates[revnum + 1] |= RS_REACHABLE;
902 985 val = PyInt_FromLong(revnum);
903 986 if (val == NULL)
904 987 goto bail;
905 988 r = PyList_Append(reachable, val);
906 989 Py_DECREF(val);
907 990 if (r < 0)
908 991 goto bail;
909 992 if (includepath == 0)
910 993 continue;
911 994 }
912 995
913 996 /* Add its parents to the list of nodes to visit */
914 997 if (revnum == nullrev)
915 998 continue;
916 999 r = index_get_parents(self, revnum, parents, (int)len - 1);
917 1000 if (r < 0)
918 1001 goto bail;
919 1002 for (i = 0; i < 2; i++) {
920 1003 if (!(revstates[parents[i] + 1] & RS_SEEN) &&
921 1004 parents[i] >= minroot) {
922 1005 tovisit[lentovisit++] = parents[i];
923 1006 revstates[parents[i] + 1] |= RS_SEEN;
924 1007 }
925 1008 }
926 1009 }
927 1010
928 1011 /* Find all the nodes in between the roots we found and the heads
929 1012 * and add them to the reachable set */
930 1013 if (includepath == 1) {
931 1014 long minidx = minroot;
932 1015 if (minidx < 0)
933 1016 minidx = 0;
934 1017 for (i = minidx; i < len; i++) {
935 1018 if (!(revstates[i + 1] & RS_SEEN))
936 1019 continue;
937 1020 r = index_get_parents(self, i, parents, (int)len - 1);
938 1021 /* Corrupted index file, error is set from
939 1022 * index_get_parents */
940 1023 if (r < 0)
941 1024 goto bail;
942 1025 if (((revstates[parents[0] + 1] |
943 1026 revstates[parents[1] + 1]) &
944 1027 RS_REACHABLE) &&
945 1028 !(revstates[i + 1] & RS_REACHABLE)) {
946 1029 revstates[i + 1] |= RS_REACHABLE;
947 1030 val = PyInt_FromSsize_t(i);
948 1031 if (val == NULL)
949 1032 goto bail;
950 1033 r = PyList_Append(reachable, val);
951 1034 Py_DECREF(val);
952 1035 if (r < 0)
953 1036 goto bail;
954 1037 }
955 1038 }
956 1039 }
957 1040
958 1041 free(revstates);
959 1042 free(tovisit);
960 1043 return reachable;
961 1044 bail:
962 1045 Py_XDECREF(reachable);
963 1046 free(revstates);
964 1047 free(tovisit);
965 1048 return NULL;
966 1049 }
967 1050
968 1051 static int add_roots_get_min(indexObject *self, PyObject *roots, char *phases,
969 1052 char phase)
970 1053 {
971 1054 Py_ssize_t len = index_length(self);
972 1055 PyObject *item;
973 1056 PyObject *iterator;
974 1057 int rev, minrev = -1;
975 1058 char *node;
976 1059
977 1060 if (!PySet_Check(roots)) {
978 1061 PyErr_SetString(PyExc_TypeError,
979 1062 "roots must be a set of nodes");
980 1063 return -2;
981 1064 }
982 1065 iterator = PyObject_GetIter(roots);
983 1066 if (iterator == NULL)
984 1067 return -2;
985 1068 while ((item = PyIter_Next(iterator))) {
986 1069 if (node_check(self->nodelen, item, &node) == -1)
987 1070 goto failed;
988 1071 rev = index_find_node(self, node);
989 1072 /* null is implicitly public, so negative is invalid */
990 1073 if (rev < 0 || rev >= len)
991 1074 goto failed;
992 1075 phases[rev] = phase;
993 1076 if (minrev == -1 || minrev > rev)
994 1077 minrev = rev;
995 1078 Py_DECREF(item);
996 1079 }
997 1080 Py_DECREF(iterator);
998 1081 return minrev;
999 1082 failed:
1000 1083 Py_DECREF(iterator);
1001 1084 Py_DECREF(item);
1002 1085 return -2;
1003 1086 }
1004 1087
1005 1088 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
1006 1089 {
1007 1090 /* 0: public (untracked), 1: draft, 2: secret, 32: archive,
1008 1091 96: internal */
1009 1092 static const char trackedphases[] = {1, 2, 32, 96};
1010 1093 PyObject *roots = Py_None;
1011 1094 PyObject *phasesetsdict = NULL;
1012 1095 PyObject *phasesets[4] = {NULL, NULL, NULL, NULL};
1013 1096 Py_ssize_t len = index_length(self);
1014 1097 char *phases = NULL;
1015 1098 int minphaserev = -1, rev, i;
1016 1099 const int numphases = (int)(sizeof(phasesets) / sizeof(phasesets[0]));
1017 1100
1018 1101 if (!PyArg_ParseTuple(args, "O", &roots))
1019 1102 return NULL;
1020 1103 if (roots == NULL || !PyDict_Check(roots)) {
1021 1104 PyErr_SetString(PyExc_TypeError, "roots must be a dictionary");
1022 1105 return NULL;
1023 1106 }
1024 1107
1025 1108 phases = calloc(len, 1);
1026 1109 if (phases == NULL) {
1027 1110 PyErr_NoMemory();
1028 1111 return NULL;
1029 1112 }
1030 1113
1031 1114 for (i = 0; i < numphases; ++i) {
1032 1115 PyObject *pyphase = PyInt_FromLong(trackedphases[i]);
1033 1116 PyObject *phaseroots = NULL;
1034 1117 if (pyphase == NULL)
1035 1118 goto release;
1036 1119 phaseroots = PyDict_GetItem(roots, pyphase);
1037 1120 Py_DECREF(pyphase);
1038 1121 if (phaseroots == NULL)
1039 1122 continue;
1040 1123 rev = add_roots_get_min(self, phaseroots, phases,
1041 1124 trackedphases[i]);
1042 1125 if (rev == -2)
1043 1126 goto release;
1044 1127 if (rev != -1 && (minphaserev == -1 || rev < minphaserev))
1045 1128 minphaserev = rev;
1046 1129 }
1047 1130
1048 1131 for (i = 0; i < numphases; ++i) {
1049 1132 phasesets[i] = PySet_New(NULL);
1050 1133 if (phasesets[i] == NULL)
1051 1134 goto release;
1052 1135 }
1053 1136
1054 1137 if (minphaserev == -1)
1055 1138 minphaserev = len;
1056 1139 for (rev = minphaserev; rev < len; ++rev) {
1057 1140 PyObject *pyphase = NULL;
1058 1141 PyObject *pyrev = NULL;
1059 1142 int parents[2];
1060 1143 /*
1061 1144 * The parent lookup could be skipped for phaseroots, but
1062 1145 * phase --force would historically not recompute them
1063 1146 * correctly, leaving descendents with a lower phase around.
1064 1147 * As such, unconditionally recompute the phase.
1065 1148 */
1066 1149 if (index_get_parents(self, rev, parents, (int)len - 1) < 0)
1067 1150 goto release;
1068 1151 set_phase_from_parents(phases, parents[0], parents[1], rev);
1069 1152 switch (phases[rev]) {
1070 1153 case 0:
1071 1154 continue;
1072 1155 case 1:
1073 1156 pyphase = phasesets[0];
1074 1157 break;
1075 1158 case 2:
1076 1159 pyphase = phasesets[1];
1077 1160 break;
1078 1161 case 32:
1079 1162 pyphase = phasesets[2];
1080 1163 break;
1081 1164 case 96:
1082 1165 pyphase = phasesets[3];
1083 1166 break;
1084 1167 default:
1085 1168 /* this should never happen since the phase number is
1086 1169 * specified by this function. */
1087 1170 PyErr_SetString(PyExc_SystemError,
1088 1171 "bad phase number in internal list");
1089 1172 goto release;
1090 1173 }
1091 1174 pyrev = PyInt_FromLong(rev);
1092 1175 if (pyrev == NULL)
1093 1176 goto release;
1094 1177 if (PySet_Add(pyphase, pyrev) == -1) {
1095 1178 Py_DECREF(pyrev);
1096 1179 goto release;
1097 1180 }
1098 1181 Py_DECREF(pyrev);
1099 1182 }
1100 1183
1101 1184 phasesetsdict = _dict_new_presized(numphases);
1102 1185 if (phasesetsdict == NULL)
1103 1186 goto release;
1104 1187 for (i = 0; i < numphases; ++i) {
1105 1188 PyObject *pyphase = PyInt_FromLong(trackedphases[i]);
1106 1189 if (pyphase == NULL)
1107 1190 goto release;
1108 1191 if (PyDict_SetItem(phasesetsdict, pyphase, phasesets[i]) ==
1109 1192 -1) {
1110 1193 Py_DECREF(pyphase);
1111 1194 goto release;
1112 1195 }
1113 1196 Py_DECREF(phasesets[i]);
1114 1197 phasesets[i] = NULL;
1115 1198 }
1116 1199
1117 1200 free(phases);
1118 1201 return Py_BuildValue("nN", len, phasesetsdict);
1119 1202
1120 1203 release:
1121 1204 for (i = 0; i < numphases; ++i)
1122 1205 Py_XDECREF(phasesets[i]);
1123 1206 Py_XDECREF(phasesetsdict);
1124 1207
1125 1208 free(phases);
1126 1209 return NULL;
1127 1210 }
1128 1211
1129 1212 static PyObject *index_headrevs(indexObject *self, PyObject *args)
1130 1213 {
1131 1214 Py_ssize_t i, j, len;
1132 1215 char *nothead = NULL;
1133 1216 PyObject *heads = NULL;
1134 1217 PyObject *filter = NULL;
1135 1218 PyObject *filteredrevs = Py_None;
1136 1219
1137 1220 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
1138 1221 return NULL;
1139 1222 }
1140 1223
1141 1224 if (self->headrevs && filteredrevs == self->filteredrevs)
1142 1225 return list_copy(self->headrevs);
1143 1226
1144 1227 Py_DECREF(self->filteredrevs);
1145 1228 self->filteredrevs = filteredrevs;
1146 1229 Py_INCREF(filteredrevs);
1147 1230
1148 1231 if (filteredrevs != Py_None) {
1149 1232 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
1150 1233 if (!filter) {
1151 1234 PyErr_SetString(
1152 1235 PyExc_TypeError,
1153 1236 "filteredrevs has no attribute __contains__");
1154 1237 goto bail;
1155 1238 }
1156 1239 }
1157 1240
1158 1241 len = index_length(self);
1159 1242 heads = PyList_New(0);
1160 1243 if (heads == NULL)
1161 1244 goto bail;
1162 1245 if (len == 0) {
1163 1246 PyObject *nullid = PyInt_FromLong(-1);
1164 1247 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
1165 1248 Py_XDECREF(nullid);
1166 1249 goto bail;
1167 1250 }
1168 1251 goto done;
1169 1252 }
1170 1253
1171 1254 nothead = calloc(len, 1);
1172 1255 if (nothead == NULL) {
1173 1256 PyErr_NoMemory();
1174 1257 goto bail;
1175 1258 }
1176 1259
1177 1260 for (i = len - 1; i >= 0; i--) {
1178 1261 int isfiltered;
1179 1262 int parents[2];
1180 1263
1181 1264 /* If nothead[i] == 1, it means we've seen an unfiltered child
1182 1265 * of this node already, and therefore this node is not
1183 1266 * filtered. So we can skip the expensive check_filter step.
1184 1267 */
1185 1268 if (nothead[i] != 1) {
1186 1269 isfiltered = check_filter(filter, i);
1187 1270 if (isfiltered == -1) {
1188 1271 PyErr_SetString(PyExc_TypeError,
1189 1272 "unable to check filter");
1190 1273 goto bail;
1191 1274 }
1192 1275
1193 1276 if (isfiltered) {
1194 1277 nothead[i] = 1;
1195 1278 continue;
1196 1279 }
1197 1280 }
1198 1281
1199 1282 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
1200 1283 goto bail;
1201 1284 for (j = 0; j < 2; j++) {
1202 1285 if (parents[j] >= 0)
1203 1286 nothead[parents[j]] = 1;
1204 1287 }
1205 1288 }
1206 1289
1207 1290 for (i = 0; i < len; i++) {
1208 1291 PyObject *head;
1209 1292
1210 1293 if (nothead[i])
1211 1294 continue;
1212 1295 head = PyInt_FromSsize_t(i);
1213 1296 if (head == NULL || PyList_Append(heads, head) == -1) {
1214 1297 Py_XDECREF(head);
1215 1298 goto bail;
1216 1299 }
1217 1300 }
1218 1301
1219 1302 done:
1220 1303 self->headrevs = heads;
1221 1304 Py_XDECREF(filter);
1222 1305 free(nothead);
1223 1306 return list_copy(self->headrevs);
1224 1307 bail:
1225 1308 Py_XDECREF(filter);
1226 1309 Py_XDECREF(heads);
1227 1310 free(nothead);
1228 1311 return NULL;
1229 1312 }
1230 1313
1231 1314 /**
1232 1315 * Obtain the base revision index entry.
1233 1316 *
1234 1317 * Callers must ensure that rev >= 0 or illegal memory access may occur.
1235 1318 */
1236 1319 static inline int index_baserev(indexObject *self, int rev)
1237 1320 {
1238 1321 const char *data;
1239 1322 int result;
1240 1323
1241 1324 data = index_deref(self, rev);
1242 1325 if (data == NULL)
1243 1326 return -2;
1244 1327
1245 1328 if (self->format_version == format_v1) {
1246 1329 result = getbe32(data + entry_v1_offset_base_rev);
1247 1330 } else if (self->format_version == format_v2) {
1248 1331 result = getbe32(data + entry_v2_offset_base_rev);
1332 } else if (self->format_version == format_cl2) {
1333 return rev;
1249 1334 } else {
1250 1335 raise_revlog_error();
1251 1336 return -1;
1252 1337 }
1253 1338
1254 1339 if (result > rev) {
1255 1340 PyErr_Format(
1256 1341 PyExc_ValueError,
1257 1342 "corrupted revlog, revision base above revision: %d, %d",
1258 1343 rev, result);
1259 1344 return -2;
1260 1345 }
1261 1346 if (result < -1) {
1262 1347 PyErr_Format(
1263 1348 PyExc_ValueError,
1264 1349 "corrupted revlog, revision base out of range: %d, %d", rev,
1265 1350 result);
1266 1351 return -2;
1267 1352 }
1268 1353 return result;
1269 1354 }
1270 1355
1271 1356 /**
1272 1357 * Find if a revision is a snapshot or not
1273 1358 *
1274 1359 * Only relevant for sparse-revlog case.
1275 1360 * Callers must ensure that rev is in a valid range.
1276 1361 */
1277 1362 static int index_issnapshotrev(indexObject *self, Py_ssize_t rev)
1278 1363 {
1279 1364 int ps[2];
1280 1365 Py_ssize_t base;
1281 1366 while (rev >= 0) {
1282 1367 base = (Py_ssize_t)index_baserev(self, rev);
1283 1368 if (base == rev) {
1284 1369 base = -1;
1285 1370 }
1286 1371 if (base == -2) {
1287 1372 assert(PyErr_Occurred());
1288 1373 return -1;
1289 1374 }
1290 1375 if (base == -1) {
1291 1376 return 1;
1292 1377 }
1293 1378 if (index_get_parents(self, rev, ps, (int)rev) < 0) {
1294 1379 assert(PyErr_Occurred());
1295 1380 return -1;
1296 1381 };
1297 1382 if (base == ps[0] || base == ps[1]) {
1298 1383 return 0;
1299 1384 }
1300 1385 rev = base;
1301 1386 }
1302 1387 return rev == -1;
1303 1388 }
1304 1389
1305 1390 static PyObject *index_issnapshot(indexObject *self, PyObject *value)
1306 1391 {
1307 1392 long rev;
1308 1393 int issnap;
1309 1394 Py_ssize_t length = index_length(self);
1310 1395
1311 1396 if (!pylong_to_long(value, &rev)) {
1312 1397 return NULL;
1313 1398 }
1314 1399 if (rev < -1 || rev >= length) {
1315 1400 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
1316 1401 rev);
1317 1402 return NULL;
1318 1403 };
1319 1404 issnap = index_issnapshotrev(self, (Py_ssize_t)rev);
1320 1405 if (issnap < 0) {
1321 1406 return NULL;
1322 1407 };
1323 1408 return PyBool_FromLong((long)issnap);
1324 1409 }
1325 1410
1326 1411 static PyObject *index_findsnapshots(indexObject *self, PyObject *args)
1327 1412 {
1328 1413 Py_ssize_t start_rev;
1329 1414 PyObject *cache;
1330 1415 Py_ssize_t base;
1331 1416 Py_ssize_t rev;
1332 1417 PyObject *key = NULL;
1333 1418 PyObject *value = NULL;
1334 1419 const Py_ssize_t length = index_length(self);
1335 1420 if (!PyArg_ParseTuple(args, "O!n", &PyDict_Type, &cache, &start_rev)) {
1336 1421 return NULL;
1337 1422 }
1338 1423 for (rev = start_rev; rev < length; rev++) {
1339 1424 int issnap;
1340 1425 PyObject *allvalues = NULL;
1341 1426 issnap = index_issnapshotrev(self, rev);
1342 1427 if (issnap < 0) {
1343 1428 goto bail;
1344 1429 }
1345 1430 if (issnap == 0) {
1346 1431 continue;
1347 1432 }
1348 1433 base = (Py_ssize_t)index_baserev(self, rev);
1349 1434 if (base == rev) {
1350 1435 base = -1;
1351 1436 }
1352 1437 if (base == -2) {
1353 1438 assert(PyErr_Occurred());
1354 1439 goto bail;
1355 1440 }
1356 1441 key = PyInt_FromSsize_t(base);
1357 1442 allvalues = PyDict_GetItem(cache, key);
1358 1443 if (allvalues == NULL && PyErr_Occurred()) {
1359 1444 goto bail;
1360 1445 }
1361 1446 if (allvalues == NULL) {
1362 1447 int r;
1363 1448 allvalues = PyList_New(0);
1364 1449 if (!allvalues) {
1365 1450 goto bail;
1366 1451 }
1367 1452 r = PyDict_SetItem(cache, key, allvalues);
1368 1453 Py_DECREF(allvalues);
1369 1454 if (r < 0) {
1370 1455 goto bail;
1371 1456 }
1372 1457 }
1373 1458 value = PyInt_FromSsize_t(rev);
1374 1459 if (PyList_Append(allvalues, value)) {
1375 1460 goto bail;
1376 1461 }
1377 1462 Py_CLEAR(key);
1378 1463 Py_CLEAR(value);
1379 1464 }
1380 1465 Py_RETURN_NONE;
1381 1466 bail:
1382 1467 Py_XDECREF(key);
1383 1468 Py_XDECREF(value);
1384 1469 return NULL;
1385 1470 }
1386 1471
1387 1472 static PyObject *index_deltachain(indexObject *self, PyObject *args)
1388 1473 {
1389 1474 int rev, generaldelta;
1390 1475 PyObject *stoparg;
1391 1476 int stoprev, iterrev, baserev = -1;
1392 1477 int stopped;
1393 1478 PyObject *chain = NULL, *result = NULL;
1394 1479 const Py_ssize_t length = index_length(self);
1395 1480
1396 1481 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
1397 1482 return NULL;
1398 1483 }
1399 1484
1400 1485 if (PyInt_Check(stoparg)) {
1401 1486 stoprev = (int)PyInt_AsLong(stoparg);
1402 1487 if (stoprev == -1 && PyErr_Occurred()) {
1403 1488 return NULL;
1404 1489 }
1405 1490 } else if (stoparg == Py_None) {
1406 1491 stoprev = -2;
1407 1492 } else {
1408 1493 PyErr_SetString(PyExc_ValueError,
1409 1494 "stoprev must be integer or None");
1410 1495 return NULL;
1411 1496 }
1412 1497
1413 1498 if (rev < 0 || rev >= length) {
1414 1499 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1415 1500 return NULL;
1416 1501 }
1417 1502
1418 1503 chain = PyList_New(0);
1419 1504 if (chain == NULL) {
1420 1505 return NULL;
1421 1506 }
1422 1507
1423 1508 baserev = index_baserev(self, rev);
1424 1509
1425 1510 /* This should never happen. */
1426 1511 if (baserev <= -2) {
1427 1512 /* Error should be set by index_deref() */
1428 1513 assert(PyErr_Occurred());
1429 1514 goto bail;
1430 1515 }
1431 1516
1432 1517 iterrev = rev;
1433 1518
1434 1519 while (iterrev != baserev && iterrev != stoprev) {
1435 1520 PyObject *value = PyInt_FromLong(iterrev);
1436 1521 if (value == NULL) {
1437 1522 goto bail;
1438 1523 }
1439 1524 if (PyList_Append(chain, value)) {
1440 1525 Py_DECREF(value);
1441 1526 goto bail;
1442 1527 }
1443 1528 Py_DECREF(value);
1444 1529
1445 1530 if (generaldelta) {
1446 1531 iterrev = baserev;
1447 1532 } else {
1448 1533 iterrev--;
1449 1534 }
1450 1535
1451 1536 if (iterrev < 0) {
1452 1537 break;
1453 1538 }
1454 1539
1455 1540 if (iterrev >= length) {
1456 1541 PyErr_SetString(PyExc_IndexError,
1457 1542 "revision outside index");
1458 1543 return NULL;
1459 1544 }
1460 1545
1461 1546 baserev = index_baserev(self, iterrev);
1462 1547
1463 1548 /* This should never happen. */
1464 1549 if (baserev <= -2) {
1465 1550 /* Error should be set by index_deref() */
1466 1551 assert(PyErr_Occurred());
1467 1552 goto bail;
1468 1553 }
1469 1554 }
1470 1555
1471 1556 if (iterrev == stoprev) {
1472 1557 stopped = 1;
1473 1558 } else {
1474 1559 PyObject *value = PyInt_FromLong(iterrev);
1475 1560 if (value == NULL) {
1476 1561 goto bail;
1477 1562 }
1478 1563 if (PyList_Append(chain, value)) {
1479 1564 Py_DECREF(value);
1480 1565 goto bail;
1481 1566 }
1482 1567 Py_DECREF(value);
1483 1568
1484 1569 stopped = 0;
1485 1570 }
1486 1571
1487 1572 if (PyList_Reverse(chain)) {
1488 1573 goto bail;
1489 1574 }
1490 1575
1491 1576 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
1492 1577 Py_DECREF(chain);
1493 1578 return result;
1494 1579
1495 1580 bail:
1496 1581 Py_DECREF(chain);
1497 1582 return NULL;
1498 1583 }
1499 1584
1500 1585 static inline int64_t
1501 1586 index_segment_span(indexObject *self, Py_ssize_t start_rev, Py_ssize_t end_rev)
1502 1587 {
1503 1588 int64_t start_offset;
1504 1589 int64_t end_offset;
1505 1590 int end_size;
1506 1591 start_offset = index_get_start(self, start_rev);
1507 1592 if (start_offset < 0) {
1508 1593 return -1;
1509 1594 }
1510 1595 end_offset = index_get_start(self, end_rev);
1511 1596 if (end_offset < 0) {
1512 1597 return -1;
1513 1598 }
1514 1599 end_size = index_get_length(self, end_rev);
1515 1600 if (end_size < 0) {
1516 1601 return -1;
1517 1602 }
1518 1603 if (end_offset < start_offset) {
1519 1604 PyErr_Format(PyExc_ValueError,
1520 1605 "corrupted revlog index: inconsistent offset "
1521 1606 "between revisions (%zd) and (%zd)",
1522 1607 start_rev, end_rev);
1523 1608 return -1;
1524 1609 }
1525 1610 return (end_offset - start_offset) + (int64_t)end_size;
1526 1611 }
1527 1612
1528 1613 /* returns endidx so that revs[startidx:endidx] has no empty trailing revs */
1529 1614 static Py_ssize_t trim_endidx(indexObject *self, const Py_ssize_t *revs,
1530 1615 Py_ssize_t startidx, Py_ssize_t endidx)
1531 1616 {
1532 1617 int length;
1533 1618 while (endidx > 1 && endidx > startidx) {
1534 1619 length = index_get_length(self, revs[endidx - 1]);
1535 1620 if (length < 0) {
1536 1621 return -1;
1537 1622 }
1538 1623 if (length != 0) {
1539 1624 break;
1540 1625 }
1541 1626 endidx -= 1;
1542 1627 }
1543 1628 return endidx;
1544 1629 }
1545 1630
1546 1631 struct Gap {
1547 1632 int64_t size;
1548 1633 Py_ssize_t idx;
1549 1634 };
1550 1635
1551 1636 static int gap_compare(const void *left, const void *right)
1552 1637 {
1553 1638 const struct Gap *l_left = ((const struct Gap *)left);
1554 1639 const struct Gap *l_right = ((const struct Gap *)right);
1555 1640 if (l_left->size < l_right->size) {
1556 1641 return -1;
1557 1642 } else if (l_left->size > l_right->size) {
1558 1643 return 1;
1559 1644 }
1560 1645 return 0;
1561 1646 }
1562 1647 static int Py_ssize_t_compare(const void *left, const void *right)
1563 1648 {
1564 1649 const Py_ssize_t l_left = *(const Py_ssize_t *)left;
1565 1650 const Py_ssize_t l_right = *(const Py_ssize_t *)right;
1566 1651 if (l_left < l_right) {
1567 1652 return -1;
1568 1653 } else if (l_left > l_right) {
1569 1654 return 1;
1570 1655 }
1571 1656 return 0;
1572 1657 }
1573 1658
1574 1659 static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
1575 1660 {
1576 1661 /* method arguments */
1577 1662 PyObject *list_revs = NULL; /* revisions in the chain */
1578 1663 double targetdensity = 0; /* min density to achieve */
1579 1664 Py_ssize_t mingapsize = 0; /* threshold to ignore gaps */
1580 1665
1581 1666 /* other core variables */
1582 1667 Py_ssize_t idxlen = index_length(self);
1583 1668 Py_ssize_t i; /* used for various iteration */
1584 1669 PyObject *result = NULL; /* the final return of the function */
1585 1670
1586 1671 /* generic information about the delta chain being slice */
1587 1672 Py_ssize_t num_revs = 0; /* size of the full delta chain */
1588 1673 Py_ssize_t *revs = NULL; /* native array of revision in the chain */
1589 1674 int64_t chainpayload = 0; /* sum of all delta in the chain */
1590 1675 int64_t deltachainspan = 0; /* distance from first byte to last byte */
1591 1676
1592 1677 /* variable used for slicing the delta chain */
1593 1678 int64_t readdata = 0; /* amount of data currently planned to be read */
1594 1679 double density = 0; /* ration of payload data compared to read ones */
1595 1680 int64_t previous_end;
1596 1681 struct Gap *gaps = NULL; /* array of notable gap in the chain */
1597 1682 Py_ssize_t num_gaps =
1598 1683 0; /* total number of notable gap recorded so far */
1599 1684 Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
1600 1685 Py_ssize_t num_selected = 0; /* number of gaps skipped */
1601 1686 PyObject *chunk = NULL; /* individual slice */
1602 1687 PyObject *allchunks = NULL; /* all slices */
1603 1688 Py_ssize_t previdx;
1604 1689
1605 1690 /* parsing argument */
1606 1691 if (!PyArg_ParseTuple(args, "O!dn", &PyList_Type, &list_revs,
1607 1692 &targetdensity, &mingapsize)) {
1608 1693 goto bail;
1609 1694 }
1610 1695
1611 1696 /* If the delta chain contains a single element, we do not need slicing
1612 1697 */
1613 1698 num_revs = PyList_GET_SIZE(list_revs);
1614 1699 if (num_revs <= 1) {
1615 1700 result = PyTuple_Pack(1, list_revs);
1616 1701 goto done;
1617 1702 }
1618 1703
1619 1704 /* Turn the python list into a native integer array (for efficiency) */
1620 1705 revs = (Py_ssize_t *)calloc(num_revs, sizeof(Py_ssize_t));
1621 1706 if (revs == NULL) {
1622 1707 PyErr_NoMemory();
1623 1708 goto bail;
1624 1709 }
1625 1710 for (i = 0; i < num_revs; i++) {
1626 1711 Py_ssize_t revnum = PyInt_AsLong(PyList_GET_ITEM(list_revs, i));
1627 1712 if (revnum == -1 && PyErr_Occurred()) {
1628 1713 goto bail;
1629 1714 }
1630 1715 if (revnum < nullrev || revnum >= idxlen) {
1631 1716 PyErr_Format(PyExc_IndexError,
1632 1717 "index out of range: %zd", revnum);
1633 1718 goto bail;
1634 1719 }
1635 1720 revs[i] = revnum;
1636 1721 }
1637 1722
1638 1723 /* Compute and check various property of the unsliced delta chain */
1639 1724 deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
1640 1725 if (deltachainspan < 0) {
1641 1726 goto bail;
1642 1727 }
1643 1728
1644 1729 if (deltachainspan <= mingapsize) {
1645 1730 result = PyTuple_Pack(1, list_revs);
1646 1731 goto done;
1647 1732 }
1648 1733 chainpayload = 0;
1649 1734 for (i = 0; i < num_revs; i++) {
1650 1735 int tmp = index_get_length(self, revs[i]);
1651 1736 if (tmp < 0) {
1652 1737 goto bail;
1653 1738 }
1654 1739 chainpayload += tmp;
1655 1740 }
1656 1741
1657 1742 readdata = deltachainspan;
1658 1743 density = 1.0;
1659 1744
1660 1745 if (0 < deltachainspan) {
1661 1746 density = (double)chainpayload / (double)deltachainspan;
1662 1747 }
1663 1748
1664 1749 if (density >= targetdensity) {
1665 1750 result = PyTuple_Pack(1, list_revs);
1666 1751 goto done;
1667 1752 }
1668 1753
1669 1754 /* if chain is too sparse, look for relevant gaps */
1670 1755 gaps = (struct Gap *)calloc(num_revs, sizeof(struct Gap));
1671 1756 if (gaps == NULL) {
1672 1757 PyErr_NoMemory();
1673 1758 goto bail;
1674 1759 }
1675 1760
1676 1761 previous_end = -1;
1677 1762 for (i = 0; i < num_revs; i++) {
1678 1763 int64_t revstart;
1679 1764 int revsize;
1680 1765 revstart = index_get_start(self, revs[i]);
1681 1766 if (revstart < 0) {
1682 1767 goto bail;
1683 1768 };
1684 1769 revsize = index_get_length(self, revs[i]);
1685 1770 if (revsize < 0) {
1686 1771 goto bail;
1687 1772 };
1688 1773 if (revsize == 0) {
1689 1774 continue;
1690 1775 }
1691 1776 if (previous_end >= 0) {
1692 1777 int64_t gapsize = revstart - previous_end;
1693 1778 if (gapsize > mingapsize) {
1694 1779 gaps[num_gaps].size = gapsize;
1695 1780 gaps[num_gaps].idx = i;
1696 1781 num_gaps += 1;
1697 1782 }
1698 1783 }
1699 1784 previous_end = revstart + revsize;
1700 1785 }
1701 1786 if (num_gaps == 0) {
1702 1787 result = PyTuple_Pack(1, list_revs);
1703 1788 goto done;
1704 1789 }
1705 1790 qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
1706 1791
1707 1792 /* Slice the largest gap first, they improve the density the most */
1708 1793 selected_indices =
1709 1794 (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
1710 1795 if (selected_indices == NULL) {
1711 1796 PyErr_NoMemory();
1712 1797 goto bail;
1713 1798 }
1714 1799
1715 1800 for (i = num_gaps - 1; i >= 0; i--) {
1716 1801 selected_indices[num_selected] = gaps[i].idx;
1717 1802 readdata -= gaps[i].size;
1718 1803 num_selected += 1;
1719 1804 if (readdata <= 0) {
1720 1805 density = 1.0;
1721 1806 } else {
1722 1807 density = (double)chainpayload / (double)readdata;
1723 1808 }
1724 1809 if (density >= targetdensity) {
1725 1810 break;
1726 1811 }
1727 1812 }
1728 1813 qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
1729 1814 &Py_ssize_t_compare);
1730 1815
1731 1816 /* create the resulting slice */
1732 1817 allchunks = PyList_New(0);
1733 1818 if (allchunks == NULL) {
1734 1819 goto bail;
1735 1820 }
1736 1821 previdx = 0;
1737 1822 selected_indices[num_selected] = num_revs;
1738 1823 for (i = 0; i <= num_selected; i++) {
1739 1824 Py_ssize_t idx = selected_indices[i];
1740 1825 Py_ssize_t endidx = trim_endidx(self, revs, previdx, idx);
1741 1826 if (endidx < 0) {
1742 1827 goto bail;
1743 1828 }
1744 1829 if (previdx < endidx) {
1745 1830 chunk = PyList_GetSlice(list_revs, previdx, endidx);
1746 1831 if (chunk == NULL) {
1747 1832 goto bail;
1748 1833 }
1749 1834 if (PyList_Append(allchunks, chunk) == -1) {
1750 1835 goto bail;
1751 1836 }
1752 1837 Py_DECREF(chunk);
1753 1838 chunk = NULL;
1754 1839 }
1755 1840 previdx = idx;
1756 1841 }
1757 1842 result = allchunks;
1758 1843 goto done;
1759 1844
1760 1845 bail:
1761 1846 Py_XDECREF(allchunks);
1762 1847 Py_XDECREF(chunk);
1763 1848 done:
1764 1849 free(revs);
1765 1850 free(gaps);
1766 1851 free(selected_indices);
1767 1852 return result;
1768 1853 }
1769 1854
1770 1855 static inline int nt_level(const char *node, Py_ssize_t level)
1771 1856 {
1772 1857 int v = node[level >> 1];
1773 1858 if (!(level & 1))
1774 1859 v >>= 4;
1775 1860 return v & 0xf;
1776 1861 }
1777 1862
1778 1863 /*
1779 1864 * Return values:
1780 1865 *
1781 1866 * -4: match is ambiguous (multiple candidates)
1782 1867 * -2: not found
1783 1868 * rest: valid rev
1784 1869 */
1785 1870 static int nt_find(nodetree *self, const char *node, Py_ssize_t nodelen,
1786 1871 int hex)
1787 1872 {
1788 1873 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
1789 1874 int level, maxlevel, off;
1790 1875
1791 1876 /* If the input is binary, do a fast check for the nullid first. */
1792 1877 if (!hex && nodelen == self->nodelen && node[0] == '\0' &&
1793 1878 node[1] == '\0' && memcmp(node, nullid, self->nodelen) == 0)
1794 1879 return -1;
1795 1880
1796 1881 if (hex)
1797 1882 maxlevel = nodelen;
1798 1883 else
1799 1884 maxlevel = 2 * nodelen;
1800 1885 if (maxlevel > 2 * self->nodelen)
1801 1886 maxlevel = 2 * self->nodelen;
1802 1887
1803 1888 for (level = off = 0; level < maxlevel; level++) {
1804 1889 int k = getnybble(node, level);
1805 1890 nodetreenode *n = &self->nodes[off];
1806 1891 int v = n->children[k];
1807 1892
1808 1893 if (v < 0) {
1809 1894 const char *n;
1810 1895 Py_ssize_t i;
1811 1896
1812 1897 v = -(v + 2);
1813 1898 n = index_node(self->index, v);
1814 1899 if (n == NULL)
1815 1900 return -2;
1816 1901 for (i = level; i < maxlevel; i++)
1817 1902 if (getnybble(node, i) != nt_level(n, i))
1818 1903 return -2;
1819 1904 return v;
1820 1905 }
1821 1906 if (v == 0)
1822 1907 return -2;
1823 1908 off = v;
1824 1909 }
1825 1910 /* multiple matches against an ambiguous prefix */
1826 1911 return -4;
1827 1912 }
1828 1913
1829 1914 static int nt_new(nodetree *self)
1830 1915 {
1831 1916 if (self->length == self->capacity) {
1832 1917 size_t newcapacity;
1833 1918 nodetreenode *newnodes;
1834 1919 newcapacity = self->capacity * 2;
1835 1920 if (newcapacity >= SIZE_MAX / sizeof(nodetreenode)) {
1836 1921 PyErr_SetString(PyExc_MemoryError,
1837 1922 "overflow in nt_new");
1838 1923 return -1;
1839 1924 }
1840 1925 newnodes =
1841 1926 realloc(self->nodes, newcapacity * sizeof(nodetreenode));
1842 1927 if (newnodes == NULL) {
1843 1928 PyErr_SetString(PyExc_MemoryError, "out of memory");
1844 1929 return -1;
1845 1930 }
1846 1931 self->capacity = newcapacity;
1847 1932 self->nodes = newnodes;
1848 1933 memset(&self->nodes[self->length], 0,
1849 1934 sizeof(nodetreenode) * (self->capacity - self->length));
1850 1935 }
1851 1936 return self->length++;
1852 1937 }
1853 1938
1854 1939 static int nt_insert(nodetree *self, const char *node, int rev)
1855 1940 {
1856 1941 int level = 0;
1857 1942 int off = 0;
1858 1943
1859 1944 while (level < 2 * self->nodelen) {
1860 1945 int k = nt_level(node, level);
1861 1946 nodetreenode *n;
1862 1947 int v;
1863 1948
1864 1949 n = &self->nodes[off];
1865 1950 v = n->children[k];
1866 1951
1867 1952 if (v == 0) {
1868 1953 n->children[k] = -rev - 2;
1869 1954 return 0;
1870 1955 }
1871 1956 if (v < 0) {
1872 1957 const char *oldnode =
1873 1958 index_node_existing(self->index, -(v + 2));
1874 1959 int noff;
1875 1960
1876 1961 if (oldnode == NULL)
1877 1962 return -1;
1878 1963 if (!memcmp(oldnode, node, self->nodelen)) {
1879 1964 n->children[k] = -rev - 2;
1880 1965 return 0;
1881 1966 }
1882 1967 noff = nt_new(self);
1883 1968 if (noff == -1)
1884 1969 return -1;
1885 1970 /* self->nodes may have been changed by realloc */
1886 1971 self->nodes[off].children[k] = noff;
1887 1972 off = noff;
1888 1973 n = &self->nodes[off];
1889 1974 n->children[nt_level(oldnode, ++level)] = v;
1890 1975 if (level > self->depth)
1891 1976 self->depth = level;
1892 1977 self->splits += 1;
1893 1978 } else {
1894 1979 level += 1;
1895 1980 off = v;
1896 1981 }
1897 1982 }
1898 1983
1899 1984 return -1;
1900 1985 }
1901 1986
1902 1987 static PyObject *ntobj_insert(nodetreeObject *self, PyObject *args)
1903 1988 {
1904 1989 Py_ssize_t rev;
1905 1990 const char *node;
1906 1991 Py_ssize_t length;
1907 1992 if (!PyArg_ParseTuple(args, "n", &rev))
1908 1993 return NULL;
1909 1994 length = index_length(self->nt.index);
1910 1995 if (rev < 0 || rev >= length) {
1911 1996 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1912 1997 return NULL;
1913 1998 }
1914 1999 node = index_node_existing(self->nt.index, rev);
1915 2000 if (nt_insert(&self->nt, node, (int)rev) == -1)
1916 2001 return NULL;
1917 2002 Py_RETURN_NONE;
1918 2003 }
1919 2004
1920 2005 static int nt_delete_node(nodetree *self, const char *node)
1921 2006 {
1922 2007 /* rev==-2 happens to get encoded as 0, which is interpreted as not set
1923 2008 */
1924 2009 return nt_insert(self, node, -2);
1925 2010 }
1926 2011
1927 2012 static int nt_init(nodetree *self, indexObject *index, unsigned capacity)
1928 2013 {
1929 2014 /* Initialize before overflow-checking to avoid nt_dealloc() crash. */
1930 2015 self->nodes = NULL;
1931 2016
1932 2017 self->index = index;
1933 2018 /* The input capacity is in terms of revisions, while the field is in
1934 2019 * terms of nodetree nodes. */
1935 2020 self->capacity = (capacity < 4 ? 4 : capacity / 2);
1936 2021 self->nodelen = index->nodelen;
1937 2022 self->depth = 0;
1938 2023 self->splits = 0;
1939 2024 if (self->capacity > SIZE_MAX / sizeof(nodetreenode)) {
1940 2025 PyErr_SetString(PyExc_ValueError, "overflow in init_nt");
1941 2026 return -1;
1942 2027 }
1943 2028 self->nodes = calloc(self->capacity, sizeof(nodetreenode));
1944 2029 if (self->nodes == NULL) {
1945 2030 PyErr_NoMemory();
1946 2031 return -1;
1947 2032 }
1948 2033 self->length = 1;
1949 2034 return 0;
1950 2035 }
1951 2036
1952 2037 static int ntobj_init(nodetreeObject *self, PyObject *args)
1953 2038 {
1954 2039 PyObject *index;
1955 2040 unsigned capacity;
1956 2041 if (!PyArg_ParseTuple(args, "O!I", &HgRevlogIndex_Type, &index,
1957 2042 &capacity))
1958 2043 return -1;
1959 2044 Py_INCREF(index);
1960 2045 return nt_init(&self->nt, (indexObject *)index, capacity);
1961 2046 }
1962 2047
1963 2048 static int nt_partialmatch(nodetree *self, const char *node, Py_ssize_t nodelen)
1964 2049 {
1965 2050 return nt_find(self, node, nodelen, 1);
1966 2051 }
1967 2052
1968 2053 /*
1969 2054 * Find the length of the shortest unique prefix of node.
1970 2055 *
1971 2056 * Return values:
1972 2057 *
1973 2058 * -3: error (exception set)
1974 2059 * -2: not found (no exception set)
1975 2060 * rest: length of shortest prefix
1976 2061 */
1977 2062 static int nt_shortest(nodetree *self, const char *node)
1978 2063 {
1979 2064 int level, off;
1980 2065
1981 2066 for (level = off = 0; level < 2 * self->nodelen; level++) {
1982 2067 int k, v;
1983 2068 nodetreenode *n = &self->nodes[off];
1984 2069 k = nt_level(node, level);
1985 2070 v = n->children[k];
1986 2071 if (v < 0) {
1987 2072 const char *n;
1988 2073 v = -(v + 2);
1989 2074 n = index_node_existing(self->index, v);
1990 2075 if (n == NULL)
1991 2076 return -3;
1992 2077 if (memcmp(node, n, self->nodelen) != 0)
1993 2078 /*
1994 2079 * Found a unique prefix, but it wasn't for the
1995 2080 * requested node (i.e the requested node does
1996 2081 * not exist).
1997 2082 */
1998 2083 return -2;
1999 2084 return level + 1;
2000 2085 }
2001 2086 if (v == 0)
2002 2087 return -2;
2003 2088 off = v;
2004 2089 }
2005 2090 /*
2006 2091 * The node was still not unique after 40 hex digits, so this won't
2007 2092 * happen. Also, if we get here, then there's a programming error in
2008 2093 * this file that made us insert a node longer than 40 hex digits.
2009 2094 */
2010 2095 PyErr_SetString(PyExc_Exception, "broken node tree");
2011 2096 return -3;
2012 2097 }
2013 2098
2014 2099 static PyObject *ntobj_shortest(nodetreeObject *self, PyObject *args)
2015 2100 {
2016 2101 PyObject *val;
2017 2102 char *node;
2018 2103 int length;
2019 2104
2020 2105 if (!PyArg_ParseTuple(args, "O", &val))
2021 2106 return NULL;
2022 2107 if (node_check(self->nt.nodelen, val, &node) == -1)
2023 2108 return NULL;
2024 2109
2025 2110 length = nt_shortest(&self->nt, node);
2026 2111 if (length == -3)
2027 2112 return NULL;
2028 2113 if (length == -2) {
2029 2114 raise_revlog_error();
2030 2115 return NULL;
2031 2116 }
2032 2117 return PyInt_FromLong(length);
2033 2118 }
2034 2119
2035 2120 static void nt_dealloc(nodetree *self)
2036 2121 {
2037 2122 free(self->nodes);
2038 2123 self->nodes = NULL;
2039 2124 }
2040 2125
2041 2126 static void ntobj_dealloc(nodetreeObject *self)
2042 2127 {
2043 2128 Py_XDECREF(self->nt.index);
2044 2129 nt_dealloc(&self->nt);
2045 2130 PyObject_Del(self);
2046 2131 }
2047 2132
2048 2133 static PyMethodDef ntobj_methods[] = {
2049 2134 {"insert", (PyCFunction)ntobj_insert, METH_VARARGS,
2050 2135 "insert an index entry"},
2051 2136 {"shortest", (PyCFunction)ntobj_shortest, METH_VARARGS,
2052 2137 "find length of shortest hex nodeid of a binary ID"},
2053 2138 {NULL} /* Sentinel */
2054 2139 };
2055 2140
2056 2141 static PyTypeObject nodetreeType = {
2057 2142 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2058 2143 "parsers.nodetree", /* tp_name */
2059 2144 sizeof(nodetreeObject), /* tp_basicsize */
2060 2145 0, /* tp_itemsize */
2061 2146 (destructor)ntobj_dealloc, /* tp_dealloc */
2062 2147 0, /* tp_print */
2063 2148 0, /* tp_getattr */
2064 2149 0, /* tp_setattr */
2065 2150 0, /* tp_compare */
2066 2151 0, /* tp_repr */
2067 2152 0, /* tp_as_number */
2068 2153 0, /* tp_as_sequence */
2069 2154 0, /* tp_as_mapping */
2070 2155 0, /* tp_hash */
2071 2156 0, /* tp_call */
2072 2157 0, /* tp_str */
2073 2158 0, /* tp_getattro */
2074 2159 0, /* tp_setattro */
2075 2160 0, /* tp_as_buffer */
2076 2161 Py_TPFLAGS_DEFAULT, /* tp_flags */
2077 2162 "nodetree", /* tp_doc */
2078 2163 0, /* tp_traverse */
2079 2164 0, /* tp_clear */
2080 2165 0, /* tp_richcompare */
2081 2166 0, /* tp_weaklistoffset */
2082 2167 0, /* tp_iter */
2083 2168 0, /* tp_iternext */
2084 2169 ntobj_methods, /* tp_methods */
2085 2170 0, /* tp_members */
2086 2171 0, /* tp_getset */
2087 2172 0, /* tp_base */
2088 2173 0, /* tp_dict */
2089 2174 0, /* tp_descr_get */
2090 2175 0, /* tp_descr_set */
2091 2176 0, /* tp_dictoffset */
2092 2177 (initproc)ntobj_init, /* tp_init */
2093 2178 0, /* tp_alloc */
2094 2179 };
2095 2180
2096 2181 static int index_init_nt(indexObject *self)
2097 2182 {
2098 2183 if (!self->ntinitialized) {
2099 2184 if (nt_init(&self->nt, self, (int)self->length) == -1) {
2100 2185 nt_dealloc(&self->nt);
2101 2186 return -1;
2102 2187 }
2103 2188 if (nt_insert(&self->nt, nullid, -1) == -1) {
2104 2189 nt_dealloc(&self->nt);
2105 2190 return -1;
2106 2191 }
2107 2192 self->ntinitialized = 1;
2108 2193 self->ntrev = (int)index_length(self);
2109 2194 self->ntlookups = 1;
2110 2195 self->ntmisses = 0;
2111 2196 }
2112 2197 return 0;
2113 2198 }
2114 2199
2115 2200 /*
2116 2201 * Return values:
2117 2202 *
2118 2203 * -3: error (exception set)
2119 2204 * -2: not found (no exception set)
2120 2205 * rest: valid rev
2121 2206 */
2122 2207 static int index_find_node(indexObject *self, const char *node)
2123 2208 {
2124 2209 int rev;
2125 2210
2126 2211 if (index_init_nt(self) == -1)
2127 2212 return -3;
2128 2213
2129 2214 self->ntlookups++;
2130 2215 rev = nt_find(&self->nt, node, self->nodelen, 0);
2131 2216 if (rev >= -1)
2132 2217 return rev;
2133 2218
2134 2219 /*
2135 2220 * For the first handful of lookups, we scan the entire index,
2136 2221 * and cache only the matching nodes. This optimizes for cases
2137 2222 * like "hg tip", where only a few nodes are accessed.
2138 2223 *
2139 2224 * After that, we cache every node we visit, using a single
2140 2225 * scan amortized over multiple lookups. This gives the best
2141 2226 * bulk performance, e.g. for "hg log".
2142 2227 */
2143 2228 if (self->ntmisses++ < 4) {
2144 2229 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2145 2230 const char *n = index_node_existing(self, rev);
2146 2231 if (n == NULL)
2147 2232 return -3;
2148 2233 if (memcmp(node, n, self->nodelen) == 0) {
2149 2234 if (nt_insert(&self->nt, n, rev) == -1)
2150 2235 return -3;
2151 2236 break;
2152 2237 }
2153 2238 }
2154 2239 } else {
2155 2240 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2156 2241 const char *n = index_node_existing(self, rev);
2157 2242 if (n == NULL)
2158 2243 return -3;
2159 2244 if (nt_insert(&self->nt, n, rev) == -1) {
2160 2245 self->ntrev = rev + 1;
2161 2246 return -3;
2162 2247 }
2163 2248 if (memcmp(node, n, self->nodelen) == 0) {
2164 2249 break;
2165 2250 }
2166 2251 }
2167 2252 self->ntrev = rev;
2168 2253 }
2169 2254
2170 2255 if (rev >= 0)
2171 2256 return rev;
2172 2257 return -2;
2173 2258 }
2174 2259
2175 2260 static PyObject *index_getitem(indexObject *self, PyObject *value)
2176 2261 {
2177 2262 char *node;
2178 2263 int rev;
2179 2264
2180 2265 if (PyInt_Check(value)) {
2181 2266 long idx;
2182 2267 if (!pylong_to_long(value, &idx)) {
2183 2268 return NULL;
2184 2269 }
2185 2270 return index_get(self, idx);
2186 2271 }
2187 2272
2188 2273 if (node_check(self->nodelen, value, &node) == -1)
2189 2274 return NULL;
2190 2275 rev = index_find_node(self, node);
2191 2276 if (rev >= -1)
2192 2277 return PyInt_FromLong(rev);
2193 2278 if (rev == -2)
2194 2279 raise_revlog_error();
2195 2280 return NULL;
2196 2281 }
2197 2282
2198 2283 /*
2199 2284 * Fully populate the radix tree.
2200 2285 */
2201 2286 static int index_populate_nt(indexObject *self)
2202 2287 {
2203 2288 int rev;
2204 2289 if (self->ntrev > 0) {
2205 2290 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2206 2291 const char *n = index_node_existing(self, rev);
2207 2292 if (n == NULL)
2208 2293 return -1;
2209 2294 if (nt_insert(&self->nt, n, rev) == -1)
2210 2295 return -1;
2211 2296 }
2212 2297 self->ntrev = -1;
2213 2298 }
2214 2299 return 0;
2215 2300 }
2216 2301
2217 2302 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
2218 2303 {
2219 2304 const char *fullnode;
2220 2305 Py_ssize_t nodelen;
2221 2306 char *node;
2222 2307 int rev, i;
2223 2308
2224 2309 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &node, &nodelen))
2225 2310 return NULL;
2226 2311
2227 2312 if (nodelen < 1) {
2228 2313 PyErr_SetString(PyExc_ValueError, "key too short");
2229 2314 return NULL;
2230 2315 }
2231 2316
2232 2317 if (nodelen > 2 * self->nodelen) {
2233 2318 PyErr_SetString(PyExc_ValueError, "key too long");
2234 2319 return NULL;
2235 2320 }
2236 2321
2237 2322 for (i = 0; i < nodelen; i++)
2238 2323 hexdigit(node, i);
2239 2324 if (PyErr_Occurred()) {
2240 2325 /* input contains non-hex characters */
2241 2326 PyErr_Clear();
2242 2327 Py_RETURN_NONE;
2243 2328 }
2244 2329
2245 2330 if (index_init_nt(self) == -1)
2246 2331 return NULL;
2247 2332 if (index_populate_nt(self) == -1)
2248 2333 return NULL;
2249 2334 rev = nt_partialmatch(&self->nt, node, nodelen);
2250 2335
2251 2336 switch (rev) {
2252 2337 case -4:
2253 2338 raise_revlog_error();
2254 2339 return NULL;
2255 2340 case -2:
2256 2341 Py_RETURN_NONE;
2257 2342 case -1:
2258 2343 return PyBytes_FromStringAndSize(nullid, self->nodelen);
2259 2344 }
2260 2345
2261 2346 fullnode = index_node_existing(self, rev);
2262 2347 if (fullnode == NULL) {
2263 2348 return NULL;
2264 2349 }
2265 2350 return PyBytes_FromStringAndSize(fullnode, self->nodelen);
2266 2351 }
2267 2352
2268 2353 static PyObject *index_shortest(indexObject *self, PyObject *args)
2269 2354 {
2270 2355 PyObject *val;
2271 2356 char *node;
2272 2357 int length;
2273 2358
2274 2359 if (!PyArg_ParseTuple(args, "O", &val))
2275 2360 return NULL;
2276 2361 if (node_check(self->nodelen, val, &node) == -1)
2277 2362 return NULL;
2278 2363
2279 2364 self->ntlookups++;
2280 2365 if (index_init_nt(self) == -1)
2281 2366 return NULL;
2282 2367 if (index_populate_nt(self) == -1)
2283 2368 return NULL;
2284 2369 length = nt_shortest(&self->nt, node);
2285 2370 if (length == -3)
2286 2371 return NULL;
2287 2372 if (length == -2) {
2288 2373 raise_revlog_error();
2289 2374 return NULL;
2290 2375 }
2291 2376 return PyInt_FromLong(length);
2292 2377 }
2293 2378
2294 2379 static PyObject *index_m_get(indexObject *self, PyObject *args)
2295 2380 {
2296 2381 PyObject *val;
2297 2382 char *node;
2298 2383 int rev;
2299 2384
2300 2385 if (!PyArg_ParseTuple(args, "O", &val))
2301 2386 return NULL;
2302 2387 if (node_check(self->nodelen, val, &node) == -1)
2303 2388 return NULL;
2304 2389 rev = index_find_node(self, node);
2305 2390 if (rev == -3)
2306 2391 return NULL;
2307 2392 if (rev == -2)
2308 2393 Py_RETURN_NONE;
2309 2394 return PyInt_FromLong(rev);
2310 2395 }
2311 2396
2312 2397 static int index_contains(indexObject *self, PyObject *value)
2313 2398 {
2314 2399 char *node;
2315 2400
2316 2401 if (PyInt_Check(value)) {
2317 2402 long rev;
2318 2403 if (!pylong_to_long(value, &rev)) {
2319 2404 return -1;
2320 2405 }
2321 2406 return rev >= -1 && rev < index_length(self);
2322 2407 }
2323 2408
2324 2409 if (node_check(self->nodelen, value, &node) == -1)
2325 2410 return -1;
2326 2411
2327 2412 switch (index_find_node(self, node)) {
2328 2413 case -3:
2329 2414 return -1;
2330 2415 case -2:
2331 2416 return 0;
2332 2417 default:
2333 2418 return 1;
2334 2419 }
2335 2420 }
2336 2421
2337 2422 static PyObject *index_m_has_node(indexObject *self, PyObject *args)
2338 2423 {
2339 2424 int ret = index_contains(self, args);
2340 2425 if (ret < 0)
2341 2426 return NULL;
2342 2427 return PyBool_FromLong((long)ret);
2343 2428 }
2344 2429
2345 2430 static PyObject *index_m_rev(indexObject *self, PyObject *val)
2346 2431 {
2347 2432 char *node;
2348 2433 int rev;
2349 2434
2350 2435 if (node_check(self->nodelen, val, &node) == -1)
2351 2436 return NULL;
2352 2437 rev = index_find_node(self, node);
2353 2438 if (rev >= -1)
2354 2439 return PyInt_FromLong(rev);
2355 2440 if (rev == -2)
2356 2441 raise_revlog_error();
2357 2442 return NULL;
2358 2443 }
2359 2444
2360 2445 typedef uint64_t bitmask;
2361 2446
2362 2447 /*
2363 2448 * Given a disjoint set of revs, return all candidates for the
2364 2449 * greatest common ancestor. In revset notation, this is the set
2365 2450 * "heads(::a and ::b and ...)"
2366 2451 */
2367 2452 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
2368 2453 int revcount)
2369 2454 {
2370 2455 const bitmask allseen = (1ull << revcount) - 1;
2371 2456 const bitmask poison = 1ull << revcount;
2372 2457 PyObject *gca = PyList_New(0);
2373 2458 int i, v, interesting;
2374 2459 int maxrev = -1;
2375 2460 bitmask sp;
2376 2461 bitmask *seen;
2377 2462
2378 2463 if (gca == NULL)
2379 2464 return PyErr_NoMemory();
2380 2465
2381 2466 for (i = 0; i < revcount; i++) {
2382 2467 if (revs[i] > maxrev)
2383 2468 maxrev = revs[i];
2384 2469 }
2385 2470
2386 2471 seen = calloc(sizeof(*seen), maxrev + 1);
2387 2472 if (seen == NULL) {
2388 2473 Py_DECREF(gca);
2389 2474 return PyErr_NoMemory();
2390 2475 }
2391 2476
2392 2477 for (i = 0; i < revcount; i++)
2393 2478 seen[revs[i]] = 1ull << i;
2394 2479
2395 2480 interesting = revcount;
2396 2481
2397 2482 for (v = maxrev; v >= 0 && interesting; v--) {
2398 2483 bitmask sv = seen[v];
2399 2484 int parents[2];
2400 2485
2401 2486 if (!sv)
2402 2487 continue;
2403 2488
2404 2489 if (sv < poison) {
2405 2490 interesting -= 1;
2406 2491 if (sv == allseen) {
2407 2492 PyObject *obj = PyInt_FromLong(v);
2408 2493 if (obj == NULL)
2409 2494 goto bail;
2410 2495 if (PyList_Append(gca, obj) == -1) {
2411 2496 Py_DECREF(obj);
2412 2497 goto bail;
2413 2498 }
2414 2499 sv |= poison;
2415 2500 for (i = 0; i < revcount; i++) {
2416 2501 if (revs[i] == v)
2417 2502 goto done;
2418 2503 }
2419 2504 }
2420 2505 }
2421 2506 if (index_get_parents(self, v, parents, maxrev) < 0)
2422 2507 goto bail;
2423 2508
2424 2509 for (i = 0; i < 2; i++) {
2425 2510 int p = parents[i];
2426 2511 if (p == -1)
2427 2512 continue;
2428 2513 sp = seen[p];
2429 2514 if (sv < poison) {
2430 2515 if (sp == 0) {
2431 2516 seen[p] = sv;
2432 2517 interesting++;
2433 2518 } else if (sp != sv)
2434 2519 seen[p] |= sv;
2435 2520 } else {
2436 2521 if (sp && sp < poison)
2437 2522 interesting--;
2438 2523 seen[p] = sv;
2439 2524 }
2440 2525 }
2441 2526 }
2442 2527
2443 2528 done:
2444 2529 free(seen);
2445 2530 return gca;
2446 2531 bail:
2447 2532 free(seen);
2448 2533 Py_XDECREF(gca);
2449 2534 return NULL;
2450 2535 }
2451 2536
2452 2537 /*
2453 2538 * Given a disjoint set of revs, return the subset with the longest
2454 2539 * path to the root.
2455 2540 */
2456 2541 static PyObject *find_deepest(indexObject *self, PyObject *revs)
2457 2542 {
2458 2543 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
2459 2544 static const Py_ssize_t capacity = 24;
2460 2545 int *depth, *interesting = NULL;
2461 2546 int i, j, v, ninteresting;
2462 2547 PyObject *dict = NULL, *keys = NULL;
2463 2548 long *seen = NULL;
2464 2549 int maxrev = -1;
2465 2550 long final;
2466 2551
2467 2552 if (revcount > capacity) {
2468 2553 PyErr_Format(PyExc_OverflowError,
2469 2554 "bitset size (%ld) > capacity (%ld)",
2470 2555 (long)revcount, (long)capacity);
2471 2556 return NULL;
2472 2557 }
2473 2558
2474 2559 for (i = 0; i < revcount; i++) {
2475 2560 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2476 2561 if (n > maxrev)
2477 2562 maxrev = n;
2478 2563 }
2479 2564
2480 2565 depth = calloc(sizeof(*depth), maxrev + 1);
2481 2566 if (depth == NULL)
2482 2567 return PyErr_NoMemory();
2483 2568
2484 2569 seen = calloc(sizeof(*seen), maxrev + 1);
2485 2570 if (seen == NULL) {
2486 2571 PyErr_NoMemory();
2487 2572 goto bail;
2488 2573 }
2489 2574
2490 2575 interesting = calloc(sizeof(*interesting), ((size_t)1) << revcount);
2491 2576 if (interesting == NULL) {
2492 2577 PyErr_NoMemory();
2493 2578 goto bail;
2494 2579 }
2495 2580
2496 2581 if (PyList_Sort(revs) == -1)
2497 2582 goto bail;
2498 2583
2499 2584 for (i = 0; i < revcount; i++) {
2500 2585 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2501 2586 long b = 1l << i;
2502 2587 depth[n] = 1;
2503 2588 seen[n] = b;
2504 2589 interesting[b] = 1;
2505 2590 }
2506 2591
2507 2592 /* invariant: ninteresting is the number of non-zero entries in
2508 2593 * interesting. */
2509 2594 ninteresting = (int)revcount;
2510 2595
2511 2596 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
2512 2597 int dv = depth[v];
2513 2598 int parents[2];
2514 2599 long sv;
2515 2600
2516 2601 if (dv == 0)
2517 2602 continue;
2518 2603
2519 2604 sv = seen[v];
2520 2605 if (index_get_parents(self, v, parents, maxrev) < 0)
2521 2606 goto bail;
2522 2607
2523 2608 for (i = 0; i < 2; i++) {
2524 2609 int p = parents[i];
2525 2610 long sp;
2526 2611 int dp;
2527 2612
2528 2613 if (p == -1)
2529 2614 continue;
2530 2615
2531 2616 dp = depth[p];
2532 2617 sp = seen[p];
2533 2618 if (dp <= dv) {
2534 2619 depth[p] = dv + 1;
2535 2620 if (sp != sv) {
2536 2621 interesting[sv] += 1;
2537 2622 seen[p] = sv;
2538 2623 if (sp) {
2539 2624 interesting[sp] -= 1;
2540 2625 if (interesting[sp] == 0)
2541 2626 ninteresting -= 1;
2542 2627 }
2543 2628 }
2544 2629 } else if (dv == dp - 1) {
2545 2630 long nsp = sp | sv;
2546 2631 if (nsp == sp)
2547 2632 continue;
2548 2633 seen[p] = nsp;
2549 2634 interesting[sp] -= 1;
2550 2635 if (interesting[sp] == 0)
2551 2636 ninteresting -= 1;
2552 2637 if (interesting[nsp] == 0)
2553 2638 ninteresting += 1;
2554 2639 interesting[nsp] += 1;
2555 2640 }
2556 2641 }
2557 2642 interesting[sv] -= 1;
2558 2643 if (interesting[sv] == 0)
2559 2644 ninteresting -= 1;
2560 2645 }
2561 2646
2562 2647 final = 0;
2563 2648 j = ninteresting;
2564 2649 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
2565 2650 if (interesting[i] == 0)
2566 2651 continue;
2567 2652 final |= i;
2568 2653 j -= 1;
2569 2654 }
2570 2655 if (final == 0) {
2571 2656 keys = PyList_New(0);
2572 2657 goto bail;
2573 2658 }
2574 2659
2575 2660 dict = PyDict_New();
2576 2661 if (dict == NULL)
2577 2662 goto bail;
2578 2663
2579 2664 for (i = 0; i < revcount; i++) {
2580 2665 PyObject *key;
2581 2666
2582 2667 if ((final & (1 << i)) == 0)
2583 2668 continue;
2584 2669
2585 2670 key = PyList_GET_ITEM(revs, i);
2586 2671 Py_INCREF(key);
2587 2672 Py_INCREF(Py_None);
2588 2673 if (PyDict_SetItem(dict, key, Py_None) == -1) {
2589 2674 Py_DECREF(key);
2590 2675 Py_DECREF(Py_None);
2591 2676 goto bail;
2592 2677 }
2593 2678 }
2594 2679
2595 2680 keys = PyDict_Keys(dict);
2596 2681
2597 2682 bail:
2598 2683 free(depth);
2599 2684 free(seen);
2600 2685 free(interesting);
2601 2686 Py_XDECREF(dict);
2602 2687
2603 2688 return keys;
2604 2689 }
2605 2690
2606 2691 /*
2607 2692 * Given a (possibly overlapping) set of revs, return all the
2608 2693 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
2609 2694 */
2610 2695 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
2611 2696 {
2612 2697 PyObject *ret = NULL;
2613 2698 Py_ssize_t argcount, i, len;
2614 2699 bitmask repeat = 0;
2615 2700 int revcount = 0;
2616 2701 int *revs;
2617 2702
2618 2703 argcount = PySequence_Length(args);
2619 2704 revs = PyMem_Malloc(argcount * sizeof(*revs));
2620 2705 if (argcount > 0 && revs == NULL)
2621 2706 return PyErr_NoMemory();
2622 2707 len = index_length(self);
2623 2708
2624 2709 for (i = 0; i < argcount; i++) {
2625 2710 static const int capacity = 24;
2626 2711 PyObject *obj = PySequence_GetItem(args, i);
2627 2712 bitmask x;
2628 2713 long val;
2629 2714
2630 2715 if (!PyInt_Check(obj)) {
2631 2716 PyErr_SetString(PyExc_TypeError,
2632 2717 "arguments must all be ints");
2633 2718 Py_DECREF(obj);
2634 2719 goto bail;
2635 2720 }
2636 2721 val = PyInt_AsLong(obj);
2637 2722 Py_DECREF(obj);
2638 2723 if (val == -1) {
2639 2724 ret = PyList_New(0);
2640 2725 goto done;
2641 2726 }
2642 2727 if (val < 0 || val >= len) {
2643 2728 PyErr_SetString(PyExc_IndexError, "index out of range");
2644 2729 goto bail;
2645 2730 }
2646 2731 /* this cheesy bloom filter lets us avoid some more
2647 2732 * expensive duplicate checks in the common set-is-disjoint
2648 2733 * case */
2649 2734 x = 1ull << (val & 0x3f);
2650 2735 if (repeat & x) {
2651 2736 int k;
2652 2737 for (k = 0; k < revcount; k++) {
2653 2738 if (val == revs[k])
2654 2739 goto duplicate;
2655 2740 }
2656 2741 } else
2657 2742 repeat |= x;
2658 2743 if (revcount >= capacity) {
2659 2744 PyErr_Format(PyExc_OverflowError,
2660 2745 "bitset size (%d) > capacity (%d)",
2661 2746 revcount, capacity);
2662 2747 goto bail;
2663 2748 }
2664 2749 revs[revcount++] = (int)val;
2665 2750 duplicate:;
2666 2751 }
2667 2752
2668 2753 if (revcount == 0) {
2669 2754 ret = PyList_New(0);
2670 2755 goto done;
2671 2756 }
2672 2757 if (revcount == 1) {
2673 2758 PyObject *obj;
2674 2759 ret = PyList_New(1);
2675 2760 if (ret == NULL)
2676 2761 goto bail;
2677 2762 obj = PyInt_FromLong(revs[0]);
2678 2763 if (obj == NULL)
2679 2764 goto bail;
2680 2765 PyList_SET_ITEM(ret, 0, obj);
2681 2766 goto done;
2682 2767 }
2683 2768
2684 2769 ret = find_gca_candidates(self, revs, revcount);
2685 2770 if (ret == NULL)
2686 2771 goto bail;
2687 2772
2688 2773 done:
2689 2774 PyMem_Free(revs);
2690 2775 return ret;
2691 2776
2692 2777 bail:
2693 2778 PyMem_Free(revs);
2694 2779 Py_XDECREF(ret);
2695 2780 return NULL;
2696 2781 }
2697 2782
2698 2783 /*
2699 2784 * Given a (possibly overlapping) set of revs, return the greatest
2700 2785 * common ancestors: those with the longest path to the root.
2701 2786 */
2702 2787 static PyObject *index_ancestors(indexObject *self, PyObject *args)
2703 2788 {
2704 2789 PyObject *ret;
2705 2790 PyObject *gca = index_commonancestorsheads(self, args);
2706 2791 if (gca == NULL)
2707 2792 return NULL;
2708 2793
2709 2794 if (PyList_GET_SIZE(gca) <= 1) {
2710 2795 return gca;
2711 2796 }
2712 2797
2713 2798 ret = find_deepest(self, gca);
2714 2799 Py_DECREF(gca);
2715 2800 return ret;
2716 2801 }
2717 2802
2718 2803 /*
2719 2804 * Invalidate any trie entries introduced by added revs.
2720 2805 */
2721 2806 static void index_invalidate_added(indexObject *self, Py_ssize_t start)
2722 2807 {
2723 2808 Py_ssize_t i, len;
2724 2809
2725 2810 len = self->length + self->new_length;
2726 2811 i = start - self->length;
2727 2812 if (i < 0)
2728 2813 return;
2729 2814
2730 2815 for (i = start; i < len; i++) {
2731 2816 const char *node = index_node(self, i);
2732 2817 nt_delete_node(&self->nt, node);
2733 2818 }
2734 2819
2735 2820 self->new_length = start - self->length;
2736 2821 }
2737 2822
2738 2823 /*
2739 2824 * Delete a numeric range of revs, which must be at the end of the
2740 2825 * range.
2741 2826 */
2742 2827 static int index_slice_del(indexObject *self, PyObject *item)
2743 2828 {
2744 2829 Py_ssize_t start, stop, step, slicelength;
2745 2830 Py_ssize_t length = index_length(self) + 1;
2746 2831 int ret = 0;
2747 2832
2748 2833 /* Argument changed from PySliceObject* to PyObject* in Python 3. */
2749 2834 #ifdef IS_PY3K
2750 2835 if (PySlice_GetIndicesEx(item, length, &start, &stop, &step,
2751 2836 &slicelength) < 0)
2752 2837 #else
2753 2838 if (PySlice_GetIndicesEx((PySliceObject *)item, length, &start, &stop,
2754 2839 &step, &slicelength) < 0)
2755 2840 #endif
2756 2841 return -1;
2757 2842
2758 2843 if (slicelength <= 0)
2759 2844 return 0;
2760 2845
2761 2846 if ((step < 0 && start < stop) || (step > 0 && start > stop))
2762 2847 stop = start;
2763 2848
2764 2849 if (step < 0) {
2765 2850 stop = start + 1;
2766 2851 start = stop + step * (slicelength - 1) - 1;
2767 2852 step = -step;
2768 2853 }
2769 2854
2770 2855 if (step != 1) {
2771 2856 PyErr_SetString(PyExc_ValueError,
2772 2857 "revlog index delete requires step size of 1");
2773 2858 return -1;
2774 2859 }
2775 2860
2776 2861 if (stop != length - 1) {
2777 2862 PyErr_SetString(PyExc_IndexError,
2778 2863 "revlog index deletion indices are invalid");
2779 2864 return -1;
2780 2865 }
2781 2866
2782 2867 if (start < self->length) {
2783 2868 if (self->ntinitialized) {
2784 2869 Py_ssize_t i;
2785 2870
2786 2871 for (i = start; i < self->length; i++) {
2787 2872 const char *node = index_node_existing(self, i);
2788 2873 if (node == NULL)
2789 2874 return -1;
2790 2875
2791 2876 nt_delete_node(&self->nt, node);
2792 2877 }
2793 2878 if (self->new_length)
2794 2879 index_invalidate_added(self, self->length);
2795 2880 if (self->ntrev > start)
2796 2881 self->ntrev = (int)start;
2797 2882 } else if (self->new_length) {
2798 2883 self->new_length = 0;
2799 2884 }
2800 2885
2801 2886 self->length = start;
2802 2887 goto done;
2803 2888 }
2804 2889
2805 2890 if (self->ntinitialized) {
2806 2891 index_invalidate_added(self, start);
2807 2892 if (self->ntrev > start)
2808 2893 self->ntrev = (int)start;
2809 2894 } else {
2810 2895 self->new_length = start - self->length;
2811 2896 }
2812 2897 done:
2813 2898 Py_CLEAR(self->headrevs);
2814 2899 return ret;
2815 2900 }
2816 2901
2817 2902 /*
2818 2903 * Supported ops:
2819 2904 *
2820 2905 * slice deletion
2821 2906 * string assignment (extend node->rev mapping)
2822 2907 * string deletion (shrink node->rev mapping)
2823 2908 */
2824 2909 static int index_assign_subscript(indexObject *self, PyObject *item,
2825 2910 PyObject *value)
2826 2911 {
2827 2912 char *node;
2828 2913 long rev;
2829 2914
2830 2915 if (PySlice_Check(item) && value == NULL)
2831 2916 return index_slice_del(self, item);
2832 2917
2833 2918 if (node_check(self->nodelen, item, &node) == -1)
2834 2919 return -1;
2835 2920
2836 2921 if (value == NULL)
2837 2922 return self->ntinitialized ? nt_delete_node(&self->nt, node)
2838 2923 : 0;
2839 2924 rev = PyInt_AsLong(value);
2840 2925 if (rev > INT_MAX || rev < 0) {
2841 2926 if (!PyErr_Occurred())
2842 2927 PyErr_SetString(PyExc_ValueError, "rev out of range");
2843 2928 return -1;
2844 2929 }
2845 2930
2846 2931 if (index_init_nt(self) == -1)
2847 2932 return -1;
2848 2933 return nt_insert(&self->nt, node, (int)rev);
2849 2934 }
2850 2935
2851 2936 /*
2852 2937 * Find all RevlogNG entries in an index that has inline data. Update
2853 2938 * the optional "offsets" table with those entries.
2854 2939 */
2855 2940 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
2856 2941 {
2857 2942 const char *data = (const char *)self->buf.buf;
2858 2943 Py_ssize_t pos = 0;
2859 2944 Py_ssize_t end = self->buf.len;
2860 2945 long incr = self->entry_size;
2861 2946 Py_ssize_t len = 0;
2862 2947
2863 2948 while (pos + self->entry_size <= end && pos >= 0) {
2864 2949 uint32_t comp_len, sidedata_comp_len = 0;
2865 2950 /* 3rd element of header is length of compressed inline data */
2866 2951 if (self->format_version == format_v1) {
2867 2952 comp_len =
2868 2953 getbe32(data + pos + entry_v1_offset_comp_len);
2869 2954 sidedata_comp_len = 0;
2870 2955 } else if (self->format_version == format_v2) {
2871 2956 comp_len =
2872 2957 getbe32(data + pos + entry_v2_offset_comp_len);
2873 2958 sidedata_comp_len = getbe32(
2874 2959 data + pos + entry_v2_offset_sidedata_comp_len);
2875 2960 } else {
2876 2961 raise_revlog_error();
2877 2962 return -1;
2878 2963 }
2879 2964 incr = self->entry_size + comp_len + sidedata_comp_len;
2880 2965 if (offsets)
2881 2966 offsets[len] = data + pos;
2882 2967 len++;
2883 2968 pos += incr;
2884 2969 }
2885 2970
2886 2971 if (pos != end) {
2887 2972 if (!PyErr_Occurred())
2888 2973 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2889 2974 return -1;
2890 2975 }
2891 2976
2892 2977 return len;
2893 2978 }
2894 2979
2895 2980 static int index_init(indexObject *self, PyObject *args, PyObject *kwargs)
2896 2981 {
2897 2982 PyObject *data_obj, *inlined_obj, *revlogv2;
2898 2983 Py_ssize_t size;
2899 2984
2900 2985 static char *kwlist[] = {"data", "inlined", "revlogv2", NULL};
2901 2986
2902 2987 /* Initialize before argument-checking to avoid index_dealloc() crash.
2903 2988 */
2904 2989 self->added = NULL;
2905 2990 self->new_length = 0;
2906 2991 self->added_length = 0;
2907 2992 self->data = NULL;
2908 2993 memset(&self->buf, 0, sizeof(self->buf));
2909 2994 self->headrevs = NULL;
2910 2995 self->filteredrevs = Py_None;
2911 2996 Py_INCREF(Py_None);
2912 2997 self->ntinitialized = 0;
2913 2998 self->offsets = NULL;
2914 2999 self->nodelen = 20;
2915 3000 self->nullentry = NULL;
2916 3001 self->rust_ext_compat = 1;
2917 3002
2918 3003 revlogv2 = NULL;
2919 3004 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|O", kwlist,
2920 3005 &data_obj, &inlined_obj, &revlogv2))
2921 3006 return -1;
2922 3007 if (!PyObject_CheckBuffer(data_obj)) {
2923 3008 PyErr_SetString(PyExc_TypeError,
2924 3009 "data does not support buffer interface");
2925 3010 return -1;
2926 3011 }
2927 3012 if (self->nodelen < 20 || self->nodelen > (Py_ssize_t)sizeof(nullid)) {
2928 3013 PyErr_SetString(PyExc_RuntimeError, "unsupported node size");
2929 3014 return -1;
2930 3015 }
2931 3016
2932 3017 if (revlogv2 && PyObject_IsTrue(revlogv2)) {
2933 3018 self->format_version = format_v2;
2934 3019 self->entry_size = v2_entry_size;
2935 3020 } else {
2936 3021 self->format_version = format_v1;
2937 3022 self->entry_size = v1_entry_size;
2938 3023 }
2939 3024
2940 3025 self->nullentry =
2941 3026 Py_BuildValue(PY23("iiiiiiis#iiBBi", "iiiiiiiy#iiBBi"), 0, 0, 0, -1,
2942 3027 -1, -1, -1, nullid, self->nodelen, 0, 0,
2943 3028 comp_mode_inline, comp_mode_inline, rank_unknown);
2944 3029
2945 3030 if (!self->nullentry)
2946 3031 return -1;
2947 3032 PyObject_GC_UnTrack(self->nullentry);
2948 3033
2949 3034 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
2950 3035 return -1;
2951 3036 size = self->buf.len;
2952 3037
2953 3038 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
2954 3039 self->data = data_obj;
2955 3040
2956 3041 self->ntlookups = self->ntmisses = 0;
2957 3042 self->ntrev = -1;
2958 3043 Py_INCREF(self->data);
2959 3044
2960 3045 if (self->inlined) {
2961 3046 Py_ssize_t len = inline_scan(self, NULL);
2962 3047 if (len == -1)
2963 3048 goto bail;
2964 3049 self->length = len;
2965 3050 } else {
2966 3051 if (size % self->entry_size) {
2967 3052 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2968 3053 goto bail;
2969 3054 }
2970 3055 self->length = size / self->entry_size;
2971 3056 }
2972 3057
2973 3058 return 0;
2974 3059 bail:
2975 3060 return -1;
2976 3061 }
2977 3062
2978 3063 static PyObject *index_nodemap(indexObject *self)
2979 3064 {
2980 3065 Py_INCREF(self);
2981 3066 return (PyObject *)self;
2982 3067 }
2983 3068
2984 3069 static void _index_clearcaches(indexObject *self)
2985 3070 {
2986 3071 if (self->offsets) {
2987 3072 PyMem_Free((void *)self->offsets);
2988 3073 self->offsets = NULL;
2989 3074 }
2990 3075 if (self->ntinitialized) {
2991 3076 nt_dealloc(&self->nt);
2992 3077 }
2993 3078 self->ntinitialized = 0;
2994 3079 Py_CLEAR(self->headrevs);
2995 3080 }
2996 3081
2997 3082 static PyObject *index_clearcaches(indexObject *self)
2998 3083 {
2999 3084 _index_clearcaches(self);
3000 3085 self->ntrev = -1;
3001 3086 self->ntlookups = self->ntmisses = 0;
3002 3087 Py_RETURN_NONE;
3003 3088 }
3004 3089
3005 3090 static void index_dealloc(indexObject *self)
3006 3091 {
3007 3092 _index_clearcaches(self);
3008 3093 Py_XDECREF(self->filteredrevs);
3009 3094 if (self->buf.buf) {
3010 3095 PyBuffer_Release(&self->buf);
3011 3096 memset(&self->buf, 0, sizeof(self->buf));
3012 3097 }
3013 3098 Py_XDECREF(self->data);
3014 3099 PyMem_Free(self->added);
3015 3100 Py_XDECREF(self->nullentry);
3016 3101 PyObject_Del(self);
3017 3102 }
3018 3103
3019 3104 static PySequenceMethods index_sequence_methods = {
3020 3105 (lenfunc)index_length, /* sq_length */
3021 3106 0, /* sq_concat */
3022 3107 0, /* sq_repeat */
3023 3108 (ssizeargfunc)index_get, /* sq_item */
3024 3109 0, /* sq_slice */
3025 3110 0, /* sq_ass_item */
3026 3111 0, /* sq_ass_slice */
3027 3112 (objobjproc)index_contains, /* sq_contains */
3028 3113 };
3029 3114
3030 3115 static PyMappingMethods index_mapping_methods = {
3031 3116 (lenfunc)index_length, /* mp_length */
3032 3117 (binaryfunc)index_getitem, /* mp_subscript */
3033 3118 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
3034 3119 };
3035 3120
3036 3121 static PyMethodDef index_methods[] = {
3037 3122 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
3038 3123 "return the gca set of the given revs"},
3039 3124 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
3040 3125 METH_VARARGS,
3041 3126 "return the heads of the common ancestors of the given revs"},
3042 3127 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
3043 3128 "clear the index caches"},
3044 3129 {"get", (PyCFunction)index_m_get, METH_VARARGS, "get an index entry"},
3045 3130 {"get_rev", (PyCFunction)index_m_get, METH_VARARGS,
3046 3131 "return `rev` associated with a node or None"},
3047 3132 {"has_node", (PyCFunction)index_m_has_node, METH_O,
3048 3133 "return True if the node exist in the index"},
3049 3134 {"rev", (PyCFunction)index_m_rev, METH_O,
3050 3135 "return `rev` associated with a node or raise RevlogError"},
3051 3136 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets, METH_VARARGS,
3052 3137 "compute phases"},
3053 3138 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
3054 3139 "reachableroots"},
3055 3140 {"replace_sidedata_info", (PyCFunction)index_replace_sidedata_info,
3056 3141 METH_VARARGS, "replace an existing index entry with a new value"},
3057 3142 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
3058 3143 "get head revisions"}, /* Can do filtering since 3.2 */
3059 3144 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
3060 3145 "get filtered head revisions"}, /* Can always do filtering */
3061 3146 {"issnapshot", (PyCFunction)index_issnapshot, METH_O,
3062 3147 "True if the object is a snapshot"},
3063 3148 {"findsnapshots", (PyCFunction)index_findsnapshots, METH_VARARGS,
3064 3149 "Gather snapshot data in a cache dict"},
3065 3150 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
3066 3151 "determine revisions with deltas to reconstruct fulltext"},
3067 3152 {"slicechunktodensity", (PyCFunction)index_slicechunktodensity,
3068 3153 METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"},
3069 3154 {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
3070 3155 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
3071 3156 "match a potentially ambiguous node ID"},
3072 3157 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
3073 3158 "find length of shortest hex nodeid of a binary ID"},
3074 3159 {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"},
3075 3160 {"entry_binary", (PyCFunction)index_entry_binary, METH_O,
3076 3161 "return an entry in binary form"},
3077 3162 {"pack_header", (PyCFunction)index_pack_header, METH_VARARGS,
3078 3163 "pack the revlog header information into binary"},
3079 3164 {NULL} /* Sentinel */
3080 3165 };
3081 3166
3082 3167 static PyGetSetDef index_getset[] = {
3083 3168 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
3084 3169 {NULL} /* Sentinel */
3085 3170 };
3086 3171
3087 3172 static PyMemberDef index_members[] = {
3088 3173 {"entry_size", T_LONG, offsetof(indexObject, entry_size), 0,
3089 3174 "size of an index entry"},
3090 3175 {"rust_ext_compat", T_LONG, offsetof(indexObject, rust_ext_compat), 0,
3091 3176 "size of an index entry"},
3092 3177 {NULL} /* Sentinel */
3093 3178 };
3094 3179
3095 3180 PyTypeObject HgRevlogIndex_Type = {
3096 3181 PyVarObject_HEAD_INIT(NULL, 0) /* header */
3097 3182 "parsers.index", /* tp_name */
3098 3183 sizeof(indexObject), /* tp_basicsize */
3099 3184 0, /* tp_itemsize */
3100 3185 (destructor)index_dealloc, /* tp_dealloc */
3101 3186 0, /* tp_print */
3102 3187 0, /* tp_getattr */
3103 3188 0, /* tp_setattr */
3104 3189 0, /* tp_compare */
3105 3190 0, /* tp_repr */
3106 3191 0, /* tp_as_number */
3107 3192 &index_sequence_methods, /* tp_as_sequence */
3108 3193 &index_mapping_methods, /* tp_as_mapping */
3109 3194 0, /* tp_hash */
3110 3195 0, /* tp_call */
3111 3196 0, /* tp_str */
3112 3197 0, /* tp_getattro */
3113 3198 0, /* tp_setattro */
3114 3199 0, /* tp_as_buffer */
3115 3200 Py_TPFLAGS_DEFAULT, /* tp_flags */
3116 3201 "revlog index", /* tp_doc */
3117 3202 0, /* tp_traverse */
3118 3203 0, /* tp_clear */
3119 3204 0, /* tp_richcompare */
3120 3205 0, /* tp_weaklistoffset */
3121 3206 0, /* tp_iter */
3122 3207 0, /* tp_iternext */
3123 3208 index_methods, /* tp_methods */
3124 3209 index_members, /* tp_members */
3125 3210 index_getset, /* tp_getset */
3126 3211 0, /* tp_base */
3127 3212 0, /* tp_dict */
3128 3213 0, /* tp_descr_get */
3129 3214 0, /* tp_descr_set */
3130 3215 0, /* tp_dictoffset */
3131 3216 (initproc)index_init, /* tp_init */
3132 3217 0, /* tp_alloc */
3133 3218 };
3134 3219
3135 3220 /*
3136 3221 * returns a tuple of the form (index, cache) with elements as
3137 3222 * follows:
3138 3223 *
3139 3224 * index: an index object that lazily parses Revlog (v1 or v2) records
3140 3225 * cache: if data is inlined, a tuple (0, index_file_content), else None
3141 3226 * index_file_content could be a string, or a buffer
3142 3227 *
3143 3228 * added complications are for backwards compatibility
3144 3229 */
3145 3230 PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs)
3146 3231 {
3147 3232 PyObject *cache = NULL;
3148 3233 indexObject *idx;
3149 3234 int ret;
3150 3235
3151 3236 idx = PyObject_New(indexObject, &HgRevlogIndex_Type);
3152 3237 if (idx == NULL)
3153 3238 goto bail;
3154 3239
3155 3240 ret = index_init(idx, args, kwargs);
3156 3241 if (ret == -1)
3157 3242 goto bail;
3158 3243
3159 3244 if (idx->inlined) {
3160 3245 cache = Py_BuildValue("iO", 0, idx->data);
3161 3246 if (cache == NULL)
3162 3247 goto bail;
3163 3248 } else {
3164 3249 cache = Py_None;
3165 3250 Py_INCREF(cache);
3166 3251 }
3167 3252
3168 3253 return Py_BuildValue("NN", idx, cache);
3169 3254
3170 3255 bail:
3171 3256 Py_XDECREF(idx);
3172 3257 Py_XDECREF(cache);
3173 3258 return NULL;
3174 3259 }
3175 3260
3176 3261 static Revlog_CAPI CAPI = {
3177 3262 /* increment the abi_version field upon each change in the Revlog_CAPI
3178 3263 struct or in the ABI of the listed functions */
3179 3264 2,
3180 3265 index_length,
3181 3266 index_node,
3182 3267 HgRevlogIndex_GetParents,
3183 3268 };
3184 3269
3185 3270 void revlog_module_init(PyObject *mod)
3186 3271 {
3187 3272 PyObject *caps = NULL;
3188 3273 HgRevlogIndex_Type.tp_new = PyType_GenericNew;
3189 3274 if (PyType_Ready(&HgRevlogIndex_Type) < 0)
3190 3275 return;
3191 3276 Py_INCREF(&HgRevlogIndex_Type);
3192 3277 PyModule_AddObject(mod, "index", (PyObject *)&HgRevlogIndex_Type);
3193 3278
3194 3279 nodetreeType.tp_new = PyType_GenericNew;
3195 3280 if (PyType_Ready(&nodetreeType) < 0)
3196 3281 return;
3197 3282 Py_INCREF(&nodetreeType);
3198 3283 PyModule_AddObject(mod, "nodetree", (PyObject *)&nodetreeType);
3199 3284
3200 3285 caps = PyCapsule_New(&CAPI, "mercurial.cext.parsers.revlog_CAPI", NULL);
3201 3286 if (caps != NULL)
3202 3287 PyModule_AddObject(mod, "revlog_CAPI", caps);
3203 3288 }
General Comments 0
You need to be logged in to leave comments. Login now