##// END OF EJS Templates
cext: use PyLong symbols...
Gregory Szorc -
r49673:13d705c9 default
parent child Browse files
Show More
@@ -1,3293 +1,3284 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Olivia Mackall <olivia@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #define PY_SSIZE_T_CLEAN
11 11 #include <Python.h>
12 12 #include <assert.h>
13 13 #include <ctype.h>
14 14 #include <limits.h>
15 15 #include <stddef.h>
16 16 #include <stdlib.h>
17 17 #include <string.h>
18 18 #include <structmember.h>
19 19
20 20 #include "bitmanipulation.h"
21 21 #include "charencode.h"
22 22 #include "compat.h"
23 23 #include "revlog.h"
24 24 #include "util.h"
25 25
26 #ifdef IS_PY3K
27 /* The mapping of Python types is meant to be temporary to get Python
28 * 3 to compile. We should remove this once Python 3 support is fully
29 * supported and proper types are used in the extensions themselves. */
30 #define PyInt_Check PyLong_Check
31 #define PyInt_FromLong PyLong_FromLong
32 #define PyInt_FromSsize_t PyLong_FromSsize_t
33 #define PyInt_AsLong PyLong_AsLong
34 #endif
35
36 26 typedef struct indexObjectStruct indexObject;
37 27
38 28 typedef struct {
39 29 int children[16];
40 30 } nodetreenode;
41 31
42 32 typedef struct {
43 33 int abi_version;
44 34 Py_ssize_t (*index_length)(const indexObject *);
45 35 const char *(*index_node)(indexObject *, Py_ssize_t);
46 36 int (*index_parents)(PyObject *, int, int *);
47 37 } Revlog_CAPI;
48 38
49 39 /*
50 40 * A base-16 trie for fast node->rev mapping.
51 41 *
52 42 * Positive value is index of the next node in the trie
53 43 * Negative value is a leaf: -(rev + 2)
54 44 * Zero is empty
55 45 */
56 46 typedef struct {
57 47 indexObject *index;
58 48 nodetreenode *nodes;
59 49 Py_ssize_t nodelen;
60 50 size_t length; /* # nodes in use */
61 51 size_t capacity; /* # nodes allocated */
62 52 int depth; /* maximum depth of tree */
63 53 int splits; /* # splits performed */
64 54 } nodetree;
65 55
66 56 typedef struct {
67 57 PyObject_HEAD /* ; */
68 58 nodetree nt;
69 59 } nodetreeObject;
70 60
71 61 /*
72 62 * This class has two behaviors.
73 63 *
74 64 * When used in a list-like way (with integer keys), we decode an
75 65 * entry in a RevlogNG index file on demand. We have limited support for
76 66 * integer-keyed insert and delete, only at elements right before the
77 67 * end.
78 68 *
79 69 * With string keys, we lazily perform a reverse mapping from node to
80 70 * rev, using a base-16 trie.
81 71 */
82 72 struct indexObjectStruct {
83 73 PyObject_HEAD
84 74 /* Type-specific fields go here. */
85 75 PyObject *data; /* raw bytes of index */
86 76 Py_ssize_t nodelen; /* digest size of the hash, 20 for SHA-1 */
87 77 PyObject *nullentry; /* fast path for references to null */
88 78 Py_buffer buf; /* buffer of data */
89 79 const char **offsets; /* populated on demand */
90 80 Py_ssize_t length; /* current on-disk number of elements */
91 81 unsigned new_length; /* number of added elements */
92 82 unsigned added_length; /* space reserved for added elements */
93 83 char *added; /* populated on demand */
94 84 PyObject *headrevs; /* cache, invalidated on changes */
95 85 PyObject *filteredrevs; /* filtered revs set */
96 86 nodetree nt; /* base-16 trie */
97 87 int ntinitialized; /* 0 or 1 */
98 88 int ntrev; /* last rev scanned */
99 89 int ntlookups; /* # lookups */
100 90 int ntmisses; /* # lookups that miss the cache */
101 91 int inlined;
102 92 long entry_size; /* size of index headers. Differs in v1 v.s. v2 format
103 93 */
104 94 long rust_ext_compat; /* compatibility with being used in rust
105 95 extensions */
106 96 long format_version; /* format version selector (format_*) */
107 97 };
108 98
109 99 static Py_ssize_t index_length(const indexObject *self)
110 100 {
111 101 return self->length + self->new_length;
112 102 }
113 103
114 104 static const char nullid[32] = {0};
115 105 static const Py_ssize_t nullrev = -1;
116 106
117 107 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
118 108
119 109 static int index_find_node(indexObject *self, const char *node);
120 110
121 111 #if LONG_MAX == 0x7fffffffL
122 112 static const char *const tuple_format =
123 113 PY23("Kiiiiiis#KiBBi", "Kiiiiiiy#KiBBi");
124 114 #else
125 115 static const char *const tuple_format =
126 116 PY23("kiiiiiis#kiBBi", "kiiiiiiy#kiBBi");
127 117 #endif
128 118
129 119 /* A RevlogNG v1 index entry is 64 bytes long. */
130 120 static const long v1_entry_size = 64;
131 121
132 122 /* A Revlogv2 index entry is 96 bytes long. */
133 123 static const long v2_entry_size = 96;
134 124
135 125 /* A Changelogv2 index entry is 96 bytes long. */
136 126 static const long cl2_entry_size = 96;
137 127
138 128 /* Internal format version.
139 129 * Must match their counterparts in revlogutils/constants.py */
140 130 static const long format_v1 = 1; /* constants.py: REVLOGV1 */
141 131 static const long format_v2 = 0xDEAD; /* constants.py: REVLOGV2 */
142 132 static const long format_cl2 = 0xD34D; /* constants.py: CHANGELOGV2 */
143 133
144 134 static const long entry_v1_offset_high = 0;
145 135 static const long entry_v1_offset_offset_flags = 4;
146 136 static const long entry_v1_offset_comp_len = 8;
147 137 static const long entry_v1_offset_uncomp_len = 12;
148 138 static const long entry_v1_offset_base_rev = 16;
149 139 static const long entry_v1_offset_link_rev = 20;
150 140 static const long entry_v1_offset_parent_1 = 24;
151 141 static const long entry_v1_offset_parent_2 = 28;
152 142 static const long entry_v1_offset_node_id = 32;
153 143
154 144 static const long entry_v2_offset_high = 0;
155 145 static const long entry_v2_offset_offset_flags = 4;
156 146 static const long entry_v2_offset_comp_len = 8;
157 147 static const long entry_v2_offset_uncomp_len = 12;
158 148 static const long entry_v2_offset_base_rev = 16;
159 149 static const long entry_v2_offset_link_rev = 20;
160 150 static const long entry_v2_offset_parent_1 = 24;
161 151 static const long entry_v2_offset_parent_2 = 28;
162 152 static const long entry_v2_offset_node_id = 32;
163 153 static const long entry_v2_offset_sidedata_offset = 64;
164 154 static const long entry_v2_offset_sidedata_comp_len = 72;
165 155 static const long entry_v2_offset_all_comp_mode = 76;
166 156 /* next free offset: 77 */
167 157
168 158 static const long entry_cl2_offset_high = 0;
169 159 static const long entry_cl2_offset_offset_flags = 4;
170 160 static const long entry_cl2_offset_comp_len = 8;
171 161 static const long entry_cl2_offset_uncomp_len = 12;
172 162 static const long entry_cl2_offset_parent_1 = 16;
173 163 static const long entry_cl2_offset_parent_2 = 20;
174 164 static const long entry_cl2_offset_node_id = 24;
175 165 static const long entry_cl2_offset_sidedata_offset = 56;
176 166 static const long entry_cl2_offset_sidedata_comp_len = 64;
177 167 static const long entry_cl2_offset_all_comp_mode = 68;
178 168 static const long entry_cl2_offset_rank = 69;
179 169 /* next free offset: 73 */
180 170
181 171 static const char comp_mode_inline = 2;
182 172 static const char rank_unknown = -1;
183 173
184 174 static void raise_revlog_error(void)
185 175 {
186 176 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
187 177
188 178 mod = PyImport_ImportModule("mercurial.error");
189 179 if (mod == NULL) {
190 180 goto cleanup;
191 181 }
192 182
193 183 dict = PyModule_GetDict(mod);
194 184 if (dict == NULL) {
195 185 goto cleanup;
196 186 }
197 187 Py_INCREF(dict);
198 188
199 189 errclass = PyDict_GetItemString(dict, "RevlogError");
200 190 if (errclass == NULL) {
201 191 PyErr_SetString(PyExc_SystemError,
202 192 "could not find RevlogError");
203 193 goto cleanup;
204 194 }
205 195
206 196 /* value of exception is ignored by callers */
207 197 PyErr_SetString(errclass, "RevlogError");
208 198
209 199 cleanup:
210 200 Py_XDECREF(dict);
211 201 Py_XDECREF(mod);
212 202 }
213 203
214 204 /*
215 205 * Return a pointer to the beginning of a RevlogNG record.
216 206 */
217 207 static const char *index_deref(indexObject *self, Py_ssize_t pos)
218 208 {
219 209 if (pos >= self->length)
220 210 return self->added + (pos - self->length) * self->entry_size;
221 211
222 212 if (self->inlined && pos > 0) {
223 213 if (self->offsets == NULL) {
224 214 Py_ssize_t ret;
225 215 self->offsets =
226 216 PyMem_Malloc(self->length * sizeof(*self->offsets));
227 217 if (self->offsets == NULL)
228 218 return (const char *)PyErr_NoMemory();
229 219 ret = inline_scan(self, self->offsets);
230 220 if (ret == -1) {
231 221 return NULL;
232 222 };
233 223 }
234 224 return self->offsets[pos];
235 225 }
236 226
237 227 return (const char *)(self->buf.buf) + pos * self->entry_size;
238 228 }
239 229
240 230 /*
241 231 * Get parents of the given rev.
242 232 *
243 233 * The specified rev must be valid and must not be nullrev. A returned
244 234 * parent revision may be nullrev, but is guaranteed to be in valid range.
245 235 */
246 236 static inline int index_get_parents(indexObject *self, Py_ssize_t rev, int *ps,
247 237 int maxrev)
248 238 {
249 239 const char *data = index_deref(self, rev);
250 240
251 241 if (self->format_version == format_v1) {
252 242 ps[0] = getbe32(data + entry_v1_offset_parent_1);
253 243 ps[1] = getbe32(data + entry_v1_offset_parent_2);
254 244 } else if (self->format_version == format_v2) {
255 245 ps[0] = getbe32(data + entry_v2_offset_parent_1);
256 246 ps[1] = getbe32(data + entry_v2_offset_parent_2);
257 247 } else if (self->format_version == format_cl2) {
258 248 ps[0] = getbe32(data + entry_cl2_offset_parent_1);
259 249 ps[1] = getbe32(data + entry_cl2_offset_parent_2);
260 250 } else {
261 251 raise_revlog_error();
262 252 return -1;
263 253 }
264 254
265 255 /* If index file is corrupted, ps[] may point to invalid revisions. So
266 256 * there is a risk of buffer overflow to trust them unconditionally. */
267 257 if (ps[0] < -1 || ps[0] > maxrev || ps[1] < -1 || ps[1] > maxrev) {
268 258 PyErr_SetString(PyExc_ValueError, "parent out of range");
269 259 return -1;
270 260 }
271 261 return 0;
272 262 }
273 263
274 264 /*
275 265 * Get parents of the given rev.
276 266 *
277 267 * If the specified rev is out of range, IndexError will be raised. If the
278 268 * revlog entry is corrupted, ValueError may be raised.
279 269 *
280 270 * Returns 0 on success or -1 on failure.
281 271 */
282 272 static int HgRevlogIndex_GetParents(PyObject *op, int rev, int *ps)
283 273 {
284 274 int tiprev;
285 275 if (!op || !HgRevlogIndex_Check(op) || !ps) {
286 276 PyErr_BadInternalCall();
287 277 return -1;
288 278 }
289 279 tiprev = (int)index_length((indexObject *)op) - 1;
290 280 if (rev < -1 || rev > tiprev) {
291 281 PyErr_Format(PyExc_IndexError, "rev out of range: %d", rev);
292 282 return -1;
293 283 } else if (rev == -1) {
294 284 ps[0] = ps[1] = -1;
295 285 return 0;
296 286 } else {
297 287 return index_get_parents((indexObject *)op, rev, ps, tiprev);
298 288 }
299 289 }
300 290
301 291 static inline int64_t index_get_start(indexObject *self, Py_ssize_t rev)
302 292 {
303 293 const char *data;
304 294 uint64_t offset;
305 295
306 296 if (rev == nullrev)
307 297 return 0;
308 298
309 299 data = index_deref(self, rev);
310 300
311 301 if (self->format_version == format_v1) {
312 302 offset = getbe32(data + entry_v1_offset_offset_flags);
313 303 if (rev == 0) {
314 304 /* mask out version number for the first entry */
315 305 offset &= 0xFFFF;
316 306 } else {
317 307 uint32_t offset_high =
318 308 getbe32(data + entry_v1_offset_high);
319 309 offset |= ((uint64_t)offset_high) << 32;
320 310 }
321 311 } else if (self->format_version == format_v2) {
322 312 offset = getbe32(data + entry_v2_offset_offset_flags);
323 313 if (rev == 0) {
324 314 /* mask out version number for the first entry */
325 315 offset &= 0xFFFF;
326 316 } else {
327 317 uint32_t offset_high =
328 318 getbe32(data + entry_v2_offset_high);
329 319 offset |= ((uint64_t)offset_high) << 32;
330 320 }
331 321 } else if (self->format_version == format_cl2) {
332 322 uint32_t offset_high = getbe32(data + entry_cl2_offset_high);
333 323 offset = getbe32(data + entry_cl2_offset_offset_flags);
334 324 offset |= ((uint64_t)offset_high) << 32;
335 325 } else {
336 326 raise_revlog_error();
337 327 return -1;
338 328 }
339 329
340 330 return (int64_t)(offset >> 16);
341 331 }
342 332
343 333 static inline int index_get_length(indexObject *self, Py_ssize_t rev)
344 334 {
345 335 const char *data;
346 336 int tmp;
347 337
348 338 if (rev == nullrev)
349 339 return 0;
350 340
351 341 data = index_deref(self, rev);
352 342
353 343 if (self->format_version == format_v1) {
354 344 tmp = (int)getbe32(data + entry_v1_offset_comp_len);
355 345 } else if (self->format_version == format_v2) {
356 346 tmp = (int)getbe32(data + entry_v2_offset_comp_len);
357 347 } else if (self->format_version == format_cl2) {
358 348 tmp = (int)getbe32(data + entry_cl2_offset_comp_len);
359 349 } else {
360 350 raise_revlog_error();
361 351 return -1;
362 352 }
363 353 if (tmp < 0) {
364 354 PyErr_Format(PyExc_OverflowError,
365 355 "revlog entry size out of bound (%d)", tmp);
366 356 return -1;
367 357 }
368 358 return tmp;
369 359 }
370 360
371 361 /*
372 362 * RevlogNG format (all in big endian, data may be inlined):
373 363 * 6 bytes: offset
374 364 * 2 bytes: flags
375 365 * 4 bytes: compressed length
376 366 * 4 bytes: uncompressed length
377 367 * 4 bytes: base revision
378 368 * 4 bytes: link revision
379 369 * 4 bytes: parent 1 revision
380 370 * 4 bytes: parent 2 revision
381 371 * 32 bytes: nodeid (only 20 bytes used with SHA-1)
382 372 */
383 373 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
384 374 {
385 375 uint64_t offset_flags, sidedata_offset;
386 376 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
387 377 sidedata_comp_len, rank = rank_unknown;
388 378 char data_comp_mode, sidedata_comp_mode;
389 379 const char *c_node_id;
390 380 const char *data;
391 381 Py_ssize_t length = index_length(self);
392 382
393 383 if (pos == nullrev) {
394 384 Py_INCREF(self->nullentry);
395 385 return self->nullentry;
396 386 }
397 387
398 388 if (pos < 0 || pos >= length) {
399 389 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
400 390 return NULL;
401 391 }
402 392
403 393 data = index_deref(self, pos);
404 394 if (data == NULL)
405 395 return NULL;
406 396
407 397 if (self->format_version == format_v1) {
408 398 offset_flags = getbe32(data + entry_v1_offset_offset_flags);
409 399 /*
410 400 * The first entry on-disk needs the version number masked out,
411 401 * but this doesn't apply if entries are added to an empty
412 402 * index.
413 403 */
414 404 if (self->length && pos == 0)
415 405 offset_flags &= 0xFFFF;
416 406 else {
417 407 uint32_t offset_high =
418 408 getbe32(data + entry_v1_offset_high);
419 409 offset_flags |= ((uint64_t)offset_high) << 32;
420 410 }
421 411
422 412 comp_len = getbe32(data + entry_v1_offset_comp_len);
423 413 uncomp_len = getbe32(data + entry_v1_offset_uncomp_len);
424 414 base_rev = getbe32(data + entry_v1_offset_base_rev);
425 415 link_rev = getbe32(data + entry_v1_offset_link_rev);
426 416 parent_1 = getbe32(data + entry_v1_offset_parent_1);
427 417 parent_2 = getbe32(data + entry_v1_offset_parent_2);
428 418 c_node_id = data + entry_v1_offset_node_id;
429 419
430 420 sidedata_offset = 0;
431 421 sidedata_comp_len = 0;
432 422 data_comp_mode = comp_mode_inline;
433 423 sidedata_comp_mode = comp_mode_inline;
434 424 } else if (self->format_version == format_v2) {
435 425 offset_flags = getbe32(data + entry_v2_offset_offset_flags);
436 426 /*
437 427 * The first entry on-disk needs the version number masked out,
438 428 * but this doesn't apply if entries are added to an empty
439 429 * index.
440 430 */
441 431 if (self->length && pos == 0)
442 432 offset_flags &= 0xFFFF;
443 433 else {
444 434 uint32_t offset_high =
445 435 getbe32(data + entry_v2_offset_high);
446 436 offset_flags |= ((uint64_t)offset_high) << 32;
447 437 }
448 438
449 439 comp_len = getbe32(data + entry_v2_offset_comp_len);
450 440 uncomp_len = getbe32(data + entry_v2_offset_uncomp_len);
451 441 base_rev = getbe32(data + entry_v2_offset_base_rev);
452 442 link_rev = getbe32(data + entry_v2_offset_link_rev);
453 443 parent_1 = getbe32(data + entry_v2_offset_parent_1);
454 444 parent_2 = getbe32(data + entry_v2_offset_parent_2);
455 445 c_node_id = data + entry_v2_offset_node_id;
456 446
457 447 sidedata_offset =
458 448 getbe64(data + entry_v2_offset_sidedata_offset);
459 449 sidedata_comp_len =
460 450 getbe32(data + entry_v2_offset_sidedata_comp_len);
461 451 data_comp_mode = data[entry_v2_offset_all_comp_mode] & 3;
462 452 sidedata_comp_mode =
463 453 ((data[entry_v2_offset_all_comp_mode] >> 2) & 3);
464 454 } else if (self->format_version == format_cl2) {
465 455 uint32_t offset_high = getbe32(data + entry_cl2_offset_high);
466 456 offset_flags = getbe32(data + entry_cl2_offset_offset_flags);
467 457 offset_flags |= ((uint64_t)offset_high) << 32;
468 458 comp_len = getbe32(data + entry_cl2_offset_comp_len);
469 459 uncomp_len = getbe32(data + entry_cl2_offset_uncomp_len);
470 460 /* base_rev and link_rev are not stored in changelogv2, but are
471 461 still used by some functions shared with the other revlogs.
472 462 They are supposed to contain links to other revisions,
473 463 but they always point to themselves in the case of a changelog.
474 464 */
475 465 base_rev = pos;
476 466 link_rev = pos;
477 467 parent_1 = getbe32(data + entry_cl2_offset_parent_1);
478 468 parent_2 = getbe32(data + entry_cl2_offset_parent_2);
479 469 c_node_id = data + entry_cl2_offset_node_id;
480 470 sidedata_offset =
481 471 getbe64(data + entry_cl2_offset_sidedata_offset);
482 472 sidedata_comp_len =
483 473 getbe32(data + entry_cl2_offset_sidedata_comp_len);
484 474 data_comp_mode = data[entry_cl2_offset_all_comp_mode] & 3;
485 475 sidedata_comp_mode =
486 476 ((data[entry_cl2_offset_all_comp_mode] >> 2) & 3);
487 477 rank = getbe32(data + entry_cl2_offset_rank);
488 478 } else {
489 479 raise_revlog_error();
490 480 return NULL;
491 481 }
492 482
493 483 return Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len,
494 484 base_rev, link_rev, parent_1, parent_2, c_node_id,
495 485 self->nodelen, sidedata_offset, sidedata_comp_len,
496 486 data_comp_mode, sidedata_comp_mode, rank);
497 487 }
498 488 /*
499 489 * Pack header information in binary
500 490 */
501 491 static PyObject *index_pack_header(indexObject *self, PyObject *args)
502 492 {
503 493 int header;
504 494 char out[4];
505 495 if (!PyArg_ParseTuple(args, "I", &header)) {
506 496 return NULL;
507 497 }
508 498 if (self->format_version != format_v1) {
509 499 PyErr_Format(PyExc_RuntimeError,
510 500 "version header should go in the docket, not the "
511 501 "index: %lu",
512 502 header);
513 503 return NULL;
514 504 }
515 505 putbe32(header, out);
516 506 return PyBytes_FromStringAndSize(out, 4);
517 507 }
518 508 /*
519 509 * Return the raw binary string representing a revision
520 510 */
521 511 static PyObject *index_entry_binary(indexObject *self, PyObject *value)
522 512 {
523 513 long rev;
524 514 const char *data;
525 515 Py_ssize_t length = index_length(self);
526 516
527 517 if (!pylong_to_long(value, &rev)) {
528 518 return NULL;
529 519 }
530 520 if (rev < 0 || rev >= length) {
531 521 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
532 522 rev);
533 523 return NULL;
534 524 };
535 525
536 526 data = index_deref(self, rev);
537 527 if (data == NULL)
538 528 return NULL;
539 529 if (rev == 0 && self->format_version == format_v1) {
540 530 /* the header is eating the start of the first entry */
541 531 return PyBytes_FromStringAndSize(data + 4,
542 532 self->entry_size - 4);
543 533 }
544 534 return PyBytes_FromStringAndSize(data, self->entry_size);
545 535 }
546 536
547 537 /*
548 538 * Return the hash of node corresponding to the given rev.
549 539 */
550 540 static const char *index_node(indexObject *self, Py_ssize_t pos)
551 541 {
552 542 Py_ssize_t length = index_length(self);
553 543 const char *data;
554 544 const char *node_id;
555 545
556 546 if (pos == nullrev)
557 547 return nullid;
558 548
559 549 if (pos >= length)
560 550 return NULL;
561 551
562 552 data = index_deref(self, pos);
563 553
564 554 if (self->format_version == format_v1) {
565 555 node_id = data + entry_v1_offset_node_id;
566 556 } else if (self->format_version == format_v2) {
567 557 node_id = data + entry_v2_offset_node_id;
568 558 } else if (self->format_version == format_cl2) {
569 559 node_id = data + entry_cl2_offset_node_id;
570 560 } else {
571 561 raise_revlog_error();
572 562 return NULL;
573 563 }
574 564
575 565 return data ? node_id : NULL;
576 566 }
577 567
578 568 /*
579 569 * Return the hash of the node corresponding to the given rev. The
580 570 * rev is assumed to be existing. If not, an exception is set.
581 571 */
582 572 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
583 573 {
584 574 const char *node = index_node(self, pos);
585 575 if (node == NULL) {
586 576 PyErr_Format(PyExc_IndexError, "could not access rev %d",
587 577 (int)pos);
588 578 }
589 579 return node;
590 580 }
591 581
592 582 static int nt_insert(nodetree *self, const char *node, int rev);
593 583
594 584 static int node_check(Py_ssize_t nodelen, PyObject *obj, char **node)
595 585 {
596 586 Py_ssize_t thisnodelen;
597 587 if (PyBytes_AsStringAndSize(obj, node, &thisnodelen) == -1)
598 588 return -1;
599 589 if (nodelen == thisnodelen)
600 590 return 0;
601 591 PyErr_Format(PyExc_ValueError, "node len %zd != expected node len %zd",
602 592 thisnodelen, nodelen);
603 593 return -1;
604 594 }
605 595
606 596 static PyObject *index_append(indexObject *self, PyObject *obj)
607 597 {
608 598 uint64_t offset_flags, sidedata_offset;
609 599 int rev, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
610 600 sidedata_comp_len, rank;
611 601 char data_comp_mode, sidedata_comp_mode;
612 602 Py_ssize_t c_node_id_len;
613 603 const char *c_node_id;
614 604 char comp_field;
615 605 char *data;
616 606
617 607 if (!PyArg_ParseTuple(obj, tuple_format, &offset_flags, &comp_len,
618 608 &uncomp_len, &base_rev, &link_rev, &parent_1,
619 609 &parent_2, &c_node_id, &c_node_id_len,
620 610 &sidedata_offset, &sidedata_comp_len,
621 611 &data_comp_mode, &sidedata_comp_mode, &rank)) {
622 612 PyErr_SetString(PyExc_TypeError, "12-tuple required");
623 613 return NULL;
624 614 }
625 615
626 616 if (c_node_id_len != self->nodelen) {
627 617 PyErr_SetString(PyExc_TypeError, "invalid node");
628 618 return NULL;
629 619 }
630 620 if (self->format_version == format_v1) {
631 621
632 622 if (data_comp_mode != comp_mode_inline) {
633 623 PyErr_Format(PyExc_ValueError,
634 624 "invalid data compression mode: %i",
635 625 data_comp_mode);
636 626 return NULL;
637 627 }
638 628 if (sidedata_comp_mode != comp_mode_inline) {
639 629 PyErr_Format(PyExc_ValueError,
640 630 "invalid sidedata compression mode: %i",
641 631 sidedata_comp_mode);
642 632 return NULL;
643 633 }
644 634 }
645 635
646 636 if (self->new_length == self->added_length) {
647 637 size_t new_added_length =
648 638 self->added_length ? self->added_length * 2 : 4096;
649 639 void *new_added = PyMem_Realloc(
650 640 self->added, new_added_length * self->entry_size);
651 641 if (!new_added)
652 642 return PyErr_NoMemory();
653 643 self->added = new_added;
654 644 self->added_length = new_added_length;
655 645 }
656 646 rev = self->length + self->new_length;
657 647 data = self->added + self->entry_size * self->new_length++;
658 648
659 649 memset(data, 0, self->entry_size);
660 650
661 651 if (self->format_version == format_v1) {
662 652 putbe32(offset_flags >> 32, data + entry_v1_offset_high);
663 653 putbe32(offset_flags & 0xffffffffU,
664 654 data + entry_v1_offset_offset_flags);
665 655 putbe32(comp_len, data + entry_v1_offset_comp_len);
666 656 putbe32(uncomp_len, data + entry_v1_offset_uncomp_len);
667 657 putbe32(base_rev, data + entry_v1_offset_base_rev);
668 658 putbe32(link_rev, data + entry_v1_offset_link_rev);
669 659 putbe32(parent_1, data + entry_v1_offset_parent_1);
670 660 putbe32(parent_2, data + entry_v1_offset_parent_2);
671 661 memcpy(data + entry_v1_offset_node_id, c_node_id,
672 662 c_node_id_len);
673 663 } else if (self->format_version == format_v2) {
674 664 putbe32(offset_flags >> 32, data + entry_v2_offset_high);
675 665 putbe32(offset_flags & 0xffffffffU,
676 666 data + entry_v2_offset_offset_flags);
677 667 putbe32(comp_len, data + entry_v2_offset_comp_len);
678 668 putbe32(uncomp_len, data + entry_v2_offset_uncomp_len);
679 669 putbe32(base_rev, data + entry_v2_offset_base_rev);
680 670 putbe32(link_rev, data + entry_v2_offset_link_rev);
681 671 putbe32(parent_1, data + entry_v2_offset_parent_1);
682 672 putbe32(parent_2, data + entry_v2_offset_parent_2);
683 673 memcpy(data + entry_v2_offset_node_id, c_node_id,
684 674 c_node_id_len);
685 675 putbe64(sidedata_offset,
686 676 data + entry_v2_offset_sidedata_offset);
687 677 putbe32(sidedata_comp_len,
688 678 data + entry_v2_offset_sidedata_comp_len);
689 679 comp_field = data_comp_mode & 3;
690 680 comp_field = comp_field | (sidedata_comp_mode & 3) << 2;
691 681 data[entry_v2_offset_all_comp_mode] = comp_field;
692 682 } else if (self->format_version == format_cl2) {
693 683 putbe32(offset_flags >> 32, data + entry_cl2_offset_high);
694 684 putbe32(offset_flags & 0xffffffffU,
695 685 data + entry_cl2_offset_offset_flags);
696 686 putbe32(comp_len, data + entry_cl2_offset_comp_len);
697 687 putbe32(uncomp_len, data + entry_cl2_offset_uncomp_len);
698 688 putbe32(parent_1, data + entry_cl2_offset_parent_1);
699 689 putbe32(parent_2, data + entry_cl2_offset_parent_2);
700 690 memcpy(data + entry_cl2_offset_node_id, c_node_id,
701 691 c_node_id_len);
702 692 putbe64(sidedata_offset,
703 693 data + entry_cl2_offset_sidedata_offset);
704 694 putbe32(sidedata_comp_len,
705 695 data + entry_cl2_offset_sidedata_comp_len);
706 696 comp_field = data_comp_mode & 3;
707 697 comp_field = comp_field | (sidedata_comp_mode & 3) << 2;
708 698 data[entry_cl2_offset_all_comp_mode] = comp_field;
709 699 putbe32(rank, data + entry_cl2_offset_rank);
710 700 } else {
711 701 raise_revlog_error();
712 702 return NULL;
713 703 }
714 704
715 705 if (self->ntinitialized)
716 706 nt_insert(&self->nt, c_node_id, rev);
717 707
718 708 Py_CLEAR(self->headrevs);
719 709 Py_RETURN_NONE;
720 710 }
721 711
722 712 /* Replace an existing index entry's sidedata offset and length with new ones.
723 713 This cannot be used outside of the context of sidedata rewriting,
724 714 inside the transaction that creates the given revision. */
725 715 static PyObject *index_replace_sidedata_info(indexObject *self, PyObject *args)
726 716 {
727 717 uint64_t offset_flags, sidedata_offset;
728 718 Py_ssize_t rev;
729 719 int sidedata_comp_len;
730 720 char comp_mode;
731 721 char *data;
732 722 #if LONG_MAX == 0x7fffffffL
733 723 const char *const sidedata_format = PY23("nKiKB", "nKiKB");
734 724 #else
735 725 const char *const sidedata_format = PY23("nkikB", "nkikB");
736 726 #endif
737 727
738 728 if (self->entry_size == v1_entry_size || self->inlined) {
739 729 /*
740 730 There is a bug in the transaction handling when going from an
741 731 inline revlog to a separate index and data file. Turn it off until
742 732 it's fixed, since v2 revlogs sometimes get rewritten on exchange.
743 733 See issue6485.
744 734 */
745 735 raise_revlog_error();
746 736 return NULL;
747 737 }
748 738
749 739 if (!PyArg_ParseTuple(args, sidedata_format, &rev, &sidedata_offset,
750 740 &sidedata_comp_len, &offset_flags, &comp_mode))
751 741 return NULL;
752 742
753 743 if (rev < 0 || rev >= index_length(self)) {
754 744 PyErr_SetString(PyExc_IndexError, "revision outside index");
755 745 return NULL;
756 746 }
757 747 if (rev < self->length) {
758 748 PyErr_SetString(
759 749 PyExc_IndexError,
760 750 "cannot rewrite entries outside of this transaction");
761 751 return NULL;
762 752 }
763 753
764 754 /* Find the newly added node, offset from the "already on-disk" length
765 755 */
766 756 data = self->added + self->entry_size * (rev - self->length);
767 757 if (self->format_version == format_v2) {
768 758 putbe64(offset_flags, data + entry_v2_offset_high);
769 759 putbe64(sidedata_offset,
770 760 data + entry_v2_offset_sidedata_offset);
771 761 putbe32(sidedata_comp_len,
772 762 data + entry_v2_offset_sidedata_comp_len);
773 763 data[entry_v2_offset_all_comp_mode] =
774 764 (data[entry_v2_offset_all_comp_mode] & ~(3 << 2)) |
775 765 ((comp_mode & 3) << 2);
776 766 } else if (self->format_version == format_cl2) {
777 767 putbe64(offset_flags, data + entry_cl2_offset_high);
778 768 putbe64(sidedata_offset,
779 769 data + entry_cl2_offset_sidedata_offset);
780 770 putbe32(sidedata_comp_len,
781 771 data + entry_cl2_offset_sidedata_comp_len);
782 772 data[entry_cl2_offset_all_comp_mode] =
783 773 (data[entry_cl2_offset_all_comp_mode] & ~(3 << 2)) |
784 774 ((comp_mode & 3) << 2);
785 775 } else {
786 776 raise_revlog_error();
787 777 return NULL;
788 778 }
789 779
790 780 Py_RETURN_NONE;
791 781 }
792 782
793 783 static PyObject *index_stats(indexObject *self)
794 784 {
795 785 PyObject *obj = PyDict_New();
796 786 PyObject *s = NULL;
797 787 PyObject *t = NULL;
798 788
799 789 if (obj == NULL)
800 790 return NULL;
801 791
802 792 #define istat(__n, __d) \
803 793 do { \
804 794 s = PyBytes_FromString(__d); \
805 t = PyInt_FromSsize_t(self->__n); \
795 t = PyLong_FromSsize_t(self->__n); \
806 796 if (!s || !t) \
807 797 goto bail; \
808 798 if (PyDict_SetItem(obj, s, t) == -1) \
809 799 goto bail; \
810 800 Py_CLEAR(s); \
811 801 Py_CLEAR(t); \
812 802 } while (0)
813 803
814 804 if (self->added_length)
815 805 istat(new_length, "index entries added");
816 806 istat(length, "revs in memory");
817 807 istat(ntlookups, "node trie lookups");
818 808 istat(ntmisses, "node trie misses");
819 809 istat(ntrev, "node trie last rev scanned");
820 810 if (self->ntinitialized) {
821 811 istat(nt.capacity, "node trie capacity");
822 812 istat(nt.depth, "node trie depth");
823 813 istat(nt.length, "node trie count");
824 814 istat(nt.splits, "node trie splits");
825 815 }
826 816
827 817 #undef istat
828 818
829 819 return obj;
830 820
831 821 bail:
832 822 Py_XDECREF(obj);
833 823 Py_XDECREF(s);
834 824 Py_XDECREF(t);
835 825 return NULL;
836 826 }
837 827
838 828 /*
839 829 * When we cache a list, we want to be sure the caller can't mutate
840 830 * the cached copy.
841 831 */
842 832 static PyObject *list_copy(PyObject *list)
843 833 {
844 834 Py_ssize_t len = PyList_GET_SIZE(list);
845 835 PyObject *newlist = PyList_New(len);
846 836 Py_ssize_t i;
847 837
848 838 if (newlist == NULL)
849 839 return NULL;
850 840
851 841 for (i = 0; i < len; i++) {
852 842 PyObject *obj = PyList_GET_ITEM(list, i);
853 843 Py_INCREF(obj);
854 844 PyList_SET_ITEM(newlist, i, obj);
855 845 }
856 846
857 847 return newlist;
858 848 }
859 849
860 850 static int check_filter(PyObject *filter, Py_ssize_t arg)
861 851 {
862 852 if (filter) {
863 853 PyObject *arglist, *result;
864 854 int isfiltered;
865 855
866 856 arglist = Py_BuildValue("(n)", arg);
867 857 if (!arglist) {
868 858 return -1;
869 859 }
870 860
871 861 result = PyObject_Call(filter, arglist, NULL);
872 862 Py_DECREF(arglist);
873 863 if (!result) {
874 864 return -1;
875 865 }
876 866
877 867 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
878 868 * same as this function, so we can just return it directly.*/
879 869 isfiltered = PyObject_IsTrue(result);
880 870 Py_DECREF(result);
881 871 return isfiltered;
882 872 } else {
883 873 return 0;
884 874 }
885 875 }
886 876
887 877 static inline void set_phase_from_parents(char *phases, int parent_1,
888 878 int parent_2, Py_ssize_t i)
889 879 {
890 880 if (parent_1 >= 0 && phases[parent_1] > phases[i])
891 881 phases[i] = phases[parent_1];
892 882 if (parent_2 >= 0 && phases[parent_2] > phases[i])
893 883 phases[i] = phases[parent_2];
894 884 }
895 885
896 886 static PyObject *reachableroots2(indexObject *self, PyObject *args)
897 887 {
898 888
899 889 /* Input */
900 890 long minroot;
901 891 PyObject *includepatharg = NULL;
902 892 int includepath = 0;
903 893 /* heads and roots are lists */
904 894 PyObject *heads = NULL;
905 895 PyObject *roots = NULL;
906 896 PyObject *reachable = NULL;
907 897
908 898 PyObject *val;
909 899 Py_ssize_t len = index_length(self);
910 900 long revnum;
911 901 Py_ssize_t k;
912 902 Py_ssize_t i;
913 903 Py_ssize_t l;
914 904 int r;
915 905 int parents[2];
916 906
917 907 /* Internal data structure:
918 908 * tovisit: array of length len+1 (all revs + nullrev), filled upto
919 909 * lentovisit
920 910 *
921 911 * revstates: array of length len+1 (all revs + nullrev) */
922 912 int *tovisit = NULL;
923 913 long lentovisit = 0;
924 914 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
925 915 char *revstates = NULL;
926 916
927 917 /* Get arguments */
928 918 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
929 919 &PyList_Type, &roots, &PyBool_Type,
930 920 &includepatharg))
931 921 goto bail;
932 922
933 923 if (includepatharg == Py_True)
934 924 includepath = 1;
935 925
936 926 /* Initialize return set */
937 927 reachable = PyList_New(0);
938 928 if (reachable == NULL)
939 929 goto bail;
940 930
941 931 /* Initialize internal datastructures */
942 932 tovisit = (int *)malloc((len + 1) * sizeof(int));
943 933 if (tovisit == NULL) {
944 934 PyErr_NoMemory();
945 935 goto bail;
946 936 }
947 937
948 938 revstates = (char *)calloc(len + 1, 1);
949 939 if (revstates == NULL) {
950 940 PyErr_NoMemory();
951 941 goto bail;
952 942 }
953 943
954 944 l = PyList_GET_SIZE(roots);
955 945 for (i = 0; i < l; i++) {
956 revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
946 revnum = PyLong_AsLong(PyList_GET_ITEM(roots, i));
957 947 if (revnum == -1 && PyErr_Occurred())
958 948 goto bail;
959 949 /* If root is out of range, e.g. wdir(), it must be unreachable
960 950 * from heads. So we can just ignore it. */
961 951 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
962 952 continue;
963 953 revstates[revnum + 1] |= RS_ROOT;
964 954 }
965 955
966 956 /* Populate tovisit with all the heads */
967 957 l = PyList_GET_SIZE(heads);
968 958 for (i = 0; i < l; i++) {
969 revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i));
959 revnum = PyLong_AsLong(PyList_GET_ITEM(heads, i));
970 960 if (revnum == -1 && PyErr_Occurred())
971 961 goto bail;
972 962 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
973 963 PyErr_SetString(PyExc_IndexError, "head out of range");
974 964 goto bail;
975 965 }
976 966 if (!(revstates[revnum + 1] & RS_SEEN)) {
977 967 tovisit[lentovisit++] = (int)revnum;
978 968 revstates[revnum + 1] |= RS_SEEN;
979 969 }
980 970 }
981 971
982 972 /* Visit the tovisit list and find the reachable roots */
983 973 k = 0;
984 974 while (k < lentovisit) {
985 975 /* Add the node to reachable if it is a root*/
986 976 revnum = tovisit[k++];
987 977 if (revstates[revnum + 1] & RS_ROOT) {
988 978 revstates[revnum + 1] |= RS_REACHABLE;
989 val = PyInt_FromLong(revnum);
979 val = PyLong_FromLong(revnum);
990 980 if (val == NULL)
991 981 goto bail;
992 982 r = PyList_Append(reachable, val);
993 983 Py_DECREF(val);
994 984 if (r < 0)
995 985 goto bail;
996 986 if (includepath == 0)
997 987 continue;
998 988 }
999 989
1000 990 /* Add its parents to the list of nodes to visit */
1001 991 if (revnum == nullrev)
1002 992 continue;
1003 993 r = index_get_parents(self, revnum, parents, (int)len - 1);
1004 994 if (r < 0)
1005 995 goto bail;
1006 996 for (i = 0; i < 2; i++) {
1007 997 if (!(revstates[parents[i] + 1] & RS_SEEN) &&
1008 998 parents[i] >= minroot) {
1009 999 tovisit[lentovisit++] = parents[i];
1010 1000 revstates[parents[i] + 1] |= RS_SEEN;
1011 1001 }
1012 1002 }
1013 1003 }
1014 1004
1015 1005 /* Find all the nodes in between the roots we found and the heads
1016 1006 * and add them to the reachable set */
1017 1007 if (includepath == 1) {
1018 1008 long minidx = minroot;
1019 1009 if (minidx < 0)
1020 1010 minidx = 0;
1021 1011 for (i = minidx; i < len; i++) {
1022 1012 if (!(revstates[i + 1] & RS_SEEN))
1023 1013 continue;
1024 1014 r = index_get_parents(self, i, parents, (int)len - 1);
1025 1015 /* Corrupted index file, error is set from
1026 1016 * index_get_parents */
1027 1017 if (r < 0)
1028 1018 goto bail;
1029 1019 if (((revstates[parents[0] + 1] |
1030 1020 revstates[parents[1] + 1]) &
1031 1021 RS_REACHABLE) &&
1032 1022 !(revstates[i + 1] & RS_REACHABLE)) {
1033 1023 revstates[i + 1] |= RS_REACHABLE;
1034 val = PyInt_FromSsize_t(i);
1024 val = PyLong_FromSsize_t(i);
1035 1025 if (val == NULL)
1036 1026 goto bail;
1037 1027 r = PyList_Append(reachable, val);
1038 1028 Py_DECREF(val);
1039 1029 if (r < 0)
1040 1030 goto bail;
1041 1031 }
1042 1032 }
1043 1033 }
1044 1034
1045 1035 free(revstates);
1046 1036 free(tovisit);
1047 1037 return reachable;
1048 1038 bail:
1049 1039 Py_XDECREF(reachable);
1050 1040 free(revstates);
1051 1041 free(tovisit);
1052 1042 return NULL;
1053 1043 }
1054 1044
1055 1045 static int add_roots_get_min(indexObject *self, PyObject *roots, char *phases,
1056 1046 char phase)
1057 1047 {
1058 1048 Py_ssize_t len = index_length(self);
1059 1049 PyObject *item;
1060 1050 PyObject *iterator;
1061 1051 int rev, minrev = -1;
1062 1052 char *node;
1063 1053
1064 1054 if (!PySet_Check(roots)) {
1065 1055 PyErr_SetString(PyExc_TypeError,
1066 1056 "roots must be a set of nodes");
1067 1057 return -2;
1068 1058 }
1069 1059 iterator = PyObject_GetIter(roots);
1070 1060 if (iterator == NULL)
1071 1061 return -2;
1072 1062 while ((item = PyIter_Next(iterator))) {
1073 1063 if (node_check(self->nodelen, item, &node) == -1)
1074 1064 goto failed;
1075 1065 rev = index_find_node(self, node);
1076 1066 /* null is implicitly public, so negative is invalid */
1077 1067 if (rev < 0 || rev >= len)
1078 1068 goto failed;
1079 1069 phases[rev] = phase;
1080 1070 if (minrev == -1 || minrev > rev)
1081 1071 minrev = rev;
1082 1072 Py_DECREF(item);
1083 1073 }
1084 1074 Py_DECREF(iterator);
1085 1075 return minrev;
1086 1076 failed:
1087 1077 Py_DECREF(iterator);
1088 1078 Py_DECREF(item);
1089 1079 return -2;
1090 1080 }
1091 1081
1092 1082 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
1093 1083 {
1094 1084 /* 0: public (untracked), 1: draft, 2: secret, 32: archive,
1095 1085 96: internal */
1096 1086 static const char trackedphases[] = {1, 2, 32, 96};
1097 1087 PyObject *roots = Py_None;
1098 1088 PyObject *phasesetsdict = NULL;
1099 1089 PyObject *phasesets[4] = {NULL, NULL, NULL, NULL};
1100 1090 Py_ssize_t len = index_length(self);
1101 1091 char *phases = NULL;
1102 1092 int minphaserev = -1, rev, i;
1103 1093 const int numphases = (int)(sizeof(phasesets) / sizeof(phasesets[0]));
1104 1094
1105 1095 if (!PyArg_ParseTuple(args, "O", &roots))
1106 1096 return NULL;
1107 1097 if (roots == NULL || !PyDict_Check(roots)) {
1108 1098 PyErr_SetString(PyExc_TypeError, "roots must be a dictionary");
1109 1099 return NULL;
1110 1100 }
1111 1101
1112 1102 phases = calloc(len, 1);
1113 1103 if (phases == NULL) {
1114 1104 PyErr_NoMemory();
1115 1105 return NULL;
1116 1106 }
1117 1107
1118 1108 for (i = 0; i < numphases; ++i) {
1119 PyObject *pyphase = PyInt_FromLong(trackedphases[i]);
1109 PyObject *pyphase = PyLong_FromLong(trackedphases[i]);
1120 1110 PyObject *phaseroots = NULL;
1121 1111 if (pyphase == NULL)
1122 1112 goto release;
1123 1113 phaseroots = PyDict_GetItem(roots, pyphase);
1124 1114 Py_DECREF(pyphase);
1125 1115 if (phaseroots == NULL)
1126 1116 continue;
1127 1117 rev = add_roots_get_min(self, phaseroots, phases,
1128 1118 trackedphases[i]);
1129 1119 if (rev == -2)
1130 1120 goto release;
1131 1121 if (rev != -1 && (minphaserev == -1 || rev < minphaserev))
1132 1122 minphaserev = rev;
1133 1123 }
1134 1124
1135 1125 for (i = 0; i < numphases; ++i) {
1136 1126 phasesets[i] = PySet_New(NULL);
1137 1127 if (phasesets[i] == NULL)
1138 1128 goto release;
1139 1129 }
1140 1130
1141 1131 if (minphaserev == -1)
1142 1132 minphaserev = len;
1143 1133 for (rev = minphaserev; rev < len; ++rev) {
1144 1134 PyObject *pyphase = NULL;
1145 1135 PyObject *pyrev = NULL;
1146 1136 int parents[2];
1147 1137 /*
1148 1138 * The parent lookup could be skipped for phaseroots, but
1149 1139 * phase --force would historically not recompute them
1150 1140 * correctly, leaving descendents with a lower phase around.
1151 1141 * As such, unconditionally recompute the phase.
1152 1142 */
1153 1143 if (index_get_parents(self, rev, parents, (int)len - 1) < 0)
1154 1144 goto release;
1155 1145 set_phase_from_parents(phases, parents[0], parents[1], rev);
1156 1146 switch (phases[rev]) {
1157 1147 case 0:
1158 1148 continue;
1159 1149 case 1:
1160 1150 pyphase = phasesets[0];
1161 1151 break;
1162 1152 case 2:
1163 1153 pyphase = phasesets[1];
1164 1154 break;
1165 1155 case 32:
1166 1156 pyphase = phasesets[2];
1167 1157 break;
1168 1158 case 96:
1169 1159 pyphase = phasesets[3];
1170 1160 break;
1171 1161 default:
1172 1162 /* this should never happen since the phase number is
1173 1163 * specified by this function. */
1174 1164 PyErr_SetString(PyExc_SystemError,
1175 1165 "bad phase number in internal list");
1176 1166 goto release;
1177 1167 }
1178 pyrev = PyInt_FromLong(rev);
1168 pyrev = PyLong_FromLong(rev);
1179 1169 if (pyrev == NULL)
1180 1170 goto release;
1181 1171 if (PySet_Add(pyphase, pyrev) == -1) {
1182 1172 Py_DECREF(pyrev);
1183 1173 goto release;
1184 1174 }
1185 1175 Py_DECREF(pyrev);
1186 1176 }
1187 1177
1188 1178 phasesetsdict = _dict_new_presized(numphases);
1189 1179 if (phasesetsdict == NULL)
1190 1180 goto release;
1191 1181 for (i = 0; i < numphases; ++i) {
1192 PyObject *pyphase = PyInt_FromLong(trackedphases[i]);
1182 PyObject *pyphase = PyLong_FromLong(trackedphases[i]);
1193 1183 if (pyphase == NULL)
1194 1184 goto release;
1195 1185 if (PyDict_SetItem(phasesetsdict, pyphase, phasesets[i]) ==
1196 1186 -1) {
1197 1187 Py_DECREF(pyphase);
1198 1188 goto release;
1199 1189 }
1200 1190 Py_DECREF(phasesets[i]);
1201 1191 phasesets[i] = NULL;
1202 1192 }
1203 1193
1204 1194 free(phases);
1205 1195 return Py_BuildValue("nN", len, phasesetsdict);
1206 1196
1207 1197 release:
1208 1198 for (i = 0; i < numphases; ++i)
1209 1199 Py_XDECREF(phasesets[i]);
1210 1200 Py_XDECREF(phasesetsdict);
1211 1201
1212 1202 free(phases);
1213 1203 return NULL;
1214 1204 }
1215 1205
1216 1206 static PyObject *index_headrevs(indexObject *self, PyObject *args)
1217 1207 {
1218 1208 Py_ssize_t i, j, len;
1219 1209 char *nothead = NULL;
1220 1210 PyObject *heads = NULL;
1221 1211 PyObject *filter = NULL;
1222 1212 PyObject *filteredrevs = Py_None;
1223 1213
1224 1214 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
1225 1215 return NULL;
1226 1216 }
1227 1217
1228 1218 if (self->headrevs && filteredrevs == self->filteredrevs)
1229 1219 return list_copy(self->headrevs);
1230 1220
1231 1221 Py_DECREF(self->filteredrevs);
1232 1222 self->filteredrevs = filteredrevs;
1233 1223 Py_INCREF(filteredrevs);
1234 1224
1235 1225 if (filteredrevs != Py_None) {
1236 1226 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
1237 1227 if (!filter) {
1238 1228 PyErr_SetString(
1239 1229 PyExc_TypeError,
1240 1230 "filteredrevs has no attribute __contains__");
1241 1231 goto bail;
1242 1232 }
1243 1233 }
1244 1234
1245 1235 len = index_length(self);
1246 1236 heads = PyList_New(0);
1247 1237 if (heads == NULL)
1248 1238 goto bail;
1249 1239 if (len == 0) {
1250 PyObject *nullid = PyInt_FromLong(-1);
1240 PyObject *nullid = PyLong_FromLong(-1);
1251 1241 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
1252 1242 Py_XDECREF(nullid);
1253 1243 goto bail;
1254 1244 }
1255 1245 goto done;
1256 1246 }
1257 1247
1258 1248 nothead = calloc(len, 1);
1259 1249 if (nothead == NULL) {
1260 1250 PyErr_NoMemory();
1261 1251 goto bail;
1262 1252 }
1263 1253
1264 1254 for (i = len - 1; i >= 0; i--) {
1265 1255 int isfiltered;
1266 1256 int parents[2];
1267 1257
1268 1258 /* If nothead[i] == 1, it means we've seen an unfiltered child
1269 1259 * of this node already, and therefore this node is not
1270 1260 * filtered. So we can skip the expensive check_filter step.
1271 1261 */
1272 1262 if (nothead[i] != 1) {
1273 1263 isfiltered = check_filter(filter, i);
1274 1264 if (isfiltered == -1) {
1275 1265 PyErr_SetString(PyExc_TypeError,
1276 1266 "unable to check filter");
1277 1267 goto bail;
1278 1268 }
1279 1269
1280 1270 if (isfiltered) {
1281 1271 nothead[i] = 1;
1282 1272 continue;
1283 1273 }
1284 1274 }
1285 1275
1286 1276 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
1287 1277 goto bail;
1288 1278 for (j = 0; j < 2; j++) {
1289 1279 if (parents[j] >= 0)
1290 1280 nothead[parents[j]] = 1;
1291 1281 }
1292 1282 }
1293 1283
1294 1284 for (i = 0; i < len; i++) {
1295 1285 PyObject *head;
1296 1286
1297 1287 if (nothead[i])
1298 1288 continue;
1299 head = PyInt_FromSsize_t(i);
1289 head = PyLong_FromSsize_t(i);
1300 1290 if (head == NULL || PyList_Append(heads, head) == -1) {
1301 1291 Py_XDECREF(head);
1302 1292 goto bail;
1303 1293 }
1304 1294 }
1305 1295
1306 1296 done:
1307 1297 self->headrevs = heads;
1308 1298 Py_XDECREF(filter);
1309 1299 free(nothead);
1310 1300 return list_copy(self->headrevs);
1311 1301 bail:
1312 1302 Py_XDECREF(filter);
1313 1303 Py_XDECREF(heads);
1314 1304 free(nothead);
1315 1305 return NULL;
1316 1306 }
1317 1307
1318 1308 /**
1319 1309 * Obtain the base revision index entry.
1320 1310 *
1321 1311 * Callers must ensure that rev >= 0 or illegal memory access may occur.
1322 1312 */
1323 1313 static inline int index_baserev(indexObject *self, int rev)
1324 1314 {
1325 1315 const char *data;
1326 1316 int result;
1327 1317
1328 1318 data = index_deref(self, rev);
1329 1319 if (data == NULL)
1330 1320 return -2;
1331 1321
1332 1322 if (self->format_version == format_v1) {
1333 1323 result = getbe32(data + entry_v1_offset_base_rev);
1334 1324 } else if (self->format_version == format_v2) {
1335 1325 result = getbe32(data + entry_v2_offset_base_rev);
1336 1326 } else if (self->format_version == format_cl2) {
1337 1327 return rev;
1338 1328 } else {
1339 1329 raise_revlog_error();
1340 1330 return -1;
1341 1331 }
1342 1332
1343 1333 if (result > rev) {
1344 1334 PyErr_Format(
1345 1335 PyExc_ValueError,
1346 1336 "corrupted revlog, revision base above revision: %d, %d",
1347 1337 rev, result);
1348 1338 return -2;
1349 1339 }
1350 1340 if (result < -1) {
1351 1341 PyErr_Format(
1352 1342 PyExc_ValueError,
1353 1343 "corrupted revlog, revision base out of range: %d, %d", rev,
1354 1344 result);
1355 1345 return -2;
1356 1346 }
1357 1347 return result;
1358 1348 }
1359 1349
1360 1350 /**
1361 1351 * Find if a revision is a snapshot or not
1362 1352 *
1363 1353 * Only relevant for sparse-revlog case.
1364 1354 * Callers must ensure that rev is in a valid range.
1365 1355 */
1366 1356 static int index_issnapshotrev(indexObject *self, Py_ssize_t rev)
1367 1357 {
1368 1358 int ps[2];
1369 1359 Py_ssize_t base;
1370 1360 while (rev >= 0) {
1371 1361 base = (Py_ssize_t)index_baserev(self, rev);
1372 1362 if (base == rev) {
1373 1363 base = -1;
1374 1364 }
1375 1365 if (base == -2) {
1376 1366 assert(PyErr_Occurred());
1377 1367 return -1;
1378 1368 }
1379 1369 if (base == -1) {
1380 1370 return 1;
1381 1371 }
1382 1372 if (index_get_parents(self, rev, ps, (int)rev) < 0) {
1383 1373 assert(PyErr_Occurred());
1384 1374 return -1;
1385 1375 };
1386 1376 if (base == ps[0] || base == ps[1]) {
1387 1377 return 0;
1388 1378 }
1389 1379 rev = base;
1390 1380 }
1391 1381 return rev == -1;
1392 1382 }
1393 1383
1394 1384 static PyObject *index_issnapshot(indexObject *self, PyObject *value)
1395 1385 {
1396 1386 long rev;
1397 1387 int issnap;
1398 1388 Py_ssize_t length = index_length(self);
1399 1389
1400 1390 if (!pylong_to_long(value, &rev)) {
1401 1391 return NULL;
1402 1392 }
1403 1393 if (rev < -1 || rev >= length) {
1404 1394 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
1405 1395 rev);
1406 1396 return NULL;
1407 1397 };
1408 1398 issnap = index_issnapshotrev(self, (Py_ssize_t)rev);
1409 1399 if (issnap < 0) {
1410 1400 return NULL;
1411 1401 };
1412 1402 return PyBool_FromLong((long)issnap);
1413 1403 }
1414 1404
1415 1405 static PyObject *index_findsnapshots(indexObject *self, PyObject *args)
1416 1406 {
1417 1407 Py_ssize_t start_rev;
1418 1408 PyObject *cache;
1419 1409 Py_ssize_t base;
1420 1410 Py_ssize_t rev;
1421 1411 PyObject *key = NULL;
1422 1412 PyObject *value = NULL;
1423 1413 const Py_ssize_t length = index_length(self);
1424 1414 if (!PyArg_ParseTuple(args, "O!n", &PyDict_Type, &cache, &start_rev)) {
1425 1415 return NULL;
1426 1416 }
1427 1417 for (rev = start_rev; rev < length; rev++) {
1428 1418 int issnap;
1429 1419 PyObject *allvalues = NULL;
1430 1420 issnap = index_issnapshotrev(self, rev);
1431 1421 if (issnap < 0) {
1432 1422 goto bail;
1433 1423 }
1434 1424 if (issnap == 0) {
1435 1425 continue;
1436 1426 }
1437 1427 base = (Py_ssize_t)index_baserev(self, rev);
1438 1428 if (base == rev) {
1439 1429 base = -1;
1440 1430 }
1441 1431 if (base == -2) {
1442 1432 assert(PyErr_Occurred());
1443 1433 goto bail;
1444 1434 }
1445 key = PyInt_FromSsize_t(base);
1435 key = PyLong_FromSsize_t(base);
1446 1436 allvalues = PyDict_GetItem(cache, key);
1447 1437 if (allvalues == NULL && PyErr_Occurred()) {
1448 1438 goto bail;
1449 1439 }
1450 1440 if (allvalues == NULL) {
1451 1441 int r;
1452 1442 allvalues = PyList_New(0);
1453 1443 if (!allvalues) {
1454 1444 goto bail;
1455 1445 }
1456 1446 r = PyDict_SetItem(cache, key, allvalues);
1457 1447 Py_DECREF(allvalues);
1458 1448 if (r < 0) {
1459 1449 goto bail;
1460 1450 }
1461 1451 }
1462 value = PyInt_FromSsize_t(rev);
1452 value = PyLong_FromSsize_t(rev);
1463 1453 if (PyList_Append(allvalues, value)) {
1464 1454 goto bail;
1465 1455 }
1466 1456 Py_CLEAR(key);
1467 1457 Py_CLEAR(value);
1468 1458 }
1469 1459 Py_RETURN_NONE;
1470 1460 bail:
1471 1461 Py_XDECREF(key);
1472 1462 Py_XDECREF(value);
1473 1463 return NULL;
1474 1464 }
1475 1465
1476 1466 static PyObject *index_deltachain(indexObject *self, PyObject *args)
1477 1467 {
1478 1468 int rev, generaldelta;
1479 1469 PyObject *stoparg;
1480 1470 int stoprev, iterrev, baserev = -1;
1481 1471 int stopped;
1482 1472 PyObject *chain = NULL, *result = NULL;
1483 1473 const Py_ssize_t length = index_length(self);
1484 1474
1485 1475 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
1486 1476 return NULL;
1487 1477 }
1488 1478
1489 if (PyInt_Check(stoparg)) {
1490 stoprev = (int)PyInt_AsLong(stoparg);
1479 if (PyLong_Check(stoparg)) {
1480 stoprev = (int)PyLong_AsLong(stoparg);
1491 1481 if (stoprev == -1 && PyErr_Occurred()) {
1492 1482 return NULL;
1493 1483 }
1494 1484 } else if (stoparg == Py_None) {
1495 1485 stoprev = -2;
1496 1486 } else {
1497 1487 PyErr_SetString(PyExc_ValueError,
1498 1488 "stoprev must be integer or None");
1499 1489 return NULL;
1500 1490 }
1501 1491
1502 1492 if (rev < 0 || rev >= length) {
1503 1493 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1504 1494 return NULL;
1505 1495 }
1506 1496
1507 1497 chain = PyList_New(0);
1508 1498 if (chain == NULL) {
1509 1499 return NULL;
1510 1500 }
1511 1501
1512 1502 baserev = index_baserev(self, rev);
1513 1503
1514 1504 /* This should never happen. */
1515 1505 if (baserev <= -2) {
1516 1506 /* Error should be set by index_deref() */
1517 1507 assert(PyErr_Occurred());
1518 1508 goto bail;
1519 1509 }
1520 1510
1521 1511 iterrev = rev;
1522 1512
1523 1513 while (iterrev != baserev && iterrev != stoprev) {
1524 PyObject *value = PyInt_FromLong(iterrev);
1514 PyObject *value = PyLong_FromLong(iterrev);
1525 1515 if (value == NULL) {
1526 1516 goto bail;
1527 1517 }
1528 1518 if (PyList_Append(chain, value)) {
1529 1519 Py_DECREF(value);
1530 1520 goto bail;
1531 1521 }
1532 1522 Py_DECREF(value);
1533 1523
1534 1524 if (generaldelta) {
1535 1525 iterrev = baserev;
1536 1526 } else {
1537 1527 iterrev--;
1538 1528 }
1539 1529
1540 1530 if (iterrev < 0) {
1541 1531 break;
1542 1532 }
1543 1533
1544 1534 if (iterrev >= length) {
1545 1535 PyErr_SetString(PyExc_IndexError,
1546 1536 "revision outside index");
1547 1537 return NULL;
1548 1538 }
1549 1539
1550 1540 baserev = index_baserev(self, iterrev);
1551 1541
1552 1542 /* This should never happen. */
1553 1543 if (baserev <= -2) {
1554 1544 /* Error should be set by index_deref() */
1555 1545 assert(PyErr_Occurred());
1556 1546 goto bail;
1557 1547 }
1558 1548 }
1559 1549
1560 1550 if (iterrev == stoprev) {
1561 1551 stopped = 1;
1562 1552 } else {
1563 PyObject *value = PyInt_FromLong(iterrev);
1553 PyObject *value = PyLong_FromLong(iterrev);
1564 1554 if (value == NULL) {
1565 1555 goto bail;
1566 1556 }
1567 1557 if (PyList_Append(chain, value)) {
1568 1558 Py_DECREF(value);
1569 1559 goto bail;
1570 1560 }
1571 1561 Py_DECREF(value);
1572 1562
1573 1563 stopped = 0;
1574 1564 }
1575 1565
1576 1566 if (PyList_Reverse(chain)) {
1577 1567 goto bail;
1578 1568 }
1579 1569
1580 1570 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
1581 1571 Py_DECREF(chain);
1582 1572 return result;
1583 1573
1584 1574 bail:
1585 1575 Py_DECREF(chain);
1586 1576 return NULL;
1587 1577 }
1588 1578
1589 1579 static inline int64_t
1590 1580 index_segment_span(indexObject *self, Py_ssize_t start_rev, Py_ssize_t end_rev)
1591 1581 {
1592 1582 int64_t start_offset;
1593 1583 int64_t end_offset;
1594 1584 int end_size;
1595 1585 start_offset = index_get_start(self, start_rev);
1596 1586 if (start_offset < 0) {
1597 1587 return -1;
1598 1588 }
1599 1589 end_offset = index_get_start(self, end_rev);
1600 1590 if (end_offset < 0) {
1601 1591 return -1;
1602 1592 }
1603 1593 end_size = index_get_length(self, end_rev);
1604 1594 if (end_size < 0) {
1605 1595 return -1;
1606 1596 }
1607 1597 if (end_offset < start_offset) {
1608 1598 PyErr_Format(PyExc_ValueError,
1609 1599 "corrupted revlog index: inconsistent offset "
1610 1600 "between revisions (%zd) and (%zd)",
1611 1601 start_rev, end_rev);
1612 1602 return -1;
1613 1603 }
1614 1604 return (end_offset - start_offset) + (int64_t)end_size;
1615 1605 }
1616 1606
1617 1607 /* returns endidx so that revs[startidx:endidx] has no empty trailing revs */
1618 1608 static Py_ssize_t trim_endidx(indexObject *self, const Py_ssize_t *revs,
1619 1609 Py_ssize_t startidx, Py_ssize_t endidx)
1620 1610 {
1621 1611 int length;
1622 1612 while (endidx > 1 && endidx > startidx) {
1623 1613 length = index_get_length(self, revs[endidx - 1]);
1624 1614 if (length < 0) {
1625 1615 return -1;
1626 1616 }
1627 1617 if (length != 0) {
1628 1618 break;
1629 1619 }
1630 1620 endidx -= 1;
1631 1621 }
1632 1622 return endidx;
1633 1623 }
1634 1624
1635 1625 struct Gap {
1636 1626 int64_t size;
1637 1627 Py_ssize_t idx;
1638 1628 };
1639 1629
1640 1630 static int gap_compare(const void *left, const void *right)
1641 1631 {
1642 1632 const struct Gap *l_left = ((const struct Gap *)left);
1643 1633 const struct Gap *l_right = ((const struct Gap *)right);
1644 1634 if (l_left->size < l_right->size) {
1645 1635 return -1;
1646 1636 } else if (l_left->size > l_right->size) {
1647 1637 return 1;
1648 1638 }
1649 1639 return 0;
1650 1640 }
1651 1641 static int Py_ssize_t_compare(const void *left, const void *right)
1652 1642 {
1653 1643 const Py_ssize_t l_left = *(const Py_ssize_t *)left;
1654 1644 const Py_ssize_t l_right = *(const Py_ssize_t *)right;
1655 1645 if (l_left < l_right) {
1656 1646 return -1;
1657 1647 } else if (l_left > l_right) {
1658 1648 return 1;
1659 1649 }
1660 1650 return 0;
1661 1651 }
1662 1652
1663 1653 static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
1664 1654 {
1665 1655 /* method arguments */
1666 1656 PyObject *list_revs = NULL; /* revisions in the chain */
1667 1657 double targetdensity = 0; /* min density to achieve */
1668 1658 Py_ssize_t mingapsize = 0; /* threshold to ignore gaps */
1669 1659
1670 1660 /* other core variables */
1671 1661 Py_ssize_t idxlen = index_length(self);
1672 1662 Py_ssize_t i; /* used for various iteration */
1673 1663 PyObject *result = NULL; /* the final return of the function */
1674 1664
1675 1665 /* generic information about the delta chain being slice */
1676 1666 Py_ssize_t num_revs = 0; /* size of the full delta chain */
1677 1667 Py_ssize_t *revs = NULL; /* native array of revision in the chain */
1678 1668 int64_t chainpayload = 0; /* sum of all delta in the chain */
1679 1669 int64_t deltachainspan = 0; /* distance from first byte to last byte */
1680 1670
1681 1671 /* variable used for slicing the delta chain */
1682 1672 int64_t readdata = 0; /* amount of data currently planned to be read */
1683 1673 double density = 0; /* ration of payload data compared to read ones */
1684 1674 int64_t previous_end;
1685 1675 struct Gap *gaps = NULL; /* array of notable gap in the chain */
1686 1676 Py_ssize_t num_gaps =
1687 1677 0; /* total number of notable gap recorded so far */
1688 1678 Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
1689 1679 Py_ssize_t num_selected = 0; /* number of gaps skipped */
1690 1680 PyObject *chunk = NULL; /* individual slice */
1691 1681 PyObject *allchunks = NULL; /* all slices */
1692 1682 Py_ssize_t previdx;
1693 1683
1694 1684 /* parsing argument */
1695 1685 if (!PyArg_ParseTuple(args, "O!dn", &PyList_Type, &list_revs,
1696 1686 &targetdensity, &mingapsize)) {
1697 1687 goto bail;
1698 1688 }
1699 1689
1700 1690 /* If the delta chain contains a single element, we do not need slicing
1701 1691 */
1702 1692 num_revs = PyList_GET_SIZE(list_revs);
1703 1693 if (num_revs <= 1) {
1704 1694 result = PyTuple_Pack(1, list_revs);
1705 1695 goto done;
1706 1696 }
1707 1697
1708 1698 /* Turn the python list into a native integer array (for efficiency) */
1709 1699 revs = (Py_ssize_t *)calloc(num_revs, sizeof(Py_ssize_t));
1710 1700 if (revs == NULL) {
1711 1701 PyErr_NoMemory();
1712 1702 goto bail;
1713 1703 }
1714 1704 for (i = 0; i < num_revs; i++) {
1715 Py_ssize_t revnum = PyInt_AsLong(PyList_GET_ITEM(list_revs, i));
1705 Py_ssize_t revnum =
1706 PyLong_AsLong(PyList_GET_ITEM(list_revs, i));
1716 1707 if (revnum == -1 && PyErr_Occurred()) {
1717 1708 goto bail;
1718 1709 }
1719 1710 if (revnum < nullrev || revnum >= idxlen) {
1720 1711 PyErr_Format(PyExc_IndexError,
1721 1712 "index out of range: %zd", revnum);
1722 1713 goto bail;
1723 1714 }
1724 1715 revs[i] = revnum;
1725 1716 }
1726 1717
1727 1718 /* Compute and check various property of the unsliced delta chain */
1728 1719 deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
1729 1720 if (deltachainspan < 0) {
1730 1721 goto bail;
1731 1722 }
1732 1723
1733 1724 if (deltachainspan <= mingapsize) {
1734 1725 result = PyTuple_Pack(1, list_revs);
1735 1726 goto done;
1736 1727 }
1737 1728 chainpayload = 0;
1738 1729 for (i = 0; i < num_revs; i++) {
1739 1730 int tmp = index_get_length(self, revs[i]);
1740 1731 if (tmp < 0) {
1741 1732 goto bail;
1742 1733 }
1743 1734 chainpayload += tmp;
1744 1735 }
1745 1736
1746 1737 readdata = deltachainspan;
1747 1738 density = 1.0;
1748 1739
1749 1740 if (0 < deltachainspan) {
1750 1741 density = (double)chainpayload / (double)deltachainspan;
1751 1742 }
1752 1743
1753 1744 if (density >= targetdensity) {
1754 1745 result = PyTuple_Pack(1, list_revs);
1755 1746 goto done;
1756 1747 }
1757 1748
1758 1749 /* if chain is too sparse, look for relevant gaps */
1759 1750 gaps = (struct Gap *)calloc(num_revs, sizeof(struct Gap));
1760 1751 if (gaps == NULL) {
1761 1752 PyErr_NoMemory();
1762 1753 goto bail;
1763 1754 }
1764 1755
1765 1756 previous_end = -1;
1766 1757 for (i = 0; i < num_revs; i++) {
1767 1758 int64_t revstart;
1768 1759 int revsize;
1769 1760 revstart = index_get_start(self, revs[i]);
1770 1761 if (revstart < 0) {
1771 1762 goto bail;
1772 1763 };
1773 1764 revsize = index_get_length(self, revs[i]);
1774 1765 if (revsize < 0) {
1775 1766 goto bail;
1776 1767 };
1777 1768 if (revsize == 0) {
1778 1769 continue;
1779 1770 }
1780 1771 if (previous_end >= 0) {
1781 1772 int64_t gapsize = revstart - previous_end;
1782 1773 if (gapsize > mingapsize) {
1783 1774 gaps[num_gaps].size = gapsize;
1784 1775 gaps[num_gaps].idx = i;
1785 1776 num_gaps += 1;
1786 1777 }
1787 1778 }
1788 1779 previous_end = revstart + revsize;
1789 1780 }
1790 1781 if (num_gaps == 0) {
1791 1782 result = PyTuple_Pack(1, list_revs);
1792 1783 goto done;
1793 1784 }
1794 1785 qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
1795 1786
1796 1787 /* Slice the largest gap first, they improve the density the most */
1797 1788 selected_indices =
1798 1789 (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
1799 1790 if (selected_indices == NULL) {
1800 1791 PyErr_NoMemory();
1801 1792 goto bail;
1802 1793 }
1803 1794
1804 1795 for (i = num_gaps - 1; i >= 0; i--) {
1805 1796 selected_indices[num_selected] = gaps[i].idx;
1806 1797 readdata -= gaps[i].size;
1807 1798 num_selected += 1;
1808 1799 if (readdata <= 0) {
1809 1800 density = 1.0;
1810 1801 } else {
1811 1802 density = (double)chainpayload / (double)readdata;
1812 1803 }
1813 1804 if (density >= targetdensity) {
1814 1805 break;
1815 1806 }
1816 1807 }
1817 1808 qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
1818 1809 &Py_ssize_t_compare);
1819 1810
1820 1811 /* create the resulting slice */
1821 1812 allchunks = PyList_New(0);
1822 1813 if (allchunks == NULL) {
1823 1814 goto bail;
1824 1815 }
1825 1816 previdx = 0;
1826 1817 selected_indices[num_selected] = num_revs;
1827 1818 for (i = 0; i <= num_selected; i++) {
1828 1819 Py_ssize_t idx = selected_indices[i];
1829 1820 Py_ssize_t endidx = trim_endidx(self, revs, previdx, idx);
1830 1821 if (endidx < 0) {
1831 1822 goto bail;
1832 1823 }
1833 1824 if (previdx < endidx) {
1834 1825 chunk = PyList_GetSlice(list_revs, previdx, endidx);
1835 1826 if (chunk == NULL) {
1836 1827 goto bail;
1837 1828 }
1838 1829 if (PyList_Append(allchunks, chunk) == -1) {
1839 1830 goto bail;
1840 1831 }
1841 1832 Py_DECREF(chunk);
1842 1833 chunk = NULL;
1843 1834 }
1844 1835 previdx = idx;
1845 1836 }
1846 1837 result = allchunks;
1847 1838 goto done;
1848 1839
1849 1840 bail:
1850 1841 Py_XDECREF(allchunks);
1851 1842 Py_XDECREF(chunk);
1852 1843 done:
1853 1844 free(revs);
1854 1845 free(gaps);
1855 1846 free(selected_indices);
1856 1847 return result;
1857 1848 }
1858 1849
1859 1850 static inline int nt_level(const char *node, Py_ssize_t level)
1860 1851 {
1861 1852 int v = node[level >> 1];
1862 1853 if (!(level & 1))
1863 1854 v >>= 4;
1864 1855 return v & 0xf;
1865 1856 }
1866 1857
1867 1858 /*
1868 1859 * Return values:
1869 1860 *
1870 1861 * -4: match is ambiguous (multiple candidates)
1871 1862 * -2: not found
1872 1863 * rest: valid rev
1873 1864 */
1874 1865 static int nt_find(nodetree *self, const char *node, Py_ssize_t nodelen,
1875 1866 int hex)
1876 1867 {
1877 1868 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
1878 1869 int level, maxlevel, off;
1879 1870
1880 1871 /* If the input is binary, do a fast check for the nullid first. */
1881 1872 if (!hex && nodelen == self->nodelen && node[0] == '\0' &&
1882 1873 node[1] == '\0' && memcmp(node, nullid, self->nodelen) == 0)
1883 1874 return -1;
1884 1875
1885 1876 if (hex)
1886 1877 maxlevel = nodelen;
1887 1878 else
1888 1879 maxlevel = 2 * nodelen;
1889 1880 if (maxlevel > 2 * self->nodelen)
1890 1881 maxlevel = 2 * self->nodelen;
1891 1882
1892 1883 for (level = off = 0; level < maxlevel; level++) {
1893 1884 int k = getnybble(node, level);
1894 1885 nodetreenode *n = &self->nodes[off];
1895 1886 int v = n->children[k];
1896 1887
1897 1888 if (v < 0) {
1898 1889 const char *n;
1899 1890 Py_ssize_t i;
1900 1891
1901 1892 v = -(v + 2);
1902 1893 n = index_node(self->index, v);
1903 1894 if (n == NULL)
1904 1895 return -2;
1905 1896 for (i = level; i < maxlevel; i++)
1906 1897 if (getnybble(node, i) != nt_level(n, i))
1907 1898 return -2;
1908 1899 return v;
1909 1900 }
1910 1901 if (v == 0)
1911 1902 return -2;
1912 1903 off = v;
1913 1904 }
1914 1905 /* multiple matches against an ambiguous prefix */
1915 1906 return -4;
1916 1907 }
1917 1908
1918 1909 static int nt_new(nodetree *self)
1919 1910 {
1920 1911 if (self->length == self->capacity) {
1921 1912 size_t newcapacity;
1922 1913 nodetreenode *newnodes;
1923 1914 newcapacity = self->capacity * 2;
1924 1915 if (newcapacity >= SIZE_MAX / sizeof(nodetreenode)) {
1925 1916 PyErr_SetString(PyExc_MemoryError,
1926 1917 "overflow in nt_new");
1927 1918 return -1;
1928 1919 }
1929 1920 newnodes =
1930 1921 realloc(self->nodes, newcapacity * sizeof(nodetreenode));
1931 1922 if (newnodes == NULL) {
1932 1923 PyErr_SetString(PyExc_MemoryError, "out of memory");
1933 1924 return -1;
1934 1925 }
1935 1926 self->capacity = newcapacity;
1936 1927 self->nodes = newnodes;
1937 1928 memset(&self->nodes[self->length], 0,
1938 1929 sizeof(nodetreenode) * (self->capacity - self->length));
1939 1930 }
1940 1931 return self->length++;
1941 1932 }
1942 1933
1943 1934 static int nt_insert(nodetree *self, const char *node, int rev)
1944 1935 {
1945 1936 int level = 0;
1946 1937 int off = 0;
1947 1938
1948 1939 while (level < 2 * self->nodelen) {
1949 1940 int k = nt_level(node, level);
1950 1941 nodetreenode *n;
1951 1942 int v;
1952 1943
1953 1944 n = &self->nodes[off];
1954 1945 v = n->children[k];
1955 1946
1956 1947 if (v == 0) {
1957 1948 n->children[k] = -rev - 2;
1958 1949 return 0;
1959 1950 }
1960 1951 if (v < 0) {
1961 1952 const char *oldnode =
1962 1953 index_node_existing(self->index, -(v + 2));
1963 1954 int noff;
1964 1955
1965 1956 if (oldnode == NULL)
1966 1957 return -1;
1967 1958 if (!memcmp(oldnode, node, self->nodelen)) {
1968 1959 n->children[k] = -rev - 2;
1969 1960 return 0;
1970 1961 }
1971 1962 noff = nt_new(self);
1972 1963 if (noff == -1)
1973 1964 return -1;
1974 1965 /* self->nodes may have been changed by realloc */
1975 1966 self->nodes[off].children[k] = noff;
1976 1967 off = noff;
1977 1968 n = &self->nodes[off];
1978 1969 n->children[nt_level(oldnode, ++level)] = v;
1979 1970 if (level > self->depth)
1980 1971 self->depth = level;
1981 1972 self->splits += 1;
1982 1973 } else {
1983 1974 level += 1;
1984 1975 off = v;
1985 1976 }
1986 1977 }
1987 1978
1988 1979 return -1;
1989 1980 }
1990 1981
1991 1982 static PyObject *ntobj_insert(nodetreeObject *self, PyObject *args)
1992 1983 {
1993 1984 Py_ssize_t rev;
1994 1985 const char *node;
1995 1986 Py_ssize_t length;
1996 1987 if (!PyArg_ParseTuple(args, "n", &rev))
1997 1988 return NULL;
1998 1989 length = index_length(self->nt.index);
1999 1990 if (rev < 0 || rev >= length) {
2000 1991 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
2001 1992 return NULL;
2002 1993 }
2003 1994 node = index_node_existing(self->nt.index, rev);
2004 1995 if (nt_insert(&self->nt, node, (int)rev) == -1)
2005 1996 return NULL;
2006 1997 Py_RETURN_NONE;
2007 1998 }
2008 1999
2009 2000 static int nt_delete_node(nodetree *self, const char *node)
2010 2001 {
2011 2002 /* rev==-2 happens to get encoded as 0, which is interpreted as not set
2012 2003 */
2013 2004 return nt_insert(self, node, -2);
2014 2005 }
2015 2006
2016 2007 static int nt_init(nodetree *self, indexObject *index, unsigned capacity)
2017 2008 {
2018 2009 /* Initialize before overflow-checking to avoid nt_dealloc() crash. */
2019 2010 self->nodes = NULL;
2020 2011
2021 2012 self->index = index;
2022 2013 /* The input capacity is in terms of revisions, while the field is in
2023 2014 * terms of nodetree nodes. */
2024 2015 self->capacity = (capacity < 4 ? 4 : capacity / 2);
2025 2016 self->nodelen = index->nodelen;
2026 2017 self->depth = 0;
2027 2018 self->splits = 0;
2028 2019 if (self->capacity > SIZE_MAX / sizeof(nodetreenode)) {
2029 2020 PyErr_SetString(PyExc_ValueError, "overflow in init_nt");
2030 2021 return -1;
2031 2022 }
2032 2023 self->nodes = calloc(self->capacity, sizeof(nodetreenode));
2033 2024 if (self->nodes == NULL) {
2034 2025 PyErr_NoMemory();
2035 2026 return -1;
2036 2027 }
2037 2028 self->length = 1;
2038 2029 return 0;
2039 2030 }
2040 2031
2041 2032 static int ntobj_init(nodetreeObject *self, PyObject *args)
2042 2033 {
2043 2034 PyObject *index;
2044 2035 unsigned capacity;
2045 2036 if (!PyArg_ParseTuple(args, "O!I", &HgRevlogIndex_Type, &index,
2046 2037 &capacity))
2047 2038 return -1;
2048 2039 Py_INCREF(index);
2049 2040 return nt_init(&self->nt, (indexObject *)index, capacity);
2050 2041 }
2051 2042
2052 2043 static int nt_partialmatch(nodetree *self, const char *node, Py_ssize_t nodelen)
2053 2044 {
2054 2045 return nt_find(self, node, nodelen, 1);
2055 2046 }
2056 2047
2057 2048 /*
2058 2049 * Find the length of the shortest unique prefix of node.
2059 2050 *
2060 2051 * Return values:
2061 2052 *
2062 2053 * -3: error (exception set)
2063 2054 * -2: not found (no exception set)
2064 2055 * rest: length of shortest prefix
2065 2056 */
2066 2057 static int nt_shortest(nodetree *self, const char *node)
2067 2058 {
2068 2059 int level, off;
2069 2060
2070 2061 for (level = off = 0; level < 2 * self->nodelen; level++) {
2071 2062 int k, v;
2072 2063 nodetreenode *n = &self->nodes[off];
2073 2064 k = nt_level(node, level);
2074 2065 v = n->children[k];
2075 2066 if (v < 0) {
2076 2067 const char *n;
2077 2068 v = -(v + 2);
2078 2069 n = index_node_existing(self->index, v);
2079 2070 if (n == NULL)
2080 2071 return -3;
2081 2072 if (memcmp(node, n, self->nodelen) != 0)
2082 2073 /*
2083 2074 * Found a unique prefix, but it wasn't for the
2084 2075 * requested node (i.e the requested node does
2085 2076 * not exist).
2086 2077 */
2087 2078 return -2;
2088 2079 return level + 1;
2089 2080 }
2090 2081 if (v == 0)
2091 2082 return -2;
2092 2083 off = v;
2093 2084 }
2094 2085 /*
2095 2086 * The node was still not unique after 40 hex digits, so this won't
2096 2087 * happen. Also, if we get here, then there's a programming error in
2097 2088 * this file that made us insert a node longer than 40 hex digits.
2098 2089 */
2099 2090 PyErr_SetString(PyExc_Exception, "broken node tree");
2100 2091 return -3;
2101 2092 }
2102 2093
2103 2094 static PyObject *ntobj_shortest(nodetreeObject *self, PyObject *args)
2104 2095 {
2105 2096 PyObject *val;
2106 2097 char *node;
2107 2098 int length;
2108 2099
2109 2100 if (!PyArg_ParseTuple(args, "O", &val))
2110 2101 return NULL;
2111 2102 if (node_check(self->nt.nodelen, val, &node) == -1)
2112 2103 return NULL;
2113 2104
2114 2105 length = nt_shortest(&self->nt, node);
2115 2106 if (length == -3)
2116 2107 return NULL;
2117 2108 if (length == -2) {
2118 2109 raise_revlog_error();
2119 2110 return NULL;
2120 2111 }
2121 return PyInt_FromLong(length);
2112 return PyLong_FromLong(length);
2122 2113 }
2123 2114
2124 2115 static void nt_dealloc(nodetree *self)
2125 2116 {
2126 2117 free(self->nodes);
2127 2118 self->nodes = NULL;
2128 2119 }
2129 2120
2130 2121 static void ntobj_dealloc(nodetreeObject *self)
2131 2122 {
2132 2123 Py_XDECREF(self->nt.index);
2133 2124 nt_dealloc(&self->nt);
2134 2125 PyObject_Del(self);
2135 2126 }
2136 2127
2137 2128 static PyMethodDef ntobj_methods[] = {
2138 2129 {"insert", (PyCFunction)ntobj_insert, METH_VARARGS,
2139 2130 "insert an index entry"},
2140 2131 {"shortest", (PyCFunction)ntobj_shortest, METH_VARARGS,
2141 2132 "find length of shortest hex nodeid of a binary ID"},
2142 2133 {NULL} /* Sentinel */
2143 2134 };
2144 2135
2145 2136 static PyTypeObject nodetreeType = {
2146 2137 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2147 2138 "parsers.nodetree", /* tp_name */
2148 2139 sizeof(nodetreeObject), /* tp_basicsize */
2149 2140 0, /* tp_itemsize */
2150 2141 (destructor)ntobj_dealloc, /* tp_dealloc */
2151 2142 0, /* tp_print */
2152 2143 0, /* tp_getattr */
2153 2144 0, /* tp_setattr */
2154 2145 0, /* tp_compare */
2155 2146 0, /* tp_repr */
2156 2147 0, /* tp_as_number */
2157 2148 0, /* tp_as_sequence */
2158 2149 0, /* tp_as_mapping */
2159 2150 0, /* tp_hash */
2160 2151 0, /* tp_call */
2161 2152 0, /* tp_str */
2162 2153 0, /* tp_getattro */
2163 2154 0, /* tp_setattro */
2164 2155 0, /* tp_as_buffer */
2165 2156 Py_TPFLAGS_DEFAULT, /* tp_flags */
2166 2157 "nodetree", /* tp_doc */
2167 2158 0, /* tp_traverse */
2168 2159 0, /* tp_clear */
2169 2160 0, /* tp_richcompare */
2170 2161 0, /* tp_weaklistoffset */
2171 2162 0, /* tp_iter */
2172 2163 0, /* tp_iternext */
2173 2164 ntobj_methods, /* tp_methods */
2174 2165 0, /* tp_members */
2175 2166 0, /* tp_getset */
2176 2167 0, /* tp_base */
2177 2168 0, /* tp_dict */
2178 2169 0, /* tp_descr_get */
2179 2170 0, /* tp_descr_set */
2180 2171 0, /* tp_dictoffset */
2181 2172 (initproc)ntobj_init, /* tp_init */
2182 2173 0, /* tp_alloc */
2183 2174 };
2184 2175
2185 2176 static int index_init_nt(indexObject *self)
2186 2177 {
2187 2178 if (!self->ntinitialized) {
2188 2179 if (nt_init(&self->nt, self, (int)self->length) == -1) {
2189 2180 nt_dealloc(&self->nt);
2190 2181 return -1;
2191 2182 }
2192 2183 if (nt_insert(&self->nt, nullid, -1) == -1) {
2193 2184 nt_dealloc(&self->nt);
2194 2185 return -1;
2195 2186 }
2196 2187 self->ntinitialized = 1;
2197 2188 self->ntrev = (int)index_length(self);
2198 2189 self->ntlookups = 1;
2199 2190 self->ntmisses = 0;
2200 2191 }
2201 2192 return 0;
2202 2193 }
2203 2194
2204 2195 /*
2205 2196 * Return values:
2206 2197 *
2207 2198 * -3: error (exception set)
2208 2199 * -2: not found (no exception set)
2209 2200 * rest: valid rev
2210 2201 */
2211 2202 static int index_find_node(indexObject *self, const char *node)
2212 2203 {
2213 2204 int rev;
2214 2205
2215 2206 if (index_init_nt(self) == -1)
2216 2207 return -3;
2217 2208
2218 2209 self->ntlookups++;
2219 2210 rev = nt_find(&self->nt, node, self->nodelen, 0);
2220 2211 if (rev >= -1)
2221 2212 return rev;
2222 2213
2223 2214 /*
2224 2215 * For the first handful of lookups, we scan the entire index,
2225 2216 * and cache only the matching nodes. This optimizes for cases
2226 2217 * like "hg tip", where only a few nodes are accessed.
2227 2218 *
2228 2219 * After that, we cache every node we visit, using a single
2229 2220 * scan amortized over multiple lookups. This gives the best
2230 2221 * bulk performance, e.g. for "hg log".
2231 2222 */
2232 2223 if (self->ntmisses++ < 4) {
2233 2224 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2234 2225 const char *n = index_node_existing(self, rev);
2235 2226 if (n == NULL)
2236 2227 return -3;
2237 2228 if (memcmp(node, n, self->nodelen) == 0) {
2238 2229 if (nt_insert(&self->nt, n, rev) == -1)
2239 2230 return -3;
2240 2231 break;
2241 2232 }
2242 2233 }
2243 2234 } else {
2244 2235 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2245 2236 const char *n = index_node_existing(self, rev);
2246 2237 if (n == NULL)
2247 2238 return -3;
2248 2239 if (nt_insert(&self->nt, n, rev) == -1) {
2249 2240 self->ntrev = rev + 1;
2250 2241 return -3;
2251 2242 }
2252 2243 if (memcmp(node, n, self->nodelen) == 0) {
2253 2244 break;
2254 2245 }
2255 2246 }
2256 2247 self->ntrev = rev;
2257 2248 }
2258 2249
2259 2250 if (rev >= 0)
2260 2251 return rev;
2261 2252 return -2;
2262 2253 }
2263 2254
2264 2255 static PyObject *index_getitem(indexObject *self, PyObject *value)
2265 2256 {
2266 2257 char *node;
2267 2258 int rev;
2268 2259
2269 if (PyInt_Check(value)) {
2260 if (PyLong_Check(value)) {
2270 2261 long idx;
2271 2262 if (!pylong_to_long(value, &idx)) {
2272 2263 return NULL;
2273 2264 }
2274 2265 return index_get(self, idx);
2275 2266 }
2276 2267
2277 2268 if (node_check(self->nodelen, value, &node) == -1)
2278 2269 return NULL;
2279 2270 rev = index_find_node(self, node);
2280 2271 if (rev >= -1)
2281 return PyInt_FromLong(rev);
2272 return PyLong_FromLong(rev);
2282 2273 if (rev == -2)
2283 2274 raise_revlog_error();
2284 2275 return NULL;
2285 2276 }
2286 2277
2287 2278 /*
2288 2279 * Fully populate the radix tree.
2289 2280 */
2290 2281 static int index_populate_nt(indexObject *self)
2291 2282 {
2292 2283 int rev;
2293 2284 if (self->ntrev > 0) {
2294 2285 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2295 2286 const char *n = index_node_existing(self, rev);
2296 2287 if (n == NULL)
2297 2288 return -1;
2298 2289 if (nt_insert(&self->nt, n, rev) == -1)
2299 2290 return -1;
2300 2291 }
2301 2292 self->ntrev = -1;
2302 2293 }
2303 2294 return 0;
2304 2295 }
2305 2296
2306 2297 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
2307 2298 {
2308 2299 const char *fullnode;
2309 2300 Py_ssize_t nodelen;
2310 2301 char *node;
2311 2302 int rev, i;
2312 2303
2313 2304 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &node, &nodelen))
2314 2305 return NULL;
2315 2306
2316 2307 if (nodelen < 1) {
2317 2308 PyErr_SetString(PyExc_ValueError, "key too short");
2318 2309 return NULL;
2319 2310 }
2320 2311
2321 2312 if (nodelen > 2 * self->nodelen) {
2322 2313 PyErr_SetString(PyExc_ValueError, "key too long");
2323 2314 return NULL;
2324 2315 }
2325 2316
2326 2317 for (i = 0; i < nodelen; i++)
2327 2318 hexdigit(node, i);
2328 2319 if (PyErr_Occurred()) {
2329 2320 /* input contains non-hex characters */
2330 2321 PyErr_Clear();
2331 2322 Py_RETURN_NONE;
2332 2323 }
2333 2324
2334 2325 if (index_init_nt(self) == -1)
2335 2326 return NULL;
2336 2327 if (index_populate_nt(self) == -1)
2337 2328 return NULL;
2338 2329 rev = nt_partialmatch(&self->nt, node, nodelen);
2339 2330
2340 2331 switch (rev) {
2341 2332 case -4:
2342 2333 raise_revlog_error();
2343 2334 return NULL;
2344 2335 case -2:
2345 2336 Py_RETURN_NONE;
2346 2337 case -1:
2347 2338 return PyBytes_FromStringAndSize(nullid, self->nodelen);
2348 2339 }
2349 2340
2350 2341 fullnode = index_node_existing(self, rev);
2351 2342 if (fullnode == NULL) {
2352 2343 return NULL;
2353 2344 }
2354 2345 return PyBytes_FromStringAndSize(fullnode, self->nodelen);
2355 2346 }
2356 2347
2357 2348 static PyObject *index_shortest(indexObject *self, PyObject *args)
2358 2349 {
2359 2350 PyObject *val;
2360 2351 char *node;
2361 2352 int length;
2362 2353
2363 2354 if (!PyArg_ParseTuple(args, "O", &val))
2364 2355 return NULL;
2365 2356 if (node_check(self->nodelen, val, &node) == -1)
2366 2357 return NULL;
2367 2358
2368 2359 self->ntlookups++;
2369 2360 if (index_init_nt(self) == -1)
2370 2361 return NULL;
2371 2362 if (index_populate_nt(self) == -1)
2372 2363 return NULL;
2373 2364 length = nt_shortest(&self->nt, node);
2374 2365 if (length == -3)
2375 2366 return NULL;
2376 2367 if (length == -2) {
2377 2368 raise_revlog_error();
2378 2369 return NULL;
2379 2370 }
2380 return PyInt_FromLong(length);
2371 return PyLong_FromLong(length);
2381 2372 }
2382 2373
2383 2374 static PyObject *index_m_get(indexObject *self, PyObject *args)
2384 2375 {
2385 2376 PyObject *val;
2386 2377 char *node;
2387 2378 int rev;
2388 2379
2389 2380 if (!PyArg_ParseTuple(args, "O", &val))
2390 2381 return NULL;
2391 2382 if (node_check(self->nodelen, val, &node) == -1)
2392 2383 return NULL;
2393 2384 rev = index_find_node(self, node);
2394 2385 if (rev == -3)
2395 2386 return NULL;
2396 2387 if (rev == -2)
2397 2388 Py_RETURN_NONE;
2398 return PyInt_FromLong(rev);
2389 return PyLong_FromLong(rev);
2399 2390 }
2400 2391
2401 2392 static int index_contains(indexObject *self, PyObject *value)
2402 2393 {
2403 2394 char *node;
2404 2395
2405 if (PyInt_Check(value)) {
2396 if (PyLong_Check(value)) {
2406 2397 long rev;
2407 2398 if (!pylong_to_long(value, &rev)) {
2408 2399 return -1;
2409 2400 }
2410 2401 return rev >= -1 && rev < index_length(self);
2411 2402 }
2412 2403
2413 2404 if (node_check(self->nodelen, value, &node) == -1)
2414 2405 return -1;
2415 2406
2416 2407 switch (index_find_node(self, node)) {
2417 2408 case -3:
2418 2409 return -1;
2419 2410 case -2:
2420 2411 return 0;
2421 2412 default:
2422 2413 return 1;
2423 2414 }
2424 2415 }
2425 2416
2426 2417 static PyObject *index_m_has_node(indexObject *self, PyObject *args)
2427 2418 {
2428 2419 int ret = index_contains(self, args);
2429 2420 if (ret < 0)
2430 2421 return NULL;
2431 2422 return PyBool_FromLong((long)ret);
2432 2423 }
2433 2424
2434 2425 static PyObject *index_m_rev(indexObject *self, PyObject *val)
2435 2426 {
2436 2427 char *node;
2437 2428 int rev;
2438 2429
2439 2430 if (node_check(self->nodelen, val, &node) == -1)
2440 2431 return NULL;
2441 2432 rev = index_find_node(self, node);
2442 2433 if (rev >= -1)
2443 return PyInt_FromLong(rev);
2434 return PyLong_FromLong(rev);
2444 2435 if (rev == -2)
2445 2436 raise_revlog_error();
2446 2437 return NULL;
2447 2438 }
2448 2439
2449 2440 typedef uint64_t bitmask;
2450 2441
2451 2442 /*
2452 2443 * Given a disjoint set of revs, return all candidates for the
2453 2444 * greatest common ancestor. In revset notation, this is the set
2454 2445 * "heads(::a and ::b and ...)"
2455 2446 */
2456 2447 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
2457 2448 int revcount)
2458 2449 {
2459 2450 const bitmask allseen = (1ull << revcount) - 1;
2460 2451 const bitmask poison = 1ull << revcount;
2461 2452 PyObject *gca = PyList_New(0);
2462 2453 int i, v, interesting;
2463 2454 int maxrev = -1;
2464 2455 bitmask sp;
2465 2456 bitmask *seen;
2466 2457
2467 2458 if (gca == NULL)
2468 2459 return PyErr_NoMemory();
2469 2460
2470 2461 for (i = 0; i < revcount; i++) {
2471 2462 if (revs[i] > maxrev)
2472 2463 maxrev = revs[i];
2473 2464 }
2474 2465
2475 2466 seen = calloc(sizeof(*seen), maxrev + 1);
2476 2467 if (seen == NULL) {
2477 2468 Py_DECREF(gca);
2478 2469 return PyErr_NoMemory();
2479 2470 }
2480 2471
2481 2472 for (i = 0; i < revcount; i++)
2482 2473 seen[revs[i]] = 1ull << i;
2483 2474
2484 2475 interesting = revcount;
2485 2476
2486 2477 for (v = maxrev; v >= 0 && interesting; v--) {
2487 2478 bitmask sv = seen[v];
2488 2479 int parents[2];
2489 2480
2490 2481 if (!sv)
2491 2482 continue;
2492 2483
2493 2484 if (sv < poison) {
2494 2485 interesting -= 1;
2495 2486 if (sv == allseen) {
2496 PyObject *obj = PyInt_FromLong(v);
2487 PyObject *obj = PyLong_FromLong(v);
2497 2488 if (obj == NULL)
2498 2489 goto bail;
2499 2490 if (PyList_Append(gca, obj) == -1) {
2500 2491 Py_DECREF(obj);
2501 2492 goto bail;
2502 2493 }
2503 2494 sv |= poison;
2504 2495 for (i = 0; i < revcount; i++) {
2505 2496 if (revs[i] == v)
2506 2497 goto done;
2507 2498 }
2508 2499 }
2509 2500 }
2510 2501 if (index_get_parents(self, v, parents, maxrev) < 0)
2511 2502 goto bail;
2512 2503
2513 2504 for (i = 0; i < 2; i++) {
2514 2505 int p = parents[i];
2515 2506 if (p == -1)
2516 2507 continue;
2517 2508 sp = seen[p];
2518 2509 if (sv < poison) {
2519 2510 if (sp == 0) {
2520 2511 seen[p] = sv;
2521 2512 interesting++;
2522 2513 } else if (sp != sv)
2523 2514 seen[p] |= sv;
2524 2515 } else {
2525 2516 if (sp && sp < poison)
2526 2517 interesting--;
2527 2518 seen[p] = sv;
2528 2519 }
2529 2520 }
2530 2521 }
2531 2522
2532 2523 done:
2533 2524 free(seen);
2534 2525 return gca;
2535 2526 bail:
2536 2527 free(seen);
2537 2528 Py_XDECREF(gca);
2538 2529 return NULL;
2539 2530 }
2540 2531
2541 2532 /*
2542 2533 * Given a disjoint set of revs, return the subset with the longest
2543 2534 * path to the root.
2544 2535 */
2545 2536 static PyObject *find_deepest(indexObject *self, PyObject *revs)
2546 2537 {
2547 2538 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
2548 2539 static const Py_ssize_t capacity = 24;
2549 2540 int *depth, *interesting = NULL;
2550 2541 int i, j, v, ninteresting;
2551 2542 PyObject *dict = NULL, *keys = NULL;
2552 2543 long *seen = NULL;
2553 2544 int maxrev = -1;
2554 2545 long final;
2555 2546
2556 2547 if (revcount > capacity) {
2557 2548 PyErr_Format(PyExc_OverflowError,
2558 2549 "bitset size (%ld) > capacity (%ld)",
2559 2550 (long)revcount, (long)capacity);
2560 2551 return NULL;
2561 2552 }
2562 2553
2563 2554 for (i = 0; i < revcount; i++) {
2564 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2555 int n = (int)PyLong_AsLong(PyList_GET_ITEM(revs, i));
2565 2556 if (n > maxrev)
2566 2557 maxrev = n;
2567 2558 }
2568 2559
2569 2560 depth = calloc(sizeof(*depth), maxrev + 1);
2570 2561 if (depth == NULL)
2571 2562 return PyErr_NoMemory();
2572 2563
2573 2564 seen = calloc(sizeof(*seen), maxrev + 1);
2574 2565 if (seen == NULL) {
2575 2566 PyErr_NoMemory();
2576 2567 goto bail;
2577 2568 }
2578 2569
2579 2570 interesting = calloc(sizeof(*interesting), ((size_t)1) << revcount);
2580 2571 if (interesting == NULL) {
2581 2572 PyErr_NoMemory();
2582 2573 goto bail;
2583 2574 }
2584 2575
2585 2576 if (PyList_Sort(revs) == -1)
2586 2577 goto bail;
2587 2578
2588 2579 for (i = 0; i < revcount; i++) {
2589 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2580 int n = (int)PyLong_AsLong(PyList_GET_ITEM(revs, i));
2590 2581 long b = 1l << i;
2591 2582 depth[n] = 1;
2592 2583 seen[n] = b;
2593 2584 interesting[b] = 1;
2594 2585 }
2595 2586
2596 2587 /* invariant: ninteresting is the number of non-zero entries in
2597 2588 * interesting. */
2598 2589 ninteresting = (int)revcount;
2599 2590
2600 2591 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
2601 2592 int dv = depth[v];
2602 2593 int parents[2];
2603 2594 long sv;
2604 2595
2605 2596 if (dv == 0)
2606 2597 continue;
2607 2598
2608 2599 sv = seen[v];
2609 2600 if (index_get_parents(self, v, parents, maxrev) < 0)
2610 2601 goto bail;
2611 2602
2612 2603 for (i = 0; i < 2; i++) {
2613 2604 int p = parents[i];
2614 2605 long sp;
2615 2606 int dp;
2616 2607
2617 2608 if (p == -1)
2618 2609 continue;
2619 2610
2620 2611 dp = depth[p];
2621 2612 sp = seen[p];
2622 2613 if (dp <= dv) {
2623 2614 depth[p] = dv + 1;
2624 2615 if (sp != sv) {
2625 2616 interesting[sv] += 1;
2626 2617 seen[p] = sv;
2627 2618 if (sp) {
2628 2619 interesting[sp] -= 1;
2629 2620 if (interesting[sp] == 0)
2630 2621 ninteresting -= 1;
2631 2622 }
2632 2623 }
2633 2624 } else if (dv == dp - 1) {
2634 2625 long nsp = sp | sv;
2635 2626 if (nsp == sp)
2636 2627 continue;
2637 2628 seen[p] = nsp;
2638 2629 interesting[sp] -= 1;
2639 2630 if (interesting[sp] == 0)
2640 2631 ninteresting -= 1;
2641 2632 if (interesting[nsp] == 0)
2642 2633 ninteresting += 1;
2643 2634 interesting[nsp] += 1;
2644 2635 }
2645 2636 }
2646 2637 interesting[sv] -= 1;
2647 2638 if (interesting[sv] == 0)
2648 2639 ninteresting -= 1;
2649 2640 }
2650 2641
2651 2642 final = 0;
2652 2643 j = ninteresting;
2653 2644 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
2654 2645 if (interesting[i] == 0)
2655 2646 continue;
2656 2647 final |= i;
2657 2648 j -= 1;
2658 2649 }
2659 2650 if (final == 0) {
2660 2651 keys = PyList_New(0);
2661 2652 goto bail;
2662 2653 }
2663 2654
2664 2655 dict = PyDict_New();
2665 2656 if (dict == NULL)
2666 2657 goto bail;
2667 2658
2668 2659 for (i = 0; i < revcount; i++) {
2669 2660 PyObject *key;
2670 2661
2671 2662 if ((final & (1 << i)) == 0)
2672 2663 continue;
2673 2664
2674 2665 key = PyList_GET_ITEM(revs, i);
2675 2666 Py_INCREF(key);
2676 2667 Py_INCREF(Py_None);
2677 2668 if (PyDict_SetItem(dict, key, Py_None) == -1) {
2678 2669 Py_DECREF(key);
2679 2670 Py_DECREF(Py_None);
2680 2671 goto bail;
2681 2672 }
2682 2673 }
2683 2674
2684 2675 keys = PyDict_Keys(dict);
2685 2676
2686 2677 bail:
2687 2678 free(depth);
2688 2679 free(seen);
2689 2680 free(interesting);
2690 2681 Py_XDECREF(dict);
2691 2682
2692 2683 return keys;
2693 2684 }
2694 2685
2695 2686 /*
2696 2687 * Given a (possibly overlapping) set of revs, return all the
2697 2688 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
2698 2689 */
2699 2690 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
2700 2691 {
2701 2692 PyObject *ret = NULL;
2702 2693 Py_ssize_t argcount, i, len;
2703 2694 bitmask repeat = 0;
2704 2695 int revcount = 0;
2705 2696 int *revs;
2706 2697
2707 2698 argcount = PySequence_Length(args);
2708 2699 revs = PyMem_Malloc(argcount * sizeof(*revs));
2709 2700 if (argcount > 0 && revs == NULL)
2710 2701 return PyErr_NoMemory();
2711 2702 len = index_length(self);
2712 2703
2713 2704 for (i = 0; i < argcount; i++) {
2714 2705 static const int capacity = 24;
2715 2706 PyObject *obj = PySequence_GetItem(args, i);
2716 2707 bitmask x;
2717 2708 long val;
2718 2709
2719 if (!PyInt_Check(obj)) {
2710 if (!PyLong_Check(obj)) {
2720 2711 PyErr_SetString(PyExc_TypeError,
2721 2712 "arguments must all be ints");
2722 2713 Py_DECREF(obj);
2723 2714 goto bail;
2724 2715 }
2725 val = PyInt_AsLong(obj);
2716 val = PyLong_AsLong(obj);
2726 2717 Py_DECREF(obj);
2727 2718 if (val == -1) {
2728 2719 ret = PyList_New(0);
2729 2720 goto done;
2730 2721 }
2731 2722 if (val < 0 || val >= len) {
2732 2723 PyErr_SetString(PyExc_IndexError, "index out of range");
2733 2724 goto bail;
2734 2725 }
2735 2726 /* this cheesy bloom filter lets us avoid some more
2736 2727 * expensive duplicate checks in the common set-is-disjoint
2737 2728 * case */
2738 2729 x = 1ull << (val & 0x3f);
2739 2730 if (repeat & x) {
2740 2731 int k;
2741 2732 for (k = 0; k < revcount; k++) {
2742 2733 if (val == revs[k])
2743 2734 goto duplicate;
2744 2735 }
2745 2736 } else
2746 2737 repeat |= x;
2747 2738 if (revcount >= capacity) {
2748 2739 PyErr_Format(PyExc_OverflowError,
2749 2740 "bitset size (%d) > capacity (%d)",
2750 2741 revcount, capacity);
2751 2742 goto bail;
2752 2743 }
2753 2744 revs[revcount++] = (int)val;
2754 2745 duplicate:;
2755 2746 }
2756 2747
2757 2748 if (revcount == 0) {
2758 2749 ret = PyList_New(0);
2759 2750 goto done;
2760 2751 }
2761 2752 if (revcount == 1) {
2762 2753 PyObject *obj;
2763 2754 ret = PyList_New(1);
2764 2755 if (ret == NULL)
2765 2756 goto bail;
2766 obj = PyInt_FromLong(revs[0]);
2757 obj = PyLong_FromLong(revs[0]);
2767 2758 if (obj == NULL)
2768 2759 goto bail;
2769 2760 PyList_SET_ITEM(ret, 0, obj);
2770 2761 goto done;
2771 2762 }
2772 2763
2773 2764 ret = find_gca_candidates(self, revs, revcount);
2774 2765 if (ret == NULL)
2775 2766 goto bail;
2776 2767
2777 2768 done:
2778 2769 PyMem_Free(revs);
2779 2770 return ret;
2780 2771
2781 2772 bail:
2782 2773 PyMem_Free(revs);
2783 2774 Py_XDECREF(ret);
2784 2775 return NULL;
2785 2776 }
2786 2777
2787 2778 /*
2788 2779 * Given a (possibly overlapping) set of revs, return the greatest
2789 2780 * common ancestors: those with the longest path to the root.
2790 2781 */
2791 2782 static PyObject *index_ancestors(indexObject *self, PyObject *args)
2792 2783 {
2793 2784 PyObject *ret;
2794 2785 PyObject *gca = index_commonancestorsheads(self, args);
2795 2786 if (gca == NULL)
2796 2787 return NULL;
2797 2788
2798 2789 if (PyList_GET_SIZE(gca) <= 1) {
2799 2790 return gca;
2800 2791 }
2801 2792
2802 2793 ret = find_deepest(self, gca);
2803 2794 Py_DECREF(gca);
2804 2795 return ret;
2805 2796 }
2806 2797
2807 2798 /*
2808 2799 * Invalidate any trie entries introduced by added revs.
2809 2800 */
2810 2801 static void index_invalidate_added(indexObject *self, Py_ssize_t start)
2811 2802 {
2812 2803 Py_ssize_t i, len;
2813 2804
2814 2805 len = self->length + self->new_length;
2815 2806 i = start - self->length;
2816 2807 if (i < 0)
2817 2808 return;
2818 2809
2819 2810 for (i = start; i < len; i++) {
2820 2811 const char *node = index_node(self, i);
2821 2812 nt_delete_node(&self->nt, node);
2822 2813 }
2823 2814
2824 2815 self->new_length = start - self->length;
2825 2816 }
2826 2817
2827 2818 /*
2828 2819 * Delete a numeric range of revs, which must be at the end of the
2829 2820 * range.
2830 2821 */
2831 2822 static int index_slice_del(indexObject *self, PyObject *item)
2832 2823 {
2833 2824 Py_ssize_t start, stop, step, slicelength;
2834 2825 Py_ssize_t length = index_length(self) + 1;
2835 2826 int ret = 0;
2836 2827
2837 2828 /* Argument changed from PySliceObject* to PyObject* in Python 3. */
2838 2829 #ifdef IS_PY3K
2839 2830 if (PySlice_GetIndicesEx(item, length, &start, &stop, &step,
2840 2831 &slicelength) < 0)
2841 2832 #else
2842 2833 if (PySlice_GetIndicesEx((PySliceObject *)item, length, &start, &stop,
2843 2834 &step, &slicelength) < 0)
2844 2835 #endif
2845 2836 return -1;
2846 2837
2847 2838 if (slicelength <= 0)
2848 2839 return 0;
2849 2840
2850 2841 if ((step < 0 && start < stop) || (step > 0 && start > stop))
2851 2842 stop = start;
2852 2843
2853 2844 if (step < 0) {
2854 2845 stop = start + 1;
2855 2846 start = stop + step * (slicelength - 1) - 1;
2856 2847 step = -step;
2857 2848 }
2858 2849
2859 2850 if (step != 1) {
2860 2851 PyErr_SetString(PyExc_ValueError,
2861 2852 "revlog index delete requires step size of 1");
2862 2853 return -1;
2863 2854 }
2864 2855
2865 2856 if (stop != length - 1) {
2866 2857 PyErr_SetString(PyExc_IndexError,
2867 2858 "revlog index deletion indices are invalid");
2868 2859 return -1;
2869 2860 }
2870 2861
2871 2862 if (start < self->length) {
2872 2863 if (self->ntinitialized) {
2873 2864 Py_ssize_t i;
2874 2865
2875 2866 for (i = start; i < self->length; i++) {
2876 2867 const char *node = index_node_existing(self, i);
2877 2868 if (node == NULL)
2878 2869 return -1;
2879 2870
2880 2871 nt_delete_node(&self->nt, node);
2881 2872 }
2882 2873 if (self->new_length)
2883 2874 index_invalidate_added(self, self->length);
2884 2875 if (self->ntrev > start)
2885 2876 self->ntrev = (int)start;
2886 2877 } else if (self->new_length) {
2887 2878 self->new_length = 0;
2888 2879 }
2889 2880
2890 2881 self->length = start;
2891 2882 goto done;
2892 2883 }
2893 2884
2894 2885 if (self->ntinitialized) {
2895 2886 index_invalidate_added(self, start);
2896 2887 if (self->ntrev > start)
2897 2888 self->ntrev = (int)start;
2898 2889 } else {
2899 2890 self->new_length = start - self->length;
2900 2891 }
2901 2892 done:
2902 2893 Py_CLEAR(self->headrevs);
2903 2894 return ret;
2904 2895 }
2905 2896
2906 2897 /*
2907 2898 * Supported ops:
2908 2899 *
2909 2900 * slice deletion
2910 2901 * string assignment (extend node->rev mapping)
2911 2902 * string deletion (shrink node->rev mapping)
2912 2903 */
2913 2904 static int index_assign_subscript(indexObject *self, PyObject *item,
2914 2905 PyObject *value)
2915 2906 {
2916 2907 char *node;
2917 2908 long rev;
2918 2909
2919 2910 if (PySlice_Check(item) && value == NULL)
2920 2911 return index_slice_del(self, item);
2921 2912
2922 2913 if (node_check(self->nodelen, item, &node) == -1)
2923 2914 return -1;
2924 2915
2925 2916 if (value == NULL)
2926 2917 return self->ntinitialized ? nt_delete_node(&self->nt, node)
2927 2918 : 0;
2928 rev = PyInt_AsLong(value);
2919 rev = PyLong_AsLong(value);
2929 2920 if (rev > INT_MAX || rev < 0) {
2930 2921 if (!PyErr_Occurred())
2931 2922 PyErr_SetString(PyExc_ValueError, "rev out of range");
2932 2923 return -1;
2933 2924 }
2934 2925
2935 2926 if (index_init_nt(self) == -1)
2936 2927 return -1;
2937 2928 return nt_insert(&self->nt, node, (int)rev);
2938 2929 }
2939 2930
2940 2931 /*
2941 2932 * Find all RevlogNG entries in an index that has inline data. Update
2942 2933 * the optional "offsets" table with those entries.
2943 2934 */
2944 2935 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
2945 2936 {
2946 2937 const char *data = (const char *)self->buf.buf;
2947 2938 Py_ssize_t pos = 0;
2948 2939 Py_ssize_t end = self->buf.len;
2949 2940 long incr = self->entry_size;
2950 2941 Py_ssize_t len = 0;
2951 2942
2952 2943 while (pos + self->entry_size <= end && pos >= 0) {
2953 2944 uint32_t comp_len, sidedata_comp_len = 0;
2954 2945 /* 3rd element of header is length of compressed inline data */
2955 2946 if (self->format_version == format_v1) {
2956 2947 comp_len =
2957 2948 getbe32(data + pos + entry_v1_offset_comp_len);
2958 2949 sidedata_comp_len = 0;
2959 2950 } else if (self->format_version == format_v2) {
2960 2951 comp_len =
2961 2952 getbe32(data + pos + entry_v2_offset_comp_len);
2962 2953 sidedata_comp_len = getbe32(
2963 2954 data + pos + entry_v2_offset_sidedata_comp_len);
2964 2955 } else {
2965 2956 raise_revlog_error();
2966 2957 return -1;
2967 2958 }
2968 2959 incr = self->entry_size + comp_len + sidedata_comp_len;
2969 2960 if (offsets)
2970 2961 offsets[len] = data + pos;
2971 2962 len++;
2972 2963 pos += incr;
2973 2964 }
2974 2965
2975 2966 if (pos != end) {
2976 2967 if (!PyErr_Occurred())
2977 2968 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2978 2969 return -1;
2979 2970 }
2980 2971
2981 2972 return len;
2982 2973 }
2983 2974
2984 2975 static int index_init(indexObject *self, PyObject *args, PyObject *kwargs)
2985 2976 {
2986 2977 PyObject *data_obj, *inlined_obj;
2987 2978 Py_ssize_t size;
2988 2979
2989 2980 static char *kwlist[] = {"data", "inlined", "format", NULL};
2990 2981
2991 2982 /* Initialize before argument-checking to avoid index_dealloc() crash.
2992 2983 */
2993 2984 self->added = NULL;
2994 2985 self->new_length = 0;
2995 2986 self->added_length = 0;
2996 2987 self->data = NULL;
2997 2988 memset(&self->buf, 0, sizeof(self->buf));
2998 2989 self->headrevs = NULL;
2999 2990 self->filteredrevs = Py_None;
3000 2991 Py_INCREF(Py_None);
3001 2992 self->ntinitialized = 0;
3002 2993 self->offsets = NULL;
3003 2994 self->nodelen = 20;
3004 2995 self->nullentry = NULL;
3005 2996 self->rust_ext_compat = 1;
3006 2997 self->format_version = format_v1;
3007 2998
3008 2999 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|l", kwlist,
3009 3000 &data_obj, &inlined_obj,
3010 3001 &(self->format_version)))
3011 3002 return -1;
3012 3003 if (!PyObject_CheckBuffer(data_obj)) {
3013 3004 PyErr_SetString(PyExc_TypeError,
3014 3005 "data does not support buffer interface");
3015 3006 return -1;
3016 3007 }
3017 3008 if (self->nodelen < 20 || self->nodelen > (Py_ssize_t)sizeof(nullid)) {
3018 3009 PyErr_SetString(PyExc_RuntimeError, "unsupported node size");
3019 3010 return -1;
3020 3011 }
3021 3012
3022 3013 if (self->format_version == format_v1) {
3023 3014 self->entry_size = v1_entry_size;
3024 3015 } else if (self->format_version == format_v2) {
3025 3016 self->entry_size = v2_entry_size;
3026 3017 } else if (self->format_version == format_cl2) {
3027 3018 self->entry_size = cl2_entry_size;
3028 3019 }
3029 3020
3030 3021 self->nullentry =
3031 3022 Py_BuildValue(PY23("iiiiiiis#iiBBi", "iiiiiiiy#iiBBi"), 0, 0, 0, -1,
3032 3023 -1, -1, -1, nullid, self->nodelen, 0, 0,
3033 3024 comp_mode_inline, comp_mode_inline, rank_unknown);
3034 3025
3035 3026 if (!self->nullentry)
3036 3027 return -1;
3037 3028 PyObject_GC_UnTrack(self->nullentry);
3038 3029
3039 3030 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
3040 3031 return -1;
3041 3032 size = self->buf.len;
3042 3033
3043 3034 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
3044 3035 self->data = data_obj;
3045 3036
3046 3037 self->ntlookups = self->ntmisses = 0;
3047 3038 self->ntrev = -1;
3048 3039 Py_INCREF(self->data);
3049 3040
3050 3041 if (self->inlined) {
3051 3042 Py_ssize_t len = inline_scan(self, NULL);
3052 3043 if (len == -1)
3053 3044 goto bail;
3054 3045 self->length = len;
3055 3046 } else {
3056 3047 if (size % self->entry_size) {
3057 3048 PyErr_SetString(PyExc_ValueError, "corrupt index file");
3058 3049 goto bail;
3059 3050 }
3060 3051 self->length = size / self->entry_size;
3061 3052 }
3062 3053
3063 3054 return 0;
3064 3055 bail:
3065 3056 return -1;
3066 3057 }
3067 3058
3068 3059 static PyObject *index_nodemap(indexObject *self)
3069 3060 {
3070 3061 Py_INCREF(self);
3071 3062 return (PyObject *)self;
3072 3063 }
3073 3064
3074 3065 static void _index_clearcaches(indexObject *self)
3075 3066 {
3076 3067 if (self->offsets) {
3077 3068 PyMem_Free((void *)self->offsets);
3078 3069 self->offsets = NULL;
3079 3070 }
3080 3071 if (self->ntinitialized) {
3081 3072 nt_dealloc(&self->nt);
3082 3073 }
3083 3074 self->ntinitialized = 0;
3084 3075 Py_CLEAR(self->headrevs);
3085 3076 }
3086 3077
3087 3078 static PyObject *index_clearcaches(indexObject *self)
3088 3079 {
3089 3080 _index_clearcaches(self);
3090 3081 self->ntrev = -1;
3091 3082 self->ntlookups = self->ntmisses = 0;
3092 3083 Py_RETURN_NONE;
3093 3084 }
3094 3085
3095 3086 static void index_dealloc(indexObject *self)
3096 3087 {
3097 3088 _index_clearcaches(self);
3098 3089 Py_XDECREF(self->filteredrevs);
3099 3090 if (self->buf.buf) {
3100 3091 PyBuffer_Release(&self->buf);
3101 3092 memset(&self->buf, 0, sizeof(self->buf));
3102 3093 }
3103 3094 Py_XDECREF(self->data);
3104 3095 PyMem_Free(self->added);
3105 3096 Py_XDECREF(self->nullentry);
3106 3097 PyObject_Del(self);
3107 3098 }
3108 3099
3109 3100 static PySequenceMethods index_sequence_methods = {
3110 3101 (lenfunc)index_length, /* sq_length */
3111 3102 0, /* sq_concat */
3112 3103 0, /* sq_repeat */
3113 3104 (ssizeargfunc)index_get, /* sq_item */
3114 3105 0, /* sq_slice */
3115 3106 0, /* sq_ass_item */
3116 3107 0, /* sq_ass_slice */
3117 3108 (objobjproc)index_contains, /* sq_contains */
3118 3109 };
3119 3110
3120 3111 static PyMappingMethods index_mapping_methods = {
3121 3112 (lenfunc)index_length, /* mp_length */
3122 3113 (binaryfunc)index_getitem, /* mp_subscript */
3123 3114 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
3124 3115 };
3125 3116
3126 3117 static PyMethodDef index_methods[] = {
3127 3118 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
3128 3119 "return the gca set of the given revs"},
3129 3120 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
3130 3121 METH_VARARGS,
3131 3122 "return the heads of the common ancestors of the given revs"},
3132 3123 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
3133 3124 "clear the index caches"},
3134 3125 {"get", (PyCFunction)index_m_get, METH_VARARGS, "get an index entry"},
3135 3126 {"get_rev", (PyCFunction)index_m_get, METH_VARARGS,
3136 3127 "return `rev` associated with a node or None"},
3137 3128 {"has_node", (PyCFunction)index_m_has_node, METH_O,
3138 3129 "return True if the node exist in the index"},
3139 3130 {"rev", (PyCFunction)index_m_rev, METH_O,
3140 3131 "return `rev` associated with a node or raise RevlogError"},
3141 3132 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets, METH_VARARGS,
3142 3133 "compute phases"},
3143 3134 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
3144 3135 "reachableroots"},
3145 3136 {"replace_sidedata_info", (PyCFunction)index_replace_sidedata_info,
3146 3137 METH_VARARGS, "replace an existing index entry with a new value"},
3147 3138 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
3148 3139 "get head revisions"}, /* Can do filtering since 3.2 */
3149 3140 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
3150 3141 "get filtered head revisions"}, /* Can always do filtering */
3151 3142 {"issnapshot", (PyCFunction)index_issnapshot, METH_O,
3152 3143 "True if the object is a snapshot"},
3153 3144 {"findsnapshots", (PyCFunction)index_findsnapshots, METH_VARARGS,
3154 3145 "Gather snapshot data in a cache dict"},
3155 3146 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
3156 3147 "determine revisions with deltas to reconstruct fulltext"},
3157 3148 {"slicechunktodensity", (PyCFunction)index_slicechunktodensity,
3158 3149 METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"},
3159 3150 {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
3160 3151 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
3161 3152 "match a potentially ambiguous node ID"},
3162 3153 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
3163 3154 "find length of shortest hex nodeid of a binary ID"},
3164 3155 {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"},
3165 3156 {"entry_binary", (PyCFunction)index_entry_binary, METH_O,
3166 3157 "return an entry in binary form"},
3167 3158 {"pack_header", (PyCFunction)index_pack_header, METH_VARARGS,
3168 3159 "pack the revlog header information into binary"},
3169 3160 {NULL} /* Sentinel */
3170 3161 };
3171 3162
3172 3163 static PyGetSetDef index_getset[] = {
3173 3164 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
3174 3165 {NULL} /* Sentinel */
3175 3166 };
3176 3167
3177 3168 static PyMemberDef index_members[] = {
3178 3169 {"entry_size", T_LONG, offsetof(indexObject, entry_size), 0,
3179 3170 "size of an index entry"},
3180 3171 {"rust_ext_compat", T_LONG, offsetof(indexObject, rust_ext_compat), 0,
3181 3172 "size of an index entry"},
3182 3173 {NULL} /* Sentinel */
3183 3174 };
3184 3175
3185 3176 PyTypeObject HgRevlogIndex_Type = {
3186 3177 PyVarObject_HEAD_INIT(NULL, 0) /* header */
3187 3178 "parsers.index", /* tp_name */
3188 3179 sizeof(indexObject), /* tp_basicsize */
3189 3180 0, /* tp_itemsize */
3190 3181 (destructor)index_dealloc, /* tp_dealloc */
3191 3182 0, /* tp_print */
3192 3183 0, /* tp_getattr */
3193 3184 0, /* tp_setattr */
3194 3185 0, /* tp_compare */
3195 3186 0, /* tp_repr */
3196 3187 0, /* tp_as_number */
3197 3188 &index_sequence_methods, /* tp_as_sequence */
3198 3189 &index_mapping_methods, /* tp_as_mapping */
3199 3190 0, /* tp_hash */
3200 3191 0, /* tp_call */
3201 3192 0, /* tp_str */
3202 3193 0, /* tp_getattro */
3203 3194 0, /* tp_setattro */
3204 3195 0, /* tp_as_buffer */
3205 3196 Py_TPFLAGS_DEFAULT, /* tp_flags */
3206 3197 "revlog index", /* tp_doc */
3207 3198 0, /* tp_traverse */
3208 3199 0, /* tp_clear */
3209 3200 0, /* tp_richcompare */
3210 3201 0, /* tp_weaklistoffset */
3211 3202 0, /* tp_iter */
3212 3203 0, /* tp_iternext */
3213 3204 index_methods, /* tp_methods */
3214 3205 index_members, /* tp_members */
3215 3206 index_getset, /* tp_getset */
3216 3207 0, /* tp_base */
3217 3208 0, /* tp_dict */
3218 3209 0, /* tp_descr_get */
3219 3210 0, /* tp_descr_set */
3220 3211 0, /* tp_dictoffset */
3221 3212 (initproc)index_init, /* tp_init */
3222 3213 0, /* tp_alloc */
3223 3214 };
3224 3215
3225 3216 /*
3226 3217 * returns a tuple of the form (index, cache) with elements as
3227 3218 * follows:
3228 3219 *
3229 3220 * index: an index object that lazily parses Revlog (v1 or v2) records
3230 3221 * cache: if data is inlined, a tuple (0, index_file_content), else None
3231 3222 * index_file_content could be a string, or a buffer
3232 3223 *
3233 3224 * added complications are for backwards compatibility
3234 3225 */
3235 3226 PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs)
3236 3227 {
3237 3228 PyObject *cache = NULL;
3238 3229 indexObject *idx;
3239 3230 int ret;
3240 3231
3241 3232 idx = PyObject_New(indexObject, &HgRevlogIndex_Type);
3242 3233 if (idx == NULL)
3243 3234 goto bail;
3244 3235
3245 3236 ret = index_init(idx, args, kwargs);
3246 3237 if (ret == -1)
3247 3238 goto bail;
3248 3239
3249 3240 if (idx->inlined) {
3250 3241 cache = Py_BuildValue("iO", 0, idx->data);
3251 3242 if (cache == NULL)
3252 3243 goto bail;
3253 3244 } else {
3254 3245 cache = Py_None;
3255 3246 Py_INCREF(cache);
3256 3247 }
3257 3248
3258 3249 return Py_BuildValue("NN", idx, cache);
3259 3250
3260 3251 bail:
3261 3252 Py_XDECREF(idx);
3262 3253 Py_XDECREF(cache);
3263 3254 return NULL;
3264 3255 }
3265 3256
3266 3257 static Revlog_CAPI CAPI = {
3267 3258 /* increment the abi_version field upon each change in the Revlog_CAPI
3268 3259 struct or in the ABI of the listed functions */
3269 3260 2,
3270 3261 index_length,
3271 3262 index_node,
3272 3263 HgRevlogIndex_GetParents,
3273 3264 };
3274 3265
3275 3266 void revlog_module_init(PyObject *mod)
3276 3267 {
3277 3268 PyObject *caps = NULL;
3278 3269 HgRevlogIndex_Type.tp_new = PyType_GenericNew;
3279 3270 if (PyType_Ready(&HgRevlogIndex_Type) < 0)
3280 3271 return;
3281 3272 Py_INCREF(&HgRevlogIndex_Type);
3282 3273 PyModule_AddObject(mod, "index", (PyObject *)&HgRevlogIndex_Type);
3283 3274
3284 3275 nodetreeType.tp_new = PyType_GenericNew;
3285 3276 if (PyType_Ready(&nodetreeType) < 0)
3286 3277 return;
3287 3278 Py_INCREF(&nodetreeType);
3288 3279 PyModule_AddObject(mod, "nodetree", (PyObject *)&nodetreeType);
3289 3280
3290 3281 caps = PyCapsule_New(&CAPI, "mercurial.cext.parsers.revlog_CAPI", NULL);
3291 3282 if (caps != NULL)
3292 3283 PyModule_AddObject(mod, "revlog_CAPI", caps);
3293 3284 }
General Comments 0
You need to be logged in to leave comments. Login now