##// END OF EJS Templates
merge: with stable
Augie Fackler -
r48195:8a6e6b62 merge default
parent child Browse files
Show More
@@ -1,3060 +1,3061 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Olivia Mackall <olivia@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #define PY_SSIZE_T_CLEAN
11 11 #include <Python.h>
12 12 #include <assert.h>
13 13 #include <ctype.h>
14 14 #include <limits.h>
15 15 #include <stddef.h>
16 16 #include <stdlib.h>
17 17 #include <string.h>
18 18 #include <structmember.h>
19 19
20 20 #include "bitmanipulation.h"
21 21 #include "charencode.h"
22 22 #include "compat.h"
23 23 #include "revlog.h"
24 24 #include "util.h"
25 25
26 26 #ifdef IS_PY3K
27 27 /* The mapping of Python types is meant to be temporary to get Python
28 28 * 3 to compile. We should remove this once Python 3 support is fully
29 29 * supported and proper types are used in the extensions themselves. */
30 30 #define PyInt_Check PyLong_Check
31 31 #define PyInt_FromLong PyLong_FromLong
32 32 #define PyInt_FromSsize_t PyLong_FromSsize_t
33 33 #define PyInt_AsLong PyLong_AsLong
34 34 #endif
35 35
36 36 typedef struct indexObjectStruct indexObject;
37 37
38 38 typedef struct {
39 39 int children[16];
40 40 } nodetreenode;
41 41
42 42 typedef struct {
43 43 int abi_version;
44 44 Py_ssize_t (*index_length)(const indexObject *);
45 45 const char *(*index_node)(indexObject *, Py_ssize_t);
46 46 int (*index_parents)(PyObject *, int, int *);
47 47 } Revlog_CAPI;
48 48
49 49 /*
50 50 * A base-16 trie for fast node->rev mapping.
51 51 *
52 52 * Positive value is index of the next node in the trie
53 53 * Negative value is a leaf: -(rev + 2)
54 54 * Zero is empty
55 55 */
56 56 typedef struct {
57 57 indexObject *index;
58 58 nodetreenode *nodes;
59 59 Py_ssize_t nodelen;
60 60 size_t length; /* # nodes in use */
61 61 size_t capacity; /* # nodes allocated */
62 62 int depth; /* maximum depth of tree */
63 63 int splits; /* # splits performed */
64 64 } nodetree;
65 65
66 66 typedef struct {
67 67 PyObject_HEAD /* ; */
68 68 nodetree nt;
69 69 } nodetreeObject;
70 70
71 71 /*
72 72 * This class has two behaviors.
73 73 *
74 74 * When used in a list-like way (with integer keys), we decode an
75 75 * entry in a RevlogNG index file on demand. We have limited support for
76 76 * integer-keyed insert and delete, only at elements right before the
77 77 * end.
78 78 *
79 79 * With string keys, we lazily perform a reverse mapping from node to
80 80 * rev, using a base-16 trie.
81 81 */
82 82 struct indexObjectStruct {
83 83 PyObject_HEAD
84 84 /* Type-specific fields go here. */
85 85 PyObject *data; /* raw bytes of index */
86 86 Py_ssize_t nodelen; /* digest size of the hash, 20 for SHA-1 */
87 87 PyObject *nullentry; /* fast path for references to null */
88 88 Py_buffer buf; /* buffer of data */
89 89 const char **offsets; /* populated on demand */
90 90 Py_ssize_t length; /* current on-disk number of elements */
91 91 unsigned new_length; /* number of added elements */
92 92 unsigned added_length; /* space reserved for added elements */
93 93 char *added; /* populated on demand */
94 94 PyObject *headrevs; /* cache, invalidated on changes */
95 95 PyObject *filteredrevs; /* filtered revs set */
96 96 nodetree nt; /* base-16 trie */
97 97 int ntinitialized; /* 0 or 1 */
98 98 int ntrev; /* last rev scanned */
99 99 int ntlookups; /* # lookups */
100 100 int ntmisses; /* # lookups that miss the cache */
101 101 int inlined;
102 102 long entry_size; /* size of index headers. Differs in v1 v.s. v2 format
103 103 */
104 104 long rust_ext_compat; /* compatibility with being used in rust
105 105 extensions */
106 106 char format_version; /* size of index headers. Differs in v1 v.s. v2
107 107 format */
108 108 };
109 109
110 110 static Py_ssize_t index_length(const indexObject *self)
111 111 {
112 112 return self->length + self->new_length;
113 113 }
114 114
115 115 static const char nullid[32] = {0};
116 116 static const Py_ssize_t nullrev = -1;
117 117
118 118 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
119 119
120 120 static int index_find_node(indexObject *self, const char *node);
121 121
122 122 #if LONG_MAX == 0x7fffffffL
123 123 static const char *const tuple_format = PY23("Kiiiiiis#KiBB", "Kiiiiiiy#KiBB");
124 124 #else
125 125 static const char *const tuple_format = PY23("kiiiiiis#kiBB", "kiiiiiiy#kiBB");
126 126 #endif
127 127
128 128 /* A RevlogNG v1 index entry is 64 bytes long. */
129 129 static const long v1_entry_size = 64;
130 130
131 131 /* A Revlogv2 index entry is 96 bytes long. */
132 132 static const long v2_entry_size = 96;
133 133
134 134 static const long format_v1 = 1; /* Internal only, could be any number */
135 135 static const long format_v2 = 2; /* Internal only, could be any number */
136 136
137 137 static const char comp_mode_inline = 2;
138 138
139 139 static void raise_revlog_error(void)
140 140 {
141 141 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
142 142
143 143 mod = PyImport_ImportModule("mercurial.error");
144 144 if (mod == NULL) {
145 145 goto cleanup;
146 146 }
147 147
148 148 dict = PyModule_GetDict(mod);
149 149 if (dict == NULL) {
150 150 goto cleanup;
151 151 }
152 152 Py_INCREF(dict);
153 153
154 154 errclass = PyDict_GetItemString(dict, "RevlogError");
155 155 if (errclass == NULL) {
156 156 PyErr_SetString(PyExc_SystemError,
157 157 "could not find RevlogError");
158 158 goto cleanup;
159 159 }
160 160
161 161 /* value of exception is ignored by callers */
162 162 PyErr_SetString(errclass, "RevlogError");
163 163
164 164 cleanup:
165 165 Py_XDECREF(dict);
166 166 Py_XDECREF(mod);
167 167 }
168 168
169 169 /*
170 170 * Return a pointer to the beginning of a RevlogNG record.
171 171 */
172 172 static const char *index_deref(indexObject *self, Py_ssize_t pos)
173 173 {
174 174 if (pos >= self->length)
175 175 return self->added + (pos - self->length) * self->entry_size;
176 176
177 177 if (self->inlined && pos > 0) {
178 178 if (self->offsets == NULL) {
179 179 Py_ssize_t ret;
180 180 self->offsets =
181 181 PyMem_Malloc(self->length * sizeof(*self->offsets));
182 182 if (self->offsets == NULL)
183 183 return (const char *)PyErr_NoMemory();
184 184 ret = inline_scan(self, self->offsets);
185 185 if (ret == -1) {
186 186 return NULL;
187 187 };
188 188 }
189 189 return self->offsets[pos];
190 190 }
191 191
192 192 return (const char *)(self->buf.buf) + pos * self->entry_size;
193 193 }
194 194
195 195 /*
196 196 * Get parents of the given rev.
197 197 *
198 198 * The specified rev must be valid and must not be nullrev. A returned
199 199 * parent revision may be nullrev, but is guaranteed to be in valid range.
200 200 */
201 201 static inline int index_get_parents(indexObject *self, Py_ssize_t rev, int *ps,
202 202 int maxrev)
203 203 {
204 204 const char *data = index_deref(self, rev);
205 205
206 206 ps[0] = getbe32(data + 24);
207 207 ps[1] = getbe32(data + 28);
208 208
209 209 /* If index file is corrupted, ps[] may point to invalid revisions. So
210 210 * there is a risk of buffer overflow to trust them unconditionally. */
211 211 if (ps[0] < -1 || ps[0] > maxrev || ps[1] < -1 || ps[1] > maxrev) {
212 212 PyErr_SetString(PyExc_ValueError, "parent out of range");
213 213 return -1;
214 214 }
215 215 return 0;
216 216 }
217 217
218 218 /*
219 219 * Get parents of the given rev.
220 220 *
221 221 * If the specified rev is out of range, IndexError will be raised. If the
222 222 * revlog entry is corrupted, ValueError may be raised.
223 223 *
224 224 * Returns 0 on success or -1 on failure.
225 225 */
226 226 static int HgRevlogIndex_GetParents(PyObject *op, int rev, int *ps)
227 227 {
228 228 int tiprev;
229 229 if (!op || !HgRevlogIndex_Check(op) || !ps) {
230 230 PyErr_BadInternalCall();
231 231 return -1;
232 232 }
233 233 tiprev = (int)index_length((indexObject *)op) - 1;
234 234 if (rev < -1 || rev > tiprev) {
235 235 PyErr_Format(PyExc_IndexError, "rev out of range: %d", rev);
236 236 return -1;
237 237 } else if (rev == -1) {
238 238 ps[0] = ps[1] = -1;
239 239 return 0;
240 240 } else {
241 241 return index_get_parents((indexObject *)op, rev, ps, tiprev);
242 242 }
243 243 }
244 244
245 245 static inline int64_t index_get_start(indexObject *self, Py_ssize_t rev)
246 246 {
247 247 const char *data;
248 248 uint64_t offset;
249 249
250 250 if (rev == nullrev)
251 251 return 0;
252 252
253 253 data = index_deref(self, rev);
254 254 offset = getbe32(data + 4);
255 255 if (rev == 0) {
256 256 /* mask out version number for the first entry */
257 257 offset &= 0xFFFF;
258 258 } else {
259 259 uint32_t offset_high = getbe32(data);
260 260 offset |= ((uint64_t)offset_high) << 32;
261 261 }
262 262 return (int64_t)(offset >> 16);
263 263 }
264 264
265 265 static inline int index_get_length(indexObject *self, Py_ssize_t rev)
266 266 {
267 267 const char *data;
268 268 int tmp;
269 269
270 270 if (rev == nullrev)
271 271 return 0;
272 272
273 273 data = index_deref(self, rev);
274 274
275 275 tmp = (int)getbe32(data + 8);
276 276 if (tmp < 0) {
277 277 PyErr_Format(PyExc_OverflowError,
278 278 "revlog entry size out of bound (%d)", tmp);
279 279 return -1;
280 280 }
281 281 return tmp;
282 282 }
283 283
284 284 /*
285 285 * RevlogNG format (all in big endian, data may be inlined):
286 286 * 6 bytes: offset
287 287 * 2 bytes: flags
288 288 * 4 bytes: compressed length
289 289 * 4 bytes: uncompressed length
290 290 * 4 bytes: base revision
291 291 * 4 bytes: link revision
292 292 * 4 bytes: parent 1 revision
293 293 * 4 bytes: parent 2 revision
294 294 * 32 bytes: nodeid (only 20 bytes used with SHA-1)
295 295 */
296 296 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
297 297 {
298 298 uint64_t offset_flags, sidedata_offset;
299 299 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
300 300 sidedata_comp_len;
301 301 char data_comp_mode, sidedata_comp_mode;
302 302 const char *c_node_id;
303 303 const char *data;
304 304 Py_ssize_t length = index_length(self);
305 305
306 306 if (pos == nullrev) {
307 307 Py_INCREF(self->nullentry);
308 308 return self->nullentry;
309 309 }
310 310
311 311 if (pos < 0 || pos >= length) {
312 312 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
313 313 return NULL;
314 314 }
315 315
316 316 data = index_deref(self, pos);
317 317 if (data == NULL)
318 318 return NULL;
319 319
320 320 offset_flags = getbe32(data + 4);
321 321 /*
322 322 * The first entry on-disk needs the version number masked out,
323 323 * but this doesn't apply if entries are added to an empty index.
324 324 */
325 325 if (self->length && pos == 0)
326 326 offset_flags &= 0xFFFF;
327 327 else {
328 328 uint32_t offset_high = getbe32(data);
329 329 offset_flags |= ((uint64_t)offset_high) << 32;
330 330 }
331 331
332 332 comp_len = getbe32(data + 8);
333 333 uncomp_len = getbe32(data + 12);
334 334 base_rev = getbe32(data + 16);
335 335 link_rev = getbe32(data + 20);
336 336 parent_1 = getbe32(data + 24);
337 337 parent_2 = getbe32(data + 28);
338 338 c_node_id = data + 32;
339 339
340 340 if (self->format_version == format_v1) {
341 341 sidedata_offset = 0;
342 342 sidedata_comp_len = 0;
343 343 data_comp_mode = comp_mode_inline;
344 344 sidedata_comp_mode = comp_mode_inline;
345 345 } else {
346 346 sidedata_offset = getbe64(data + 64);
347 347 sidedata_comp_len = getbe32(data + 72);
348 348 data_comp_mode = data[76] & 3;
349 349 sidedata_comp_mode = ((data[76] >> 2) & 3);
350 350 }
351 351
352 352 return Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len,
353 353 base_rev, link_rev, parent_1, parent_2, c_node_id,
354 354 self->nodelen, sidedata_offset, sidedata_comp_len,
355 355 data_comp_mode, sidedata_comp_mode);
356 356 }
357 357 /*
358 358 * Pack header information in binary
359 359 */
360 360 static PyObject *index_pack_header(indexObject *self, PyObject *args)
361 361 {
362 362 int header;
363 363 char out[4];
364 364 if (!PyArg_ParseTuple(args, "I", &header)) {
365 365 return NULL;
366 366 }
367 367 if (self->format_version != format_v1) {
368 368 PyErr_Format(PyExc_RuntimeError,
369 369 "version header should go in the docket, not the "
370 370 "index: %lu",
371 371 header);
372 372 return NULL;
373 373 }
374 374 putbe32(header, out);
375 375 return PyBytes_FromStringAndSize(out, 4);
376 376 }
377 377 /*
378 378 * Return the raw binary string representing a revision
379 379 */
380 380 static PyObject *index_entry_binary(indexObject *self, PyObject *value)
381 381 {
382 382 long rev;
383 383 const char *data;
384 384 Py_ssize_t length = index_length(self);
385 385
386 386 if (!pylong_to_long(value, &rev)) {
387 387 return NULL;
388 388 }
389 389 if (rev < 0 || rev >= length) {
390 390 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
391 391 rev);
392 392 return NULL;
393 393 };
394 394
395 395 data = index_deref(self, rev);
396 396 if (data == NULL)
397 397 return NULL;
398 398 if (rev == 0 && self->format_version == format_v1) {
399 399 /* the header is eating the start of the first entry */
400 400 return PyBytes_FromStringAndSize(data + 4,
401 401 self->entry_size - 4);
402 402 }
403 403 return PyBytes_FromStringAndSize(data, self->entry_size);
404 404 }
405 405
406 406 /*
407 407 * Return the hash of node corresponding to the given rev.
408 408 */
409 409 static const char *index_node(indexObject *self, Py_ssize_t pos)
410 410 {
411 411 Py_ssize_t length = index_length(self);
412 412 const char *data;
413 413
414 414 if (pos == nullrev)
415 415 return nullid;
416 416
417 417 if (pos >= length)
418 418 return NULL;
419 419
420 420 data = index_deref(self, pos);
421 421 return data ? data + 32 : NULL;
422 422 }
423 423
424 424 /*
425 425 * Return the hash of the node corresponding to the given rev. The
426 426 * rev is assumed to be existing. If not, an exception is set.
427 427 */
428 428 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
429 429 {
430 430 const char *node = index_node(self, pos);
431 431 if (node == NULL) {
432 432 PyErr_Format(PyExc_IndexError, "could not access rev %d",
433 433 (int)pos);
434 434 }
435 435 return node;
436 436 }
437 437
438 438 static int nt_insert(nodetree *self, const char *node, int rev);
439 439
440 440 static int node_check(Py_ssize_t nodelen, PyObject *obj, char **node)
441 441 {
442 442 Py_ssize_t thisnodelen;
443 443 if (PyBytes_AsStringAndSize(obj, node, &thisnodelen) == -1)
444 444 return -1;
445 445 if (nodelen == thisnodelen)
446 446 return 0;
447 447 PyErr_Format(PyExc_ValueError, "node len %zd != expected node len %zd",
448 448 thisnodelen, nodelen);
449 449 return -1;
450 450 }
451 451
452 452 static PyObject *index_append(indexObject *self, PyObject *obj)
453 453 {
454 454 uint64_t offset_flags, sidedata_offset;
455 455 int rev, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
456 456 char data_comp_mode, sidedata_comp_mode;
457 457 Py_ssize_t c_node_id_len, sidedata_comp_len;
458 458 const char *c_node_id;
459 459 char comp_field;
460 460 char *data;
461 461
462 462 if (!PyArg_ParseTuple(obj, tuple_format, &offset_flags, &comp_len,
463 463 &uncomp_len, &base_rev, &link_rev, &parent_1,
464 464 &parent_2, &c_node_id, &c_node_id_len,
465 465 &sidedata_offset, &sidedata_comp_len,
466 466 &data_comp_mode, &sidedata_comp_mode)) {
467 467 PyErr_SetString(PyExc_TypeError, "11-tuple required");
468 468 return NULL;
469 469 }
470 470
471 471 if (c_node_id_len != self->nodelen) {
472 472 PyErr_SetString(PyExc_TypeError, "invalid node");
473 473 return NULL;
474 474 }
475 475 if (self->format_version == format_v1) {
476 476
477 477 if (data_comp_mode != comp_mode_inline) {
478 478 PyErr_Format(PyExc_ValueError,
479 479 "invalid data compression mode: %i",
480 480 data_comp_mode);
481 481 return NULL;
482 482 }
483 483 if (sidedata_comp_mode != comp_mode_inline) {
484 484 PyErr_Format(PyExc_ValueError,
485 485 "invalid sidedata compression mode: %i",
486 486 sidedata_comp_mode);
487 487 return NULL;
488 488 }
489 489 }
490 490
491 491 if (self->new_length == self->added_length) {
492 492 size_t new_added_length =
493 493 self->added_length ? self->added_length * 2 : 4096;
494 494 void *new_added = PyMem_Realloc(
495 495 self->added, new_added_length * self->entry_size);
496 496 if (!new_added)
497 497 return PyErr_NoMemory();
498 498 self->added = new_added;
499 499 self->added_length = new_added_length;
500 500 }
501 501 rev = self->length + self->new_length;
502 502 data = self->added + self->entry_size * self->new_length++;
503 503 putbe32(offset_flags >> 32, data);
504 504 putbe32(offset_flags & 0xffffffffU, data + 4);
505 505 putbe32(comp_len, data + 8);
506 506 putbe32(uncomp_len, data + 12);
507 507 putbe32(base_rev, data + 16);
508 508 putbe32(link_rev, data + 20);
509 509 putbe32(parent_1, data + 24);
510 510 putbe32(parent_2, data + 28);
511 511 memcpy(data + 32, c_node_id, c_node_id_len);
512 512 /* Padding since SHA-1 is only 20 bytes for now */
513 513 memset(data + 32 + c_node_id_len, 0, 32 - c_node_id_len);
514 514 if (self->format_version == format_v2) {
515 515 putbe64(sidedata_offset, data + 64);
516 516 putbe32(sidedata_comp_len, data + 72);
517 517 comp_field = data_comp_mode & 3;
518 518 comp_field = comp_field | (sidedata_comp_mode & 3) << 2;
519 519 data[76] = comp_field;
520 520 /* Padding for 96 bytes alignment */
521 521 memset(data + 77, 0, self->entry_size - 77);
522 522 }
523 523
524 524 if (self->ntinitialized)
525 525 nt_insert(&self->nt, c_node_id, rev);
526 526
527 527 Py_CLEAR(self->headrevs);
528 528 Py_RETURN_NONE;
529 529 }
530 530
531 531 /* Replace an existing index entry's sidedata offset and length with new ones.
532 532 This cannot be used outside of the context of sidedata rewriting,
533 533 inside the transaction that creates the given revision. */
534 534 static PyObject *index_replace_sidedata_info(indexObject *self, PyObject *args)
535 535 {
536 536 uint64_t offset_flags, sidedata_offset;
537 537 int rev;
538 538 char comp_mode;
539 539 Py_ssize_t sidedata_comp_len;
540 540 char *data;
541 541 #if LONG_MAX == 0x7fffffffL
542 542 const char *const sidedata_format = PY23("nKiKB", "nKiKB");
543 543 #else
544 544 const char *const sidedata_format = PY23("nkikB", "nkikB");
545 545 #endif
546 546
547 547 if (self->entry_size == v1_entry_size || self->inlined) {
548 548 /*
549 549 There is a bug in the transaction handling when going from an
550 550 inline revlog to a separate index and data file. Turn it off until
551 551 it's fixed, since v2 revlogs sometimes get rewritten on exchange.
552 552 See issue6485.
553 553 */
554 554 raise_revlog_error();
555 555 return NULL;
556 556 }
557 557
558 558 if (!PyArg_ParseTuple(args, sidedata_format, &rev, &sidedata_offset,
559 559 &sidedata_comp_len, &offset_flags, &comp_mode))
560 560 return NULL;
561 561
562 562 if (rev < 0 || rev >= index_length(self)) {
563 563 PyErr_SetString(PyExc_IndexError, "revision outside index");
564 564 return NULL;
565 565 }
566 566 if (rev < self->length) {
567 567 PyErr_SetString(
568 568 PyExc_IndexError,
569 569 "cannot rewrite entries outside of this transaction");
570 570 return NULL;
571 571 }
572 572
573 573 /* Find the newly added node, offset from the "already on-disk" length
574 574 */
575 575 data = self->added + self->entry_size * (rev - self->length);
576 576 putbe64(offset_flags, data);
577 577 putbe64(sidedata_offset, data + 64);
578 578 putbe32(sidedata_comp_len, data + 72);
579 579 data[76] = (data[76] & ~(3 << 2)) | ((comp_mode & 3) << 2);
580 580
581 581 Py_RETURN_NONE;
582 582 }
583 583
584 584 static PyObject *index_stats(indexObject *self)
585 585 {
586 586 PyObject *obj = PyDict_New();
587 587 PyObject *s = NULL;
588 588 PyObject *t = NULL;
589 589
590 590 if (obj == NULL)
591 591 return NULL;
592 592
593 593 #define istat(__n, __d) \
594 594 do { \
595 595 s = PyBytes_FromString(__d); \
596 596 t = PyInt_FromSsize_t(self->__n); \
597 597 if (!s || !t) \
598 598 goto bail; \
599 599 if (PyDict_SetItem(obj, s, t) == -1) \
600 600 goto bail; \
601 601 Py_CLEAR(s); \
602 602 Py_CLEAR(t); \
603 603 } while (0)
604 604
605 605 if (self->added_length)
606 606 istat(new_length, "index entries added");
607 607 istat(length, "revs in memory");
608 608 istat(ntlookups, "node trie lookups");
609 609 istat(ntmisses, "node trie misses");
610 610 istat(ntrev, "node trie last rev scanned");
611 611 if (self->ntinitialized) {
612 612 istat(nt.capacity, "node trie capacity");
613 613 istat(nt.depth, "node trie depth");
614 614 istat(nt.length, "node trie count");
615 615 istat(nt.splits, "node trie splits");
616 616 }
617 617
618 618 #undef istat
619 619
620 620 return obj;
621 621
622 622 bail:
623 623 Py_XDECREF(obj);
624 624 Py_XDECREF(s);
625 625 Py_XDECREF(t);
626 626 return NULL;
627 627 }
628 628
629 629 /*
630 630 * When we cache a list, we want to be sure the caller can't mutate
631 631 * the cached copy.
632 632 */
633 633 static PyObject *list_copy(PyObject *list)
634 634 {
635 635 Py_ssize_t len = PyList_GET_SIZE(list);
636 636 PyObject *newlist = PyList_New(len);
637 637 Py_ssize_t i;
638 638
639 639 if (newlist == NULL)
640 640 return NULL;
641 641
642 642 for (i = 0; i < len; i++) {
643 643 PyObject *obj = PyList_GET_ITEM(list, i);
644 644 Py_INCREF(obj);
645 645 PyList_SET_ITEM(newlist, i, obj);
646 646 }
647 647
648 648 return newlist;
649 649 }
650 650
651 651 static int check_filter(PyObject *filter, Py_ssize_t arg)
652 652 {
653 653 if (filter) {
654 654 PyObject *arglist, *result;
655 655 int isfiltered;
656 656
657 657 arglist = Py_BuildValue("(n)", arg);
658 658 if (!arglist) {
659 659 return -1;
660 660 }
661 661
662 662 result = PyObject_Call(filter, arglist, NULL);
663 663 Py_DECREF(arglist);
664 664 if (!result) {
665 665 return -1;
666 666 }
667 667
668 668 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
669 669 * same as this function, so we can just return it directly.*/
670 670 isfiltered = PyObject_IsTrue(result);
671 671 Py_DECREF(result);
672 672 return isfiltered;
673 673 } else {
674 674 return 0;
675 675 }
676 676 }
677 677
678 678 static inline void set_phase_from_parents(char *phases, int parent_1,
679 679 int parent_2, Py_ssize_t i)
680 680 {
681 681 if (parent_1 >= 0 && phases[parent_1] > phases[i])
682 682 phases[i] = phases[parent_1];
683 683 if (parent_2 >= 0 && phases[parent_2] > phases[i])
684 684 phases[i] = phases[parent_2];
685 685 }
686 686
687 687 static PyObject *reachableroots2(indexObject *self, PyObject *args)
688 688 {
689 689
690 690 /* Input */
691 691 long minroot;
692 692 PyObject *includepatharg = NULL;
693 693 int includepath = 0;
694 694 /* heads and roots are lists */
695 695 PyObject *heads = NULL;
696 696 PyObject *roots = NULL;
697 697 PyObject *reachable = NULL;
698 698
699 699 PyObject *val;
700 700 Py_ssize_t len = index_length(self);
701 701 long revnum;
702 702 Py_ssize_t k;
703 703 Py_ssize_t i;
704 704 Py_ssize_t l;
705 705 int r;
706 706 int parents[2];
707 707
708 708 /* Internal data structure:
709 709 * tovisit: array of length len+1 (all revs + nullrev), filled upto
710 710 * lentovisit
711 711 *
712 712 * revstates: array of length len+1 (all revs + nullrev) */
713 713 int *tovisit = NULL;
714 714 long lentovisit = 0;
715 715 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
716 716 char *revstates = NULL;
717 717
718 718 /* Get arguments */
719 719 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
720 720 &PyList_Type, &roots, &PyBool_Type,
721 721 &includepatharg))
722 722 goto bail;
723 723
724 724 if (includepatharg == Py_True)
725 725 includepath = 1;
726 726
727 727 /* Initialize return set */
728 728 reachable = PyList_New(0);
729 729 if (reachable == NULL)
730 730 goto bail;
731 731
732 732 /* Initialize internal datastructures */
733 733 tovisit = (int *)malloc((len + 1) * sizeof(int));
734 734 if (tovisit == NULL) {
735 735 PyErr_NoMemory();
736 736 goto bail;
737 737 }
738 738
739 739 revstates = (char *)calloc(len + 1, 1);
740 740 if (revstates == NULL) {
741 741 PyErr_NoMemory();
742 742 goto bail;
743 743 }
744 744
745 745 l = PyList_GET_SIZE(roots);
746 746 for (i = 0; i < l; i++) {
747 747 revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
748 748 if (revnum == -1 && PyErr_Occurred())
749 749 goto bail;
750 750 /* If root is out of range, e.g. wdir(), it must be unreachable
751 751 * from heads. So we can just ignore it. */
752 752 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
753 753 continue;
754 754 revstates[revnum + 1] |= RS_ROOT;
755 755 }
756 756
757 757 /* Populate tovisit with all the heads */
758 758 l = PyList_GET_SIZE(heads);
759 759 for (i = 0; i < l; i++) {
760 760 revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i));
761 761 if (revnum == -1 && PyErr_Occurred())
762 762 goto bail;
763 763 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
764 764 PyErr_SetString(PyExc_IndexError, "head out of range");
765 765 goto bail;
766 766 }
767 767 if (!(revstates[revnum + 1] & RS_SEEN)) {
768 768 tovisit[lentovisit++] = (int)revnum;
769 769 revstates[revnum + 1] |= RS_SEEN;
770 770 }
771 771 }
772 772
773 773 /* Visit the tovisit list and find the reachable roots */
774 774 k = 0;
775 775 while (k < lentovisit) {
776 776 /* Add the node to reachable if it is a root*/
777 777 revnum = tovisit[k++];
778 778 if (revstates[revnum + 1] & RS_ROOT) {
779 779 revstates[revnum + 1] |= RS_REACHABLE;
780 780 val = PyInt_FromLong(revnum);
781 781 if (val == NULL)
782 782 goto bail;
783 783 r = PyList_Append(reachable, val);
784 784 Py_DECREF(val);
785 785 if (r < 0)
786 786 goto bail;
787 787 if (includepath == 0)
788 788 continue;
789 789 }
790 790
791 791 /* Add its parents to the list of nodes to visit */
792 792 if (revnum == nullrev)
793 793 continue;
794 794 r = index_get_parents(self, revnum, parents, (int)len - 1);
795 795 if (r < 0)
796 796 goto bail;
797 797 for (i = 0; i < 2; i++) {
798 798 if (!(revstates[parents[i] + 1] & RS_SEEN) &&
799 799 parents[i] >= minroot) {
800 800 tovisit[lentovisit++] = parents[i];
801 801 revstates[parents[i] + 1] |= RS_SEEN;
802 802 }
803 803 }
804 804 }
805 805
806 806 /* Find all the nodes in between the roots we found and the heads
807 807 * and add them to the reachable set */
808 808 if (includepath == 1) {
809 809 long minidx = minroot;
810 810 if (minidx < 0)
811 811 minidx = 0;
812 812 for (i = minidx; i < len; i++) {
813 813 if (!(revstates[i + 1] & RS_SEEN))
814 814 continue;
815 815 r = index_get_parents(self, i, parents, (int)len - 1);
816 816 /* Corrupted index file, error is set from
817 817 * index_get_parents */
818 818 if (r < 0)
819 819 goto bail;
820 820 if (((revstates[parents[0] + 1] |
821 821 revstates[parents[1] + 1]) &
822 822 RS_REACHABLE) &&
823 823 !(revstates[i + 1] & RS_REACHABLE)) {
824 824 revstates[i + 1] |= RS_REACHABLE;
825 825 val = PyInt_FromSsize_t(i);
826 826 if (val == NULL)
827 827 goto bail;
828 828 r = PyList_Append(reachable, val);
829 829 Py_DECREF(val);
830 830 if (r < 0)
831 831 goto bail;
832 832 }
833 833 }
834 834 }
835 835
836 836 free(revstates);
837 837 free(tovisit);
838 838 return reachable;
839 839 bail:
840 840 Py_XDECREF(reachable);
841 841 free(revstates);
842 842 free(tovisit);
843 843 return NULL;
844 844 }
845 845
846 846 static int add_roots_get_min(indexObject *self, PyObject *roots, char *phases,
847 847 char phase)
848 848 {
849 849 Py_ssize_t len = index_length(self);
850 850 PyObject *item;
851 851 PyObject *iterator;
852 852 int rev, minrev = -1;
853 853 char *node;
854 854
855 855 if (!PySet_Check(roots)) {
856 856 PyErr_SetString(PyExc_TypeError,
857 857 "roots must be a set of nodes");
858 858 return -2;
859 859 }
860 860 iterator = PyObject_GetIter(roots);
861 861 if (iterator == NULL)
862 862 return -2;
863 863 while ((item = PyIter_Next(iterator))) {
864 864 if (node_check(self->nodelen, item, &node) == -1)
865 865 goto failed;
866 866 rev = index_find_node(self, node);
867 867 /* null is implicitly public, so negative is invalid */
868 868 if (rev < 0 || rev >= len)
869 869 goto failed;
870 870 phases[rev] = phase;
871 871 if (minrev == -1 || minrev > rev)
872 872 minrev = rev;
873 873 Py_DECREF(item);
874 874 }
875 875 Py_DECREF(iterator);
876 876 return minrev;
877 877 failed:
878 878 Py_DECREF(iterator);
879 879 Py_DECREF(item);
880 880 return -2;
881 881 }
882 882
883 883 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
884 884 {
885 885 /* 0: public (untracked), 1: draft, 2: secret, 32: archive,
886 886 96: internal */
887 887 static const char trackedphases[] = {1, 2, 32, 96};
888 888 PyObject *roots = Py_None;
889 889 PyObject *phasesetsdict = NULL;
890 890 PyObject *phasesets[4] = {NULL, NULL, NULL, NULL};
891 891 Py_ssize_t len = index_length(self);
892 892 char *phases = NULL;
893 893 int minphaserev = -1, rev, i;
894 894 const int numphases = (int)(sizeof(phasesets) / sizeof(phasesets[0]));
895 895
896 896 if (!PyArg_ParseTuple(args, "O", &roots))
897 897 return NULL;
898 898 if (roots == NULL || !PyDict_Check(roots)) {
899 899 PyErr_SetString(PyExc_TypeError, "roots must be a dictionary");
900 900 return NULL;
901 901 }
902 902
903 903 phases = calloc(len, 1);
904 904 if (phases == NULL) {
905 905 PyErr_NoMemory();
906 906 return NULL;
907 907 }
908 908
909 909 for (i = 0; i < numphases; ++i) {
910 910 PyObject *pyphase = PyInt_FromLong(trackedphases[i]);
911 911 PyObject *phaseroots = NULL;
912 912 if (pyphase == NULL)
913 913 goto release;
914 914 phaseroots = PyDict_GetItem(roots, pyphase);
915 915 Py_DECREF(pyphase);
916 916 if (phaseroots == NULL)
917 917 continue;
918 918 rev = add_roots_get_min(self, phaseroots, phases,
919 919 trackedphases[i]);
920 920 if (rev == -2)
921 921 goto release;
922 922 if (rev != -1 && (minphaserev == -1 || rev < minphaserev))
923 923 minphaserev = rev;
924 924 }
925 925
926 926 for (i = 0; i < numphases; ++i) {
927 927 phasesets[i] = PySet_New(NULL);
928 928 if (phasesets[i] == NULL)
929 929 goto release;
930 930 }
931 931
932 932 if (minphaserev == -1)
933 933 minphaserev = len;
934 934 for (rev = minphaserev; rev < len; ++rev) {
935 935 PyObject *pyphase = NULL;
936 936 PyObject *pyrev = NULL;
937 937 int parents[2];
938 938 /*
939 939 * The parent lookup could be skipped for phaseroots, but
940 940 * phase --force would historically not recompute them
941 941 * correctly, leaving descendents with a lower phase around.
942 942 * As such, unconditionally recompute the phase.
943 943 */
944 944 if (index_get_parents(self, rev, parents, (int)len - 1) < 0)
945 945 goto release;
946 946 set_phase_from_parents(phases, parents[0], parents[1], rev);
947 947 switch (phases[rev]) {
948 948 case 0:
949 949 continue;
950 950 case 1:
951 951 pyphase = phasesets[0];
952 952 break;
953 953 case 2:
954 954 pyphase = phasesets[1];
955 955 break;
956 956 case 32:
957 957 pyphase = phasesets[2];
958 958 break;
959 959 case 96:
960 960 pyphase = phasesets[3];
961 961 break;
962 962 default:
963 963 /* this should never happen since the phase number is
964 964 * specified by this function. */
965 965 PyErr_SetString(PyExc_SystemError,
966 966 "bad phase number in internal list");
967 967 goto release;
968 968 }
969 969 pyrev = PyInt_FromLong(rev);
970 970 if (pyrev == NULL)
971 971 goto release;
972 972 if (PySet_Add(pyphase, pyrev) == -1) {
973 973 Py_DECREF(pyrev);
974 974 goto release;
975 975 }
976 976 Py_DECREF(pyrev);
977 977 }
978 978
979 979 phasesetsdict = _dict_new_presized(numphases);
980 980 if (phasesetsdict == NULL)
981 981 goto release;
982 982 for (i = 0; i < numphases; ++i) {
983 983 PyObject *pyphase = PyInt_FromLong(trackedphases[i]);
984 984 if (pyphase == NULL)
985 985 goto release;
986 986 if (PyDict_SetItem(phasesetsdict, pyphase, phasesets[i]) ==
987 987 -1) {
988 988 Py_DECREF(pyphase);
989 989 goto release;
990 990 }
991 991 Py_DECREF(phasesets[i]);
992 992 phasesets[i] = NULL;
993 993 }
994 994
995 free(phases);
995 996 return Py_BuildValue("nN", len, phasesetsdict);
996 997
997 998 release:
998 999 for (i = 0; i < numphases; ++i)
999 1000 Py_XDECREF(phasesets[i]);
1000 1001 Py_XDECREF(phasesetsdict);
1001 1002
1002 1003 free(phases);
1003 1004 return NULL;
1004 1005 }
1005 1006
1006 1007 static PyObject *index_headrevs(indexObject *self, PyObject *args)
1007 1008 {
1008 1009 Py_ssize_t i, j, len;
1009 1010 char *nothead = NULL;
1010 1011 PyObject *heads = NULL;
1011 1012 PyObject *filter = NULL;
1012 1013 PyObject *filteredrevs = Py_None;
1013 1014
1014 1015 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
1015 1016 return NULL;
1016 1017 }
1017 1018
1018 1019 if (self->headrevs && filteredrevs == self->filteredrevs)
1019 1020 return list_copy(self->headrevs);
1020 1021
1021 1022 Py_DECREF(self->filteredrevs);
1022 1023 self->filteredrevs = filteredrevs;
1023 1024 Py_INCREF(filteredrevs);
1024 1025
1025 1026 if (filteredrevs != Py_None) {
1026 1027 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
1027 1028 if (!filter) {
1028 1029 PyErr_SetString(
1029 1030 PyExc_TypeError,
1030 1031 "filteredrevs has no attribute __contains__");
1031 1032 goto bail;
1032 1033 }
1033 1034 }
1034 1035
1035 1036 len = index_length(self);
1036 1037 heads = PyList_New(0);
1037 1038 if (heads == NULL)
1038 1039 goto bail;
1039 1040 if (len == 0) {
1040 1041 PyObject *nullid = PyInt_FromLong(-1);
1041 1042 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
1042 1043 Py_XDECREF(nullid);
1043 1044 goto bail;
1044 1045 }
1045 1046 goto done;
1046 1047 }
1047 1048
1048 1049 nothead = calloc(len, 1);
1049 1050 if (nothead == NULL) {
1050 1051 PyErr_NoMemory();
1051 1052 goto bail;
1052 1053 }
1053 1054
1054 1055 for (i = len - 1; i >= 0; i--) {
1055 1056 int isfiltered;
1056 1057 int parents[2];
1057 1058
1058 1059 /* If nothead[i] == 1, it means we've seen an unfiltered child
1059 1060 * of this node already, and therefore this node is not
1060 1061 * filtered. So we can skip the expensive check_filter step.
1061 1062 */
1062 1063 if (nothead[i] != 1) {
1063 1064 isfiltered = check_filter(filter, i);
1064 1065 if (isfiltered == -1) {
1065 1066 PyErr_SetString(PyExc_TypeError,
1066 1067 "unable to check filter");
1067 1068 goto bail;
1068 1069 }
1069 1070
1070 1071 if (isfiltered) {
1071 1072 nothead[i] = 1;
1072 1073 continue;
1073 1074 }
1074 1075 }
1075 1076
1076 1077 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
1077 1078 goto bail;
1078 1079 for (j = 0; j < 2; j++) {
1079 1080 if (parents[j] >= 0)
1080 1081 nothead[parents[j]] = 1;
1081 1082 }
1082 1083 }
1083 1084
1084 1085 for (i = 0; i < len; i++) {
1085 1086 PyObject *head;
1086 1087
1087 1088 if (nothead[i])
1088 1089 continue;
1089 1090 head = PyInt_FromSsize_t(i);
1090 1091 if (head == NULL || PyList_Append(heads, head) == -1) {
1091 1092 Py_XDECREF(head);
1092 1093 goto bail;
1093 1094 }
1094 1095 }
1095 1096
1096 1097 done:
1097 1098 self->headrevs = heads;
1098 1099 Py_XDECREF(filter);
1099 1100 free(nothead);
1100 1101 return list_copy(self->headrevs);
1101 1102 bail:
1102 1103 Py_XDECREF(filter);
1103 1104 Py_XDECREF(heads);
1104 1105 free(nothead);
1105 1106 return NULL;
1106 1107 }
1107 1108
1108 1109 /**
1109 1110 * Obtain the base revision index entry.
1110 1111 *
1111 1112 * Callers must ensure that rev >= 0 or illegal memory access may occur.
1112 1113 */
1113 1114 static inline int index_baserev(indexObject *self, int rev)
1114 1115 {
1115 1116 const char *data;
1116 1117 int result;
1117 1118
1118 1119 data = index_deref(self, rev);
1119 1120 if (data == NULL)
1120 1121 return -2;
1121 1122 result = getbe32(data + 16);
1122 1123
1123 1124 if (result > rev) {
1124 1125 PyErr_Format(
1125 1126 PyExc_ValueError,
1126 1127 "corrupted revlog, revision base above revision: %d, %d",
1127 1128 rev, result);
1128 1129 return -2;
1129 1130 }
1130 1131 if (result < -1) {
1131 1132 PyErr_Format(
1132 1133 PyExc_ValueError,
1133 1134 "corrupted revlog, revision base out of range: %d, %d", rev,
1134 1135 result);
1135 1136 return -2;
1136 1137 }
1137 1138 return result;
1138 1139 }
1139 1140
1140 1141 /**
1141 1142 * Find if a revision is a snapshot or not
1142 1143 *
1143 1144 * Only relevant for sparse-revlog case.
1144 1145 * Callers must ensure that rev is in a valid range.
1145 1146 */
1146 1147 static int index_issnapshotrev(indexObject *self, Py_ssize_t rev)
1147 1148 {
1148 1149 int ps[2];
1149 1150 Py_ssize_t base;
1150 1151 while (rev >= 0) {
1151 1152 base = (Py_ssize_t)index_baserev(self, rev);
1152 1153 if (base == rev) {
1153 1154 base = -1;
1154 1155 }
1155 1156 if (base == -2) {
1156 1157 assert(PyErr_Occurred());
1157 1158 return -1;
1158 1159 }
1159 1160 if (base == -1) {
1160 1161 return 1;
1161 1162 }
1162 1163 if (index_get_parents(self, rev, ps, (int)rev) < 0) {
1163 1164 assert(PyErr_Occurred());
1164 1165 return -1;
1165 1166 };
1166 1167 if (base == ps[0] || base == ps[1]) {
1167 1168 return 0;
1168 1169 }
1169 1170 rev = base;
1170 1171 }
1171 1172 return rev == -1;
1172 1173 }
1173 1174
1174 1175 static PyObject *index_issnapshot(indexObject *self, PyObject *value)
1175 1176 {
1176 1177 long rev;
1177 1178 int issnap;
1178 1179 Py_ssize_t length = index_length(self);
1179 1180
1180 1181 if (!pylong_to_long(value, &rev)) {
1181 1182 return NULL;
1182 1183 }
1183 1184 if (rev < -1 || rev >= length) {
1184 1185 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
1185 1186 rev);
1186 1187 return NULL;
1187 1188 };
1188 1189 issnap = index_issnapshotrev(self, (Py_ssize_t)rev);
1189 1190 if (issnap < 0) {
1190 1191 return NULL;
1191 1192 };
1192 1193 return PyBool_FromLong((long)issnap);
1193 1194 }
1194 1195
1195 1196 static PyObject *index_findsnapshots(indexObject *self, PyObject *args)
1196 1197 {
1197 1198 Py_ssize_t start_rev;
1198 1199 PyObject *cache;
1199 1200 Py_ssize_t base;
1200 1201 Py_ssize_t rev;
1201 1202 PyObject *key = NULL;
1202 1203 PyObject *value = NULL;
1203 1204 const Py_ssize_t length = index_length(self);
1204 1205 if (!PyArg_ParseTuple(args, "O!n", &PyDict_Type, &cache, &start_rev)) {
1205 1206 return NULL;
1206 1207 }
1207 1208 for (rev = start_rev; rev < length; rev++) {
1208 1209 int issnap;
1209 1210 PyObject *allvalues = NULL;
1210 1211 issnap = index_issnapshotrev(self, rev);
1211 1212 if (issnap < 0) {
1212 1213 goto bail;
1213 1214 }
1214 1215 if (issnap == 0) {
1215 1216 continue;
1216 1217 }
1217 1218 base = (Py_ssize_t)index_baserev(self, rev);
1218 1219 if (base == rev) {
1219 1220 base = -1;
1220 1221 }
1221 1222 if (base == -2) {
1222 1223 assert(PyErr_Occurred());
1223 1224 goto bail;
1224 1225 }
1225 1226 key = PyInt_FromSsize_t(base);
1226 1227 allvalues = PyDict_GetItem(cache, key);
1227 1228 if (allvalues == NULL && PyErr_Occurred()) {
1228 1229 goto bail;
1229 1230 }
1230 1231 if (allvalues == NULL) {
1231 1232 int r;
1232 1233 allvalues = PyList_New(0);
1233 1234 if (!allvalues) {
1234 1235 goto bail;
1235 1236 }
1236 1237 r = PyDict_SetItem(cache, key, allvalues);
1237 1238 Py_DECREF(allvalues);
1238 1239 if (r < 0) {
1239 1240 goto bail;
1240 1241 }
1241 1242 }
1242 1243 value = PyInt_FromSsize_t(rev);
1243 1244 if (PyList_Append(allvalues, value)) {
1244 1245 goto bail;
1245 1246 }
1246 1247 Py_CLEAR(key);
1247 1248 Py_CLEAR(value);
1248 1249 }
1249 1250 Py_RETURN_NONE;
1250 1251 bail:
1251 1252 Py_XDECREF(key);
1252 1253 Py_XDECREF(value);
1253 1254 return NULL;
1254 1255 }
1255 1256
1256 1257 static PyObject *index_deltachain(indexObject *self, PyObject *args)
1257 1258 {
1258 1259 int rev, generaldelta;
1259 1260 PyObject *stoparg;
1260 1261 int stoprev, iterrev, baserev = -1;
1261 1262 int stopped;
1262 1263 PyObject *chain = NULL, *result = NULL;
1263 1264 const Py_ssize_t length = index_length(self);
1264 1265
1265 1266 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
1266 1267 return NULL;
1267 1268 }
1268 1269
1269 1270 if (PyInt_Check(stoparg)) {
1270 1271 stoprev = (int)PyInt_AsLong(stoparg);
1271 1272 if (stoprev == -1 && PyErr_Occurred()) {
1272 1273 return NULL;
1273 1274 }
1274 1275 } else if (stoparg == Py_None) {
1275 1276 stoprev = -2;
1276 1277 } else {
1277 1278 PyErr_SetString(PyExc_ValueError,
1278 1279 "stoprev must be integer or None");
1279 1280 return NULL;
1280 1281 }
1281 1282
1282 1283 if (rev < 0 || rev >= length) {
1283 1284 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1284 1285 return NULL;
1285 1286 }
1286 1287
1287 1288 chain = PyList_New(0);
1288 1289 if (chain == NULL) {
1289 1290 return NULL;
1290 1291 }
1291 1292
1292 1293 baserev = index_baserev(self, rev);
1293 1294
1294 1295 /* This should never happen. */
1295 1296 if (baserev <= -2) {
1296 1297 /* Error should be set by index_deref() */
1297 1298 assert(PyErr_Occurred());
1298 1299 goto bail;
1299 1300 }
1300 1301
1301 1302 iterrev = rev;
1302 1303
1303 1304 while (iterrev != baserev && iterrev != stoprev) {
1304 1305 PyObject *value = PyInt_FromLong(iterrev);
1305 1306 if (value == NULL) {
1306 1307 goto bail;
1307 1308 }
1308 1309 if (PyList_Append(chain, value)) {
1309 1310 Py_DECREF(value);
1310 1311 goto bail;
1311 1312 }
1312 1313 Py_DECREF(value);
1313 1314
1314 1315 if (generaldelta) {
1315 1316 iterrev = baserev;
1316 1317 } else {
1317 1318 iterrev--;
1318 1319 }
1319 1320
1320 1321 if (iterrev < 0) {
1321 1322 break;
1322 1323 }
1323 1324
1324 1325 if (iterrev >= length) {
1325 1326 PyErr_SetString(PyExc_IndexError,
1326 1327 "revision outside index");
1327 1328 return NULL;
1328 1329 }
1329 1330
1330 1331 baserev = index_baserev(self, iterrev);
1331 1332
1332 1333 /* This should never happen. */
1333 1334 if (baserev <= -2) {
1334 1335 /* Error should be set by index_deref() */
1335 1336 assert(PyErr_Occurred());
1336 1337 goto bail;
1337 1338 }
1338 1339 }
1339 1340
1340 1341 if (iterrev == stoprev) {
1341 1342 stopped = 1;
1342 1343 } else {
1343 1344 PyObject *value = PyInt_FromLong(iterrev);
1344 1345 if (value == NULL) {
1345 1346 goto bail;
1346 1347 }
1347 1348 if (PyList_Append(chain, value)) {
1348 1349 Py_DECREF(value);
1349 1350 goto bail;
1350 1351 }
1351 1352 Py_DECREF(value);
1352 1353
1353 1354 stopped = 0;
1354 1355 }
1355 1356
1356 1357 if (PyList_Reverse(chain)) {
1357 1358 goto bail;
1358 1359 }
1359 1360
1360 1361 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
1361 1362 Py_DECREF(chain);
1362 1363 return result;
1363 1364
1364 1365 bail:
1365 1366 Py_DECREF(chain);
1366 1367 return NULL;
1367 1368 }
1368 1369
1369 1370 static inline int64_t
1370 1371 index_segment_span(indexObject *self, Py_ssize_t start_rev, Py_ssize_t end_rev)
1371 1372 {
1372 1373 int64_t start_offset;
1373 1374 int64_t end_offset;
1374 1375 int end_size;
1375 1376 start_offset = index_get_start(self, start_rev);
1376 1377 if (start_offset < 0) {
1377 1378 return -1;
1378 1379 }
1379 1380 end_offset = index_get_start(self, end_rev);
1380 1381 if (end_offset < 0) {
1381 1382 return -1;
1382 1383 }
1383 1384 end_size = index_get_length(self, end_rev);
1384 1385 if (end_size < 0) {
1385 1386 return -1;
1386 1387 }
1387 1388 if (end_offset < start_offset) {
1388 1389 PyErr_Format(PyExc_ValueError,
1389 1390 "corrupted revlog index: inconsistent offset "
1390 1391 "between revisions (%zd) and (%zd)",
1391 1392 start_rev, end_rev);
1392 1393 return -1;
1393 1394 }
1394 1395 return (end_offset - start_offset) + (int64_t)end_size;
1395 1396 }
1396 1397
1397 1398 /* returns endidx so that revs[startidx:endidx] has no empty trailing revs */
1398 1399 static Py_ssize_t trim_endidx(indexObject *self, const Py_ssize_t *revs,
1399 1400 Py_ssize_t startidx, Py_ssize_t endidx)
1400 1401 {
1401 1402 int length;
1402 1403 while (endidx > 1 && endidx > startidx) {
1403 1404 length = index_get_length(self, revs[endidx - 1]);
1404 1405 if (length < 0) {
1405 1406 return -1;
1406 1407 }
1407 1408 if (length != 0) {
1408 1409 break;
1409 1410 }
1410 1411 endidx -= 1;
1411 1412 }
1412 1413 return endidx;
1413 1414 }
1414 1415
1415 1416 struct Gap {
1416 1417 int64_t size;
1417 1418 Py_ssize_t idx;
1418 1419 };
1419 1420
1420 1421 static int gap_compare(const void *left, const void *right)
1421 1422 {
1422 1423 const struct Gap *l_left = ((const struct Gap *)left);
1423 1424 const struct Gap *l_right = ((const struct Gap *)right);
1424 1425 if (l_left->size < l_right->size) {
1425 1426 return -1;
1426 1427 } else if (l_left->size > l_right->size) {
1427 1428 return 1;
1428 1429 }
1429 1430 return 0;
1430 1431 }
1431 1432 static int Py_ssize_t_compare(const void *left, const void *right)
1432 1433 {
1433 1434 const Py_ssize_t l_left = *(const Py_ssize_t *)left;
1434 1435 const Py_ssize_t l_right = *(const Py_ssize_t *)right;
1435 1436 if (l_left < l_right) {
1436 1437 return -1;
1437 1438 } else if (l_left > l_right) {
1438 1439 return 1;
1439 1440 }
1440 1441 return 0;
1441 1442 }
1442 1443
1443 1444 static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
1444 1445 {
1445 1446 /* method arguments */
1446 1447 PyObject *list_revs = NULL; /* revisions in the chain */
1447 1448 double targetdensity = 0; /* min density to achieve */
1448 1449 Py_ssize_t mingapsize = 0; /* threshold to ignore gaps */
1449 1450
1450 1451 /* other core variables */
1451 1452 Py_ssize_t idxlen = index_length(self);
1452 1453 Py_ssize_t i; /* used for various iteration */
1453 1454 PyObject *result = NULL; /* the final return of the function */
1454 1455
1455 1456 /* generic information about the delta chain being slice */
1456 1457 Py_ssize_t num_revs = 0; /* size of the full delta chain */
1457 1458 Py_ssize_t *revs = NULL; /* native array of revision in the chain */
1458 1459 int64_t chainpayload = 0; /* sum of all delta in the chain */
1459 1460 int64_t deltachainspan = 0; /* distance from first byte to last byte */
1460 1461
1461 1462 /* variable used for slicing the delta chain */
1462 1463 int64_t readdata = 0; /* amount of data currently planned to be read */
1463 1464 double density = 0; /* ration of payload data compared to read ones */
1464 1465 int64_t previous_end;
1465 1466 struct Gap *gaps = NULL; /* array of notable gap in the chain */
1466 1467 Py_ssize_t num_gaps =
1467 1468 0; /* total number of notable gap recorded so far */
1468 1469 Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
1469 1470 Py_ssize_t num_selected = 0; /* number of gaps skipped */
1470 1471 PyObject *chunk = NULL; /* individual slice */
1471 1472 PyObject *allchunks = NULL; /* all slices */
1472 1473 Py_ssize_t previdx;
1473 1474
1474 1475 /* parsing argument */
1475 1476 if (!PyArg_ParseTuple(args, "O!dn", &PyList_Type, &list_revs,
1476 1477 &targetdensity, &mingapsize)) {
1477 1478 goto bail;
1478 1479 }
1479 1480
1480 1481 /* If the delta chain contains a single element, we do not need slicing
1481 1482 */
1482 1483 num_revs = PyList_GET_SIZE(list_revs);
1483 1484 if (num_revs <= 1) {
1484 1485 result = PyTuple_Pack(1, list_revs);
1485 1486 goto done;
1486 1487 }
1487 1488
1488 1489 /* Turn the python list into a native integer array (for efficiency) */
1489 1490 revs = (Py_ssize_t *)calloc(num_revs, sizeof(Py_ssize_t));
1490 1491 if (revs == NULL) {
1491 1492 PyErr_NoMemory();
1492 1493 goto bail;
1493 1494 }
1494 1495 for (i = 0; i < num_revs; i++) {
1495 1496 Py_ssize_t revnum = PyInt_AsLong(PyList_GET_ITEM(list_revs, i));
1496 1497 if (revnum == -1 && PyErr_Occurred()) {
1497 1498 goto bail;
1498 1499 }
1499 1500 if (revnum < nullrev || revnum >= idxlen) {
1500 1501 PyErr_Format(PyExc_IndexError,
1501 1502 "index out of range: %zd", revnum);
1502 1503 goto bail;
1503 1504 }
1504 1505 revs[i] = revnum;
1505 1506 }
1506 1507
1507 1508 /* Compute and check various property of the unsliced delta chain */
1508 1509 deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
1509 1510 if (deltachainspan < 0) {
1510 1511 goto bail;
1511 1512 }
1512 1513
1513 1514 if (deltachainspan <= mingapsize) {
1514 1515 result = PyTuple_Pack(1, list_revs);
1515 1516 goto done;
1516 1517 }
1517 1518 chainpayload = 0;
1518 1519 for (i = 0; i < num_revs; i++) {
1519 1520 int tmp = index_get_length(self, revs[i]);
1520 1521 if (tmp < 0) {
1521 1522 goto bail;
1522 1523 }
1523 1524 chainpayload += tmp;
1524 1525 }
1525 1526
1526 1527 readdata = deltachainspan;
1527 1528 density = 1.0;
1528 1529
1529 1530 if (0 < deltachainspan) {
1530 1531 density = (double)chainpayload / (double)deltachainspan;
1531 1532 }
1532 1533
1533 1534 if (density >= targetdensity) {
1534 1535 result = PyTuple_Pack(1, list_revs);
1535 1536 goto done;
1536 1537 }
1537 1538
1538 1539 /* if chain is too sparse, look for relevant gaps */
1539 1540 gaps = (struct Gap *)calloc(num_revs, sizeof(struct Gap));
1540 1541 if (gaps == NULL) {
1541 1542 PyErr_NoMemory();
1542 1543 goto bail;
1543 1544 }
1544 1545
1545 1546 previous_end = -1;
1546 1547 for (i = 0; i < num_revs; i++) {
1547 1548 int64_t revstart;
1548 1549 int revsize;
1549 1550 revstart = index_get_start(self, revs[i]);
1550 1551 if (revstart < 0) {
1551 1552 goto bail;
1552 1553 };
1553 1554 revsize = index_get_length(self, revs[i]);
1554 1555 if (revsize < 0) {
1555 1556 goto bail;
1556 1557 };
1557 1558 if (revsize == 0) {
1558 1559 continue;
1559 1560 }
1560 1561 if (previous_end >= 0) {
1561 1562 int64_t gapsize = revstart - previous_end;
1562 1563 if (gapsize > mingapsize) {
1563 1564 gaps[num_gaps].size = gapsize;
1564 1565 gaps[num_gaps].idx = i;
1565 1566 num_gaps += 1;
1566 1567 }
1567 1568 }
1568 1569 previous_end = revstart + revsize;
1569 1570 }
1570 1571 if (num_gaps == 0) {
1571 1572 result = PyTuple_Pack(1, list_revs);
1572 1573 goto done;
1573 1574 }
1574 1575 qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
1575 1576
1576 1577 /* Slice the largest gap first, they improve the density the most */
1577 1578 selected_indices =
1578 1579 (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
1579 1580 if (selected_indices == NULL) {
1580 1581 PyErr_NoMemory();
1581 1582 goto bail;
1582 1583 }
1583 1584
1584 1585 for (i = num_gaps - 1; i >= 0; i--) {
1585 1586 selected_indices[num_selected] = gaps[i].idx;
1586 1587 readdata -= gaps[i].size;
1587 1588 num_selected += 1;
1588 1589 if (readdata <= 0) {
1589 1590 density = 1.0;
1590 1591 } else {
1591 1592 density = (double)chainpayload / (double)readdata;
1592 1593 }
1593 1594 if (density >= targetdensity) {
1594 1595 break;
1595 1596 }
1596 1597 }
1597 1598 qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
1598 1599 &Py_ssize_t_compare);
1599 1600
1600 1601 /* create the resulting slice */
1601 1602 allchunks = PyList_New(0);
1602 1603 if (allchunks == NULL) {
1603 1604 goto bail;
1604 1605 }
1605 1606 previdx = 0;
1606 1607 selected_indices[num_selected] = num_revs;
1607 1608 for (i = 0; i <= num_selected; i++) {
1608 1609 Py_ssize_t idx = selected_indices[i];
1609 1610 Py_ssize_t endidx = trim_endidx(self, revs, previdx, idx);
1610 1611 if (endidx < 0) {
1611 1612 goto bail;
1612 1613 }
1613 1614 if (previdx < endidx) {
1614 1615 chunk = PyList_GetSlice(list_revs, previdx, endidx);
1615 1616 if (chunk == NULL) {
1616 1617 goto bail;
1617 1618 }
1618 1619 if (PyList_Append(allchunks, chunk) == -1) {
1619 1620 goto bail;
1620 1621 }
1621 1622 Py_DECREF(chunk);
1622 1623 chunk = NULL;
1623 1624 }
1624 1625 previdx = idx;
1625 1626 }
1626 1627 result = allchunks;
1627 1628 goto done;
1628 1629
1629 1630 bail:
1630 1631 Py_XDECREF(allchunks);
1631 1632 Py_XDECREF(chunk);
1632 1633 done:
1633 1634 free(revs);
1634 1635 free(gaps);
1635 1636 free(selected_indices);
1636 1637 return result;
1637 1638 }
1638 1639
1639 1640 static inline int nt_level(const char *node, Py_ssize_t level)
1640 1641 {
1641 1642 int v = node[level >> 1];
1642 1643 if (!(level & 1))
1643 1644 v >>= 4;
1644 1645 return v & 0xf;
1645 1646 }
1646 1647
1647 1648 /*
1648 1649 * Return values:
1649 1650 *
1650 1651 * -4: match is ambiguous (multiple candidates)
1651 1652 * -2: not found
1652 1653 * rest: valid rev
1653 1654 */
1654 1655 static int nt_find(nodetree *self, const char *node, Py_ssize_t nodelen,
1655 1656 int hex)
1656 1657 {
1657 1658 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
1658 1659 int level, maxlevel, off;
1659 1660
1660 1661 /* If the input is binary, do a fast check for the nullid first. */
1661 1662 if (!hex && nodelen == self->nodelen && node[0] == '\0' &&
1662 1663 node[1] == '\0' && memcmp(node, nullid, self->nodelen) == 0)
1663 1664 return -1;
1664 1665
1665 1666 if (hex)
1666 1667 maxlevel = nodelen;
1667 1668 else
1668 1669 maxlevel = 2 * nodelen;
1669 1670 if (maxlevel > 2 * self->nodelen)
1670 1671 maxlevel = 2 * self->nodelen;
1671 1672
1672 1673 for (level = off = 0; level < maxlevel; level++) {
1673 1674 int k = getnybble(node, level);
1674 1675 nodetreenode *n = &self->nodes[off];
1675 1676 int v = n->children[k];
1676 1677
1677 1678 if (v < 0) {
1678 1679 const char *n;
1679 1680 Py_ssize_t i;
1680 1681
1681 1682 v = -(v + 2);
1682 1683 n = index_node(self->index, v);
1683 1684 if (n == NULL)
1684 1685 return -2;
1685 1686 for (i = level; i < maxlevel; i++)
1686 1687 if (getnybble(node, i) != nt_level(n, i))
1687 1688 return -2;
1688 1689 return v;
1689 1690 }
1690 1691 if (v == 0)
1691 1692 return -2;
1692 1693 off = v;
1693 1694 }
1694 1695 /* multiple matches against an ambiguous prefix */
1695 1696 return -4;
1696 1697 }
1697 1698
1698 1699 static int nt_new(nodetree *self)
1699 1700 {
1700 1701 if (self->length == self->capacity) {
1701 1702 size_t newcapacity;
1702 1703 nodetreenode *newnodes;
1703 1704 newcapacity = self->capacity * 2;
1704 1705 if (newcapacity >= SIZE_MAX / sizeof(nodetreenode)) {
1705 1706 PyErr_SetString(PyExc_MemoryError,
1706 1707 "overflow in nt_new");
1707 1708 return -1;
1708 1709 }
1709 1710 newnodes =
1710 1711 realloc(self->nodes, newcapacity * sizeof(nodetreenode));
1711 1712 if (newnodes == NULL) {
1712 1713 PyErr_SetString(PyExc_MemoryError, "out of memory");
1713 1714 return -1;
1714 1715 }
1715 1716 self->capacity = newcapacity;
1716 1717 self->nodes = newnodes;
1717 1718 memset(&self->nodes[self->length], 0,
1718 1719 sizeof(nodetreenode) * (self->capacity - self->length));
1719 1720 }
1720 1721 return self->length++;
1721 1722 }
1722 1723
1723 1724 static int nt_insert(nodetree *self, const char *node, int rev)
1724 1725 {
1725 1726 int level = 0;
1726 1727 int off = 0;
1727 1728
1728 1729 while (level < 2 * self->nodelen) {
1729 1730 int k = nt_level(node, level);
1730 1731 nodetreenode *n;
1731 1732 int v;
1732 1733
1733 1734 n = &self->nodes[off];
1734 1735 v = n->children[k];
1735 1736
1736 1737 if (v == 0) {
1737 1738 n->children[k] = -rev - 2;
1738 1739 return 0;
1739 1740 }
1740 1741 if (v < 0) {
1741 1742 const char *oldnode =
1742 1743 index_node_existing(self->index, -(v + 2));
1743 1744 int noff;
1744 1745
1745 1746 if (oldnode == NULL)
1746 1747 return -1;
1747 1748 if (!memcmp(oldnode, node, self->nodelen)) {
1748 1749 n->children[k] = -rev - 2;
1749 1750 return 0;
1750 1751 }
1751 1752 noff = nt_new(self);
1752 1753 if (noff == -1)
1753 1754 return -1;
1754 1755 /* self->nodes may have been changed by realloc */
1755 1756 self->nodes[off].children[k] = noff;
1756 1757 off = noff;
1757 1758 n = &self->nodes[off];
1758 1759 n->children[nt_level(oldnode, ++level)] = v;
1759 1760 if (level > self->depth)
1760 1761 self->depth = level;
1761 1762 self->splits += 1;
1762 1763 } else {
1763 1764 level += 1;
1764 1765 off = v;
1765 1766 }
1766 1767 }
1767 1768
1768 1769 return -1;
1769 1770 }
1770 1771
1771 1772 static PyObject *ntobj_insert(nodetreeObject *self, PyObject *args)
1772 1773 {
1773 1774 Py_ssize_t rev;
1774 1775 const char *node;
1775 1776 Py_ssize_t length;
1776 1777 if (!PyArg_ParseTuple(args, "n", &rev))
1777 1778 return NULL;
1778 1779 length = index_length(self->nt.index);
1779 1780 if (rev < 0 || rev >= length) {
1780 1781 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1781 1782 return NULL;
1782 1783 }
1783 1784 node = index_node_existing(self->nt.index, rev);
1784 1785 if (nt_insert(&self->nt, node, (int)rev) == -1)
1785 1786 return NULL;
1786 1787 Py_RETURN_NONE;
1787 1788 }
1788 1789
1789 1790 static int nt_delete_node(nodetree *self, const char *node)
1790 1791 {
1791 1792 /* rev==-2 happens to get encoded as 0, which is interpreted as not set
1792 1793 */
1793 1794 return nt_insert(self, node, -2);
1794 1795 }
1795 1796
1796 1797 static int nt_init(nodetree *self, indexObject *index, unsigned capacity)
1797 1798 {
1798 1799 /* Initialize before overflow-checking to avoid nt_dealloc() crash. */
1799 1800 self->nodes = NULL;
1800 1801
1801 1802 self->index = index;
1802 1803 /* The input capacity is in terms of revisions, while the field is in
1803 1804 * terms of nodetree nodes. */
1804 1805 self->capacity = (capacity < 4 ? 4 : capacity / 2);
1805 1806 self->nodelen = index->nodelen;
1806 1807 self->depth = 0;
1807 1808 self->splits = 0;
1808 1809 if (self->capacity > SIZE_MAX / sizeof(nodetreenode)) {
1809 1810 PyErr_SetString(PyExc_ValueError, "overflow in init_nt");
1810 1811 return -1;
1811 1812 }
1812 1813 self->nodes = calloc(self->capacity, sizeof(nodetreenode));
1813 1814 if (self->nodes == NULL) {
1814 1815 PyErr_NoMemory();
1815 1816 return -1;
1816 1817 }
1817 1818 self->length = 1;
1818 1819 return 0;
1819 1820 }
1820 1821
1821 1822 static int ntobj_init(nodetreeObject *self, PyObject *args)
1822 1823 {
1823 1824 PyObject *index;
1824 1825 unsigned capacity;
1825 1826 if (!PyArg_ParseTuple(args, "O!I", &HgRevlogIndex_Type, &index,
1826 1827 &capacity))
1827 1828 return -1;
1828 1829 Py_INCREF(index);
1829 1830 return nt_init(&self->nt, (indexObject *)index, capacity);
1830 1831 }
1831 1832
1832 1833 static int nt_partialmatch(nodetree *self, const char *node, Py_ssize_t nodelen)
1833 1834 {
1834 1835 return nt_find(self, node, nodelen, 1);
1835 1836 }
1836 1837
1837 1838 /*
1838 1839 * Find the length of the shortest unique prefix of node.
1839 1840 *
1840 1841 * Return values:
1841 1842 *
1842 1843 * -3: error (exception set)
1843 1844 * -2: not found (no exception set)
1844 1845 * rest: length of shortest prefix
1845 1846 */
1846 1847 static int nt_shortest(nodetree *self, const char *node)
1847 1848 {
1848 1849 int level, off;
1849 1850
1850 1851 for (level = off = 0; level < 2 * self->nodelen; level++) {
1851 1852 int k, v;
1852 1853 nodetreenode *n = &self->nodes[off];
1853 1854 k = nt_level(node, level);
1854 1855 v = n->children[k];
1855 1856 if (v < 0) {
1856 1857 const char *n;
1857 1858 v = -(v + 2);
1858 1859 n = index_node_existing(self->index, v);
1859 1860 if (n == NULL)
1860 1861 return -3;
1861 1862 if (memcmp(node, n, self->nodelen) != 0)
1862 1863 /*
1863 1864 * Found a unique prefix, but it wasn't for the
1864 1865 * requested node (i.e the requested node does
1865 1866 * not exist).
1866 1867 */
1867 1868 return -2;
1868 1869 return level + 1;
1869 1870 }
1870 1871 if (v == 0)
1871 1872 return -2;
1872 1873 off = v;
1873 1874 }
1874 1875 /*
1875 1876 * The node was still not unique after 40 hex digits, so this won't
1876 1877 * happen. Also, if we get here, then there's a programming error in
1877 1878 * this file that made us insert a node longer than 40 hex digits.
1878 1879 */
1879 1880 PyErr_SetString(PyExc_Exception, "broken node tree");
1880 1881 return -3;
1881 1882 }
1882 1883
1883 1884 static PyObject *ntobj_shortest(nodetreeObject *self, PyObject *args)
1884 1885 {
1885 1886 PyObject *val;
1886 1887 char *node;
1887 1888 int length;
1888 1889
1889 1890 if (!PyArg_ParseTuple(args, "O", &val))
1890 1891 return NULL;
1891 1892 if (node_check(self->nt.nodelen, val, &node) == -1)
1892 1893 return NULL;
1893 1894
1894 1895 length = nt_shortest(&self->nt, node);
1895 1896 if (length == -3)
1896 1897 return NULL;
1897 1898 if (length == -2) {
1898 1899 raise_revlog_error();
1899 1900 return NULL;
1900 1901 }
1901 1902 return PyInt_FromLong(length);
1902 1903 }
1903 1904
1904 1905 static void nt_dealloc(nodetree *self)
1905 1906 {
1906 1907 free(self->nodes);
1907 1908 self->nodes = NULL;
1908 1909 }
1909 1910
1910 1911 static void ntobj_dealloc(nodetreeObject *self)
1911 1912 {
1912 1913 Py_XDECREF(self->nt.index);
1913 1914 nt_dealloc(&self->nt);
1914 1915 PyObject_Del(self);
1915 1916 }
1916 1917
1917 1918 static PyMethodDef ntobj_methods[] = {
1918 1919 {"insert", (PyCFunction)ntobj_insert, METH_VARARGS,
1919 1920 "insert an index entry"},
1920 1921 {"shortest", (PyCFunction)ntobj_shortest, METH_VARARGS,
1921 1922 "find length of shortest hex nodeid of a binary ID"},
1922 1923 {NULL} /* Sentinel */
1923 1924 };
1924 1925
1925 1926 static PyTypeObject nodetreeType = {
1926 1927 PyVarObject_HEAD_INIT(NULL, 0) /* header */
1927 1928 "parsers.nodetree", /* tp_name */
1928 1929 sizeof(nodetreeObject), /* tp_basicsize */
1929 1930 0, /* tp_itemsize */
1930 1931 (destructor)ntobj_dealloc, /* tp_dealloc */
1931 1932 0, /* tp_print */
1932 1933 0, /* tp_getattr */
1933 1934 0, /* tp_setattr */
1934 1935 0, /* tp_compare */
1935 1936 0, /* tp_repr */
1936 1937 0, /* tp_as_number */
1937 1938 0, /* tp_as_sequence */
1938 1939 0, /* tp_as_mapping */
1939 1940 0, /* tp_hash */
1940 1941 0, /* tp_call */
1941 1942 0, /* tp_str */
1942 1943 0, /* tp_getattro */
1943 1944 0, /* tp_setattro */
1944 1945 0, /* tp_as_buffer */
1945 1946 Py_TPFLAGS_DEFAULT, /* tp_flags */
1946 1947 "nodetree", /* tp_doc */
1947 1948 0, /* tp_traverse */
1948 1949 0, /* tp_clear */
1949 1950 0, /* tp_richcompare */
1950 1951 0, /* tp_weaklistoffset */
1951 1952 0, /* tp_iter */
1952 1953 0, /* tp_iternext */
1953 1954 ntobj_methods, /* tp_methods */
1954 1955 0, /* tp_members */
1955 1956 0, /* tp_getset */
1956 1957 0, /* tp_base */
1957 1958 0, /* tp_dict */
1958 1959 0, /* tp_descr_get */
1959 1960 0, /* tp_descr_set */
1960 1961 0, /* tp_dictoffset */
1961 1962 (initproc)ntobj_init, /* tp_init */
1962 1963 0, /* tp_alloc */
1963 1964 };
1964 1965
1965 1966 static int index_init_nt(indexObject *self)
1966 1967 {
1967 1968 if (!self->ntinitialized) {
1968 1969 if (nt_init(&self->nt, self, (int)self->length) == -1) {
1969 1970 nt_dealloc(&self->nt);
1970 1971 return -1;
1971 1972 }
1972 1973 if (nt_insert(&self->nt, nullid, -1) == -1) {
1973 1974 nt_dealloc(&self->nt);
1974 1975 return -1;
1975 1976 }
1976 1977 self->ntinitialized = 1;
1977 1978 self->ntrev = (int)index_length(self);
1978 1979 self->ntlookups = 1;
1979 1980 self->ntmisses = 0;
1980 1981 }
1981 1982 return 0;
1982 1983 }
1983 1984
1984 1985 /*
1985 1986 * Return values:
1986 1987 *
1987 1988 * -3: error (exception set)
1988 1989 * -2: not found (no exception set)
1989 1990 * rest: valid rev
1990 1991 */
1991 1992 static int index_find_node(indexObject *self, const char *node)
1992 1993 {
1993 1994 int rev;
1994 1995
1995 1996 if (index_init_nt(self) == -1)
1996 1997 return -3;
1997 1998
1998 1999 self->ntlookups++;
1999 2000 rev = nt_find(&self->nt, node, self->nodelen, 0);
2000 2001 if (rev >= -1)
2001 2002 return rev;
2002 2003
2003 2004 /*
2004 2005 * For the first handful of lookups, we scan the entire index,
2005 2006 * and cache only the matching nodes. This optimizes for cases
2006 2007 * like "hg tip", where only a few nodes are accessed.
2007 2008 *
2008 2009 * After that, we cache every node we visit, using a single
2009 2010 * scan amortized over multiple lookups. This gives the best
2010 2011 * bulk performance, e.g. for "hg log".
2011 2012 */
2012 2013 if (self->ntmisses++ < 4) {
2013 2014 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2014 2015 const char *n = index_node_existing(self, rev);
2015 2016 if (n == NULL)
2016 2017 return -3;
2017 2018 if (memcmp(node, n, self->nodelen) == 0) {
2018 2019 if (nt_insert(&self->nt, n, rev) == -1)
2019 2020 return -3;
2020 2021 break;
2021 2022 }
2022 2023 }
2023 2024 } else {
2024 2025 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2025 2026 const char *n = index_node_existing(self, rev);
2026 2027 if (n == NULL)
2027 2028 return -3;
2028 2029 if (nt_insert(&self->nt, n, rev) == -1) {
2029 2030 self->ntrev = rev + 1;
2030 2031 return -3;
2031 2032 }
2032 2033 if (memcmp(node, n, self->nodelen) == 0) {
2033 2034 break;
2034 2035 }
2035 2036 }
2036 2037 self->ntrev = rev;
2037 2038 }
2038 2039
2039 2040 if (rev >= 0)
2040 2041 return rev;
2041 2042 return -2;
2042 2043 }
2043 2044
2044 2045 static PyObject *index_getitem(indexObject *self, PyObject *value)
2045 2046 {
2046 2047 char *node;
2047 2048 int rev;
2048 2049
2049 2050 if (PyInt_Check(value)) {
2050 2051 long idx;
2051 2052 if (!pylong_to_long(value, &idx)) {
2052 2053 return NULL;
2053 2054 }
2054 2055 return index_get(self, idx);
2055 2056 }
2056 2057
2057 2058 if (node_check(self->nodelen, value, &node) == -1)
2058 2059 return NULL;
2059 2060 rev = index_find_node(self, node);
2060 2061 if (rev >= -1)
2061 2062 return PyInt_FromLong(rev);
2062 2063 if (rev == -2)
2063 2064 raise_revlog_error();
2064 2065 return NULL;
2065 2066 }
2066 2067
2067 2068 /*
2068 2069 * Fully populate the radix tree.
2069 2070 */
2070 2071 static int index_populate_nt(indexObject *self)
2071 2072 {
2072 2073 int rev;
2073 2074 if (self->ntrev > 0) {
2074 2075 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2075 2076 const char *n = index_node_existing(self, rev);
2076 2077 if (n == NULL)
2077 2078 return -1;
2078 2079 if (nt_insert(&self->nt, n, rev) == -1)
2079 2080 return -1;
2080 2081 }
2081 2082 self->ntrev = -1;
2082 2083 }
2083 2084 return 0;
2084 2085 }
2085 2086
2086 2087 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
2087 2088 {
2088 2089 const char *fullnode;
2089 2090 Py_ssize_t nodelen;
2090 2091 char *node;
2091 2092 int rev, i;
2092 2093
2093 2094 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &node, &nodelen))
2094 2095 return NULL;
2095 2096
2096 2097 if (nodelen < 1) {
2097 2098 PyErr_SetString(PyExc_ValueError, "key too short");
2098 2099 return NULL;
2099 2100 }
2100 2101
2101 2102 if (nodelen > 2 * self->nodelen) {
2102 2103 PyErr_SetString(PyExc_ValueError, "key too long");
2103 2104 return NULL;
2104 2105 }
2105 2106
2106 2107 for (i = 0; i < nodelen; i++)
2107 2108 hexdigit(node, i);
2108 2109 if (PyErr_Occurred()) {
2109 2110 /* input contains non-hex characters */
2110 2111 PyErr_Clear();
2111 2112 Py_RETURN_NONE;
2112 2113 }
2113 2114
2114 2115 if (index_init_nt(self) == -1)
2115 2116 return NULL;
2116 2117 if (index_populate_nt(self) == -1)
2117 2118 return NULL;
2118 2119 rev = nt_partialmatch(&self->nt, node, nodelen);
2119 2120
2120 2121 switch (rev) {
2121 2122 case -4:
2122 2123 raise_revlog_error();
2123 2124 return NULL;
2124 2125 case -2:
2125 2126 Py_RETURN_NONE;
2126 2127 case -1:
2127 2128 return PyBytes_FromStringAndSize(nullid, self->nodelen);
2128 2129 }
2129 2130
2130 2131 fullnode = index_node_existing(self, rev);
2131 2132 if (fullnode == NULL) {
2132 2133 return NULL;
2133 2134 }
2134 2135 return PyBytes_FromStringAndSize(fullnode, self->nodelen);
2135 2136 }
2136 2137
2137 2138 static PyObject *index_shortest(indexObject *self, PyObject *args)
2138 2139 {
2139 2140 PyObject *val;
2140 2141 char *node;
2141 2142 int length;
2142 2143
2143 2144 if (!PyArg_ParseTuple(args, "O", &val))
2144 2145 return NULL;
2145 2146 if (node_check(self->nodelen, val, &node) == -1)
2146 2147 return NULL;
2147 2148
2148 2149 self->ntlookups++;
2149 2150 if (index_init_nt(self) == -1)
2150 2151 return NULL;
2151 2152 if (index_populate_nt(self) == -1)
2152 2153 return NULL;
2153 2154 length = nt_shortest(&self->nt, node);
2154 2155 if (length == -3)
2155 2156 return NULL;
2156 2157 if (length == -2) {
2157 2158 raise_revlog_error();
2158 2159 return NULL;
2159 2160 }
2160 2161 return PyInt_FromLong(length);
2161 2162 }
2162 2163
2163 2164 static PyObject *index_m_get(indexObject *self, PyObject *args)
2164 2165 {
2165 2166 PyObject *val;
2166 2167 char *node;
2167 2168 int rev;
2168 2169
2169 2170 if (!PyArg_ParseTuple(args, "O", &val))
2170 2171 return NULL;
2171 2172 if (node_check(self->nodelen, val, &node) == -1)
2172 2173 return NULL;
2173 2174 rev = index_find_node(self, node);
2174 2175 if (rev == -3)
2175 2176 return NULL;
2176 2177 if (rev == -2)
2177 2178 Py_RETURN_NONE;
2178 2179 return PyInt_FromLong(rev);
2179 2180 }
2180 2181
2181 2182 static int index_contains(indexObject *self, PyObject *value)
2182 2183 {
2183 2184 char *node;
2184 2185
2185 2186 if (PyInt_Check(value)) {
2186 2187 long rev;
2187 2188 if (!pylong_to_long(value, &rev)) {
2188 2189 return -1;
2189 2190 }
2190 2191 return rev >= -1 && rev < index_length(self);
2191 2192 }
2192 2193
2193 2194 if (node_check(self->nodelen, value, &node) == -1)
2194 2195 return -1;
2195 2196
2196 2197 switch (index_find_node(self, node)) {
2197 2198 case -3:
2198 2199 return -1;
2199 2200 case -2:
2200 2201 return 0;
2201 2202 default:
2202 2203 return 1;
2203 2204 }
2204 2205 }
2205 2206
2206 2207 static PyObject *index_m_has_node(indexObject *self, PyObject *args)
2207 2208 {
2208 2209 int ret = index_contains(self, args);
2209 2210 if (ret < 0)
2210 2211 return NULL;
2211 2212 return PyBool_FromLong((long)ret);
2212 2213 }
2213 2214
2214 2215 static PyObject *index_m_rev(indexObject *self, PyObject *val)
2215 2216 {
2216 2217 char *node;
2217 2218 int rev;
2218 2219
2219 2220 if (node_check(self->nodelen, val, &node) == -1)
2220 2221 return NULL;
2221 2222 rev = index_find_node(self, node);
2222 2223 if (rev >= -1)
2223 2224 return PyInt_FromLong(rev);
2224 2225 if (rev == -2)
2225 2226 raise_revlog_error();
2226 2227 return NULL;
2227 2228 }
2228 2229
2229 2230 typedef uint64_t bitmask;
2230 2231
2231 2232 /*
2232 2233 * Given a disjoint set of revs, return all candidates for the
2233 2234 * greatest common ancestor. In revset notation, this is the set
2234 2235 * "heads(::a and ::b and ...)"
2235 2236 */
2236 2237 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
2237 2238 int revcount)
2238 2239 {
2239 2240 const bitmask allseen = (1ull << revcount) - 1;
2240 2241 const bitmask poison = 1ull << revcount;
2241 2242 PyObject *gca = PyList_New(0);
2242 2243 int i, v, interesting;
2243 2244 int maxrev = -1;
2244 2245 bitmask sp;
2245 2246 bitmask *seen;
2246 2247
2247 2248 if (gca == NULL)
2248 2249 return PyErr_NoMemory();
2249 2250
2250 2251 for (i = 0; i < revcount; i++) {
2251 2252 if (revs[i] > maxrev)
2252 2253 maxrev = revs[i];
2253 2254 }
2254 2255
2255 2256 seen = calloc(sizeof(*seen), maxrev + 1);
2256 2257 if (seen == NULL) {
2257 2258 Py_DECREF(gca);
2258 2259 return PyErr_NoMemory();
2259 2260 }
2260 2261
2261 2262 for (i = 0; i < revcount; i++)
2262 2263 seen[revs[i]] = 1ull << i;
2263 2264
2264 2265 interesting = revcount;
2265 2266
2266 2267 for (v = maxrev; v >= 0 && interesting; v--) {
2267 2268 bitmask sv = seen[v];
2268 2269 int parents[2];
2269 2270
2270 2271 if (!sv)
2271 2272 continue;
2272 2273
2273 2274 if (sv < poison) {
2274 2275 interesting -= 1;
2275 2276 if (sv == allseen) {
2276 2277 PyObject *obj = PyInt_FromLong(v);
2277 2278 if (obj == NULL)
2278 2279 goto bail;
2279 2280 if (PyList_Append(gca, obj) == -1) {
2280 2281 Py_DECREF(obj);
2281 2282 goto bail;
2282 2283 }
2283 2284 sv |= poison;
2284 2285 for (i = 0; i < revcount; i++) {
2285 2286 if (revs[i] == v)
2286 2287 goto done;
2287 2288 }
2288 2289 }
2289 2290 }
2290 2291 if (index_get_parents(self, v, parents, maxrev) < 0)
2291 2292 goto bail;
2292 2293
2293 2294 for (i = 0; i < 2; i++) {
2294 2295 int p = parents[i];
2295 2296 if (p == -1)
2296 2297 continue;
2297 2298 sp = seen[p];
2298 2299 if (sv < poison) {
2299 2300 if (sp == 0) {
2300 2301 seen[p] = sv;
2301 2302 interesting++;
2302 2303 } else if (sp != sv)
2303 2304 seen[p] |= sv;
2304 2305 } else {
2305 2306 if (sp && sp < poison)
2306 2307 interesting--;
2307 2308 seen[p] = sv;
2308 2309 }
2309 2310 }
2310 2311 }
2311 2312
2312 2313 done:
2313 2314 free(seen);
2314 2315 return gca;
2315 2316 bail:
2316 2317 free(seen);
2317 2318 Py_XDECREF(gca);
2318 2319 return NULL;
2319 2320 }
2320 2321
2321 2322 /*
2322 2323 * Given a disjoint set of revs, return the subset with the longest
2323 2324 * path to the root.
2324 2325 */
2325 2326 static PyObject *find_deepest(indexObject *self, PyObject *revs)
2326 2327 {
2327 2328 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
2328 2329 static const Py_ssize_t capacity = 24;
2329 2330 int *depth, *interesting = NULL;
2330 2331 int i, j, v, ninteresting;
2331 2332 PyObject *dict = NULL, *keys = NULL;
2332 2333 long *seen = NULL;
2333 2334 int maxrev = -1;
2334 2335 long final;
2335 2336
2336 2337 if (revcount > capacity) {
2337 2338 PyErr_Format(PyExc_OverflowError,
2338 2339 "bitset size (%ld) > capacity (%ld)",
2339 2340 (long)revcount, (long)capacity);
2340 2341 return NULL;
2341 2342 }
2342 2343
2343 2344 for (i = 0; i < revcount; i++) {
2344 2345 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2345 2346 if (n > maxrev)
2346 2347 maxrev = n;
2347 2348 }
2348 2349
2349 2350 depth = calloc(sizeof(*depth), maxrev + 1);
2350 2351 if (depth == NULL)
2351 2352 return PyErr_NoMemory();
2352 2353
2353 2354 seen = calloc(sizeof(*seen), maxrev + 1);
2354 2355 if (seen == NULL) {
2355 2356 PyErr_NoMemory();
2356 2357 goto bail;
2357 2358 }
2358 2359
2359 2360 interesting = calloc(sizeof(*interesting), ((size_t)1) << revcount);
2360 2361 if (interesting == NULL) {
2361 2362 PyErr_NoMemory();
2362 2363 goto bail;
2363 2364 }
2364 2365
2365 2366 if (PyList_Sort(revs) == -1)
2366 2367 goto bail;
2367 2368
2368 2369 for (i = 0; i < revcount; i++) {
2369 2370 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2370 2371 long b = 1l << i;
2371 2372 depth[n] = 1;
2372 2373 seen[n] = b;
2373 2374 interesting[b] = 1;
2374 2375 }
2375 2376
2376 2377 /* invariant: ninteresting is the number of non-zero entries in
2377 2378 * interesting. */
2378 2379 ninteresting = (int)revcount;
2379 2380
2380 2381 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
2381 2382 int dv = depth[v];
2382 2383 int parents[2];
2383 2384 long sv;
2384 2385
2385 2386 if (dv == 0)
2386 2387 continue;
2387 2388
2388 2389 sv = seen[v];
2389 2390 if (index_get_parents(self, v, parents, maxrev) < 0)
2390 2391 goto bail;
2391 2392
2392 2393 for (i = 0; i < 2; i++) {
2393 2394 int p = parents[i];
2394 2395 long sp;
2395 2396 int dp;
2396 2397
2397 2398 if (p == -1)
2398 2399 continue;
2399 2400
2400 2401 dp = depth[p];
2401 2402 sp = seen[p];
2402 2403 if (dp <= dv) {
2403 2404 depth[p] = dv + 1;
2404 2405 if (sp != sv) {
2405 2406 interesting[sv] += 1;
2406 2407 seen[p] = sv;
2407 2408 if (sp) {
2408 2409 interesting[sp] -= 1;
2409 2410 if (interesting[sp] == 0)
2410 2411 ninteresting -= 1;
2411 2412 }
2412 2413 }
2413 2414 } else if (dv == dp - 1) {
2414 2415 long nsp = sp | sv;
2415 2416 if (nsp == sp)
2416 2417 continue;
2417 2418 seen[p] = nsp;
2418 2419 interesting[sp] -= 1;
2419 2420 if (interesting[sp] == 0)
2420 2421 ninteresting -= 1;
2421 2422 if (interesting[nsp] == 0)
2422 2423 ninteresting += 1;
2423 2424 interesting[nsp] += 1;
2424 2425 }
2425 2426 }
2426 2427 interesting[sv] -= 1;
2427 2428 if (interesting[sv] == 0)
2428 2429 ninteresting -= 1;
2429 2430 }
2430 2431
2431 2432 final = 0;
2432 2433 j = ninteresting;
2433 2434 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
2434 2435 if (interesting[i] == 0)
2435 2436 continue;
2436 2437 final |= i;
2437 2438 j -= 1;
2438 2439 }
2439 2440 if (final == 0) {
2440 2441 keys = PyList_New(0);
2441 2442 goto bail;
2442 2443 }
2443 2444
2444 2445 dict = PyDict_New();
2445 2446 if (dict == NULL)
2446 2447 goto bail;
2447 2448
2448 2449 for (i = 0; i < revcount; i++) {
2449 2450 PyObject *key;
2450 2451
2451 2452 if ((final & (1 << i)) == 0)
2452 2453 continue;
2453 2454
2454 2455 key = PyList_GET_ITEM(revs, i);
2455 2456 Py_INCREF(key);
2456 2457 Py_INCREF(Py_None);
2457 2458 if (PyDict_SetItem(dict, key, Py_None) == -1) {
2458 2459 Py_DECREF(key);
2459 2460 Py_DECREF(Py_None);
2460 2461 goto bail;
2461 2462 }
2462 2463 }
2463 2464
2464 2465 keys = PyDict_Keys(dict);
2465 2466
2466 2467 bail:
2467 2468 free(depth);
2468 2469 free(seen);
2469 2470 free(interesting);
2470 2471 Py_XDECREF(dict);
2471 2472
2472 2473 return keys;
2473 2474 }
2474 2475
2475 2476 /*
2476 2477 * Given a (possibly overlapping) set of revs, return all the
2477 2478 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
2478 2479 */
2479 2480 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
2480 2481 {
2481 2482 PyObject *ret = NULL;
2482 2483 Py_ssize_t argcount, i, len;
2483 2484 bitmask repeat = 0;
2484 2485 int revcount = 0;
2485 2486 int *revs;
2486 2487
2487 2488 argcount = PySequence_Length(args);
2488 2489 revs = PyMem_Malloc(argcount * sizeof(*revs));
2489 2490 if (argcount > 0 && revs == NULL)
2490 2491 return PyErr_NoMemory();
2491 2492 len = index_length(self);
2492 2493
2493 2494 for (i = 0; i < argcount; i++) {
2494 2495 static const int capacity = 24;
2495 2496 PyObject *obj = PySequence_GetItem(args, i);
2496 2497 bitmask x;
2497 2498 long val;
2498 2499
2499 2500 if (!PyInt_Check(obj)) {
2500 2501 PyErr_SetString(PyExc_TypeError,
2501 2502 "arguments must all be ints");
2502 2503 Py_DECREF(obj);
2503 2504 goto bail;
2504 2505 }
2505 2506 val = PyInt_AsLong(obj);
2506 2507 Py_DECREF(obj);
2507 2508 if (val == -1) {
2508 2509 ret = PyList_New(0);
2509 2510 goto done;
2510 2511 }
2511 2512 if (val < 0 || val >= len) {
2512 2513 PyErr_SetString(PyExc_IndexError, "index out of range");
2513 2514 goto bail;
2514 2515 }
2515 2516 /* this cheesy bloom filter lets us avoid some more
2516 2517 * expensive duplicate checks in the common set-is-disjoint
2517 2518 * case */
2518 2519 x = 1ull << (val & 0x3f);
2519 2520 if (repeat & x) {
2520 2521 int k;
2521 2522 for (k = 0; k < revcount; k++) {
2522 2523 if (val == revs[k])
2523 2524 goto duplicate;
2524 2525 }
2525 2526 } else
2526 2527 repeat |= x;
2527 2528 if (revcount >= capacity) {
2528 2529 PyErr_Format(PyExc_OverflowError,
2529 2530 "bitset size (%d) > capacity (%d)",
2530 2531 revcount, capacity);
2531 2532 goto bail;
2532 2533 }
2533 2534 revs[revcount++] = (int)val;
2534 2535 duplicate:;
2535 2536 }
2536 2537
2537 2538 if (revcount == 0) {
2538 2539 ret = PyList_New(0);
2539 2540 goto done;
2540 2541 }
2541 2542 if (revcount == 1) {
2542 2543 PyObject *obj;
2543 2544 ret = PyList_New(1);
2544 2545 if (ret == NULL)
2545 2546 goto bail;
2546 2547 obj = PyInt_FromLong(revs[0]);
2547 2548 if (obj == NULL)
2548 2549 goto bail;
2549 2550 PyList_SET_ITEM(ret, 0, obj);
2550 2551 goto done;
2551 2552 }
2552 2553
2553 2554 ret = find_gca_candidates(self, revs, revcount);
2554 2555 if (ret == NULL)
2555 2556 goto bail;
2556 2557
2557 2558 done:
2558 2559 PyMem_Free(revs);
2559 2560 return ret;
2560 2561
2561 2562 bail:
2562 2563 PyMem_Free(revs);
2563 2564 Py_XDECREF(ret);
2564 2565 return NULL;
2565 2566 }
2566 2567
2567 2568 /*
2568 2569 * Given a (possibly overlapping) set of revs, return the greatest
2569 2570 * common ancestors: those with the longest path to the root.
2570 2571 */
2571 2572 static PyObject *index_ancestors(indexObject *self, PyObject *args)
2572 2573 {
2573 2574 PyObject *ret;
2574 2575 PyObject *gca = index_commonancestorsheads(self, args);
2575 2576 if (gca == NULL)
2576 2577 return NULL;
2577 2578
2578 2579 if (PyList_GET_SIZE(gca) <= 1) {
2579 2580 return gca;
2580 2581 }
2581 2582
2582 2583 ret = find_deepest(self, gca);
2583 2584 Py_DECREF(gca);
2584 2585 return ret;
2585 2586 }
2586 2587
2587 2588 /*
2588 2589 * Invalidate any trie entries introduced by added revs.
2589 2590 */
2590 2591 static void index_invalidate_added(indexObject *self, Py_ssize_t start)
2591 2592 {
2592 2593 Py_ssize_t i, len;
2593 2594
2594 2595 len = self->length + self->new_length;
2595 2596 i = start - self->length;
2596 2597 if (i < 0)
2597 2598 return;
2598 2599
2599 2600 for (i = start; i < len; i++)
2600 2601 nt_delete_node(&self->nt, index_deref(self, i) + 32);
2601 2602
2602 2603 self->new_length = start - self->length;
2603 2604 }
2604 2605
2605 2606 /*
2606 2607 * Delete a numeric range of revs, which must be at the end of the
2607 2608 * range.
2608 2609 */
2609 2610 static int index_slice_del(indexObject *self, PyObject *item)
2610 2611 {
2611 2612 Py_ssize_t start, stop, step, slicelength;
2612 2613 Py_ssize_t length = index_length(self) + 1;
2613 2614 int ret = 0;
2614 2615
2615 2616 /* Argument changed from PySliceObject* to PyObject* in Python 3. */
2616 2617 #ifdef IS_PY3K
2617 2618 if (PySlice_GetIndicesEx(item, length, &start, &stop, &step,
2618 2619 &slicelength) < 0)
2619 2620 #else
2620 2621 if (PySlice_GetIndicesEx((PySliceObject *)item, length, &start, &stop,
2621 2622 &step, &slicelength) < 0)
2622 2623 #endif
2623 2624 return -1;
2624 2625
2625 2626 if (slicelength <= 0)
2626 2627 return 0;
2627 2628
2628 2629 if ((step < 0 && start < stop) || (step > 0 && start > stop))
2629 2630 stop = start;
2630 2631
2631 2632 if (step < 0) {
2632 2633 stop = start + 1;
2633 2634 start = stop + step * (slicelength - 1) - 1;
2634 2635 step = -step;
2635 2636 }
2636 2637
2637 2638 if (step != 1) {
2638 2639 PyErr_SetString(PyExc_ValueError,
2639 2640 "revlog index delete requires step size of 1");
2640 2641 return -1;
2641 2642 }
2642 2643
2643 2644 if (stop != length - 1) {
2644 2645 PyErr_SetString(PyExc_IndexError,
2645 2646 "revlog index deletion indices are invalid");
2646 2647 return -1;
2647 2648 }
2648 2649
2649 2650 if (start < self->length) {
2650 2651 if (self->ntinitialized) {
2651 2652 Py_ssize_t i;
2652 2653
2653 2654 for (i = start; i < self->length; i++) {
2654 2655 const char *node = index_node_existing(self, i);
2655 2656 if (node == NULL)
2656 2657 return -1;
2657 2658
2658 2659 nt_delete_node(&self->nt, node);
2659 2660 }
2660 2661 if (self->new_length)
2661 2662 index_invalidate_added(self, self->length);
2662 2663 if (self->ntrev > start)
2663 2664 self->ntrev = (int)start;
2664 2665 } else if (self->new_length) {
2665 2666 self->new_length = 0;
2666 2667 }
2667 2668
2668 2669 self->length = start;
2669 2670 goto done;
2670 2671 }
2671 2672
2672 2673 if (self->ntinitialized) {
2673 2674 index_invalidate_added(self, start);
2674 2675 if (self->ntrev > start)
2675 2676 self->ntrev = (int)start;
2676 2677 } else {
2677 2678 self->new_length = start - self->length;
2678 2679 }
2679 2680 done:
2680 2681 Py_CLEAR(self->headrevs);
2681 2682 return ret;
2682 2683 }
2683 2684
2684 2685 /*
2685 2686 * Supported ops:
2686 2687 *
2687 2688 * slice deletion
2688 2689 * string assignment (extend node->rev mapping)
2689 2690 * string deletion (shrink node->rev mapping)
2690 2691 */
2691 2692 static int index_assign_subscript(indexObject *self, PyObject *item,
2692 2693 PyObject *value)
2693 2694 {
2694 2695 char *node;
2695 2696 long rev;
2696 2697
2697 2698 if (PySlice_Check(item) && value == NULL)
2698 2699 return index_slice_del(self, item);
2699 2700
2700 2701 if (node_check(self->nodelen, item, &node) == -1)
2701 2702 return -1;
2702 2703
2703 2704 if (value == NULL)
2704 2705 return self->ntinitialized ? nt_delete_node(&self->nt, node)
2705 2706 : 0;
2706 2707 rev = PyInt_AsLong(value);
2707 2708 if (rev > INT_MAX || rev < 0) {
2708 2709 if (!PyErr_Occurred())
2709 2710 PyErr_SetString(PyExc_ValueError, "rev out of range");
2710 2711 return -1;
2711 2712 }
2712 2713
2713 2714 if (index_init_nt(self) == -1)
2714 2715 return -1;
2715 2716 return nt_insert(&self->nt, node, (int)rev);
2716 2717 }
2717 2718
2718 2719 /*
2719 2720 * Find all RevlogNG entries in an index that has inline data. Update
2720 2721 * the optional "offsets" table with those entries.
2721 2722 */
2722 2723 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
2723 2724 {
2724 2725 const char *data = (const char *)self->buf.buf;
2725 2726 Py_ssize_t pos = 0;
2726 2727 Py_ssize_t end = self->buf.len;
2727 2728 long incr = self->entry_size;
2728 2729 Py_ssize_t len = 0;
2729 2730
2730 2731 while (pos + self->entry_size <= end && pos >= 0) {
2731 2732 uint32_t comp_len, sidedata_comp_len = 0;
2732 2733 /* 3rd element of header is length of compressed inline data */
2733 2734 comp_len = getbe32(data + pos + 8);
2734 2735 if (self->entry_size == v2_entry_size) {
2735 2736 sidedata_comp_len = getbe32(data + pos + 72);
2736 2737 }
2737 2738 incr = self->entry_size + comp_len + sidedata_comp_len;
2738 2739 if (offsets)
2739 2740 offsets[len] = data + pos;
2740 2741 len++;
2741 2742 pos += incr;
2742 2743 }
2743 2744
2744 2745 if (pos != end) {
2745 2746 if (!PyErr_Occurred())
2746 2747 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2747 2748 return -1;
2748 2749 }
2749 2750
2750 2751 return len;
2751 2752 }
2752 2753
2753 2754 static int index_init(indexObject *self, PyObject *args, PyObject *kwargs)
2754 2755 {
2755 2756 PyObject *data_obj, *inlined_obj, *revlogv2;
2756 2757 Py_ssize_t size;
2757 2758
2758 2759 static char *kwlist[] = {"data", "inlined", "revlogv2", NULL};
2759 2760
2760 2761 /* Initialize before argument-checking to avoid index_dealloc() crash.
2761 2762 */
2762 2763 self->added = NULL;
2763 2764 self->new_length = 0;
2764 2765 self->added_length = 0;
2765 2766 self->data = NULL;
2766 2767 memset(&self->buf, 0, sizeof(self->buf));
2767 2768 self->headrevs = NULL;
2768 2769 self->filteredrevs = Py_None;
2769 2770 Py_INCREF(Py_None);
2770 2771 self->ntinitialized = 0;
2771 2772 self->offsets = NULL;
2772 2773 self->nodelen = 20;
2773 2774 self->nullentry = NULL;
2774 2775 self->rust_ext_compat = 1;
2775 2776
2776 2777 revlogv2 = NULL;
2777 2778 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|O", kwlist,
2778 2779 &data_obj, &inlined_obj, &revlogv2))
2779 2780 return -1;
2780 2781 if (!PyObject_CheckBuffer(data_obj)) {
2781 2782 PyErr_SetString(PyExc_TypeError,
2782 2783 "data does not support buffer interface");
2783 2784 return -1;
2784 2785 }
2785 2786 if (self->nodelen < 20 || self->nodelen > (Py_ssize_t)sizeof(nullid)) {
2786 2787 PyErr_SetString(PyExc_RuntimeError, "unsupported node size");
2787 2788 return -1;
2788 2789 }
2789 2790
2790 2791 if (revlogv2 && PyObject_IsTrue(revlogv2)) {
2791 2792 self->format_version = format_v2;
2792 2793 self->entry_size = v2_entry_size;
2793 2794 } else {
2794 2795 self->format_version = format_v1;
2795 2796 self->entry_size = v1_entry_size;
2796 2797 }
2797 2798
2798 2799 self->nullentry = Py_BuildValue(
2799 2800 PY23("iiiiiiis#iiBB", "iiiiiiiy#iiBB"), 0, 0, 0, -1, -1, -1, -1,
2800 2801 nullid, self->nodelen, 0, 0, comp_mode_inline, comp_mode_inline);
2801 2802
2802 2803 if (!self->nullentry)
2803 2804 return -1;
2804 2805 PyObject_GC_UnTrack(self->nullentry);
2805 2806
2806 2807 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
2807 2808 return -1;
2808 2809 size = self->buf.len;
2809 2810
2810 2811 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
2811 2812 self->data = data_obj;
2812 2813
2813 2814 self->ntlookups = self->ntmisses = 0;
2814 2815 self->ntrev = -1;
2815 2816 Py_INCREF(self->data);
2816 2817
2817 2818 if (self->inlined) {
2818 2819 Py_ssize_t len = inline_scan(self, NULL);
2819 2820 if (len == -1)
2820 2821 goto bail;
2821 2822 self->length = len;
2822 2823 } else {
2823 2824 if (size % self->entry_size) {
2824 2825 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2825 2826 goto bail;
2826 2827 }
2827 2828 self->length = size / self->entry_size;
2828 2829 }
2829 2830
2830 2831 return 0;
2831 2832 bail:
2832 2833 return -1;
2833 2834 }
2834 2835
2835 2836 static PyObject *index_nodemap(indexObject *self)
2836 2837 {
2837 2838 Py_INCREF(self);
2838 2839 return (PyObject *)self;
2839 2840 }
2840 2841
2841 2842 static void _index_clearcaches(indexObject *self)
2842 2843 {
2843 2844 if (self->offsets) {
2844 2845 PyMem_Free((void *)self->offsets);
2845 2846 self->offsets = NULL;
2846 2847 }
2847 2848 if (self->ntinitialized) {
2848 2849 nt_dealloc(&self->nt);
2849 2850 }
2850 2851 self->ntinitialized = 0;
2851 2852 Py_CLEAR(self->headrevs);
2852 2853 }
2853 2854
2854 2855 static PyObject *index_clearcaches(indexObject *self)
2855 2856 {
2856 2857 _index_clearcaches(self);
2857 2858 self->ntrev = -1;
2858 2859 self->ntlookups = self->ntmisses = 0;
2859 2860 Py_RETURN_NONE;
2860 2861 }
2861 2862
2862 2863 static void index_dealloc(indexObject *self)
2863 2864 {
2864 2865 _index_clearcaches(self);
2865 2866 Py_XDECREF(self->filteredrevs);
2866 2867 if (self->buf.buf) {
2867 2868 PyBuffer_Release(&self->buf);
2868 2869 memset(&self->buf, 0, sizeof(self->buf));
2869 2870 }
2870 2871 Py_XDECREF(self->data);
2871 2872 PyMem_Free(self->added);
2872 2873 Py_XDECREF(self->nullentry);
2873 2874 PyObject_Del(self);
2874 2875 }
2875 2876
2876 2877 static PySequenceMethods index_sequence_methods = {
2877 2878 (lenfunc)index_length, /* sq_length */
2878 2879 0, /* sq_concat */
2879 2880 0, /* sq_repeat */
2880 2881 (ssizeargfunc)index_get, /* sq_item */
2881 2882 0, /* sq_slice */
2882 2883 0, /* sq_ass_item */
2883 2884 0, /* sq_ass_slice */
2884 2885 (objobjproc)index_contains, /* sq_contains */
2885 2886 };
2886 2887
2887 2888 static PyMappingMethods index_mapping_methods = {
2888 2889 (lenfunc)index_length, /* mp_length */
2889 2890 (binaryfunc)index_getitem, /* mp_subscript */
2890 2891 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
2891 2892 };
2892 2893
2893 2894 static PyMethodDef index_methods[] = {
2894 2895 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
2895 2896 "return the gca set of the given revs"},
2896 2897 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
2897 2898 METH_VARARGS,
2898 2899 "return the heads of the common ancestors of the given revs"},
2899 2900 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
2900 2901 "clear the index caches"},
2901 2902 {"get", (PyCFunction)index_m_get, METH_VARARGS, "get an index entry"},
2902 2903 {"get_rev", (PyCFunction)index_m_get, METH_VARARGS,
2903 2904 "return `rev` associated with a node or None"},
2904 2905 {"has_node", (PyCFunction)index_m_has_node, METH_O,
2905 2906 "return True if the node exist in the index"},
2906 2907 {"rev", (PyCFunction)index_m_rev, METH_O,
2907 2908 "return `rev` associated with a node or raise RevlogError"},
2908 2909 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets, METH_VARARGS,
2909 2910 "compute phases"},
2910 2911 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
2911 2912 "reachableroots"},
2912 2913 {"replace_sidedata_info", (PyCFunction)index_replace_sidedata_info,
2913 2914 METH_VARARGS, "replace an existing index entry with a new value"},
2914 2915 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
2915 2916 "get head revisions"}, /* Can do filtering since 3.2 */
2916 2917 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
2917 2918 "get filtered head revisions"}, /* Can always do filtering */
2918 2919 {"issnapshot", (PyCFunction)index_issnapshot, METH_O,
2919 2920 "True if the object is a snapshot"},
2920 2921 {"findsnapshots", (PyCFunction)index_findsnapshots, METH_VARARGS,
2921 2922 "Gather snapshot data in a cache dict"},
2922 2923 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
2923 2924 "determine revisions with deltas to reconstruct fulltext"},
2924 2925 {"slicechunktodensity", (PyCFunction)index_slicechunktodensity,
2925 2926 METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"},
2926 2927 {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
2927 2928 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
2928 2929 "match a potentially ambiguous node ID"},
2929 2930 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
2930 2931 "find length of shortest hex nodeid of a binary ID"},
2931 2932 {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"},
2932 2933 {"entry_binary", (PyCFunction)index_entry_binary, METH_O,
2933 2934 "return an entry in binary form"},
2934 2935 {"pack_header", (PyCFunction)index_pack_header, METH_VARARGS,
2935 2936 "pack the revlog header information into binary"},
2936 2937 {NULL} /* Sentinel */
2937 2938 };
2938 2939
2939 2940 static PyGetSetDef index_getset[] = {
2940 2941 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
2941 2942 {NULL} /* Sentinel */
2942 2943 };
2943 2944
2944 2945 static PyMemberDef index_members[] = {
2945 2946 {"entry_size", T_LONG, offsetof(indexObject, entry_size), 0,
2946 2947 "size of an index entry"},
2947 2948 {"rust_ext_compat", T_LONG, offsetof(indexObject, rust_ext_compat), 0,
2948 2949 "size of an index entry"},
2949 2950 {NULL} /* Sentinel */
2950 2951 };
2951 2952
2952 2953 PyTypeObject HgRevlogIndex_Type = {
2953 2954 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2954 2955 "parsers.index", /* tp_name */
2955 2956 sizeof(indexObject), /* tp_basicsize */
2956 2957 0, /* tp_itemsize */
2957 2958 (destructor)index_dealloc, /* tp_dealloc */
2958 2959 0, /* tp_print */
2959 2960 0, /* tp_getattr */
2960 2961 0, /* tp_setattr */
2961 2962 0, /* tp_compare */
2962 2963 0, /* tp_repr */
2963 2964 0, /* tp_as_number */
2964 2965 &index_sequence_methods, /* tp_as_sequence */
2965 2966 &index_mapping_methods, /* tp_as_mapping */
2966 2967 0, /* tp_hash */
2967 2968 0, /* tp_call */
2968 2969 0, /* tp_str */
2969 2970 0, /* tp_getattro */
2970 2971 0, /* tp_setattro */
2971 2972 0, /* tp_as_buffer */
2972 2973 Py_TPFLAGS_DEFAULT, /* tp_flags */
2973 2974 "revlog index", /* tp_doc */
2974 2975 0, /* tp_traverse */
2975 2976 0, /* tp_clear */
2976 2977 0, /* tp_richcompare */
2977 2978 0, /* tp_weaklistoffset */
2978 2979 0, /* tp_iter */
2979 2980 0, /* tp_iternext */
2980 2981 index_methods, /* tp_methods */
2981 2982 index_members, /* tp_members */
2982 2983 index_getset, /* tp_getset */
2983 2984 0, /* tp_base */
2984 2985 0, /* tp_dict */
2985 2986 0, /* tp_descr_get */
2986 2987 0, /* tp_descr_set */
2987 2988 0, /* tp_dictoffset */
2988 2989 (initproc)index_init, /* tp_init */
2989 2990 0, /* tp_alloc */
2990 2991 };
2991 2992
2992 2993 /*
2993 2994 * returns a tuple of the form (index, cache) with elements as
2994 2995 * follows:
2995 2996 *
2996 2997 * index: an index object that lazily parses Revlog (v1 or v2) records
2997 2998 * cache: if data is inlined, a tuple (0, index_file_content), else None
2998 2999 * index_file_content could be a string, or a buffer
2999 3000 *
3000 3001 * added complications are for backwards compatibility
3001 3002 */
3002 3003 PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs)
3003 3004 {
3004 3005 PyObject *cache = NULL;
3005 3006 indexObject *idx;
3006 3007 int ret;
3007 3008
3008 3009 idx = PyObject_New(indexObject, &HgRevlogIndex_Type);
3009 3010 if (idx == NULL)
3010 3011 goto bail;
3011 3012
3012 3013 ret = index_init(idx, args, kwargs);
3013 3014 if (ret == -1)
3014 3015 goto bail;
3015 3016
3016 3017 if (idx->inlined) {
3017 3018 cache = Py_BuildValue("iO", 0, idx->data);
3018 3019 if (cache == NULL)
3019 3020 goto bail;
3020 3021 } else {
3021 3022 cache = Py_None;
3022 3023 Py_INCREF(cache);
3023 3024 }
3024 3025
3025 3026 return Py_BuildValue("NN", idx, cache);
3026 3027
3027 3028 bail:
3028 3029 Py_XDECREF(idx);
3029 3030 Py_XDECREF(cache);
3030 3031 return NULL;
3031 3032 }
3032 3033
3033 3034 static Revlog_CAPI CAPI = {
3034 3035 /* increment the abi_version field upon each change in the Revlog_CAPI
3035 3036 struct or in the ABI of the listed functions */
3036 3037 2,
3037 3038 index_length,
3038 3039 index_node,
3039 3040 HgRevlogIndex_GetParents,
3040 3041 };
3041 3042
3042 3043 void revlog_module_init(PyObject *mod)
3043 3044 {
3044 3045 PyObject *caps = NULL;
3045 3046 HgRevlogIndex_Type.tp_new = PyType_GenericNew;
3046 3047 if (PyType_Ready(&HgRevlogIndex_Type) < 0)
3047 3048 return;
3048 3049 Py_INCREF(&HgRevlogIndex_Type);
3049 3050 PyModule_AddObject(mod, "index", (PyObject *)&HgRevlogIndex_Type);
3050 3051
3051 3052 nodetreeType.tp_new = PyType_GenericNew;
3052 3053 if (PyType_Ready(&nodetreeType) < 0)
3053 3054 return;
3054 3055 Py_INCREF(&nodetreeType);
3055 3056 PyModule_AddObject(mod, "nodetree", (PyObject *)&nodetreeType);
3056 3057
3057 3058 caps = PyCapsule_New(&CAPI, "mercurial.cext.parsers.revlog_CAPI", NULL);
3058 3059 if (caps != NULL)
3059 3060 PyModule_AddObject(mod, "revlog_CAPI", caps);
3060 3061 }
General Comments 0
You need to be logged in to leave comments. Login now