##// END OF EJS Templates
rust: stop claiming the C index is compatible with the rust code...
marmoute -
r52320:d361d7bf default
parent child Browse files
Show More
@@ -1,3498 +1,3497 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Olivia Mackall <olivia@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #define PY_SSIZE_T_CLEAN
11 11 #include <Python.h>
12 12 #include <assert.h>
13 13 #include <ctype.h>
14 14 #include <limits.h>
15 15 #include <stddef.h>
16 16 #include <stdlib.h>
17 17 #include <string.h>
18 18 #include <structmember.h>
19 19
20 20 #include "bitmanipulation.h"
21 21 #include "charencode.h"
22 22 #include "compat.h"
23 23 #include "revlog.h"
24 24 #include "util.h"
25 25
26 26 typedef struct indexObjectStruct indexObject;
27 27
28 28 typedef struct {
29 29 int children[16];
30 30 } nodetreenode;
31 31
32 32 typedef struct {
33 33 int abi_version;
34 34 Py_ssize_t (*index_length)(const indexObject *);
35 35 const char *(*index_node)(indexObject *, Py_ssize_t);
36 36 int (*fast_rank)(indexObject *, Py_ssize_t);
37 37 int (*index_parents)(PyObject *, int, int *);
38 38 } Revlog_CAPI;
39 39
40 40 /*
41 41 * A base-16 trie for fast node->rev mapping.
42 42 *
43 43 * Positive value is index of the next node in the trie
44 44 * Negative value is a leaf: -(rev + 2)
45 45 * Zero is empty
46 46 */
47 47 typedef struct {
48 48 indexObject *index;
49 49 nodetreenode *nodes;
50 50 Py_ssize_t nodelen;
51 51 size_t length; /* # nodes in use */
52 52 size_t capacity; /* # nodes allocated */
53 53 int depth; /* maximum depth of tree */
54 54 int splits; /* # splits performed */
55 55 } nodetree;
56 56
57 57 typedef struct {
58 58 PyObject_HEAD /* ; */
59 59 nodetree nt;
60 60 } nodetreeObject;
61 61
62 62 /*
63 63 * This class has two behaviors.
64 64 *
65 65 * When used in a list-like way (with integer keys), we decode an
66 66 * entry in a RevlogNG index file on demand. We have limited support for
67 67 * integer-keyed insert and delete, only at elements right before the
68 68 * end.
69 69 *
70 70 * With string keys, we lazily perform a reverse mapping from node to
71 71 * rev, using a base-16 trie.
72 72 */
73 73 struct indexObjectStruct {
74 74 PyObject_HEAD
75 75 /* Type-specific fields go here. */
76 76 PyObject *data; /* raw bytes of index */
77 77 Py_ssize_t nodelen; /* digest size of the hash, 20 for SHA-1 */
78 78 PyObject *nullentry; /* fast path for references to null */
79 79 Py_buffer buf; /* buffer of data */
80 80 const char **offsets; /* populated on demand */
81 81 Py_ssize_t length; /* current on-disk number of elements */
82 82 unsigned new_length; /* number of added elements */
83 83 unsigned added_length; /* space reserved for added elements */
84 84 char *added; /* populated on demand */
85 85 PyObject *headrevs; /* cache, invalidated on changes */
86 86 PyObject *filteredrevs; /* filtered revs set */
87 87 nodetree nt; /* base-16 trie */
88 88 int ntinitialized; /* 0 or 1 */
89 89 int ntrev; /* last rev scanned */
90 90 int ntlookups; /* # lookups */
91 91 int ntmisses; /* # lookups that miss the cache */
92 92 int inlined;
93 93 long entry_size; /* size of index headers. Differs in v1 v.s. v2 format
94 94 */
95 95 long rust_ext_compat; /* compatibility with being used in rust
96 96 extensions */
97 97 long format_version; /* format version selector (format_*) */
98 98 };
99 99
100 100 static Py_ssize_t index_length(const indexObject *self)
101 101 {
102 102 return self->length + self->new_length;
103 103 }
104 104
105 105 static const char nullid[32] = {0};
106 106 static const Py_ssize_t nullrev = -1;
107 107
108 108 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
109 109
110 110 static int index_find_node(indexObject *self, const char *node);
111 111
112 112 #if LONG_MAX == 0x7fffffffL
113 113 static const char *const tuple_format = "Kiiiiiiy#KiBBi";
114 114 #else
115 115 static const char *const tuple_format = "kiiiiiiy#kiBBi";
116 116 #endif
117 117
118 118 /* A RevlogNG v1 index entry is 64 bytes long. */
119 119 static const long v1_entry_size = 64;
120 120
121 121 /* A Revlogv2 index entry is 96 bytes long. */
122 122 static const long v2_entry_size = 96;
123 123
124 124 /* A Changelogv2 index entry is 96 bytes long. */
125 125 static const long cl2_entry_size = 96;
126 126
127 127 /* Internal format version.
128 128 * Must match their counterparts in revlogutils/constants.py */
129 129 static const long format_v1 = 1; /* constants.py: REVLOGV1 */
130 130 static const long format_v2 = 0xDEAD; /* constants.py: REVLOGV2 */
131 131 static const long format_cl2 = 0xD34D; /* constants.py: CHANGELOGV2 */
132 132
133 133 static const long entry_v1_offset_high = 0;
134 134 static const long entry_v1_offset_offset_flags = 4;
135 135 static const long entry_v1_offset_comp_len = 8;
136 136 static const long entry_v1_offset_uncomp_len = 12;
137 137 static const long entry_v1_offset_base_rev = 16;
138 138 static const long entry_v1_offset_link_rev = 20;
139 139 static const long entry_v1_offset_parent_1 = 24;
140 140 static const long entry_v1_offset_parent_2 = 28;
141 141 static const long entry_v1_offset_node_id = 32;
142 142
143 143 static const long entry_v2_offset_high = 0;
144 144 static const long entry_v2_offset_offset_flags = 4;
145 145 static const long entry_v2_offset_comp_len = 8;
146 146 static const long entry_v2_offset_uncomp_len = 12;
147 147 static const long entry_v2_offset_base_rev = 16;
148 148 static const long entry_v2_offset_link_rev = 20;
149 149 static const long entry_v2_offset_parent_1 = 24;
150 150 static const long entry_v2_offset_parent_2 = 28;
151 151 static const long entry_v2_offset_node_id = 32;
152 152 static const long entry_v2_offset_sidedata_offset = 64;
153 153 static const long entry_v2_offset_sidedata_comp_len = 72;
154 154 static const long entry_v2_offset_all_comp_mode = 76;
155 155 /* next free offset: 77 */
156 156
157 157 static const long entry_cl2_offset_high = 0;
158 158 static const long entry_cl2_offset_offset_flags = 4;
159 159 static const long entry_cl2_offset_comp_len = 8;
160 160 static const long entry_cl2_offset_uncomp_len = 12;
161 161 static const long entry_cl2_offset_parent_1 = 16;
162 162 static const long entry_cl2_offset_parent_2 = 20;
163 163 static const long entry_cl2_offset_node_id = 24;
164 164 static const long entry_cl2_offset_sidedata_offset = 56;
165 165 static const long entry_cl2_offset_sidedata_comp_len = 64;
166 166 static const long entry_cl2_offset_all_comp_mode = 68;
167 167 static const long entry_cl2_offset_rank = 69;
168 168 /* next free offset: 73 */
169 169
170 170 static const char comp_mode_inline = 2;
171 171 static const int rank_unknown = -1;
172 172
173 173 static void raise_revlog_error(void)
174 174 {
175 175 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
176 176
177 177 mod = PyImport_ImportModule("mercurial.error");
178 178 if (mod == NULL) {
179 179 goto cleanup;
180 180 }
181 181
182 182 dict = PyModule_GetDict(mod);
183 183 if (dict == NULL) {
184 184 goto cleanup;
185 185 }
186 186 Py_INCREF(dict);
187 187
188 188 errclass = PyDict_GetItemString(dict, "RevlogError");
189 189 if (errclass == NULL) {
190 190 PyErr_SetString(PyExc_SystemError,
191 191 "could not find RevlogError");
192 192 goto cleanup;
193 193 }
194 194
195 195 /* value of exception is ignored by callers */
196 196 PyErr_SetString(errclass, "RevlogError");
197 197
198 198 cleanup:
199 199 Py_XDECREF(dict);
200 200 Py_XDECREF(mod);
201 201 }
202 202
203 203 /*
204 204 * Return a pointer to the beginning of a RevlogNG record.
205 205 */
206 206 static const char *index_deref(indexObject *self, Py_ssize_t pos)
207 207 {
208 208 if (pos >= self->length)
209 209 return self->added + (pos - self->length) * self->entry_size;
210 210
211 211 if (self->inlined && pos > 0) {
212 212 if (self->offsets == NULL) {
213 213 Py_ssize_t ret;
214 214 self->offsets =
215 215 PyMem_Malloc(self->length * sizeof(*self->offsets));
216 216 if (self->offsets == NULL)
217 217 return (const char *)PyErr_NoMemory();
218 218 ret = inline_scan(self, self->offsets);
219 219 if (ret == -1) {
220 220 return NULL;
221 221 };
222 222 }
223 223 return self->offsets[pos];
224 224 }
225 225
226 226 return (const char *)(self->buf.buf) + pos * self->entry_size;
227 227 }
228 228
229 229 /*
230 230 * Get parents of the given rev.
231 231 *
232 232 * The specified rev must be valid and must not be nullrev. A returned
233 233 * parent revision may be nullrev, but is guaranteed to be in valid range.
234 234 */
235 235 static inline int index_get_parents(indexObject *self, Py_ssize_t rev, int *ps,
236 236 int maxrev)
237 237 {
238 238 const char *data = index_deref(self, rev);
239 239
240 240 if (self->format_version == format_v1) {
241 241 ps[0] = getbe32(data + entry_v1_offset_parent_1);
242 242 ps[1] = getbe32(data + entry_v1_offset_parent_2);
243 243 } else if (self->format_version == format_v2) {
244 244 ps[0] = getbe32(data + entry_v2_offset_parent_1);
245 245 ps[1] = getbe32(data + entry_v2_offset_parent_2);
246 246 } else if (self->format_version == format_cl2) {
247 247 ps[0] = getbe32(data + entry_cl2_offset_parent_1);
248 248 ps[1] = getbe32(data + entry_cl2_offset_parent_2);
249 249 } else {
250 250 raise_revlog_error();
251 251 return -1;
252 252 }
253 253
254 254 /* If index file is corrupted, ps[] may point to invalid revisions. So
255 255 * there is a risk of buffer overflow to trust them unconditionally. */
256 256 if (ps[0] < -1 || ps[0] > maxrev || ps[1] < -1 || ps[1] > maxrev) {
257 257 PyErr_SetString(PyExc_ValueError, "parent out of range");
258 258 return -1;
259 259 }
260 260 return 0;
261 261 }
262 262
263 263 /*
264 264 * Get parents of the given rev.
265 265 *
266 266 * If the specified rev is out of range, IndexError will be raised. If the
267 267 * revlog entry is corrupted, ValueError may be raised.
268 268 *
269 269 * Returns 0 on success or -1 on failure.
270 270 */
271 271 static int HgRevlogIndex_GetParents(PyObject *op, int rev, int *ps)
272 272 {
273 273 int tiprev;
274 274 if (!op || !HgRevlogIndex_Check(op) || !ps) {
275 275 PyErr_BadInternalCall();
276 276 return -1;
277 277 }
278 278 tiprev = (int)index_length((indexObject *)op) - 1;
279 279 if (rev < -1 || rev > tiprev) {
280 280 PyErr_Format(PyExc_IndexError, "rev out of range: %d", rev);
281 281 return -1;
282 282 } else if (rev == -1) {
283 283 ps[0] = ps[1] = -1;
284 284 return 0;
285 285 } else {
286 286 return index_get_parents((indexObject *)op, rev, ps, tiprev);
287 287 }
288 288 }
289 289
290 290 static inline int64_t index_get_start(indexObject *self, Py_ssize_t rev)
291 291 {
292 292 const char *data;
293 293 uint64_t offset;
294 294
295 295 if (rev == nullrev)
296 296 return 0;
297 297
298 298 data = index_deref(self, rev);
299 299
300 300 if (self->format_version == format_v1) {
301 301 offset = getbe32(data + entry_v1_offset_offset_flags);
302 302 if (rev == 0) {
303 303 /* mask out version number for the first entry */
304 304 offset &= 0xFFFF;
305 305 } else {
306 306 uint32_t offset_high =
307 307 getbe32(data + entry_v1_offset_high);
308 308 offset |= ((uint64_t)offset_high) << 32;
309 309 }
310 310 } else if (self->format_version == format_v2) {
311 311 offset = getbe32(data + entry_v2_offset_offset_flags);
312 312 if (rev == 0) {
313 313 /* mask out version number for the first entry */
314 314 offset &= 0xFFFF;
315 315 } else {
316 316 uint32_t offset_high =
317 317 getbe32(data + entry_v2_offset_high);
318 318 offset |= ((uint64_t)offset_high) << 32;
319 319 }
320 320 } else if (self->format_version == format_cl2) {
321 321 uint32_t offset_high = getbe32(data + entry_cl2_offset_high);
322 322 offset = getbe32(data + entry_cl2_offset_offset_flags);
323 323 offset |= ((uint64_t)offset_high) << 32;
324 324 } else {
325 325 raise_revlog_error();
326 326 return -1;
327 327 }
328 328
329 329 return (int64_t)(offset >> 16);
330 330 }
331 331
332 332 static inline int index_get_length(indexObject *self, Py_ssize_t rev)
333 333 {
334 334 const char *data;
335 335 int tmp;
336 336
337 337 if (rev == nullrev)
338 338 return 0;
339 339
340 340 data = index_deref(self, rev);
341 341
342 342 if (self->format_version == format_v1) {
343 343 tmp = (int)getbe32(data + entry_v1_offset_comp_len);
344 344 } else if (self->format_version == format_v2) {
345 345 tmp = (int)getbe32(data + entry_v2_offset_comp_len);
346 346 } else if (self->format_version == format_cl2) {
347 347 tmp = (int)getbe32(data + entry_cl2_offset_comp_len);
348 348 } else {
349 349 raise_revlog_error();
350 350 return -1;
351 351 }
352 352 if (tmp < 0) {
353 353 PyErr_Format(PyExc_OverflowError,
354 354 "revlog entry size out of bound (%d)", tmp);
355 355 return -1;
356 356 }
357 357 return tmp;
358 358 }
359 359
360 360 /*
361 361 * RevlogNG format (all in big endian, data may be inlined):
362 362 * 6 bytes: offset
363 363 * 2 bytes: flags
364 364 * 4 bytes: compressed length
365 365 * 4 bytes: uncompressed length
366 366 * 4 bytes: base revision
367 367 * 4 bytes: link revision
368 368 * 4 bytes: parent 1 revision
369 369 * 4 bytes: parent 2 revision
370 370 * 32 bytes: nodeid (only 20 bytes used with SHA-1)
371 371 */
372 372 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
373 373 {
374 374 uint64_t offset_flags, sidedata_offset;
375 375 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
376 376 sidedata_comp_len, rank = rank_unknown;
377 377 char data_comp_mode, sidedata_comp_mode;
378 378 const char *c_node_id;
379 379 const char *data;
380 380 Py_ssize_t length = index_length(self);
381 381
382 382 if (pos == nullrev) {
383 383 Py_INCREF(self->nullentry);
384 384 return self->nullentry;
385 385 }
386 386
387 387 if (pos < 0 || pos >= length) {
388 388 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
389 389 return NULL;
390 390 }
391 391
392 392 data = index_deref(self, pos);
393 393 if (data == NULL)
394 394 return NULL;
395 395
396 396 if (self->format_version == format_v1) {
397 397 offset_flags = getbe32(data + entry_v1_offset_offset_flags);
398 398 /*
399 399 * The first entry on-disk needs the version number masked out,
400 400 * but this doesn't apply if entries are added to an empty
401 401 * index.
402 402 */
403 403 if (self->length && pos == 0)
404 404 offset_flags &= 0xFFFF;
405 405 else {
406 406 uint32_t offset_high =
407 407 getbe32(data + entry_v1_offset_high);
408 408 offset_flags |= ((uint64_t)offset_high) << 32;
409 409 }
410 410
411 411 comp_len = getbe32(data + entry_v1_offset_comp_len);
412 412 uncomp_len = getbe32(data + entry_v1_offset_uncomp_len);
413 413 base_rev = getbe32(data + entry_v1_offset_base_rev);
414 414 link_rev = getbe32(data + entry_v1_offset_link_rev);
415 415 parent_1 = getbe32(data + entry_v1_offset_parent_1);
416 416 parent_2 = getbe32(data + entry_v1_offset_parent_2);
417 417 c_node_id = data + entry_v1_offset_node_id;
418 418
419 419 sidedata_offset = 0;
420 420 sidedata_comp_len = 0;
421 421 data_comp_mode = comp_mode_inline;
422 422 sidedata_comp_mode = comp_mode_inline;
423 423 } else if (self->format_version == format_v2) {
424 424 offset_flags = getbe32(data + entry_v2_offset_offset_flags);
425 425 /*
426 426 * The first entry on-disk needs the version number masked out,
427 427 * but this doesn't apply if entries are added to an empty
428 428 * index.
429 429 */
430 430 if (self->length && pos == 0)
431 431 offset_flags &= 0xFFFF;
432 432 else {
433 433 uint32_t offset_high =
434 434 getbe32(data + entry_v2_offset_high);
435 435 offset_flags |= ((uint64_t)offset_high) << 32;
436 436 }
437 437
438 438 comp_len = getbe32(data + entry_v2_offset_comp_len);
439 439 uncomp_len = getbe32(data + entry_v2_offset_uncomp_len);
440 440 base_rev = getbe32(data + entry_v2_offset_base_rev);
441 441 link_rev = getbe32(data + entry_v2_offset_link_rev);
442 442 parent_1 = getbe32(data + entry_v2_offset_parent_1);
443 443 parent_2 = getbe32(data + entry_v2_offset_parent_2);
444 444 c_node_id = data + entry_v2_offset_node_id;
445 445
446 446 sidedata_offset =
447 447 getbe64(data + entry_v2_offset_sidedata_offset);
448 448 sidedata_comp_len =
449 449 getbe32(data + entry_v2_offset_sidedata_comp_len);
450 450 data_comp_mode = data[entry_v2_offset_all_comp_mode] & 3;
451 451 sidedata_comp_mode =
452 452 ((data[entry_v2_offset_all_comp_mode] >> 2) & 3);
453 453 } else if (self->format_version == format_cl2) {
454 454 uint32_t offset_high = getbe32(data + entry_cl2_offset_high);
455 455 offset_flags = getbe32(data + entry_cl2_offset_offset_flags);
456 456 offset_flags |= ((uint64_t)offset_high) << 32;
457 457 comp_len = getbe32(data + entry_cl2_offset_comp_len);
458 458 uncomp_len = getbe32(data + entry_cl2_offset_uncomp_len);
459 459 /* base_rev and link_rev are not stored in changelogv2, but are
460 460 still used by some functions shared with the other revlogs.
461 461 They are supposed to contain links to other revisions,
462 462 but they always point to themselves in the case of a changelog.
463 463 */
464 464 base_rev = pos;
465 465 link_rev = pos;
466 466 parent_1 = getbe32(data + entry_cl2_offset_parent_1);
467 467 parent_2 = getbe32(data + entry_cl2_offset_parent_2);
468 468 c_node_id = data + entry_cl2_offset_node_id;
469 469 sidedata_offset =
470 470 getbe64(data + entry_cl2_offset_sidedata_offset);
471 471 sidedata_comp_len =
472 472 getbe32(data + entry_cl2_offset_sidedata_comp_len);
473 473 data_comp_mode = data[entry_cl2_offset_all_comp_mode] & 3;
474 474 sidedata_comp_mode =
475 475 ((data[entry_cl2_offset_all_comp_mode] >> 2) & 3);
476 476 rank = getbe32(data + entry_cl2_offset_rank);
477 477 } else {
478 478 raise_revlog_error();
479 479 return NULL;
480 480 }
481 481
482 482 return Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len,
483 483 base_rev, link_rev, parent_1, parent_2, c_node_id,
484 484 self->nodelen, sidedata_offset, sidedata_comp_len,
485 485 data_comp_mode, sidedata_comp_mode, rank);
486 486 }
487 487 /*
488 488 * Pack header information in binary
489 489 */
490 490 static PyObject *index_pack_header(indexObject *self, PyObject *args)
491 491 {
492 492 int header;
493 493 char out[4];
494 494 if (!PyArg_ParseTuple(args, "i", &header)) {
495 495 return NULL;
496 496 }
497 497 if (self->format_version != format_v1) {
498 498 PyErr_Format(PyExc_RuntimeError,
499 499 "version header should go in the docket, not the "
500 500 "index: %d",
501 501 header);
502 502 return NULL;
503 503 }
504 504 putbe32(header, out);
505 505 return PyBytes_FromStringAndSize(out, 4);
506 506 }
507 507 /*
508 508 * Return the raw binary string representing a revision
509 509 */
510 510 static PyObject *index_entry_binary(indexObject *self, PyObject *value)
511 511 {
512 512 long rev;
513 513 const char *data;
514 514 Py_ssize_t length = index_length(self);
515 515
516 516 if (!pylong_to_long(value, &rev)) {
517 517 return NULL;
518 518 }
519 519 if (rev < 0 || rev >= length) {
520 520 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
521 521 rev);
522 522 return NULL;
523 523 };
524 524
525 525 data = index_deref(self, rev);
526 526 if (data == NULL)
527 527 return NULL;
528 528 if (rev == 0 && self->format_version == format_v1) {
529 529 /* the header is eating the start of the first entry */
530 530 return PyBytes_FromStringAndSize(data + 4,
531 531 self->entry_size - 4);
532 532 }
533 533 return PyBytes_FromStringAndSize(data, self->entry_size);
534 534 }
535 535
536 536 /*
537 537 * Return the hash of node corresponding to the given rev.
538 538 */
539 539 static const char *index_node(indexObject *self, Py_ssize_t pos)
540 540 {
541 541 Py_ssize_t length = index_length(self);
542 542 const char *data;
543 543 const char *node_id;
544 544
545 545 if (pos == nullrev)
546 546 return nullid;
547 547
548 548 if (pos >= length)
549 549 return NULL;
550 550
551 551 data = index_deref(self, pos);
552 552
553 553 if (self->format_version == format_v1) {
554 554 node_id = data + entry_v1_offset_node_id;
555 555 } else if (self->format_version == format_v2) {
556 556 node_id = data + entry_v2_offset_node_id;
557 557 } else if (self->format_version == format_cl2) {
558 558 node_id = data + entry_cl2_offset_node_id;
559 559 } else {
560 560 raise_revlog_error();
561 561 return NULL;
562 562 }
563 563
564 564 return data ? node_id : NULL;
565 565 }
566 566
567 567 /*
568 568 * Return the stored rank of a given revision if known, or rank_unknown
569 569 * otherwise.
570 570 *
571 571 * The rank of a revision is the size of the sub-graph it defines as a head.
572 572 * Equivalently, the rank of a revision `r` is the size of the set
573 573 * `ancestors(r)`, `r` included.
574 574 *
575 575 * This method returns the rank retrieved from the revlog in constant time. It
576 576 * makes no attempt at computing unknown values for versions of the revlog
577 577 * which do not persist the rank.
578 578 */
579 579 static int index_fast_rank(indexObject *self, Py_ssize_t pos)
580 580 {
581 581 Py_ssize_t length = index_length(self);
582 582
583 583 if (self->format_version != format_cl2 || pos >= length) {
584 584 return rank_unknown;
585 585 }
586 586
587 587 if (pos == nullrev) {
588 588 return 0; /* convention */
589 589 }
590 590
591 591 return getbe32(index_deref(self, pos) + entry_cl2_offset_rank);
592 592 }
593 593
594 594 /*
595 595 * Return the hash of the node corresponding to the given rev. The
596 596 * rev is assumed to be existing. If not, an exception is set.
597 597 */
598 598 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
599 599 {
600 600 const char *node = index_node(self, pos);
601 601 if (node == NULL) {
602 602 PyErr_Format(PyExc_IndexError, "could not access rev %d",
603 603 (int)pos);
604 604 }
605 605 return node;
606 606 }
607 607
608 608 static int nt_insert(nodetree *self, const char *node, int rev);
609 609
610 610 static int node_check(Py_ssize_t nodelen, PyObject *obj, char **node)
611 611 {
612 612 Py_ssize_t thisnodelen;
613 613 if (PyBytes_AsStringAndSize(obj, node, &thisnodelen) == -1)
614 614 return -1;
615 615 if (nodelen == thisnodelen)
616 616 return 0;
617 617 PyErr_Format(PyExc_ValueError, "node len %zd != expected node len %zd",
618 618 thisnodelen, nodelen);
619 619 return -1;
620 620 }
621 621
622 622 static PyObject *index_append(indexObject *self, PyObject *obj)
623 623 {
624 624 uint64_t offset_flags, sidedata_offset;
625 625 int rev, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
626 626 sidedata_comp_len, rank;
627 627 char data_comp_mode, sidedata_comp_mode;
628 628 Py_ssize_t c_node_id_len;
629 629 const char *c_node_id;
630 630 char comp_field;
631 631 char *data;
632 632
633 633 if (!PyArg_ParseTuple(obj, tuple_format, &offset_flags, &comp_len,
634 634 &uncomp_len, &base_rev, &link_rev, &parent_1,
635 635 &parent_2, &c_node_id, &c_node_id_len,
636 636 &sidedata_offset, &sidedata_comp_len,
637 637 &data_comp_mode, &sidedata_comp_mode, &rank)) {
638 638 PyErr_SetString(PyExc_TypeError, "12-tuple required");
639 639 return NULL;
640 640 }
641 641
642 642 if (c_node_id_len != self->nodelen) {
643 643 PyErr_SetString(PyExc_TypeError, "invalid node");
644 644 return NULL;
645 645 }
646 646 if (self->format_version == format_v1) {
647 647
648 648 if (data_comp_mode != comp_mode_inline) {
649 649 PyErr_Format(PyExc_ValueError,
650 650 "invalid data compression mode: %i",
651 651 data_comp_mode);
652 652 return NULL;
653 653 }
654 654 if (sidedata_comp_mode != comp_mode_inline) {
655 655 PyErr_Format(PyExc_ValueError,
656 656 "invalid sidedata compression mode: %i",
657 657 sidedata_comp_mode);
658 658 return NULL;
659 659 }
660 660 }
661 661
662 662 if (self->new_length == self->added_length) {
663 663 size_t new_added_length =
664 664 self->added_length ? self->added_length * 2 : 4096;
665 665 void *new_added = PyMem_Realloc(
666 666 self->added, new_added_length * self->entry_size);
667 667 if (!new_added)
668 668 return PyErr_NoMemory();
669 669 self->added = new_added;
670 670 self->added_length = new_added_length;
671 671 }
672 672 rev = self->length + self->new_length;
673 673 data = self->added + self->entry_size * self->new_length++;
674 674
675 675 memset(data, 0, self->entry_size);
676 676
677 677 if (self->format_version == format_v1) {
678 678 putbe32(offset_flags >> 32, data + entry_v1_offset_high);
679 679 putbe32(offset_flags & 0xffffffffU,
680 680 data + entry_v1_offset_offset_flags);
681 681 putbe32(comp_len, data + entry_v1_offset_comp_len);
682 682 putbe32(uncomp_len, data + entry_v1_offset_uncomp_len);
683 683 putbe32(base_rev, data + entry_v1_offset_base_rev);
684 684 putbe32(link_rev, data + entry_v1_offset_link_rev);
685 685 putbe32(parent_1, data + entry_v1_offset_parent_1);
686 686 putbe32(parent_2, data + entry_v1_offset_parent_2);
687 687 memcpy(data + entry_v1_offset_node_id, c_node_id,
688 688 c_node_id_len);
689 689 } else if (self->format_version == format_v2) {
690 690 putbe32(offset_flags >> 32, data + entry_v2_offset_high);
691 691 putbe32(offset_flags & 0xffffffffU,
692 692 data + entry_v2_offset_offset_flags);
693 693 putbe32(comp_len, data + entry_v2_offset_comp_len);
694 694 putbe32(uncomp_len, data + entry_v2_offset_uncomp_len);
695 695 putbe32(base_rev, data + entry_v2_offset_base_rev);
696 696 putbe32(link_rev, data + entry_v2_offset_link_rev);
697 697 putbe32(parent_1, data + entry_v2_offset_parent_1);
698 698 putbe32(parent_2, data + entry_v2_offset_parent_2);
699 699 memcpy(data + entry_v2_offset_node_id, c_node_id,
700 700 c_node_id_len);
701 701 putbe64(sidedata_offset,
702 702 data + entry_v2_offset_sidedata_offset);
703 703 putbe32(sidedata_comp_len,
704 704 data + entry_v2_offset_sidedata_comp_len);
705 705 comp_field = data_comp_mode & 3;
706 706 comp_field = comp_field | (sidedata_comp_mode & 3) << 2;
707 707 data[entry_v2_offset_all_comp_mode] = comp_field;
708 708 } else if (self->format_version == format_cl2) {
709 709 putbe32(offset_flags >> 32, data + entry_cl2_offset_high);
710 710 putbe32(offset_flags & 0xffffffffU,
711 711 data + entry_cl2_offset_offset_flags);
712 712 putbe32(comp_len, data + entry_cl2_offset_comp_len);
713 713 putbe32(uncomp_len, data + entry_cl2_offset_uncomp_len);
714 714 putbe32(parent_1, data + entry_cl2_offset_parent_1);
715 715 putbe32(parent_2, data + entry_cl2_offset_parent_2);
716 716 memcpy(data + entry_cl2_offset_node_id, c_node_id,
717 717 c_node_id_len);
718 718 putbe64(sidedata_offset,
719 719 data + entry_cl2_offset_sidedata_offset);
720 720 putbe32(sidedata_comp_len,
721 721 data + entry_cl2_offset_sidedata_comp_len);
722 722 comp_field = data_comp_mode & 3;
723 723 comp_field = comp_field | (sidedata_comp_mode & 3) << 2;
724 724 data[entry_cl2_offset_all_comp_mode] = comp_field;
725 725 putbe32(rank, data + entry_cl2_offset_rank);
726 726 } else {
727 727 raise_revlog_error();
728 728 return NULL;
729 729 }
730 730
731 731 if (self->ntinitialized)
732 732 nt_insert(&self->nt, c_node_id, rev);
733 733
734 734 Py_CLEAR(self->headrevs);
735 735 Py_RETURN_NONE;
736 736 }
737 737
738 738 /* Replace an existing index entry's sidedata offset and length with new ones.
739 739 This cannot be used outside of the context of sidedata rewriting,
740 740 inside the transaction that creates the given revision. */
741 741 static PyObject *index_replace_sidedata_info(indexObject *self, PyObject *args)
742 742 {
743 743 uint64_t offset_flags, sidedata_offset;
744 744 Py_ssize_t rev;
745 745 int sidedata_comp_len;
746 746 char comp_mode;
747 747 char *data;
748 748 #if LONG_MAX == 0x7fffffffL
749 749 const char *const sidedata_format = "nKiKB";
750 750 #else
751 751 const char *const sidedata_format = "nkikB";
752 752 #endif
753 753
754 754 if (self->entry_size == v1_entry_size || self->inlined) {
755 755 /*
756 756 There is a bug in the transaction handling when going from an
757 757 inline revlog to a separate index and data file. Turn it off until
758 758 it's fixed, since v2 revlogs sometimes get rewritten on exchange.
759 759 See issue6485.
760 760 */
761 761 raise_revlog_error();
762 762 return NULL;
763 763 }
764 764
765 765 if (!PyArg_ParseTuple(args, sidedata_format, &rev, &sidedata_offset,
766 766 &sidedata_comp_len, &offset_flags, &comp_mode))
767 767 return NULL;
768 768
769 769 if (rev < 0 || rev >= index_length(self)) {
770 770 PyErr_SetString(PyExc_IndexError, "revision outside index");
771 771 return NULL;
772 772 }
773 773 if (rev < self->length) {
774 774 PyErr_SetString(
775 775 PyExc_IndexError,
776 776 "cannot rewrite entries outside of this transaction");
777 777 return NULL;
778 778 }
779 779
780 780 /* Find the newly added node, offset from the "already on-disk" length
781 781 */
782 782 data = self->added + self->entry_size * (rev - self->length);
783 783 if (self->format_version == format_v2) {
784 784 putbe64(offset_flags, data + entry_v2_offset_high);
785 785 putbe64(sidedata_offset,
786 786 data + entry_v2_offset_sidedata_offset);
787 787 putbe32(sidedata_comp_len,
788 788 data + entry_v2_offset_sidedata_comp_len);
789 789 data[entry_v2_offset_all_comp_mode] =
790 790 (data[entry_v2_offset_all_comp_mode] & ~(3 << 2)) |
791 791 ((comp_mode & 3) << 2);
792 792 } else if (self->format_version == format_cl2) {
793 793 putbe64(offset_flags, data + entry_cl2_offset_high);
794 794 putbe64(sidedata_offset,
795 795 data + entry_cl2_offset_sidedata_offset);
796 796 putbe32(sidedata_comp_len,
797 797 data + entry_cl2_offset_sidedata_comp_len);
798 798 data[entry_cl2_offset_all_comp_mode] =
799 799 (data[entry_cl2_offset_all_comp_mode] & ~(3 << 2)) |
800 800 ((comp_mode & 3) << 2);
801 801 } else {
802 802 raise_revlog_error();
803 803 return NULL;
804 804 }
805 805
806 806 Py_RETURN_NONE;
807 807 }
808 808
809 809 static PyObject *index_stats(indexObject *self)
810 810 {
811 811 PyObject *obj = PyDict_New();
812 812 PyObject *s = NULL;
813 813 PyObject *t = NULL;
814 814
815 815 if (obj == NULL)
816 816 return NULL;
817 817
818 818 #define istat(__n, __d) \
819 819 do { \
820 820 s = PyBytes_FromString(__d); \
821 821 t = PyLong_FromSsize_t(self->__n); \
822 822 if (!s || !t) \
823 823 goto bail; \
824 824 if (PyDict_SetItem(obj, s, t) == -1) \
825 825 goto bail; \
826 826 Py_CLEAR(s); \
827 827 Py_CLEAR(t); \
828 828 } while (0)
829 829
830 830 if (self->added_length)
831 831 istat(new_length, "index entries added");
832 832 istat(length, "revs in memory");
833 833 istat(ntlookups, "node trie lookups");
834 834 istat(ntmisses, "node trie misses");
835 835 istat(ntrev, "node trie last rev scanned");
836 836 if (self->ntinitialized) {
837 837 istat(nt.capacity, "node trie capacity");
838 838 istat(nt.depth, "node trie depth");
839 839 istat(nt.length, "node trie count");
840 840 istat(nt.splits, "node trie splits");
841 841 }
842 842
843 843 #undef istat
844 844
845 845 return obj;
846 846
847 847 bail:
848 848 Py_XDECREF(obj);
849 849 Py_XDECREF(s);
850 850 Py_XDECREF(t);
851 851 return NULL;
852 852 }
853 853
854 854 /*
855 855 * When we cache a list, we want to be sure the caller can't mutate
856 856 * the cached copy.
857 857 */
858 858 static PyObject *list_copy(PyObject *list)
859 859 {
860 860 Py_ssize_t len = PyList_GET_SIZE(list);
861 861 PyObject *newlist = PyList_New(len);
862 862 Py_ssize_t i;
863 863
864 864 if (newlist == NULL)
865 865 return NULL;
866 866
867 867 for (i = 0; i < len; i++) {
868 868 PyObject *obj = PyList_GET_ITEM(list, i);
869 869 Py_INCREF(obj);
870 870 PyList_SET_ITEM(newlist, i, obj);
871 871 }
872 872
873 873 return newlist;
874 874 }
875 875
876 876 static int check_filter(PyObject *filter, Py_ssize_t arg)
877 877 {
878 878 if (filter) {
879 879 PyObject *arglist, *result;
880 880 int isfiltered;
881 881
882 882 arglist = Py_BuildValue("(n)", arg);
883 883 if (!arglist) {
884 884 return -1;
885 885 }
886 886
887 887 result = PyObject_Call(filter, arglist, NULL);
888 888 Py_DECREF(arglist);
889 889 if (!result) {
890 890 return -1;
891 891 }
892 892
893 893 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
894 894 * same as this function, so we can just return it directly.*/
895 895 isfiltered = PyObject_IsTrue(result);
896 896 Py_DECREF(result);
897 897 return isfiltered;
898 898 } else {
899 899 return 0;
900 900 }
901 901 }
902 902
903 903 static inline void set_phase_from_parents(char *phases, int parent_1,
904 904 int parent_2, Py_ssize_t i)
905 905 {
906 906 if (parent_1 >= 0 && phases[parent_1] > phases[i])
907 907 phases[i] = phases[parent_1];
908 908 if (parent_2 >= 0 && phases[parent_2] > phases[i])
909 909 phases[i] = phases[parent_2];
910 910 }
911 911
912 912 /* Take ownership of a given Python value and add it to a Python list.
913 913 Return -1 on failure (including if [elem] is NULL). */
914 914 static int pylist_append_owned(PyObject *list, PyObject *elem)
915 915 {
916 916 int res;
917 917
918 918 if (elem == NULL)
919 919 return -1;
920 920 res = PyList_Append(list, elem);
921 921 Py_DECREF(elem);
922 922 return res;
923 923 }
924 924
925 925 static PyObject *reachableroots2(indexObject *self, PyObject *args)
926 926 {
927 927
928 928 /* Input */
929 929 long minroot;
930 930 PyObject *includepatharg = NULL;
931 931 int includepath = 0;
932 932 /* heads and roots are lists */
933 933 PyObject *heads = NULL;
934 934 PyObject *roots = NULL;
935 935 PyObject *reachable = NULL;
936 936
937 937 Py_ssize_t len = index_length(self);
938 938 long revnum;
939 939 Py_ssize_t k;
940 940 Py_ssize_t i;
941 941 Py_ssize_t l;
942 942 int r;
943 943 int parents[2];
944 944
945 945 /* Internal data structure:
946 946 * tovisit: array of length len+1 (all revs + nullrev), filled upto
947 947 * lentovisit
948 948 *
949 949 * revstates: array of length len+1 (all revs + nullrev) */
950 950 int *tovisit = NULL;
951 951 long lentovisit = 0;
952 952 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
953 953 char *revstates = NULL;
954 954
955 955 /* Get arguments */
956 956 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
957 957 &PyList_Type, &roots, &PyBool_Type,
958 958 &includepatharg))
959 959 goto bail;
960 960
961 961 if (includepatharg == Py_True)
962 962 includepath = 1;
963 963
964 964 /* Initialize return set */
965 965 reachable = PyList_New(0);
966 966 if (reachable == NULL)
967 967 goto bail;
968 968
969 969 /* Initialize internal datastructures */
970 970 tovisit = (int *)malloc((len + 1) * sizeof(int));
971 971 if (tovisit == NULL) {
972 972 PyErr_NoMemory();
973 973 goto bail;
974 974 }
975 975
976 976 revstates = (char *)calloc(len + 1, 1);
977 977 if (revstates == NULL) {
978 978 PyErr_NoMemory();
979 979 goto bail;
980 980 }
981 981
982 982 l = PyList_GET_SIZE(roots);
983 983 for (i = 0; i < l; i++) {
984 984 revnum = PyLong_AsLong(PyList_GET_ITEM(roots, i));
985 985 if (revnum == -1 && PyErr_Occurred())
986 986 goto bail;
987 987 /* If root is out of range, e.g. wdir(), it must be unreachable
988 988 * from heads. So we can just ignore it. */
989 989 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
990 990 continue;
991 991 revstates[revnum + 1] |= RS_ROOT;
992 992 }
993 993
994 994 /* Populate tovisit with all the heads */
995 995 l = PyList_GET_SIZE(heads);
996 996 for (i = 0; i < l; i++) {
997 997 revnum = PyLong_AsLong(PyList_GET_ITEM(heads, i));
998 998 if (revnum == -1 && PyErr_Occurred())
999 999 goto bail;
1000 1000 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
1001 1001 PyErr_SetString(PyExc_IndexError, "head out of range");
1002 1002 goto bail;
1003 1003 }
1004 1004 if (!(revstates[revnum + 1] & RS_SEEN)) {
1005 1005 tovisit[lentovisit++] = (int)revnum;
1006 1006 revstates[revnum + 1] |= RS_SEEN;
1007 1007 }
1008 1008 }
1009 1009
1010 1010 /* Visit the tovisit list and find the reachable roots */
1011 1011 k = 0;
1012 1012 while (k < lentovisit) {
1013 1013 /* Add the node to reachable if it is a root*/
1014 1014 revnum = tovisit[k++];
1015 1015 if (revstates[revnum + 1] & RS_ROOT) {
1016 1016 revstates[revnum + 1] |= RS_REACHABLE;
1017 1017 r = pylist_append_owned(reachable,
1018 1018 PyLong_FromLong(revnum));
1019 1019 if (r < 0)
1020 1020 goto bail;
1021 1021 if (includepath == 0)
1022 1022 continue;
1023 1023 }
1024 1024
1025 1025 /* Add its parents to the list of nodes to visit */
1026 1026 if (revnum == nullrev)
1027 1027 continue;
1028 1028 r = index_get_parents(self, revnum, parents, (int)len - 1);
1029 1029 if (r < 0)
1030 1030 goto bail;
1031 1031 for (i = 0; i < 2; i++) {
1032 1032 if (!(revstates[parents[i] + 1] & RS_SEEN) &&
1033 1033 parents[i] >= minroot) {
1034 1034 tovisit[lentovisit++] = parents[i];
1035 1035 revstates[parents[i] + 1] |= RS_SEEN;
1036 1036 }
1037 1037 }
1038 1038 }
1039 1039
1040 1040 /* Find all the nodes in between the roots we found and the heads
1041 1041 * and add them to the reachable set */
1042 1042 if (includepath == 1) {
1043 1043 long minidx = minroot;
1044 1044 if (minidx < 0)
1045 1045 minidx = 0;
1046 1046 for (i = minidx; i < len; i++) {
1047 1047 if (!(revstates[i + 1] & RS_SEEN))
1048 1048 continue;
1049 1049 r = index_get_parents(self, i, parents, (int)len - 1);
1050 1050 /* Corrupted index file, error is set from
1051 1051 * index_get_parents */
1052 1052 if (r < 0)
1053 1053 goto bail;
1054 1054 if (((revstates[parents[0] + 1] |
1055 1055 revstates[parents[1] + 1]) &
1056 1056 RS_REACHABLE) &&
1057 1057 !(revstates[i + 1] & RS_REACHABLE)) {
1058 1058 revstates[i + 1] |= RS_REACHABLE;
1059 1059 r = pylist_append_owned(reachable,
1060 1060 PyLong_FromSsize_t(i));
1061 1061 if (r < 0)
1062 1062 goto bail;
1063 1063 }
1064 1064 }
1065 1065 }
1066 1066
1067 1067 free(revstates);
1068 1068 free(tovisit);
1069 1069 return reachable;
1070 1070 bail:
1071 1071 Py_XDECREF(reachable);
1072 1072 free(revstates);
1073 1073 free(tovisit);
1074 1074 return NULL;
1075 1075 }
1076 1076
1077 1077 static int add_roots_get_min(indexObject *self, PyObject *roots, char *phases,
1078 1078 char phase)
1079 1079 {
1080 1080 Py_ssize_t len = index_length(self);
1081 1081 PyObject *item;
1082 1082 PyObject *iterator;
1083 1083 int rev, minrev = -1;
1084 1084
1085 1085 if (!PySet_Check(roots)) {
1086 1086 PyErr_SetString(PyExc_TypeError,
1087 1087 "roots must be a set of nodes");
1088 1088 return -2;
1089 1089 }
1090 1090 iterator = PyObject_GetIter(roots);
1091 1091 if (iterator == NULL)
1092 1092 return -2;
1093 1093 while ((item = PyIter_Next(iterator))) {
1094 1094 rev = (int)PyLong_AsLong(item);
1095 1095 if (rev == -1 && PyErr_Occurred()) {
1096 1096 goto failed;
1097 1097 }
1098 1098 /* null is implicitly public, so negative is invalid */
1099 1099 if (rev < 0 || rev >= len)
1100 1100 goto failed;
1101 1101 phases[rev] = phase;
1102 1102 if (minrev == -1 || minrev > rev)
1103 1103 minrev = rev;
1104 1104 Py_DECREF(item);
1105 1105 }
1106 1106 Py_DECREF(iterator);
1107 1107 return minrev;
1108 1108 failed:
1109 1109 Py_DECREF(iterator);
1110 1110 Py_DECREF(item);
1111 1111 return -2;
1112 1112 }
1113 1113
1114 1114 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
1115 1115 {
1116 1116 /* 0: public (untracked), 1: draft, 2: secret, 32: archive,
1117 1117 96: internal */
1118 1118 static const char trackedphases[] = {1, 2, 32, 96};
1119 1119 PyObject *roots = Py_None;
1120 1120 PyObject *phasesetsdict = NULL;
1121 1121 PyObject *phasesets[4] = {NULL, NULL, NULL, NULL};
1122 1122 Py_ssize_t len = index_length(self);
1123 1123 char *phases = NULL;
1124 1124 int minphaserev = -1, rev, i;
1125 1125 const int numphases = (int)(sizeof(phasesets) / sizeof(phasesets[0]));
1126 1126
1127 1127 if (!PyArg_ParseTuple(args, "O", &roots))
1128 1128 return NULL;
1129 1129 if (roots == NULL || !PyDict_Check(roots)) {
1130 1130 PyErr_SetString(PyExc_TypeError, "roots must be a dictionary");
1131 1131 return NULL;
1132 1132 }
1133 1133
1134 1134 phases = calloc(len, 1);
1135 1135 if (phases == NULL) {
1136 1136 PyErr_NoMemory();
1137 1137 return NULL;
1138 1138 }
1139 1139
1140 1140 for (i = 0; i < numphases; ++i) {
1141 1141 PyObject *pyphase = PyLong_FromLong(trackedphases[i]);
1142 1142 PyObject *phaseroots = NULL;
1143 1143 if (pyphase == NULL)
1144 1144 goto release;
1145 1145 phaseroots = PyDict_GetItem(roots, pyphase);
1146 1146 Py_DECREF(pyphase);
1147 1147 if (phaseroots == NULL)
1148 1148 continue;
1149 1149 rev = add_roots_get_min(self, phaseroots, phases,
1150 1150 trackedphases[i]);
1151 1151 if (rev == -2)
1152 1152 goto release;
1153 1153 if (rev != -1 && (minphaserev == -1 || rev < minphaserev))
1154 1154 minphaserev = rev;
1155 1155 }
1156 1156
1157 1157 for (i = 0; i < numphases; ++i) {
1158 1158 phasesets[i] = PySet_New(NULL);
1159 1159 if (phasesets[i] == NULL)
1160 1160 goto release;
1161 1161 }
1162 1162
1163 1163 if (minphaserev == -1)
1164 1164 minphaserev = len;
1165 1165 for (rev = minphaserev; rev < len; ++rev) {
1166 1166 PyObject *pyphase = NULL;
1167 1167 PyObject *pyrev = NULL;
1168 1168 int parents[2];
1169 1169 /*
1170 1170 * The parent lookup could be skipped for phaseroots, but
1171 1171 * phase --force would historically not recompute them
1172 1172 * correctly, leaving descendents with a lower phase around.
1173 1173 * As such, unconditionally recompute the phase.
1174 1174 */
1175 1175 if (index_get_parents(self, rev, parents, (int)len - 1) < 0)
1176 1176 goto release;
1177 1177 set_phase_from_parents(phases, parents[0], parents[1], rev);
1178 1178 switch (phases[rev]) {
1179 1179 case 0:
1180 1180 continue;
1181 1181 case 1:
1182 1182 pyphase = phasesets[0];
1183 1183 break;
1184 1184 case 2:
1185 1185 pyphase = phasesets[1];
1186 1186 break;
1187 1187 case 32:
1188 1188 pyphase = phasesets[2];
1189 1189 break;
1190 1190 case 96:
1191 1191 pyphase = phasesets[3];
1192 1192 break;
1193 1193 default:
1194 1194 /* this should never happen since the phase number is
1195 1195 * specified by this function. */
1196 1196 PyErr_SetString(PyExc_SystemError,
1197 1197 "bad phase number in internal list");
1198 1198 goto release;
1199 1199 }
1200 1200 pyrev = PyLong_FromLong(rev);
1201 1201 if (pyrev == NULL)
1202 1202 goto release;
1203 1203 if (PySet_Add(pyphase, pyrev) == -1) {
1204 1204 Py_DECREF(pyrev);
1205 1205 goto release;
1206 1206 }
1207 1207 Py_DECREF(pyrev);
1208 1208 }
1209 1209
1210 1210 phasesetsdict = _dict_new_presized(numphases);
1211 1211 if (phasesetsdict == NULL)
1212 1212 goto release;
1213 1213 for (i = 0; i < numphases; ++i) {
1214 1214 PyObject *pyphase = PyLong_FromLong(trackedphases[i]);
1215 1215 if (pyphase == NULL)
1216 1216 goto release;
1217 1217 if (PyDict_SetItem(phasesetsdict, pyphase, phasesets[i]) ==
1218 1218 -1) {
1219 1219 Py_DECREF(pyphase);
1220 1220 goto release;
1221 1221 }
1222 1222 Py_DECREF(phasesets[i]);
1223 1223 phasesets[i] = NULL;
1224 1224 }
1225 1225
1226 1226 free(phases);
1227 1227 return Py_BuildValue("nN", len, phasesetsdict);
1228 1228
1229 1229 release:
1230 1230 for (i = 0; i < numphases; ++i)
1231 1231 Py_XDECREF(phasesets[i]);
1232 1232 Py_XDECREF(phasesetsdict);
1233 1233
1234 1234 free(phases);
1235 1235 return NULL;
1236 1236 }
1237 1237
1238 1238 static PyObject *index_headrevs(indexObject *self, PyObject *args)
1239 1239 {
1240 1240 Py_ssize_t i, j, len;
1241 1241 char *nothead = NULL;
1242 1242 PyObject *heads = NULL;
1243 1243 PyObject *filter = NULL;
1244 1244 PyObject *filteredrevs = Py_None;
1245 1245
1246 1246 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
1247 1247 return NULL;
1248 1248 }
1249 1249
1250 1250 if (self->headrevs && filteredrevs == self->filteredrevs)
1251 1251 return list_copy(self->headrevs);
1252 1252
1253 1253 Py_DECREF(self->filteredrevs);
1254 1254 self->filteredrevs = filteredrevs;
1255 1255 Py_INCREF(filteredrevs);
1256 1256
1257 1257 if (filteredrevs != Py_None) {
1258 1258 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
1259 1259 if (!filter) {
1260 1260 PyErr_SetString(
1261 1261 PyExc_TypeError,
1262 1262 "filteredrevs has no attribute __contains__");
1263 1263 goto bail;
1264 1264 }
1265 1265 }
1266 1266
1267 1267 len = index_length(self);
1268 1268 heads = PyList_New(0);
1269 1269 if (heads == NULL)
1270 1270 goto bail;
1271 1271 if (len == 0) {
1272 1272 if (pylist_append_owned(heads, PyLong_FromLong(-1)) == -1) {
1273 1273 Py_XDECREF(nullid);
1274 1274 goto bail;
1275 1275 }
1276 1276 goto done;
1277 1277 }
1278 1278
1279 1279 nothead = calloc(len, 1);
1280 1280 if (nothead == NULL) {
1281 1281 PyErr_NoMemory();
1282 1282 goto bail;
1283 1283 }
1284 1284
1285 1285 for (i = len - 1; i >= 0; i--) {
1286 1286 int isfiltered;
1287 1287 int parents[2];
1288 1288
1289 1289 /* If nothead[i] == 1, it means we've seen an unfiltered child
1290 1290 * of this node already, and therefore this node is not
1291 1291 * filtered. So we can skip the expensive check_filter step.
1292 1292 */
1293 1293 if (nothead[i] != 1) {
1294 1294 isfiltered = check_filter(filter, i);
1295 1295 if (isfiltered == -1) {
1296 1296 PyErr_SetString(PyExc_TypeError,
1297 1297 "unable to check filter");
1298 1298 goto bail;
1299 1299 }
1300 1300
1301 1301 if (isfiltered) {
1302 1302 nothead[i] = 1;
1303 1303 continue;
1304 1304 }
1305 1305 }
1306 1306
1307 1307 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
1308 1308 goto bail;
1309 1309 for (j = 0; j < 2; j++) {
1310 1310 if (parents[j] >= 0)
1311 1311 nothead[parents[j]] = 1;
1312 1312 }
1313 1313 }
1314 1314
1315 1315 for (i = 0; i < len; i++) {
1316 1316 if (nothead[i])
1317 1317 continue;
1318 1318 if (pylist_append_owned(heads, PyLong_FromSsize_t(i)) == -1) {
1319 1319 goto bail;
1320 1320 }
1321 1321 }
1322 1322
1323 1323 done:
1324 1324 self->headrevs = heads;
1325 1325 Py_XDECREF(filter);
1326 1326 free(nothead);
1327 1327 return list_copy(self->headrevs);
1328 1328 bail:
1329 1329 Py_XDECREF(filter);
1330 1330 Py_XDECREF(heads);
1331 1331 free(nothead);
1332 1332 return NULL;
1333 1333 }
1334 1334
1335 1335 /* "rgs" stands for "reverse growable set".
1336 1336 It is a representation of a set of integers that can't exceed, but
1337 1337 tend to be close to `max`.
1338 1338
1339 1339 `body` is a growable bit array covering the range `max-len+1..max`,
1340 1340 in reverse order.
1341 1341 `sum` keeps track of the cardinality of the set.
1342 1342 */
1343 1343 typedef struct rgs {
1344 1344 int max;
1345 1345 int len;
1346 1346 char *body;
1347 1347 int sum;
1348 1348 } rgs;
1349 1349
1350 1350 static int rgs_offset(rgs *rgs, int i)
1351 1351 {
1352 1352 return rgs->max - i;
1353 1353 }
1354 1354
1355 1355 /* returns 1 on success, 0 on failure */
1356 1356 static int rgs_alloc(rgs *rgs, int max)
1357 1357 {
1358 1358 int new_len = 64;
1359 1359 char *new_body = calloc(new_len, 1);
1360 1360 if (new_body == NULL)
1361 1361 return 0;
1362 1362 rgs->len = new_len;
1363 1363 rgs->body = new_body;
1364 1364 rgs->max = max;
1365 1365 rgs->sum = 0;
1366 1366 return 1;
1367 1367 }
1368 1368
1369 1369 static bool rgs_get(rgs *rgs, int i)
1370 1370 {
1371 1371 int offset = rgs_offset(rgs, i);
1372 1372 if (offset >= rgs->len) {
1373 1373 return 0;
1374 1374 }
1375 1375 if (offset < 0) {
1376 1376 abort();
1377 1377 }
1378 1378 return rgs->body[offset];
1379 1379 }
1380 1380
1381 1381 /* Realloc `body` to length `new_len`.
1382 1382 Returns -1 when out of memory. */
1383 1383 static int rgs_realloc(rgs *rgs, int new_len)
1384 1384 {
1385 1385 int old_len = rgs->len;
1386 1386 char *old_body = rgs->body;
1387 1387 char *new_body = calloc(new_len, 1);
1388 1388 assert(new_len >= old_len);
1389 1389 if (new_body == NULL)
1390 1390 return -1;
1391 1391 memcpy(new_body, old_body, old_len);
1392 1392 free(old_body);
1393 1393 rgs->body = new_body;
1394 1394 rgs->len = new_len;
1395 1395 return 0;
1396 1396 }
1397 1397
1398 1398 /* Realloc the rgs `body` to include the `offset` */
1399 1399 static int rgs_realloc_amortized(rgs *rgs, int offset)
1400 1400 {
1401 1401 int old_len = rgs->len;
1402 1402 int new_len = old_len * 4;
1403 1403 if (offset >= new_len)
1404 1404 new_len = offset + 1;
1405 1405 return rgs_realloc(rgs, new_len);
1406 1406 }
1407 1407
1408 1408 /* returns 0 on success, -1 on error */
1409 1409 static int rgs_set(rgs *rgs, int i, bool v)
1410 1410 {
1411 1411 int offset = rgs_offset(rgs, i);
1412 1412 if (offset >= rgs->len) {
1413 1413 if (v == 0) {
1414 1414 /* no-op change: no need to resize */
1415 1415 return 0;
1416 1416 }
1417 1417 if (rgs_realloc_amortized(rgs, offset) == -1)
1418 1418 return -1;
1419 1419 }
1420 1420 if (offset < 0) {
1421 1421 abort();
1422 1422 }
1423 1423 rgs->sum -= rgs->body[offset];
1424 1424 rgs->sum += v;
1425 1425 rgs->body[offset] = v;
1426 1426 return 0;
1427 1427 }
1428 1428
1429 1429 /* Add a size_t value to a Python list. Return -1 on failure. */
1430 1430 static inline int pylist_append_size_t(PyObject *list, size_t v)
1431 1431 {
1432 1432 return pylist_append_owned(list, PyLong_FromSsize_t(v));
1433 1433 }
1434 1434
1435 1435 static PyObject *index_headrevsdiff(indexObject *self, PyObject *args)
1436 1436 {
1437 1437 int begin, end;
1438 1438 Py_ssize_t i, j;
1439 1439 PyObject *heads_added = NULL;
1440 1440 PyObject *heads_removed = NULL;
1441 1441 PyObject *res = NULL;
1442 1442 rgs rgs;
1443 1443 rgs.body = NULL;
1444 1444
1445 1445 if (!PyArg_ParseTuple(args, "ii", &begin, &end))
1446 1446 goto bail;
1447 1447
1448 1448 if (!rgs_alloc(&rgs, end))
1449 1449 goto bail;
1450 1450
1451 1451 heads_added = PyList_New(0);
1452 1452 if (heads_added == NULL)
1453 1453 goto bail;
1454 1454 heads_removed = PyList_New(0);
1455 1455 if (heads_removed == NULL)
1456 1456 goto bail;
1457 1457
1458 1458 for (i = end - 1; i >= begin; i--) {
1459 1459 int parents[2];
1460 1460
1461 1461 if (rgs_get(&rgs, i)) {
1462 1462 if (rgs_set(&rgs, i, false) == -1) {
1463 1463 goto bail;
1464 1464 };
1465 1465 } else {
1466 1466 if (pylist_append_size_t(heads_added, i) == -1) {
1467 1467 goto bail;
1468 1468 }
1469 1469 }
1470 1470
1471 1471 if (index_get_parents(self, i, parents, i) < 0)
1472 1472 goto bail;
1473 1473 for (j = 0; j < 2; j++) {
1474 1474 if (parents[j] >= 0)
1475 1475 if (rgs_set(&rgs, parents[j], true) == -1) {
1476 1476 goto bail;
1477 1477 }
1478 1478 }
1479 1479 }
1480 1480
1481 1481 while (rgs.sum) {
1482 1482 int parents[2];
1483 1483
1484 1484 if (rgs_get(&rgs, i)) {
1485 1485 if (rgs_set(&rgs, i, false) == -1) {
1486 1486 goto bail;
1487 1487 }
1488 1488 if (pylist_append_size_t(heads_removed, i) == -1) {
1489 1489 goto bail;
1490 1490 }
1491 1491 }
1492 1492
1493 1493 if (index_get_parents(self, i, parents, i) < 0)
1494 1494 goto bail;
1495 1495 for (j = 0; j < 2; j++) {
1496 1496 if (parents[j] >= 0)
1497 1497 if (rgs_set(&rgs, parents[j], false) == -1) {
1498 1498 /* can't actually fail */
1499 1499 goto bail;
1500 1500 }
1501 1501 }
1502 1502 i--;
1503 1503 }
1504 1504
1505 1505 if (begin == 0 && end > 0) {
1506 1506 if (pylist_append_size_t(heads_removed, -1) == -1) {
1507 1507 goto bail;
1508 1508 }
1509 1509 }
1510 1510
1511 1511 if (!(res = PyTuple_Pack(2, heads_removed, heads_added))) {
1512 1512 goto bail;
1513 1513 }
1514 1514
1515 1515 Py_XDECREF(heads_removed);
1516 1516 Py_XDECREF(heads_added);
1517 1517 free(rgs.body);
1518 1518 return res;
1519 1519 bail:
1520 1520 Py_XDECREF(heads_added);
1521 1521 Py_XDECREF(heads_removed);
1522 1522 free(rgs.body);
1523 1523 return NULL;
1524 1524 }
1525 1525
1526 1526 /**
1527 1527 * Obtain the base revision index entry.
1528 1528 *
1529 1529 * Callers must ensure that rev >= 0 or illegal memory access may occur.
1530 1530 */
1531 1531 static inline int index_baserev(indexObject *self, int rev)
1532 1532 {
1533 1533 const char *data;
1534 1534 int result;
1535 1535
1536 1536 data = index_deref(self, rev);
1537 1537 if (data == NULL)
1538 1538 return -2;
1539 1539
1540 1540 if (self->format_version == format_v1) {
1541 1541 result = getbe32(data + entry_v1_offset_base_rev);
1542 1542 } else if (self->format_version == format_v2) {
1543 1543 result = getbe32(data + entry_v2_offset_base_rev);
1544 1544 } else if (self->format_version == format_cl2) {
1545 1545 return rev;
1546 1546 } else {
1547 1547 raise_revlog_error();
1548 1548 return -1;
1549 1549 }
1550 1550
1551 1551 if (result > rev) {
1552 1552 PyErr_Format(
1553 1553 PyExc_ValueError,
1554 1554 "corrupted revlog, revision base above revision: %d, %d",
1555 1555 rev, result);
1556 1556 return -2;
1557 1557 }
1558 1558 if (result < -1) {
1559 1559 PyErr_Format(
1560 1560 PyExc_ValueError,
1561 1561 "corrupted revlog, revision base out of range: %d, %d", rev,
1562 1562 result);
1563 1563 return -2;
1564 1564 }
1565 1565 return result;
1566 1566 }
1567 1567
1568 1568 /**
1569 1569 * Find if a revision is a snapshot or not
1570 1570 *
1571 1571 * Only relevant for sparse-revlog case.
1572 1572 * Callers must ensure that rev is in a valid range.
1573 1573 */
1574 1574 static int index_issnapshotrev(indexObject *self, Py_ssize_t rev)
1575 1575 {
1576 1576 int ps[2];
1577 1577 int b;
1578 1578 Py_ssize_t base;
1579 1579 while (rev >= 0) {
1580 1580 base = (Py_ssize_t)index_baserev(self, rev);
1581 1581 if (base == rev) {
1582 1582 base = -1;
1583 1583 }
1584 1584 if (base == -2) {
1585 1585 assert(PyErr_Occurred());
1586 1586 return -1;
1587 1587 }
1588 1588 if (base == -1) {
1589 1589 return 1;
1590 1590 }
1591 1591 if (index_get_parents(self, rev, ps, (int)rev) < 0) {
1592 1592 assert(PyErr_Occurred());
1593 1593 return -1;
1594 1594 };
1595 1595 while ((index_get_length(self, ps[0]) == 0) && ps[0] >= 0) {
1596 1596 b = index_baserev(self, ps[0]);
1597 1597 if (b == ps[0]) {
1598 1598 break;
1599 1599 }
1600 1600 ps[0] = b;
1601 1601 }
1602 1602 while ((index_get_length(self, ps[1]) == 0) && ps[1] >= 0) {
1603 1603 b = index_baserev(self, ps[1]);
1604 1604 if (b == ps[1]) {
1605 1605 break;
1606 1606 }
1607 1607 ps[1] = b;
1608 1608 }
1609 1609 if (base == ps[0] || base == ps[1]) {
1610 1610 return 0;
1611 1611 }
1612 1612 rev = base;
1613 1613 }
1614 1614 return rev == -1;
1615 1615 }
1616 1616
1617 1617 static PyObject *index_issnapshot(indexObject *self, PyObject *value)
1618 1618 {
1619 1619 long rev;
1620 1620 int issnap;
1621 1621 Py_ssize_t length = index_length(self);
1622 1622
1623 1623 if (!pylong_to_long(value, &rev)) {
1624 1624 return NULL;
1625 1625 }
1626 1626 if (rev < -1 || rev >= length) {
1627 1627 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
1628 1628 rev);
1629 1629 return NULL;
1630 1630 };
1631 1631 issnap = index_issnapshotrev(self, (Py_ssize_t)rev);
1632 1632 if (issnap < 0) {
1633 1633 return NULL;
1634 1634 };
1635 1635 return PyBool_FromLong((long)issnap);
1636 1636 }
1637 1637
1638 1638 static PyObject *index_findsnapshots(indexObject *self, PyObject *args)
1639 1639 {
1640 1640 Py_ssize_t start_rev;
1641 1641 Py_ssize_t end_rev;
1642 1642 PyObject *cache;
1643 1643 Py_ssize_t base;
1644 1644 Py_ssize_t rev;
1645 1645 PyObject *key = NULL;
1646 1646 PyObject *value = NULL;
1647 1647 const Py_ssize_t length = index_length(self);
1648 1648 if (!PyArg_ParseTuple(args, "O!nn", &PyDict_Type, &cache, &start_rev,
1649 1649 &end_rev)) {
1650 1650 return NULL;
1651 1651 }
1652 1652 end_rev += 1;
1653 1653 if (end_rev > length) {
1654 1654 end_rev = length;
1655 1655 }
1656 1656 if (start_rev < 0) {
1657 1657 start_rev = 0;
1658 1658 }
1659 1659 for (rev = start_rev; rev < end_rev; rev++) {
1660 1660 int issnap;
1661 1661 PyObject *allvalues = NULL;
1662 1662 issnap = index_issnapshotrev(self, rev);
1663 1663 if (issnap < 0) {
1664 1664 goto bail;
1665 1665 }
1666 1666 if (issnap == 0) {
1667 1667 continue;
1668 1668 }
1669 1669 base = (Py_ssize_t)index_baserev(self, rev);
1670 1670 if (base == rev) {
1671 1671 base = -1;
1672 1672 }
1673 1673 if (base == -2) {
1674 1674 assert(PyErr_Occurred());
1675 1675 goto bail;
1676 1676 }
1677 1677 key = PyLong_FromSsize_t(base);
1678 1678 allvalues = PyDict_GetItem(cache, key);
1679 1679 if (allvalues == NULL && PyErr_Occurred()) {
1680 1680 goto bail;
1681 1681 }
1682 1682 if (allvalues == NULL) {
1683 1683 int r;
1684 1684 allvalues = PySet_New(0);
1685 1685 if (!allvalues) {
1686 1686 goto bail;
1687 1687 }
1688 1688 r = PyDict_SetItem(cache, key, allvalues);
1689 1689 Py_DECREF(allvalues);
1690 1690 if (r < 0) {
1691 1691 goto bail;
1692 1692 }
1693 1693 }
1694 1694 value = PyLong_FromSsize_t(rev);
1695 1695 if (PySet_Add(allvalues, value)) {
1696 1696 goto bail;
1697 1697 }
1698 1698 Py_CLEAR(key);
1699 1699 Py_CLEAR(value);
1700 1700 }
1701 1701 Py_RETURN_NONE;
1702 1702 bail:
1703 1703 Py_XDECREF(key);
1704 1704 Py_XDECREF(value);
1705 1705 return NULL;
1706 1706 }
1707 1707
1708 1708 static PyObject *index_deltachain(indexObject *self, PyObject *args)
1709 1709 {
1710 1710 int rev, generaldelta;
1711 1711 PyObject *stoparg;
1712 1712 int stoprev, iterrev, baserev = -1;
1713 1713 int stopped;
1714 1714 PyObject *chain = NULL, *result = NULL;
1715 1715 const Py_ssize_t length = index_length(self);
1716 1716
1717 1717 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
1718 1718 return NULL;
1719 1719 }
1720 1720
1721 1721 if (PyLong_Check(stoparg)) {
1722 1722 stoprev = (int)PyLong_AsLong(stoparg);
1723 1723 if (stoprev == -1 && PyErr_Occurred()) {
1724 1724 return NULL;
1725 1725 }
1726 1726 } else if (stoparg == Py_None) {
1727 1727 stoprev = -2;
1728 1728 } else {
1729 1729 PyErr_SetString(PyExc_ValueError,
1730 1730 "stoprev must be integer or None");
1731 1731 return NULL;
1732 1732 }
1733 1733
1734 1734 if (rev < 0 || rev >= length) {
1735 1735 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1736 1736 return NULL;
1737 1737 }
1738 1738
1739 1739 chain = PyList_New(0);
1740 1740 if (chain == NULL) {
1741 1741 return NULL;
1742 1742 }
1743 1743
1744 1744 baserev = index_baserev(self, rev);
1745 1745
1746 1746 /* This should never happen. */
1747 1747 if (baserev <= -2) {
1748 1748 /* Error should be set by index_deref() */
1749 1749 assert(PyErr_Occurred());
1750 1750 goto bail;
1751 1751 }
1752 1752
1753 1753 iterrev = rev;
1754 1754
1755 1755 while (iterrev != baserev && iterrev != stoprev) {
1756 1756 if (pylist_append_owned(chain, PyLong_FromLong(iterrev))) {
1757 1757 goto bail;
1758 1758 }
1759 1759
1760 1760 if (generaldelta) {
1761 1761 iterrev = baserev;
1762 1762 } else {
1763 1763 iterrev--;
1764 1764 }
1765 1765
1766 1766 if (iterrev < 0) {
1767 1767 break;
1768 1768 }
1769 1769
1770 1770 if (iterrev >= length) {
1771 1771 PyErr_SetString(PyExc_IndexError,
1772 1772 "revision outside index");
1773 1773 return NULL;
1774 1774 }
1775 1775
1776 1776 baserev = index_baserev(self, iterrev);
1777 1777
1778 1778 /* This should never happen. */
1779 1779 if (baserev <= -2) {
1780 1780 /* Error should be set by index_deref() */
1781 1781 assert(PyErr_Occurred());
1782 1782 goto bail;
1783 1783 }
1784 1784 }
1785 1785
1786 1786 if (iterrev == stoprev) {
1787 1787 stopped = 1;
1788 1788 } else {
1789 1789 if (pylist_append_owned(chain, PyLong_FromLong(iterrev))) {
1790 1790 goto bail;
1791 1791 }
1792 1792
1793 1793 stopped = 0;
1794 1794 }
1795 1795
1796 1796 if (PyList_Reverse(chain)) {
1797 1797 goto bail;
1798 1798 }
1799 1799
1800 1800 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
1801 1801 Py_DECREF(chain);
1802 1802 return result;
1803 1803
1804 1804 bail:
1805 1805 Py_DECREF(chain);
1806 1806 return NULL;
1807 1807 }
1808 1808
1809 1809 static inline int64_t
1810 1810 index_segment_span(indexObject *self, Py_ssize_t start_rev, Py_ssize_t end_rev)
1811 1811 {
1812 1812 int64_t start_offset;
1813 1813 int64_t end_offset;
1814 1814 int end_size;
1815 1815 start_offset = index_get_start(self, start_rev);
1816 1816 if (start_offset < 0) {
1817 1817 return -1;
1818 1818 }
1819 1819 end_offset = index_get_start(self, end_rev);
1820 1820 if (end_offset < 0) {
1821 1821 return -1;
1822 1822 }
1823 1823 end_size = index_get_length(self, end_rev);
1824 1824 if (end_size < 0) {
1825 1825 return -1;
1826 1826 }
1827 1827 if (end_offset < start_offset) {
1828 1828 PyErr_Format(PyExc_ValueError,
1829 1829 "corrupted revlog index: inconsistent offset "
1830 1830 "between revisions (%zd) and (%zd)",
1831 1831 start_rev, end_rev);
1832 1832 return -1;
1833 1833 }
1834 1834 return (end_offset - start_offset) + (int64_t)end_size;
1835 1835 }
1836 1836
1837 1837 /* returns endidx so that revs[startidx:endidx] has no empty trailing revs */
1838 1838 static Py_ssize_t trim_endidx(indexObject *self, const Py_ssize_t *revs,
1839 1839 Py_ssize_t startidx, Py_ssize_t endidx)
1840 1840 {
1841 1841 int length;
1842 1842 while (endidx > 1 && endidx > startidx) {
1843 1843 length = index_get_length(self, revs[endidx - 1]);
1844 1844 if (length < 0) {
1845 1845 return -1;
1846 1846 }
1847 1847 if (length != 0) {
1848 1848 break;
1849 1849 }
1850 1850 endidx -= 1;
1851 1851 }
1852 1852 return endidx;
1853 1853 }
1854 1854
1855 1855 struct Gap {
1856 1856 int64_t size;
1857 1857 Py_ssize_t idx;
1858 1858 };
1859 1859
1860 1860 static int gap_compare(const void *left, const void *right)
1861 1861 {
1862 1862 const struct Gap *l_left = ((const struct Gap *)left);
1863 1863 const struct Gap *l_right = ((const struct Gap *)right);
1864 1864 if (l_left->size < l_right->size) {
1865 1865 return -1;
1866 1866 } else if (l_left->size > l_right->size) {
1867 1867 return 1;
1868 1868 }
1869 1869 return 0;
1870 1870 }
1871 1871 static int Py_ssize_t_compare(const void *left, const void *right)
1872 1872 {
1873 1873 const Py_ssize_t l_left = *(const Py_ssize_t *)left;
1874 1874 const Py_ssize_t l_right = *(const Py_ssize_t *)right;
1875 1875 if (l_left < l_right) {
1876 1876 return -1;
1877 1877 } else if (l_left > l_right) {
1878 1878 return 1;
1879 1879 }
1880 1880 return 0;
1881 1881 }
1882 1882
1883 1883 static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
1884 1884 {
1885 1885 /* method arguments */
1886 1886 PyObject *list_revs = NULL; /* revisions in the chain */
1887 1887 double targetdensity = 0; /* min density to achieve */
1888 1888 Py_ssize_t mingapsize = 0; /* threshold to ignore gaps */
1889 1889
1890 1890 /* other core variables */
1891 1891 Py_ssize_t idxlen = index_length(self);
1892 1892 Py_ssize_t i; /* used for various iteration */
1893 1893 PyObject *result = NULL; /* the final return of the function */
1894 1894
1895 1895 /* generic information about the delta chain being slice */
1896 1896 Py_ssize_t num_revs = 0; /* size of the full delta chain */
1897 1897 Py_ssize_t *revs = NULL; /* native array of revision in the chain */
1898 1898 int64_t chainpayload = 0; /* sum of all delta in the chain */
1899 1899 int64_t deltachainspan = 0; /* distance from first byte to last byte */
1900 1900
1901 1901 /* variable used for slicing the delta chain */
1902 1902 int64_t readdata = 0; /* amount of data currently planned to be read */
1903 1903 double density = 0; /* ration of payload data compared to read ones */
1904 1904 int64_t previous_end;
1905 1905 struct Gap *gaps = NULL; /* array of notable gap in the chain */
1906 1906 Py_ssize_t num_gaps =
1907 1907 0; /* total number of notable gap recorded so far */
1908 1908 Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
1909 1909 Py_ssize_t num_selected = 0; /* number of gaps skipped */
1910 1910 PyObject *allchunks = NULL; /* all slices */
1911 1911 Py_ssize_t previdx;
1912 1912
1913 1913 /* parsing argument */
1914 1914 if (!PyArg_ParseTuple(args, "O!dn", &PyList_Type, &list_revs,
1915 1915 &targetdensity, &mingapsize)) {
1916 1916 goto bail;
1917 1917 }
1918 1918
1919 1919 /* If the delta chain contains a single element, we do not need slicing
1920 1920 */
1921 1921 num_revs = PyList_GET_SIZE(list_revs);
1922 1922 if (num_revs <= 1) {
1923 1923 result = PyTuple_Pack(1, list_revs);
1924 1924 goto done;
1925 1925 }
1926 1926
1927 1927 /* Turn the python list into a native integer array (for efficiency) */
1928 1928 revs = (Py_ssize_t *)calloc(num_revs, sizeof(Py_ssize_t));
1929 1929 if (revs == NULL) {
1930 1930 PyErr_NoMemory();
1931 1931 goto bail;
1932 1932 }
1933 1933 for (i = 0; i < num_revs; i++) {
1934 1934 Py_ssize_t revnum =
1935 1935 PyLong_AsLong(PyList_GET_ITEM(list_revs, i));
1936 1936 if (revnum == -1 && PyErr_Occurred()) {
1937 1937 goto bail;
1938 1938 }
1939 1939 if (revnum < nullrev || revnum >= idxlen) {
1940 1940 PyErr_Format(PyExc_IndexError,
1941 1941 "index out of range: %zd", revnum);
1942 1942 goto bail;
1943 1943 }
1944 1944 revs[i] = revnum;
1945 1945 }
1946 1946
1947 1947 /* Compute and check various property of the unsliced delta chain */
1948 1948 deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
1949 1949 if (deltachainspan < 0) {
1950 1950 goto bail;
1951 1951 }
1952 1952
1953 1953 if (deltachainspan <= mingapsize) {
1954 1954 result = PyTuple_Pack(1, list_revs);
1955 1955 goto done;
1956 1956 }
1957 1957 chainpayload = 0;
1958 1958 for (i = 0; i < num_revs; i++) {
1959 1959 int tmp = index_get_length(self, revs[i]);
1960 1960 if (tmp < 0) {
1961 1961 goto bail;
1962 1962 }
1963 1963 chainpayload += tmp;
1964 1964 }
1965 1965
1966 1966 readdata = deltachainspan;
1967 1967 density = 1.0;
1968 1968
1969 1969 if (0 < deltachainspan) {
1970 1970 density = (double)chainpayload / (double)deltachainspan;
1971 1971 }
1972 1972
1973 1973 if (density >= targetdensity) {
1974 1974 result = PyTuple_Pack(1, list_revs);
1975 1975 goto done;
1976 1976 }
1977 1977
1978 1978 /* if chain is too sparse, look for relevant gaps */
1979 1979 gaps = (struct Gap *)calloc(num_revs, sizeof(struct Gap));
1980 1980 if (gaps == NULL) {
1981 1981 PyErr_NoMemory();
1982 1982 goto bail;
1983 1983 }
1984 1984
1985 1985 previous_end = -1;
1986 1986 for (i = 0; i < num_revs; i++) {
1987 1987 int64_t revstart;
1988 1988 int revsize;
1989 1989 revstart = index_get_start(self, revs[i]);
1990 1990 if (revstart < 0) {
1991 1991 goto bail;
1992 1992 };
1993 1993 revsize = index_get_length(self, revs[i]);
1994 1994 if (revsize < 0) {
1995 1995 goto bail;
1996 1996 };
1997 1997 if (revsize == 0) {
1998 1998 continue;
1999 1999 }
2000 2000 if (previous_end >= 0) {
2001 2001 int64_t gapsize = revstart - previous_end;
2002 2002 if (gapsize > mingapsize) {
2003 2003 gaps[num_gaps].size = gapsize;
2004 2004 gaps[num_gaps].idx = i;
2005 2005 num_gaps += 1;
2006 2006 }
2007 2007 }
2008 2008 previous_end = revstart + revsize;
2009 2009 }
2010 2010 if (num_gaps == 0) {
2011 2011 result = PyTuple_Pack(1, list_revs);
2012 2012 goto done;
2013 2013 }
2014 2014 qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
2015 2015
2016 2016 /* Slice the largest gap first, they improve the density the most */
2017 2017 selected_indices =
2018 2018 (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
2019 2019 if (selected_indices == NULL) {
2020 2020 PyErr_NoMemory();
2021 2021 goto bail;
2022 2022 }
2023 2023
2024 2024 for (i = num_gaps - 1; i >= 0; i--) {
2025 2025 selected_indices[num_selected] = gaps[i].idx;
2026 2026 readdata -= gaps[i].size;
2027 2027 num_selected += 1;
2028 2028 if (readdata <= 0) {
2029 2029 density = 1.0;
2030 2030 } else {
2031 2031 density = (double)chainpayload / (double)readdata;
2032 2032 }
2033 2033 if (density >= targetdensity) {
2034 2034 break;
2035 2035 }
2036 2036 }
2037 2037 qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
2038 2038 &Py_ssize_t_compare);
2039 2039
2040 2040 /* create the resulting slice */
2041 2041 allchunks = PyList_New(0);
2042 2042 if (allchunks == NULL) {
2043 2043 goto bail;
2044 2044 }
2045 2045 previdx = 0;
2046 2046 selected_indices[num_selected] = num_revs;
2047 2047 for (i = 0; i <= num_selected; i++) {
2048 2048 Py_ssize_t idx = selected_indices[i];
2049 2049 Py_ssize_t endidx = trim_endidx(self, revs, previdx, idx);
2050 2050 if (endidx < 0) {
2051 2051 goto bail;
2052 2052 }
2053 2053 if (previdx < endidx) {
2054 2054 PyObject *chunk =
2055 2055 PyList_GetSlice(list_revs, previdx, endidx);
2056 2056 if (pylist_append_owned(allchunks, chunk) == -1) {
2057 2057 goto bail;
2058 2058 }
2059 2059 }
2060 2060 previdx = idx;
2061 2061 }
2062 2062 result = allchunks;
2063 2063 goto done;
2064 2064
2065 2065 bail:
2066 2066 Py_XDECREF(allchunks);
2067 2067 done:
2068 2068 free(revs);
2069 2069 free(gaps);
2070 2070 free(selected_indices);
2071 2071 return result;
2072 2072 }
2073 2073
2074 2074 static inline int nt_level(const char *node, Py_ssize_t level)
2075 2075 {
2076 2076 int v = node[level >> 1];
2077 2077 if (!(level & 1))
2078 2078 v >>= 4;
2079 2079 return v & 0xf;
2080 2080 }
2081 2081
2082 2082 /*
2083 2083 * Return values:
2084 2084 *
2085 2085 * -4: match is ambiguous (multiple candidates)
2086 2086 * -2: not found
2087 2087 * rest: valid rev
2088 2088 */
2089 2089 static int nt_find(nodetree *self, const char *node, Py_ssize_t nodelen,
2090 2090 int hex)
2091 2091 {
2092 2092 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
2093 2093 int level, maxlevel, off;
2094 2094
2095 2095 /* If the input is binary, do a fast check for the nullid first. */
2096 2096 if (!hex && nodelen == self->nodelen && node[0] == '\0' &&
2097 2097 node[1] == '\0' && memcmp(node, nullid, self->nodelen) == 0)
2098 2098 return -1;
2099 2099
2100 2100 if (hex)
2101 2101 maxlevel = nodelen;
2102 2102 else
2103 2103 maxlevel = 2 * nodelen;
2104 2104 if (maxlevel > 2 * self->nodelen)
2105 2105 maxlevel = 2 * self->nodelen;
2106 2106
2107 2107 for (level = off = 0; level < maxlevel; level++) {
2108 2108 int k = getnybble(node, level);
2109 2109 nodetreenode *n = &self->nodes[off];
2110 2110 int v = n->children[k];
2111 2111
2112 2112 if (v < 0) {
2113 2113 const char *n;
2114 2114 Py_ssize_t i;
2115 2115
2116 2116 v = -(v + 2);
2117 2117 n = index_node(self->index, v);
2118 2118 if (n == NULL)
2119 2119 return -2;
2120 2120 for (i = level; i < maxlevel; i++)
2121 2121 if (getnybble(node, i) != nt_level(n, i))
2122 2122 return -2;
2123 2123 return v;
2124 2124 }
2125 2125 if (v == 0)
2126 2126 return -2;
2127 2127 off = v;
2128 2128 }
2129 2129 /* multiple matches against an ambiguous prefix */
2130 2130 return -4;
2131 2131 }
2132 2132
2133 2133 static int nt_new(nodetree *self)
2134 2134 {
2135 2135 if (self->length == self->capacity) {
2136 2136 size_t newcapacity;
2137 2137 nodetreenode *newnodes;
2138 2138 newcapacity = self->capacity * 2;
2139 2139 if (newcapacity >= SIZE_MAX / sizeof(nodetreenode)) {
2140 2140 PyErr_SetString(PyExc_MemoryError,
2141 2141 "overflow in nt_new");
2142 2142 return -1;
2143 2143 }
2144 2144 newnodes =
2145 2145 realloc(self->nodes, newcapacity * sizeof(nodetreenode));
2146 2146 if (newnodes == NULL) {
2147 2147 PyErr_SetString(PyExc_MemoryError, "out of memory");
2148 2148 return -1;
2149 2149 }
2150 2150 self->capacity = newcapacity;
2151 2151 self->nodes = newnodes;
2152 2152 memset(&self->nodes[self->length], 0,
2153 2153 sizeof(nodetreenode) * (self->capacity - self->length));
2154 2154 }
2155 2155 return self->length++;
2156 2156 }
2157 2157
2158 2158 static int nt_insert(nodetree *self, const char *node, int rev)
2159 2159 {
2160 2160 int level = 0;
2161 2161 int off = 0;
2162 2162
2163 2163 while (level < 2 * self->nodelen) {
2164 2164 int k = nt_level(node, level);
2165 2165 nodetreenode *n;
2166 2166 int v;
2167 2167
2168 2168 n = &self->nodes[off];
2169 2169 v = n->children[k];
2170 2170
2171 2171 if (v == 0) {
2172 2172 n->children[k] = -rev - 2;
2173 2173 return 0;
2174 2174 }
2175 2175 if (v < 0) {
2176 2176 const char *oldnode =
2177 2177 index_node_existing(self->index, -(v + 2));
2178 2178 int noff;
2179 2179
2180 2180 if (oldnode == NULL)
2181 2181 return -1;
2182 2182 if (!memcmp(oldnode, node, self->nodelen)) {
2183 2183 n->children[k] = -rev - 2;
2184 2184 return 0;
2185 2185 }
2186 2186 noff = nt_new(self);
2187 2187 if (noff == -1)
2188 2188 return -1;
2189 2189 /* self->nodes may have been changed by realloc */
2190 2190 self->nodes[off].children[k] = noff;
2191 2191 off = noff;
2192 2192 n = &self->nodes[off];
2193 2193 n->children[nt_level(oldnode, ++level)] = v;
2194 2194 if (level > self->depth)
2195 2195 self->depth = level;
2196 2196 self->splits += 1;
2197 2197 } else {
2198 2198 level += 1;
2199 2199 off = v;
2200 2200 }
2201 2201 }
2202 2202
2203 2203 return -1;
2204 2204 }
2205 2205
2206 2206 static PyObject *ntobj_insert(nodetreeObject *self, PyObject *args)
2207 2207 {
2208 2208 Py_ssize_t rev;
2209 2209 const char *node;
2210 2210 Py_ssize_t length;
2211 2211 if (!PyArg_ParseTuple(args, "n", &rev))
2212 2212 return NULL;
2213 2213 length = index_length(self->nt.index);
2214 2214 if (rev < 0 || rev >= length) {
2215 2215 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
2216 2216 return NULL;
2217 2217 }
2218 2218 node = index_node_existing(self->nt.index, rev);
2219 2219 if (nt_insert(&self->nt, node, (int)rev) == -1)
2220 2220 return NULL;
2221 2221 Py_RETURN_NONE;
2222 2222 }
2223 2223
2224 2224 static int nt_delete_node(nodetree *self, const char *node)
2225 2225 {
2226 2226 /* rev==-2 happens to get encoded as 0, which is interpreted as not set
2227 2227 */
2228 2228 return nt_insert(self, node, -2);
2229 2229 }
2230 2230
2231 2231 static int nt_init(nodetree *self, indexObject *index, unsigned capacity)
2232 2232 {
2233 2233 /* Initialize before overflow-checking to avoid nt_dealloc() crash. */
2234 2234 self->nodes = NULL;
2235 2235
2236 2236 self->index = index;
2237 2237 /* The input capacity is in terms of revisions, while the field is in
2238 2238 * terms of nodetree nodes. */
2239 2239 self->capacity = (capacity < 4 ? 4 : capacity / 2);
2240 2240 self->nodelen = index->nodelen;
2241 2241 self->depth = 0;
2242 2242 self->splits = 0;
2243 2243 if (self->capacity > SIZE_MAX / sizeof(nodetreenode)) {
2244 2244 PyErr_SetString(PyExc_ValueError, "overflow in init_nt");
2245 2245 return -1;
2246 2246 }
2247 2247 self->nodes = calloc(self->capacity, sizeof(nodetreenode));
2248 2248 if (self->nodes == NULL) {
2249 2249 PyErr_NoMemory();
2250 2250 return -1;
2251 2251 }
2252 2252 self->length = 1;
2253 2253 return 0;
2254 2254 }
2255 2255
2256 2256 static int ntobj_init(nodetreeObject *self, PyObject *args)
2257 2257 {
2258 2258 PyObject *index;
2259 2259 unsigned capacity;
2260 2260 if (!PyArg_ParseTuple(args, "O!I", &HgRevlogIndex_Type, &index,
2261 2261 &capacity))
2262 2262 return -1;
2263 2263 Py_INCREF(index);
2264 2264 return nt_init(&self->nt, (indexObject *)index, capacity);
2265 2265 }
2266 2266
2267 2267 static int nt_partialmatch(nodetree *self, const char *node, Py_ssize_t nodelen)
2268 2268 {
2269 2269 return nt_find(self, node, nodelen, 1);
2270 2270 }
2271 2271
2272 2272 /*
2273 2273 * Find the length of the shortest unique prefix of node.
2274 2274 *
2275 2275 * Return values:
2276 2276 *
2277 2277 * -3: error (exception set)
2278 2278 * -2: not found (no exception set)
2279 2279 * rest: length of shortest prefix
2280 2280 */
2281 2281 static int nt_shortest(nodetree *self, const char *node)
2282 2282 {
2283 2283 int level, off;
2284 2284
2285 2285 for (level = off = 0; level < 2 * self->nodelen; level++) {
2286 2286 int k, v;
2287 2287 nodetreenode *n = &self->nodes[off];
2288 2288 k = nt_level(node, level);
2289 2289 v = n->children[k];
2290 2290 if (v < 0) {
2291 2291 const char *n;
2292 2292 v = -(v + 2);
2293 2293 n = index_node_existing(self->index, v);
2294 2294 if (n == NULL)
2295 2295 return -3;
2296 2296 if (memcmp(node, n, self->nodelen) != 0)
2297 2297 /*
2298 2298 * Found a unique prefix, but it wasn't for the
2299 2299 * requested node (i.e the requested node does
2300 2300 * not exist).
2301 2301 */
2302 2302 return -2;
2303 2303 return level + 1;
2304 2304 }
2305 2305 if (v == 0)
2306 2306 return -2;
2307 2307 off = v;
2308 2308 }
2309 2309 /*
2310 2310 * The node was still not unique after 40 hex digits, so this won't
2311 2311 * happen. Also, if we get here, then there's a programming error in
2312 2312 * this file that made us insert a node longer than 40 hex digits.
2313 2313 */
2314 2314 PyErr_SetString(PyExc_Exception, "broken node tree");
2315 2315 return -3;
2316 2316 }
2317 2317
2318 2318 static PyObject *ntobj_shortest(nodetreeObject *self, PyObject *args)
2319 2319 {
2320 2320 PyObject *val;
2321 2321 char *node;
2322 2322 int length;
2323 2323
2324 2324 if (!PyArg_ParseTuple(args, "O", &val))
2325 2325 return NULL;
2326 2326 if (node_check(self->nt.nodelen, val, &node) == -1)
2327 2327 return NULL;
2328 2328
2329 2329 length = nt_shortest(&self->nt, node);
2330 2330 if (length == -3)
2331 2331 return NULL;
2332 2332 if (length == -2) {
2333 2333 raise_revlog_error();
2334 2334 return NULL;
2335 2335 }
2336 2336 return PyLong_FromLong(length);
2337 2337 }
2338 2338
2339 2339 static void nt_dealloc(nodetree *self)
2340 2340 {
2341 2341 free(self->nodes);
2342 2342 self->nodes = NULL;
2343 2343 }
2344 2344
2345 2345 static void ntobj_dealloc(nodetreeObject *self)
2346 2346 {
2347 2347 Py_XDECREF(self->nt.index);
2348 2348 nt_dealloc(&self->nt);
2349 2349 PyObject_Del(self);
2350 2350 }
2351 2351
2352 2352 static PyMethodDef ntobj_methods[] = {
2353 2353 {"insert", (PyCFunction)ntobj_insert, METH_VARARGS,
2354 2354 "insert an index entry"},
2355 2355 {"shortest", (PyCFunction)ntobj_shortest, METH_VARARGS,
2356 2356 "find length of shortest hex nodeid of a binary ID"},
2357 2357 {NULL} /* Sentinel */
2358 2358 };
2359 2359
2360 2360 static PyTypeObject nodetreeType = {
2361 2361 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2362 2362 "parsers.nodetree", /* tp_name */
2363 2363 sizeof(nodetreeObject), /* tp_basicsize */
2364 2364 0, /* tp_itemsize */
2365 2365 (destructor)ntobj_dealloc, /* tp_dealloc */
2366 2366 0, /* tp_print */
2367 2367 0, /* tp_getattr */
2368 2368 0, /* tp_setattr */
2369 2369 0, /* tp_compare */
2370 2370 0, /* tp_repr */
2371 2371 0, /* tp_as_number */
2372 2372 0, /* tp_as_sequence */
2373 2373 0, /* tp_as_mapping */
2374 2374 0, /* tp_hash */
2375 2375 0, /* tp_call */
2376 2376 0, /* tp_str */
2377 2377 0, /* tp_getattro */
2378 2378 0, /* tp_setattro */
2379 2379 0, /* tp_as_buffer */
2380 2380 Py_TPFLAGS_DEFAULT, /* tp_flags */
2381 2381 "nodetree", /* tp_doc */
2382 2382 0, /* tp_traverse */
2383 2383 0, /* tp_clear */
2384 2384 0, /* tp_richcompare */
2385 2385 0, /* tp_weaklistoffset */
2386 2386 0, /* tp_iter */
2387 2387 0, /* tp_iternext */
2388 2388 ntobj_methods, /* tp_methods */
2389 2389 0, /* tp_members */
2390 2390 0, /* tp_getset */
2391 2391 0, /* tp_base */
2392 2392 0, /* tp_dict */
2393 2393 0, /* tp_descr_get */
2394 2394 0, /* tp_descr_set */
2395 2395 0, /* tp_dictoffset */
2396 2396 (initproc)ntobj_init, /* tp_init */
2397 2397 0, /* tp_alloc */
2398 2398 };
2399 2399
2400 2400 static int index_init_nt(indexObject *self)
2401 2401 {
2402 2402 if (!self->ntinitialized) {
2403 2403 if (nt_init(&self->nt, self, (int)self->length) == -1) {
2404 2404 nt_dealloc(&self->nt);
2405 2405 return -1;
2406 2406 }
2407 2407 if (nt_insert(&self->nt, nullid, -1) == -1) {
2408 2408 nt_dealloc(&self->nt);
2409 2409 return -1;
2410 2410 }
2411 2411 self->ntinitialized = 1;
2412 2412 self->ntrev = (int)index_length(self);
2413 2413 self->ntlookups = 1;
2414 2414 self->ntmisses = 0;
2415 2415 }
2416 2416 return 0;
2417 2417 }
2418 2418
2419 2419 /*
2420 2420 * Return values:
2421 2421 *
2422 2422 * -3: error (exception set)
2423 2423 * -2: not found (no exception set)
2424 2424 * rest: valid rev
2425 2425 */
2426 2426 static int index_find_node(indexObject *self, const char *node)
2427 2427 {
2428 2428 int rev;
2429 2429
2430 2430 if (index_init_nt(self) == -1)
2431 2431 return -3;
2432 2432
2433 2433 self->ntlookups++;
2434 2434 rev = nt_find(&self->nt, node, self->nodelen, 0);
2435 2435 if (rev >= -1)
2436 2436 return rev;
2437 2437
2438 2438 /*
2439 2439 * For the first handful of lookups, we scan the entire index,
2440 2440 * and cache only the matching nodes. This optimizes for cases
2441 2441 * like "hg tip", where only a few nodes are accessed.
2442 2442 *
2443 2443 * After that, we cache every node we visit, using a single
2444 2444 * scan amortized over multiple lookups. This gives the best
2445 2445 * bulk performance, e.g. for "hg log".
2446 2446 */
2447 2447 if (self->ntmisses++ < 4) {
2448 2448 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2449 2449 const char *n = index_node_existing(self, rev);
2450 2450 if (n == NULL)
2451 2451 return -3;
2452 2452 if (memcmp(node, n, self->nodelen) == 0) {
2453 2453 if (nt_insert(&self->nt, n, rev) == -1)
2454 2454 return -3;
2455 2455 break;
2456 2456 }
2457 2457 }
2458 2458 } else {
2459 2459 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2460 2460 const char *n = index_node_existing(self, rev);
2461 2461 if (n == NULL)
2462 2462 return -3;
2463 2463 if (nt_insert(&self->nt, n, rev) == -1) {
2464 2464 self->ntrev = rev + 1;
2465 2465 return -3;
2466 2466 }
2467 2467 if (memcmp(node, n, self->nodelen) == 0) {
2468 2468 break;
2469 2469 }
2470 2470 }
2471 2471 self->ntrev = rev;
2472 2472 }
2473 2473
2474 2474 if (rev >= 0)
2475 2475 return rev;
2476 2476 return -2;
2477 2477 }
2478 2478
2479 2479 static PyObject *index_getitem(indexObject *self, PyObject *value)
2480 2480 {
2481 2481 char *node;
2482 2482 int rev;
2483 2483
2484 2484 if (PyLong_Check(value)) {
2485 2485 long idx;
2486 2486 if (!pylong_to_long(value, &idx)) {
2487 2487 return NULL;
2488 2488 }
2489 2489 return index_get(self, idx);
2490 2490 }
2491 2491
2492 2492 if (node_check(self->nodelen, value, &node) == -1)
2493 2493 return NULL;
2494 2494 rev = index_find_node(self, node);
2495 2495 if (rev >= -1)
2496 2496 return PyLong_FromLong(rev);
2497 2497 if (rev == -2)
2498 2498 raise_revlog_error();
2499 2499 return NULL;
2500 2500 }
2501 2501
2502 2502 /*
2503 2503 * Fully populate the radix tree.
2504 2504 */
2505 2505 static int index_populate_nt(indexObject *self)
2506 2506 {
2507 2507 int rev;
2508 2508 if (self->ntrev > 0) {
2509 2509 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2510 2510 const char *n = index_node_existing(self, rev);
2511 2511 if (n == NULL)
2512 2512 return -1;
2513 2513 if (nt_insert(&self->nt, n, rev) == -1)
2514 2514 return -1;
2515 2515 }
2516 2516 self->ntrev = -1;
2517 2517 }
2518 2518 return 0;
2519 2519 }
2520 2520
2521 2521 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
2522 2522 {
2523 2523 const char *fullnode;
2524 2524 Py_ssize_t nodelen;
2525 2525 char *node;
2526 2526 int rev, i;
2527 2527
2528 2528 if (!PyArg_ParseTuple(args, "y#", &node, &nodelen))
2529 2529 return NULL;
2530 2530
2531 2531 if (nodelen < 1) {
2532 2532 PyErr_SetString(PyExc_ValueError, "key too short");
2533 2533 return NULL;
2534 2534 }
2535 2535
2536 2536 if (nodelen > 2 * self->nodelen) {
2537 2537 PyErr_SetString(PyExc_ValueError, "key too long");
2538 2538 return NULL;
2539 2539 }
2540 2540
2541 2541 for (i = 0; i < nodelen; i++)
2542 2542 hexdigit(node, i);
2543 2543 if (PyErr_Occurred()) {
2544 2544 /* input contains non-hex characters */
2545 2545 PyErr_Clear();
2546 2546 Py_RETURN_NONE;
2547 2547 }
2548 2548
2549 2549 if (index_init_nt(self) == -1)
2550 2550 return NULL;
2551 2551 if (index_populate_nt(self) == -1)
2552 2552 return NULL;
2553 2553 rev = nt_partialmatch(&self->nt, node, nodelen);
2554 2554
2555 2555 switch (rev) {
2556 2556 case -4:
2557 2557 raise_revlog_error();
2558 2558 return NULL;
2559 2559 case -2:
2560 2560 Py_RETURN_NONE;
2561 2561 case -1:
2562 2562 return PyBytes_FromStringAndSize(nullid, self->nodelen);
2563 2563 }
2564 2564
2565 2565 fullnode = index_node_existing(self, rev);
2566 2566 if (fullnode == NULL) {
2567 2567 return NULL;
2568 2568 }
2569 2569 return PyBytes_FromStringAndSize(fullnode, self->nodelen);
2570 2570 }
2571 2571
2572 2572 static PyObject *index_shortest(indexObject *self, PyObject *args)
2573 2573 {
2574 2574 PyObject *val;
2575 2575 char *node;
2576 2576 int length;
2577 2577
2578 2578 if (!PyArg_ParseTuple(args, "O", &val))
2579 2579 return NULL;
2580 2580 if (node_check(self->nodelen, val, &node) == -1)
2581 2581 return NULL;
2582 2582
2583 2583 self->ntlookups++;
2584 2584 if (index_init_nt(self) == -1)
2585 2585 return NULL;
2586 2586 if (index_populate_nt(self) == -1)
2587 2587 return NULL;
2588 2588 length = nt_shortest(&self->nt, node);
2589 2589 if (length == -3)
2590 2590 return NULL;
2591 2591 if (length == -2) {
2592 2592 raise_revlog_error();
2593 2593 return NULL;
2594 2594 }
2595 2595 return PyLong_FromLong(length);
2596 2596 }
2597 2597
2598 2598 static PyObject *index_m_get(indexObject *self, PyObject *args)
2599 2599 {
2600 2600 PyObject *val;
2601 2601 char *node;
2602 2602 int rev;
2603 2603
2604 2604 if (!PyArg_ParseTuple(args, "O", &val))
2605 2605 return NULL;
2606 2606 if (node_check(self->nodelen, val, &node) == -1)
2607 2607 return NULL;
2608 2608 rev = index_find_node(self, node);
2609 2609 if (rev == -3)
2610 2610 return NULL;
2611 2611 if (rev == -2)
2612 2612 Py_RETURN_NONE;
2613 2613 return PyLong_FromLong(rev);
2614 2614 }
2615 2615
2616 2616 static int index_contains(indexObject *self, PyObject *value)
2617 2617 {
2618 2618 char *node;
2619 2619
2620 2620 if (PyLong_Check(value)) {
2621 2621 long rev;
2622 2622 if (!pylong_to_long(value, &rev)) {
2623 2623 return -1;
2624 2624 }
2625 2625 return rev >= -1 && rev < index_length(self);
2626 2626 }
2627 2627
2628 2628 if (node_check(self->nodelen, value, &node) == -1)
2629 2629 return -1;
2630 2630
2631 2631 switch (index_find_node(self, node)) {
2632 2632 case -3:
2633 2633 return -1;
2634 2634 case -2:
2635 2635 return 0;
2636 2636 default:
2637 2637 return 1;
2638 2638 }
2639 2639 }
2640 2640
2641 2641 static PyObject *index_m_has_node(indexObject *self, PyObject *args)
2642 2642 {
2643 2643 int ret = index_contains(self, args);
2644 2644 if (ret < 0)
2645 2645 return NULL;
2646 2646 return PyBool_FromLong((long)ret);
2647 2647 }
2648 2648
2649 2649 static PyObject *index_m_rev(indexObject *self, PyObject *val)
2650 2650 {
2651 2651 char *node;
2652 2652 int rev;
2653 2653
2654 2654 if (node_check(self->nodelen, val, &node) == -1)
2655 2655 return NULL;
2656 2656 rev = index_find_node(self, node);
2657 2657 if (rev >= -1)
2658 2658 return PyLong_FromLong(rev);
2659 2659 if (rev == -2)
2660 2660 raise_revlog_error();
2661 2661 return NULL;
2662 2662 }
2663 2663
2664 2664 typedef uint64_t bitmask;
2665 2665
2666 2666 /*
2667 2667 * Given a disjoint set of revs, return all candidates for the
2668 2668 * greatest common ancestor. In revset notation, this is the set
2669 2669 * "heads(::a and ::b and ...)"
2670 2670 */
2671 2671 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
2672 2672 int revcount)
2673 2673 {
2674 2674 const bitmask allseen = (1ull << revcount) - 1;
2675 2675 const bitmask poison = 1ull << revcount;
2676 2676 PyObject *gca = PyList_New(0);
2677 2677 int i, v, interesting;
2678 2678 int maxrev = -1;
2679 2679 bitmask sp;
2680 2680 bitmask *seen;
2681 2681
2682 2682 if (gca == NULL)
2683 2683 return PyErr_NoMemory();
2684 2684
2685 2685 for (i = 0; i < revcount; i++) {
2686 2686 if (revs[i] > maxrev)
2687 2687 maxrev = revs[i];
2688 2688 }
2689 2689
2690 2690 seen = calloc(sizeof(*seen), maxrev + 1);
2691 2691 if (seen == NULL) {
2692 2692 Py_DECREF(gca);
2693 2693 return PyErr_NoMemory();
2694 2694 }
2695 2695
2696 2696 for (i = 0; i < revcount; i++)
2697 2697 seen[revs[i]] = 1ull << i;
2698 2698
2699 2699 interesting = revcount;
2700 2700
2701 2701 for (v = maxrev; v >= 0 && interesting; v--) {
2702 2702 bitmask sv = seen[v];
2703 2703 int parents[2];
2704 2704
2705 2705 if (!sv)
2706 2706 continue;
2707 2707
2708 2708 if (sv < poison) {
2709 2709 interesting -= 1;
2710 2710 if (sv == allseen) {
2711 2711 if (pylist_append_owned(
2712 2712 gca, PyLong_FromLong(v)) == -1) {
2713 2713 goto bail;
2714 2714 }
2715 2715 sv |= poison;
2716 2716 for (i = 0; i < revcount; i++) {
2717 2717 if (revs[i] == v)
2718 2718 goto done;
2719 2719 }
2720 2720 }
2721 2721 }
2722 2722 if (index_get_parents(self, v, parents, maxrev) < 0)
2723 2723 goto bail;
2724 2724
2725 2725 for (i = 0; i < 2; i++) {
2726 2726 int p = parents[i];
2727 2727 if (p == -1)
2728 2728 continue;
2729 2729 sp = seen[p];
2730 2730 if (sv < poison) {
2731 2731 if (sp == 0) {
2732 2732 seen[p] = sv;
2733 2733 interesting++;
2734 2734 } else if (sp != sv)
2735 2735 seen[p] |= sv;
2736 2736 } else {
2737 2737 if (sp && sp < poison)
2738 2738 interesting--;
2739 2739 seen[p] = sv;
2740 2740 }
2741 2741 }
2742 2742 }
2743 2743
2744 2744 done:
2745 2745 free(seen);
2746 2746 return gca;
2747 2747 bail:
2748 2748 free(seen);
2749 2749 Py_XDECREF(gca);
2750 2750 return NULL;
2751 2751 }
2752 2752
2753 2753 /*
2754 2754 * Given a disjoint set of revs, return the subset with the longest
2755 2755 * path to the root.
2756 2756 */
2757 2757 static PyObject *find_deepest(indexObject *self, PyObject *revs)
2758 2758 {
2759 2759 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
2760 2760 static const Py_ssize_t capacity = 24;
2761 2761 int *depth, *interesting = NULL;
2762 2762 int i, j, v, ninteresting;
2763 2763 PyObject *dict = NULL, *keys = NULL;
2764 2764 long *seen = NULL;
2765 2765 int maxrev = -1;
2766 2766 long final;
2767 2767
2768 2768 if (revcount > capacity) {
2769 2769 PyErr_Format(PyExc_OverflowError,
2770 2770 "bitset size (%ld) > capacity (%ld)",
2771 2771 (long)revcount, (long)capacity);
2772 2772 return NULL;
2773 2773 }
2774 2774
2775 2775 for (i = 0; i < revcount; i++) {
2776 2776 int n = (int)PyLong_AsLong(PyList_GET_ITEM(revs, i));
2777 2777 if (n > maxrev)
2778 2778 maxrev = n;
2779 2779 }
2780 2780
2781 2781 depth = calloc(sizeof(*depth), maxrev + 1);
2782 2782 if (depth == NULL)
2783 2783 return PyErr_NoMemory();
2784 2784
2785 2785 seen = calloc(sizeof(*seen), maxrev + 1);
2786 2786 if (seen == NULL) {
2787 2787 PyErr_NoMemory();
2788 2788 goto bail;
2789 2789 }
2790 2790
2791 2791 interesting = calloc(sizeof(*interesting), ((size_t)1) << revcount);
2792 2792 if (interesting == NULL) {
2793 2793 PyErr_NoMemory();
2794 2794 goto bail;
2795 2795 }
2796 2796
2797 2797 if (PyList_Sort(revs) == -1)
2798 2798 goto bail;
2799 2799
2800 2800 for (i = 0; i < revcount; i++) {
2801 2801 int n = (int)PyLong_AsLong(PyList_GET_ITEM(revs, i));
2802 2802 long b = 1l << i;
2803 2803 depth[n] = 1;
2804 2804 seen[n] = b;
2805 2805 interesting[b] = 1;
2806 2806 }
2807 2807
2808 2808 /* invariant: ninteresting is the number of non-zero entries in
2809 2809 * interesting. */
2810 2810 ninteresting = (int)revcount;
2811 2811
2812 2812 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
2813 2813 int dv = depth[v];
2814 2814 int parents[2];
2815 2815 long sv;
2816 2816
2817 2817 if (dv == 0)
2818 2818 continue;
2819 2819
2820 2820 sv = seen[v];
2821 2821 if (index_get_parents(self, v, parents, maxrev) < 0)
2822 2822 goto bail;
2823 2823
2824 2824 for (i = 0; i < 2; i++) {
2825 2825 int p = parents[i];
2826 2826 long sp;
2827 2827 int dp;
2828 2828
2829 2829 if (p == -1)
2830 2830 continue;
2831 2831
2832 2832 dp = depth[p];
2833 2833 sp = seen[p];
2834 2834 if (dp <= dv) {
2835 2835 depth[p] = dv + 1;
2836 2836 if (sp != sv) {
2837 2837 interesting[sv] += 1;
2838 2838 seen[p] = sv;
2839 2839 if (sp) {
2840 2840 interesting[sp] -= 1;
2841 2841 if (interesting[sp] == 0)
2842 2842 ninteresting -= 1;
2843 2843 }
2844 2844 }
2845 2845 } else if (dv == dp - 1) {
2846 2846 long nsp = sp | sv;
2847 2847 if (nsp == sp)
2848 2848 continue;
2849 2849 seen[p] = nsp;
2850 2850 interesting[sp] -= 1;
2851 2851 if (interesting[sp] == 0)
2852 2852 ninteresting -= 1;
2853 2853 if (interesting[nsp] == 0)
2854 2854 ninteresting += 1;
2855 2855 interesting[nsp] += 1;
2856 2856 }
2857 2857 }
2858 2858 interesting[sv] -= 1;
2859 2859 if (interesting[sv] == 0)
2860 2860 ninteresting -= 1;
2861 2861 }
2862 2862
2863 2863 final = 0;
2864 2864 j = ninteresting;
2865 2865 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
2866 2866 if (interesting[i] == 0)
2867 2867 continue;
2868 2868 final |= i;
2869 2869 j -= 1;
2870 2870 }
2871 2871 if (final == 0) {
2872 2872 keys = PyList_New(0);
2873 2873 goto bail;
2874 2874 }
2875 2875
2876 2876 dict = PyDict_New();
2877 2877 if (dict == NULL)
2878 2878 goto bail;
2879 2879
2880 2880 for (i = 0; i < revcount; i++) {
2881 2881 PyObject *key;
2882 2882
2883 2883 if ((final & (1 << i)) == 0)
2884 2884 continue;
2885 2885
2886 2886 key = PyList_GET_ITEM(revs, i);
2887 2887 Py_INCREF(key);
2888 2888 Py_INCREF(Py_None);
2889 2889 if (PyDict_SetItem(dict, key, Py_None) == -1) {
2890 2890 Py_DECREF(key);
2891 2891 Py_DECREF(Py_None);
2892 2892 goto bail;
2893 2893 }
2894 2894 }
2895 2895
2896 2896 keys = PyDict_Keys(dict);
2897 2897
2898 2898 bail:
2899 2899 free(depth);
2900 2900 free(seen);
2901 2901 free(interesting);
2902 2902 Py_XDECREF(dict);
2903 2903
2904 2904 return keys;
2905 2905 }
2906 2906
2907 2907 /*
2908 2908 * Given a (possibly overlapping) set of revs, return all the
2909 2909 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
2910 2910 */
2911 2911 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
2912 2912 {
2913 2913 PyObject *ret = NULL;
2914 2914 Py_ssize_t argcount, i, len;
2915 2915 bitmask repeat = 0;
2916 2916 int revcount = 0;
2917 2917 int *revs;
2918 2918
2919 2919 argcount = PySequence_Length(args);
2920 2920 revs = PyMem_Malloc(argcount * sizeof(*revs));
2921 2921 if (argcount > 0 && revs == NULL)
2922 2922 return PyErr_NoMemory();
2923 2923 len = index_length(self);
2924 2924
2925 2925 for (i = 0; i < argcount; i++) {
2926 2926 static const int capacity = 24;
2927 2927 PyObject *obj = PySequence_GetItem(args, i);
2928 2928 bitmask x;
2929 2929 long val;
2930 2930
2931 2931 if (!PyLong_Check(obj)) {
2932 2932 PyErr_SetString(PyExc_TypeError,
2933 2933 "arguments must all be ints");
2934 2934 Py_DECREF(obj);
2935 2935 goto bail;
2936 2936 }
2937 2937 val = PyLong_AsLong(obj);
2938 2938 Py_DECREF(obj);
2939 2939 if (val == -1) {
2940 2940 ret = PyList_New(0);
2941 2941 goto done;
2942 2942 }
2943 2943 if (val < 0 || val >= len) {
2944 2944 PyErr_SetString(PyExc_IndexError, "index out of range");
2945 2945 goto bail;
2946 2946 }
2947 2947 /* this cheesy bloom filter lets us avoid some more
2948 2948 * expensive duplicate checks in the common set-is-disjoint
2949 2949 * case */
2950 2950 x = 1ull << (val & 0x3f);
2951 2951 if (repeat & x) {
2952 2952 int k;
2953 2953 for (k = 0; k < revcount; k++) {
2954 2954 if (val == revs[k])
2955 2955 goto duplicate;
2956 2956 }
2957 2957 } else
2958 2958 repeat |= x;
2959 2959 if (revcount >= capacity) {
2960 2960 PyErr_Format(PyExc_OverflowError,
2961 2961 "bitset size (%d) > capacity (%d)",
2962 2962 revcount, capacity);
2963 2963 goto bail;
2964 2964 }
2965 2965 revs[revcount++] = (int)val;
2966 2966 duplicate:;
2967 2967 }
2968 2968
2969 2969 if (revcount == 0) {
2970 2970 ret = PyList_New(0);
2971 2971 goto done;
2972 2972 }
2973 2973 if (revcount == 1) {
2974 2974 PyObject *obj;
2975 2975 ret = PyList_New(1);
2976 2976 if (ret == NULL)
2977 2977 goto bail;
2978 2978 obj = PyLong_FromLong(revs[0]);
2979 2979 if (obj == NULL)
2980 2980 goto bail;
2981 2981 PyList_SET_ITEM(ret, 0, obj);
2982 2982 goto done;
2983 2983 }
2984 2984
2985 2985 ret = find_gca_candidates(self, revs, revcount);
2986 2986 if (ret == NULL)
2987 2987 goto bail;
2988 2988
2989 2989 done:
2990 2990 PyMem_Free(revs);
2991 2991 return ret;
2992 2992
2993 2993 bail:
2994 2994 PyMem_Free(revs);
2995 2995 Py_XDECREF(ret);
2996 2996 return NULL;
2997 2997 }
2998 2998
2999 2999 /*
3000 3000 * Given a (possibly overlapping) set of revs, return the greatest
3001 3001 * common ancestors: those with the longest path to the root.
3002 3002 */
3003 3003 static PyObject *index_ancestors(indexObject *self, PyObject *args)
3004 3004 {
3005 3005 PyObject *ret;
3006 3006 PyObject *gca = index_commonancestorsheads(self, args);
3007 3007 if (gca == NULL)
3008 3008 return NULL;
3009 3009
3010 3010 if (PyList_GET_SIZE(gca) <= 1) {
3011 3011 return gca;
3012 3012 }
3013 3013
3014 3014 ret = find_deepest(self, gca);
3015 3015 Py_DECREF(gca);
3016 3016 return ret;
3017 3017 }
3018 3018
3019 3019 /*
3020 3020 * Invalidate any trie entries introduced by added revs.
3021 3021 */
3022 3022 static void index_invalidate_added(indexObject *self, Py_ssize_t start)
3023 3023 {
3024 3024 Py_ssize_t i, len;
3025 3025
3026 3026 len = self->length + self->new_length;
3027 3027 i = start - self->length;
3028 3028 if (i < 0)
3029 3029 return;
3030 3030
3031 3031 for (i = start; i < len; i++) {
3032 3032 const char *node = index_node(self, i);
3033 3033 nt_delete_node(&self->nt, node);
3034 3034 }
3035 3035
3036 3036 self->new_length = start - self->length;
3037 3037 }
3038 3038
3039 3039 /*
3040 3040 * Delete a numeric range of revs, which must be at the end of the
3041 3041 * range.
3042 3042 */
3043 3043 static int index_slice_del(indexObject *self, PyObject *item)
3044 3044 {
3045 3045 Py_ssize_t start, stop, step, slicelength;
3046 3046 Py_ssize_t length = index_length(self) + 1;
3047 3047 int ret = 0;
3048 3048
3049 3049 if (PySlice_GetIndicesEx(item, length, &start, &stop, &step,
3050 3050 &slicelength) < 0)
3051 3051 return -1;
3052 3052
3053 3053 if (slicelength <= 0)
3054 3054 return 0;
3055 3055
3056 3056 if ((step < 0 && start < stop) || (step > 0 && start > stop))
3057 3057 stop = start;
3058 3058
3059 3059 if (step < 0) {
3060 3060 stop = start + 1;
3061 3061 start = stop + step * (slicelength - 1) - 1;
3062 3062 step = -step;
3063 3063 }
3064 3064
3065 3065 if (step != 1) {
3066 3066 PyErr_SetString(PyExc_ValueError,
3067 3067 "revlog index delete requires step size of 1");
3068 3068 return -1;
3069 3069 }
3070 3070
3071 3071 if (stop != length - 1) {
3072 3072 PyErr_SetString(PyExc_IndexError,
3073 3073 "revlog index deletion indices are invalid");
3074 3074 return -1;
3075 3075 }
3076 3076
3077 3077 if (start < self->length) {
3078 3078 if (self->ntinitialized) {
3079 3079 Py_ssize_t i;
3080 3080
3081 3081 for (i = start; i < self->length; i++) {
3082 3082 const char *node = index_node_existing(self, i);
3083 3083 if (node == NULL)
3084 3084 return -1;
3085 3085
3086 3086 nt_delete_node(&self->nt, node);
3087 3087 }
3088 3088 if (self->new_length)
3089 3089 index_invalidate_added(self, self->length);
3090 3090 if (self->ntrev > start)
3091 3091 self->ntrev = (int)start;
3092 3092 } else if (self->new_length) {
3093 3093 self->new_length = 0;
3094 3094 }
3095 3095
3096 3096 self->length = start;
3097 3097 goto done;
3098 3098 }
3099 3099
3100 3100 if (self->ntinitialized) {
3101 3101 index_invalidate_added(self, start);
3102 3102 if (self->ntrev > start)
3103 3103 self->ntrev = (int)start;
3104 3104 } else {
3105 3105 self->new_length = start - self->length;
3106 3106 }
3107 3107 done:
3108 3108 Py_CLEAR(self->headrevs);
3109 3109 return ret;
3110 3110 }
3111 3111
3112 3112 /*
3113 3113 * Supported ops:
3114 3114 *
3115 3115 * slice deletion
3116 3116 * string assignment (extend node->rev mapping)
3117 3117 * string deletion (shrink node->rev mapping)
3118 3118 */
3119 3119 static int index_assign_subscript(indexObject *self, PyObject *item,
3120 3120 PyObject *value)
3121 3121 {
3122 3122 char *node;
3123 3123 long rev;
3124 3124
3125 3125 if (PySlice_Check(item) && value == NULL)
3126 3126 return index_slice_del(self, item);
3127 3127
3128 3128 if (node_check(self->nodelen, item, &node) == -1)
3129 3129 return -1;
3130 3130
3131 3131 if (value == NULL)
3132 3132 return self->ntinitialized ? nt_delete_node(&self->nt, node)
3133 3133 : 0;
3134 3134 rev = PyLong_AsLong(value);
3135 3135 if (rev > INT_MAX || rev < 0) {
3136 3136 if (!PyErr_Occurred())
3137 3137 PyErr_SetString(PyExc_ValueError, "rev out of range");
3138 3138 return -1;
3139 3139 }
3140 3140
3141 3141 if (index_init_nt(self) == -1)
3142 3142 return -1;
3143 3143 return nt_insert(&self->nt, node, (int)rev);
3144 3144 }
3145 3145
3146 3146 /*
3147 3147 * Find all RevlogNG entries in an index that has inline data. Update
3148 3148 * the optional "offsets" table with those entries.
3149 3149 */
3150 3150 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
3151 3151 {
3152 3152 const char *data = (const char *)self->buf.buf;
3153 3153 Py_ssize_t pos = 0;
3154 3154 Py_ssize_t end = self->buf.len;
3155 3155 long incr = self->entry_size;
3156 3156 Py_ssize_t len = 0;
3157 3157
3158 3158 while (pos + self->entry_size <= end && pos >= 0) {
3159 3159 uint32_t comp_len, sidedata_comp_len = 0;
3160 3160 /* 3rd element of header is length of compressed inline data */
3161 3161 if (self->format_version == format_v1) {
3162 3162 comp_len =
3163 3163 getbe32(data + pos + entry_v1_offset_comp_len);
3164 3164 sidedata_comp_len = 0;
3165 3165 } else if (self->format_version == format_v2) {
3166 3166 comp_len =
3167 3167 getbe32(data + pos + entry_v2_offset_comp_len);
3168 3168 sidedata_comp_len = getbe32(
3169 3169 data + pos + entry_v2_offset_sidedata_comp_len);
3170 3170 } else {
3171 3171 raise_revlog_error();
3172 3172 return -1;
3173 3173 }
3174 3174 incr = self->entry_size + comp_len + sidedata_comp_len;
3175 3175 if (offsets)
3176 3176 offsets[len] = data + pos;
3177 3177 len++;
3178 3178 pos += incr;
3179 3179 }
3180 3180
3181 3181 if (pos != end) {
3182 3182 if (!PyErr_Occurred())
3183 3183 PyErr_SetString(PyExc_ValueError, "corrupt index file");
3184 3184 return -1;
3185 3185 }
3186 3186
3187 3187 return len;
3188 3188 }
3189 3189
3190 3190 static int index_init(indexObject *self, PyObject *args, PyObject *kwargs)
3191 3191 {
3192 3192 PyObject *data_obj, *inlined_obj;
3193 3193 Py_ssize_t size;
3194 3194
3195 3195 static char *kwlist[] = {"data", "inlined", "format", NULL};
3196 3196
3197 3197 /* Initialize before argument-checking to avoid index_dealloc() crash.
3198 3198 */
3199 3199 self->added = NULL;
3200 3200 self->new_length = 0;
3201 3201 self->added_length = 0;
3202 3202 self->data = NULL;
3203 3203 memset(&self->buf, 0, sizeof(self->buf));
3204 3204 self->headrevs = NULL;
3205 3205 self->filteredrevs = Py_None;
3206 3206 Py_INCREF(Py_None);
3207 3207 self->ntinitialized = 0;
3208 3208 self->offsets = NULL;
3209 3209 self->nodelen = 20;
3210 3210 self->nullentry = NULL;
3211 3211 self->rust_ext_compat = 0;
3212 3212 self->format_version = format_v1;
3213 3213
3214 3214 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|l", kwlist,
3215 3215 &data_obj, &inlined_obj,
3216 3216 &(self->format_version)))
3217 3217 return -1;
3218 3218 if (!PyObject_CheckBuffer(data_obj)) {
3219 3219 PyErr_SetString(PyExc_TypeError,
3220 3220 "data does not support buffer interface");
3221 3221 return -1;
3222 3222 }
3223 3223 if (self->nodelen < 20 || self->nodelen > (Py_ssize_t)sizeof(nullid)) {
3224 3224 PyErr_SetString(PyExc_RuntimeError, "unsupported node size");
3225 3225 return -1;
3226 3226 }
3227 3227
3228 3228 if (self->format_version == format_v1) {
3229 self->rust_ext_compat = 1;
3230 3229 self->entry_size = v1_entry_size;
3231 3230 } else if (self->format_version == format_v2) {
3232 3231 self->entry_size = v2_entry_size;
3233 3232 } else if (self->format_version == format_cl2) {
3234 3233 self->entry_size = cl2_entry_size;
3235 3234 }
3236 3235
3237 3236 self->nullentry = Py_BuildValue(
3238 3237 "iiiiiiiy#iiBBi", 0, 0, 0, -1, -1, -1, -1, nullid, self->nodelen, 0,
3239 3238 0, comp_mode_inline, comp_mode_inline, rank_unknown);
3240 3239
3241 3240 if (!self->nullentry)
3242 3241 return -1;
3243 3242 PyObject_GC_UnTrack(self->nullentry);
3244 3243
3245 3244 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
3246 3245 return -1;
3247 3246 size = self->buf.len;
3248 3247
3249 3248 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
3250 3249 self->data = data_obj;
3251 3250
3252 3251 self->ntlookups = self->ntmisses = 0;
3253 3252 self->ntrev = -1;
3254 3253 Py_INCREF(self->data);
3255 3254
3256 3255 if (self->inlined) {
3257 3256 Py_ssize_t len = inline_scan(self, NULL);
3258 3257 if (len == -1)
3259 3258 goto bail;
3260 3259 self->length = len;
3261 3260 } else {
3262 3261 if (size % self->entry_size) {
3263 3262 PyErr_SetString(PyExc_ValueError, "corrupt index file");
3264 3263 goto bail;
3265 3264 }
3266 3265 self->length = size / self->entry_size;
3267 3266 }
3268 3267
3269 3268 return 0;
3270 3269 bail:
3271 3270 return -1;
3272 3271 }
3273 3272
3274 3273 static PyObject *index_nodemap(indexObject *self)
3275 3274 {
3276 3275 Py_INCREF(self);
3277 3276 return (PyObject *)self;
3278 3277 }
3279 3278
3280 3279 static void _index_clearcaches(indexObject *self)
3281 3280 {
3282 3281 if (self->offsets) {
3283 3282 PyMem_Free((void *)self->offsets);
3284 3283 self->offsets = NULL;
3285 3284 }
3286 3285 if (self->ntinitialized) {
3287 3286 nt_dealloc(&self->nt);
3288 3287 }
3289 3288 self->ntinitialized = 0;
3290 3289 Py_CLEAR(self->headrevs);
3291 3290 }
3292 3291
3293 3292 static PyObject *index_clearcaches(indexObject *self)
3294 3293 {
3295 3294 _index_clearcaches(self);
3296 3295 self->ntrev = -1;
3297 3296 self->ntlookups = self->ntmisses = 0;
3298 3297 Py_RETURN_NONE;
3299 3298 }
3300 3299
3301 3300 static void index_dealloc(indexObject *self)
3302 3301 {
3303 3302 _index_clearcaches(self);
3304 3303 Py_XDECREF(self->filteredrevs);
3305 3304 if (self->buf.buf) {
3306 3305 PyBuffer_Release(&self->buf);
3307 3306 memset(&self->buf, 0, sizeof(self->buf));
3308 3307 }
3309 3308 Py_XDECREF(self->data);
3310 3309 PyMem_Free(self->added);
3311 3310 Py_XDECREF(self->nullentry);
3312 3311 PyObject_Del(self);
3313 3312 }
3314 3313
3315 3314 static PySequenceMethods index_sequence_methods = {
3316 3315 (lenfunc)index_length, /* sq_length */
3317 3316 0, /* sq_concat */
3318 3317 0, /* sq_repeat */
3319 3318 (ssizeargfunc)index_get, /* sq_item */
3320 3319 0, /* sq_slice */
3321 3320 0, /* sq_ass_item */
3322 3321 0, /* sq_ass_slice */
3323 3322 (objobjproc)index_contains, /* sq_contains */
3324 3323 };
3325 3324
3326 3325 static PyMappingMethods index_mapping_methods = {
3327 3326 (lenfunc)index_length, /* mp_length */
3328 3327 (binaryfunc)index_getitem, /* mp_subscript */
3329 3328 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
3330 3329 };
3331 3330
3332 3331 static PyMethodDef index_methods[] = {
3333 3332 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
3334 3333 "return the gca set of the given revs"},
3335 3334 {"headrevsdiff", (PyCFunction)index_headrevsdiff, METH_VARARGS,
3336 3335 "return the set of heads removed/added by a range of commits"},
3337 3336 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
3338 3337 METH_VARARGS,
3339 3338 "return the heads of the common ancestors of the given revs"},
3340 3339 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
3341 3340 "clear the index caches"},
3342 3341 {"get", (PyCFunction)index_m_get, METH_VARARGS, "get an index entry"},
3343 3342 {"get_rev", (PyCFunction)index_m_get, METH_VARARGS,
3344 3343 "return `rev` associated with a node or None"},
3345 3344 {"has_node", (PyCFunction)index_m_has_node, METH_O,
3346 3345 "return True if the node exist in the index"},
3347 3346 {"rev", (PyCFunction)index_m_rev, METH_O,
3348 3347 "return `rev` associated with a node or raise RevlogError"},
3349 3348 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets, METH_VARARGS,
3350 3349 "compute phases"},
3351 3350 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
3352 3351 "reachableroots"},
3353 3352 {"replace_sidedata_info", (PyCFunction)index_replace_sidedata_info,
3354 3353 METH_VARARGS, "replace an existing index entry with a new value"},
3355 3354 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
3356 3355 "get head revisions"}, /* Can do filtering since 3.2 */
3357 3356 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
3358 3357 "get filtered head revisions"}, /* Can always do filtering */
3359 3358 {"issnapshot", (PyCFunction)index_issnapshot, METH_O,
3360 3359 "True if the object is a snapshot"},
3361 3360 {"findsnapshots", (PyCFunction)index_findsnapshots, METH_VARARGS,
3362 3361 "Gather snapshot data in a cache dict"},
3363 3362 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
3364 3363 "determine revisions with deltas to reconstruct fulltext"},
3365 3364 {"slicechunktodensity", (PyCFunction)index_slicechunktodensity,
3366 3365 METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"},
3367 3366 {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
3368 3367 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
3369 3368 "match a potentially ambiguous node ID"},
3370 3369 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
3371 3370 "find length of shortest hex nodeid of a binary ID"},
3372 3371 {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"},
3373 3372 {"entry_binary", (PyCFunction)index_entry_binary, METH_O,
3374 3373 "return an entry in binary form"},
3375 3374 {"pack_header", (PyCFunction)index_pack_header, METH_VARARGS,
3376 3375 "pack the revlog header information into binary"},
3377 3376 {NULL} /* Sentinel */
3378 3377 };
3379 3378
3380 3379 static PyGetSetDef index_getset[] = {
3381 3380 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
3382 3381 {NULL} /* Sentinel */
3383 3382 };
3384 3383
3385 3384 static PyMemberDef index_members[] = {
3386 3385 {"entry_size", T_LONG, offsetof(indexObject, entry_size), 0,
3387 3386 "size of an index entry"},
3388 3387 {"rust_ext_compat", T_LONG, offsetof(indexObject, rust_ext_compat), 0,
3389 3388 "size of an index entry"},
3390 3389 {NULL} /* Sentinel */
3391 3390 };
3392 3391
3393 3392 PyTypeObject HgRevlogIndex_Type = {
3394 3393 PyVarObject_HEAD_INIT(NULL, 0) /* header */
3395 3394 "parsers.index", /* tp_name */
3396 3395 sizeof(indexObject), /* tp_basicsize */
3397 3396 0, /* tp_itemsize */
3398 3397 (destructor)index_dealloc, /* tp_dealloc */
3399 3398 0, /* tp_print */
3400 3399 0, /* tp_getattr */
3401 3400 0, /* tp_setattr */
3402 3401 0, /* tp_compare */
3403 3402 0, /* tp_repr */
3404 3403 0, /* tp_as_number */
3405 3404 &index_sequence_methods, /* tp_as_sequence */
3406 3405 &index_mapping_methods, /* tp_as_mapping */
3407 3406 0, /* tp_hash */
3408 3407 0, /* tp_call */
3409 3408 0, /* tp_str */
3410 3409 0, /* tp_getattro */
3411 3410 0, /* tp_setattro */
3412 3411 0, /* tp_as_buffer */
3413 3412 Py_TPFLAGS_DEFAULT, /* tp_flags */
3414 3413 "revlog index", /* tp_doc */
3415 3414 0, /* tp_traverse */
3416 3415 0, /* tp_clear */
3417 3416 0, /* tp_richcompare */
3418 3417 0, /* tp_weaklistoffset */
3419 3418 0, /* tp_iter */
3420 3419 0, /* tp_iternext */
3421 3420 index_methods, /* tp_methods */
3422 3421 index_members, /* tp_members */
3423 3422 index_getset, /* tp_getset */
3424 3423 0, /* tp_base */
3425 3424 0, /* tp_dict */
3426 3425 0, /* tp_descr_get */
3427 3426 0, /* tp_descr_set */
3428 3427 0, /* tp_dictoffset */
3429 3428 (initproc)index_init, /* tp_init */
3430 3429 0, /* tp_alloc */
3431 3430 };
3432 3431
3433 3432 /*
3434 3433 * returns a tuple of the form (index, cache) with elements as
3435 3434 * follows:
3436 3435 *
3437 3436 * index: an index object that lazily parses Revlog (v1 or v2) records
3438 3437 * cache: if data is inlined, a tuple (0, index_file_content), else None
3439 3438 * index_file_content could be a string, or a buffer
3440 3439 *
3441 3440 * added complications are for backwards compatibility
3442 3441 */
3443 3442 PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs)
3444 3443 {
3445 3444 PyObject *cache = NULL;
3446 3445 indexObject *idx;
3447 3446 int ret;
3448 3447
3449 3448 idx = PyObject_New(indexObject, &HgRevlogIndex_Type);
3450 3449 if (idx == NULL)
3451 3450 goto bail;
3452 3451
3453 3452 ret = index_init(idx, args, kwargs);
3454 3453 if (ret == -1)
3455 3454 goto bail;
3456 3455
3457 3456 if (idx->inlined) {
3458 3457 cache = Py_BuildValue("iO", 0, idx->data);
3459 3458 if (cache == NULL)
3460 3459 goto bail;
3461 3460 } else {
3462 3461 cache = Py_None;
3463 3462 Py_INCREF(cache);
3464 3463 }
3465 3464
3466 3465 return Py_BuildValue("NN", idx, cache);
3467 3466
3468 3467 bail:
3469 3468 Py_XDECREF(idx);
3470 3469 Py_XDECREF(cache);
3471 3470 return NULL;
3472 3471 }
3473 3472
3474 3473 static Revlog_CAPI CAPI = {
3475 3474 /* increment the abi_version field upon each change in the Revlog_CAPI
3476 3475 struct or in the ABI of the listed functions */
3477 3476 3, index_length, index_node, index_fast_rank, HgRevlogIndex_GetParents,
3478 3477 };
3479 3478
3480 3479 void revlog_module_init(PyObject *mod)
3481 3480 {
3482 3481 PyObject *caps = NULL;
3483 3482 HgRevlogIndex_Type.tp_new = PyType_GenericNew;
3484 3483 if (PyType_Ready(&HgRevlogIndex_Type) < 0)
3485 3484 return;
3486 3485 Py_INCREF(&HgRevlogIndex_Type);
3487 3486 PyModule_AddObject(mod, "index", (PyObject *)&HgRevlogIndex_Type);
3488 3487
3489 3488 nodetreeType.tp_new = PyType_GenericNew;
3490 3489 if (PyType_Ready(&nodetreeType) < 0)
3491 3490 return;
3492 3491 Py_INCREF(&nodetreeType);
3493 3492 PyModule_AddObject(mod, "nodetree", (PyObject *)&nodetreeType);
3494 3493
3495 3494 caps = PyCapsule_New(&CAPI, "mercurial.cext.parsers.revlog_CAPI", NULL);
3496 3495 if (caps != NULL)
3497 3496 PyModule_AddObject(mod, "revlog_CAPI", caps);
3498 3497 }
General Comments 0
You need to be logged in to leave comments. Login now