##// END OF EJS Templates
revlogv2: preserve the compression mode on disk...
marmoute -
r48025:6bfa6c2c default
parent child Browse files
Show More
@@ -1,3037 +1,3040 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Olivia Mackall <olivia@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #define PY_SSIZE_T_CLEAN
11 11 #include <Python.h>
12 12 #include <assert.h>
13 13 #include <ctype.h>
14 14 #include <limits.h>
15 15 #include <stddef.h>
16 16 #include <stdlib.h>
17 17 #include <string.h>
18 18 #include <structmember.h>
19 19
20 20 #include "bitmanipulation.h"
21 21 #include "charencode.h"
22 22 #include "compat.h"
23 23 #include "revlog.h"
24 24 #include "util.h"
25 25
26 26 #ifdef IS_PY3K
27 27 /* The mapping of Python types is meant to be temporary to get Python
28 28 * 3 to compile. We should remove this once Python 3 support is fully
29 29 * supported and proper types are used in the extensions themselves. */
30 30 #define PyInt_Check PyLong_Check
31 31 #define PyInt_FromLong PyLong_FromLong
32 32 #define PyInt_FromSsize_t PyLong_FromSsize_t
33 33 #define PyInt_AsLong PyLong_AsLong
34 34 #endif
35 35
36 36 typedef struct indexObjectStruct indexObject;
37 37
38 38 typedef struct {
39 39 int children[16];
40 40 } nodetreenode;
41 41
42 42 typedef struct {
43 43 int abi_version;
44 44 Py_ssize_t (*index_length)(const indexObject *);
45 45 const char *(*index_node)(indexObject *, Py_ssize_t);
46 46 int (*index_parents)(PyObject *, int, int *);
47 47 } Revlog_CAPI;
48 48
49 49 /*
50 50 * A base-16 trie for fast node->rev mapping.
51 51 *
52 52 * Positive value is index of the next node in the trie
53 53 * Negative value is a leaf: -(rev + 2)
54 54 * Zero is empty
55 55 */
56 56 typedef struct {
57 57 indexObject *index;
58 58 nodetreenode *nodes;
59 59 Py_ssize_t nodelen;
60 60 size_t length; /* # nodes in use */
61 61 size_t capacity; /* # nodes allocated */
62 62 int depth; /* maximum depth of tree */
63 63 int splits; /* # splits performed */
64 64 } nodetree;
65 65
66 66 typedef struct {
67 67 PyObject_HEAD /* ; */
68 68 nodetree nt;
69 69 } nodetreeObject;
70 70
71 71 /*
72 72 * This class has two behaviors.
73 73 *
74 74 * When used in a list-like way (with integer keys), we decode an
75 75 * entry in a RevlogNG index file on demand. We have limited support for
76 76 * integer-keyed insert and delete, only at elements right before the
77 77 * end.
78 78 *
79 79 * With string keys, we lazily perform a reverse mapping from node to
80 80 * rev, using a base-16 trie.
81 81 */
82 82 struct indexObjectStruct {
83 83 PyObject_HEAD
84 84 /* Type-specific fields go here. */
85 85 PyObject *data; /* raw bytes of index */
86 86 Py_ssize_t nodelen; /* digest size of the hash, 20 for SHA-1 */
87 87 PyObject *nullentry; /* fast path for references to null */
88 88 Py_buffer buf; /* buffer of data */
89 89 const char **offsets; /* populated on demand */
90 90 Py_ssize_t length; /* current on-disk number of elements */
91 91 unsigned new_length; /* number of added elements */
92 92 unsigned added_length; /* space reserved for added elements */
93 93 char *added; /* populated on demand */
94 94 PyObject *headrevs; /* cache, invalidated on changes */
95 95 PyObject *filteredrevs; /* filtered revs set */
96 96 nodetree nt; /* base-16 trie */
97 97 int ntinitialized; /* 0 or 1 */
98 98 int ntrev; /* last rev scanned */
99 99 int ntlookups; /* # lookups */
100 100 int ntmisses; /* # lookups that miss the cache */
101 101 int inlined;
102 102 long entry_size; /* size of index headers. Differs in v1 v.s. v2 format
103 103 */
104 104 char format_version; /* size of index headers. Differs in v1 v.s. v2
105 105 format */
106 106 };
107 107
108 108 static Py_ssize_t index_length(const indexObject *self)
109 109 {
110 110 return self->length + self->new_length;
111 111 }
112 112
113 113 static const char nullid[32] = {0};
114 114 static const Py_ssize_t nullrev = -1;
115 115
116 116 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
117 117
118 118 static int index_find_node(indexObject *self, const char *node);
119 119
120 120 #if LONG_MAX == 0x7fffffffL
121 121 static const char *const tuple_format = PY23("Kiiiiiis#KiB", "Kiiiiiiy#KiB");
122 122 #else
123 123 static const char *const tuple_format = PY23("kiiiiiis#kiB", "kiiiiiiy#kiB");
124 124 #endif
125 125
126 126 /* A RevlogNG v1 index entry is 64 bytes long. */
127 127 static const long v1_entry_size = 64;
128 128
129 129 /* A Revlogv2 index entry is 96 bytes long. */
130 130 static const long v2_entry_size = 96;
131 131
132 132 static const long format_v1 = 1; /* Internal only, could be any number */
133 133 static const long format_v2 = 2; /* Internal only, could be any number */
134 134
135 135 static const char comp_mode_inline = 2;
136 136
137 137 static void raise_revlog_error(void)
138 138 {
139 139 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
140 140
141 141 mod = PyImport_ImportModule("mercurial.error");
142 142 if (mod == NULL) {
143 143 goto cleanup;
144 144 }
145 145
146 146 dict = PyModule_GetDict(mod);
147 147 if (dict == NULL) {
148 148 goto cleanup;
149 149 }
150 150 Py_INCREF(dict);
151 151
152 152 errclass = PyDict_GetItemString(dict, "RevlogError");
153 153 if (errclass == NULL) {
154 154 PyErr_SetString(PyExc_SystemError,
155 155 "could not find RevlogError");
156 156 goto cleanup;
157 157 }
158 158
159 159 /* value of exception is ignored by callers */
160 160 PyErr_SetString(errclass, "RevlogError");
161 161
162 162 cleanup:
163 163 Py_XDECREF(dict);
164 164 Py_XDECREF(mod);
165 165 }
166 166
167 167 /*
168 168 * Return a pointer to the beginning of a RevlogNG record.
169 169 */
170 170 static const char *index_deref(indexObject *self, Py_ssize_t pos)
171 171 {
172 172 if (pos >= self->length)
173 173 return self->added + (pos - self->length) * self->entry_size;
174 174
175 175 if (self->inlined && pos > 0) {
176 176 if (self->offsets == NULL) {
177 177 Py_ssize_t ret;
178 178 self->offsets =
179 179 PyMem_Malloc(self->length * sizeof(*self->offsets));
180 180 if (self->offsets == NULL)
181 181 return (const char *)PyErr_NoMemory();
182 182 ret = inline_scan(self, self->offsets);
183 183 if (ret == -1) {
184 184 return NULL;
185 185 };
186 186 }
187 187 return self->offsets[pos];
188 188 }
189 189
190 190 return (const char *)(self->buf.buf) + pos * self->entry_size;
191 191 }
192 192
193 193 /*
194 194 * Get parents of the given rev.
195 195 *
196 196 * The specified rev must be valid and must not be nullrev. A returned
197 197 * parent revision may be nullrev, but is guaranteed to be in valid range.
198 198 */
199 199 static inline int index_get_parents(indexObject *self, Py_ssize_t rev, int *ps,
200 200 int maxrev)
201 201 {
202 202 const char *data = index_deref(self, rev);
203 203
204 204 ps[0] = getbe32(data + 24);
205 205 ps[1] = getbe32(data + 28);
206 206
207 207 /* If index file is corrupted, ps[] may point to invalid revisions. So
208 208 * there is a risk of buffer overflow to trust them unconditionally. */
209 209 if (ps[0] < -1 || ps[0] > maxrev || ps[1] < -1 || ps[1] > maxrev) {
210 210 PyErr_SetString(PyExc_ValueError, "parent out of range");
211 211 return -1;
212 212 }
213 213 return 0;
214 214 }
215 215
216 216 /*
217 217 * Get parents of the given rev.
218 218 *
219 219 * If the specified rev is out of range, IndexError will be raised. If the
220 220 * revlog entry is corrupted, ValueError may be raised.
221 221 *
222 222 * Returns 0 on success or -1 on failure.
223 223 */
224 224 static int HgRevlogIndex_GetParents(PyObject *op, int rev, int *ps)
225 225 {
226 226 int tiprev;
227 227 if (!op || !HgRevlogIndex_Check(op) || !ps) {
228 228 PyErr_BadInternalCall();
229 229 return -1;
230 230 }
231 231 tiprev = (int)index_length((indexObject *)op) - 1;
232 232 if (rev < -1 || rev > tiprev) {
233 233 PyErr_Format(PyExc_IndexError, "rev out of range: %d", rev);
234 234 return -1;
235 235 } else if (rev == -1) {
236 236 ps[0] = ps[1] = -1;
237 237 return 0;
238 238 } else {
239 239 return index_get_parents((indexObject *)op, rev, ps, tiprev);
240 240 }
241 241 }
242 242
243 243 static inline int64_t index_get_start(indexObject *self, Py_ssize_t rev)
244 244 {
245 245 const char *data;
246 246 uint64_t offset;
247 247
248 248 if (rev == nullrev)
249 249 return 0;
250 250
251 251 data = index_deref(self, rev);
252 252 offset = getbe32(data + 4);
253 253 if (rev == 0) {
254 254 /* mask out version number for the first entry */
255 255 offset &= 0xFFFF;
256 256 } else {
257 257 uint32_t offset_high = getbe32(data);
258 258 offset |= ((uint64_t)offset_high) << 32;
259 259 }
260 260 return (int64_t)(offset >> 16);
261 261 }
262 262
263 263 static inline int index_get_length(indexObject *self, Py_ssize_t rev)
264 264 {
265 265 const char *data;
266 266 int tmp;
267 267
268 268 if (rev == nullrev)
269 269 return 0;
270 270
271 271 data = index_deref(self, rev);
272 272
273 273 tmp = (int)getbe32(data + 8);
274 274 if (tmp < 0) {
275 275 PyErr_Format(PyExc_OverflowError,
276 276 "revlog entry size out of bound (%d)", tmp);
277 277 return -1;
278 278 }
279 279 return tmp;
280 280 }
281 281
282 282 /*
283 283 * RevlogNG format (all in big endian, data may be inlined):
284 284 * 6 bytes: offset
285 285 * 2 bytes: flags
286 286 * 4 bytes: compressed length
287 287 * 4 bytes: uncompressed length
288 288 * 4 bytes: base revision
289 289 * 4 bytes: link revision
290 290 * 4 bytes: parent 1 revision
291 291 * 4 bytes: parent 2 revision
292 292 * 32 bytes: nodeid (only 20 bytes used with SHA-1)
293 293 */
294 294 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
295 295 {
296 296 uint64_t offset_flags, sidedata_offset;
297 297 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
298 298 sidedata_comp_len;
299 299 char data_comp_mode;
300 300 const char *c_node_id;
301 301 const char *data;
302 302 Py_ssize_t length = index_length(self);
303 303
304 304 if (pos == nullrev) {
305 305 Py_INCREF(self->nullentry);
306 306 return self->nullentry;
307 307 }
308 308
309 309 if (pos < 0 || pos >= length) {
310 310 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
311 311 return NULL;
312 312 }
313 313
314 314 data = index_deref(self, pos);
315 315 if (data == NULL)
316 316 return NULL;
317 317
318 318 offset_flags = getbe32(data + 4);
319 319 /*
320 320 * The first entry on-disk needs the version number masked out,
321 321 * but this doesn't apply if entries are added to an empty index.
322 322 */
323 323 if (self->length && pos == 0)
324 324 offset_flags &= 0xFFFF;
325 325 else {
326 326 uint32_t offset_high = getbe32(data);
327 327 offset_flags |= ((uint64_t)offset_high) << 32;
328 328 }
329 329
330 330 comp_len = getbe32(data + 8);
331 331 uncomp_len = getbe32(data + 12);
332 332 base_rev = getbe32(data + 16);
333 333 link_rev = getbe32(data + 20);
334 334 parent_1 = getbe32(data + 24);
335 335 parent_2 = getbe32(data + 28);
336 336 c_node_id = data + 32;
337 337
338 338 if (self->format_version == format_v1) {
339 339 sidedata_offset = 0;
340 340 sidedata_comp_len = 0;
341 data_comp_mode = comp_mode_inline;
341 342 } else {
342 343 sidedata_offset = getbe64(data + 64);
343 344 sidedata_comp_len = getbe32(data + 72);
345 data_comp_mode = data[76];
344 346 }
345 347
346 data_comp_mode = comp_mode_inline;
347 348 return Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len,
348 349 base_rev, link_rev, parent_1, parent_2, c_node_id,
349 350 self->nodelen, sidedata_offset, sidedata_comp_len,
350 351 data_comp_mode);
351 352 }
352 353 /*
353 354 * Pack header information in binary
354 355 */
355 356 static PyObject *index_pack_header(indexObject *self, PyObject *args)
356 357 {
357 358 int header;
358 359 char out[4];
359 360 if (!PyArg_ParseTuple(args, "I", &header)) {
360 361 return NULL;
361 362 }
362 363 if (self->format_version != format_v1) {
363 364 PyErr_Format(PyExc_RuntimeError,
364 365 "version header should go in the docket, not the "
365 366 "index: %lu",
366 367 header);
367 368 return NULL;
368 369 }
369 370 putbe32(header, out);
370 371 return PyBytes_FromStringAndSize(out, 4);
371 372 }
372 373 /*
373 374 * Return the raw binary string representing a revision
374 375 */
375 376 static PyObject *index_entry_binary(indexObject *self, PyObject *value)
376 377 {
377 378 long rev;
378 379 const char *data;
379 380 Py_ssize_t length = index_length(self);
380 381
381 382 if (!pylong_to_long(value, &rev)) {
382 383 return NULL;
383 384 }
384 385 if (rev < 0 || rev >= length) {
385 386 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
386 387 rev);
387 388 return NULL;
388 389 };
389 390
390 391 data = index_deref(self, rev);
391 392 if (data == NULL)
392 393 return NULL;
393 394 if (rev == 0 && self->format_version == format_v1) {
394 395 /* the header is eating the start of the first entry */
395 396 return PyBytes_FromStringAndSize(data + 4,
396 397 self->entry_size - 4);
397 398 }
398 399 return PyBytes_FromStringAndSize(data, self->entry_size);
399 400 }
400 401
401 402 /*
402 403 * Return the hash of node corresponding to the given rev.
403 404 */
404 405 static const char *index_node(indexObject *self, Py_ssize_t pos)
405 406 {
406 407 Py_ssize_t length = index_length(self);
407 408 const char *data;
408 409
409 410 if (pos == nullrev)
410 411 return nullid;
411 412
412 413 if (pos >= length)
413 414 return NULL;
414 415
415 416 data = index_deref(self, pos);
416 417 return data ? data + 32 : NULL;
417 418 }
418 419
419 420 /*
420 421 * Return the hash of the node corresponding to the given rev. The
421 422 * rev is assumed to be existing. If not, an exception is set.
422 423 */
423 424 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
424 425 {
425 426 const char *node = index_node(self, pos);
426 427 if (node == NULL) {
427 428 PyErr_Format(PyExc_IndexError, "could not access rev %d",
428 429 (int)pos);
429 430 }
430 431 return node;
431 432 }
432 433
433 434 static int nt_insert(nodetree *self, const char *node, int rev);
434 435
435 436 static int node_check(Py_ssize_t nodelen, PyObject *obj, char **node)
436 437 {
437 438 Py_ssize_t thisnodelen;
438 439 if (PyBytes_AsStringAndSize(obj, node, &thisnodelen) == -1)
439 440 return -1;
440 441 if (nodelen == thisnodelen)
441 442 return 0;
442 443 PyErr_Format(PyExc_ValueError, "node len %zd != expected node len %zd",
443 444 thisnodelen, nodelen);
444 445 return -1;
445 446 }
446 447
447 448 static PyObject *index_append(indexObject *self, PyObject *obj)
448 449 {
449 450 uint64_t offset_flags, sidedata_offset;
450 451 int rev, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
451 452 char data_comp_mode;
452 453 Py_ssize_t c_node_id_len, sidedata_comp_len;
453 454 const char *c_node_id;
454 455 char *data;
455 456
456 457 if (!PyArg_ParseTuple(obj, tuple_format, &offset_flags, &comp_len,
457 458 &uncomp_len, &base_rev, &link_rev, &parent_1,
458 459 &parent_2, &c_node_id, &c_node_id_len,
459 460 &sidedata_offset, &sidedata_comp_len,
460 461 &data_comp_mode)) {
461 462 PyErr_SetString(PyExc_TypeError, "11-tuple required");
462 463 return NULL;
463 464 }
464 465
465 466 if (c_node_id_len != self->nodelen) {
466 467 PyErr_SetString(PyExc_TypeError, "invalid node");
467 468 return NULL;
468 469 }
469 if (data_comp_mode != comp_mode_inline) {
470 if (self->format_version == format_v1 &&
471 data_comp_mode != comp_mode_inline) {
470 472 PyErr_Format(PyExc_ValueError,
471 473 "invalid data compression mode: %i",
472 474 data_comp_mode);
473 475 return NULL;
474 476 }
475 477
476 478 if (self->new_length == self->added_length) {
477 479 size_t new_added_length =
478 480 self->added_length ? self->added_length * 2 : 4096;
479 481 void *new_added = PyMem_Realloc(
480 482 self->added, new_added_length * self->entry_size);
481 483 if (!new_added)
482 484 return PyErr_NoMemory();
483 485 self->added = new_added;
484 486 self->added_length = new_added_length;
485 487 }
486 488 rev = self->length + self->new_length;
487 489 data = self->added + self->entry_size * self->new_length++;
488 490 putbe32(offset_flags >> 32, data);
489 491 putbe32(offset_flags & 0xffffffffU, data + 4);
490 492 putbe32(comp_len, data + 8);
491 493 putbe32(uncomp_len, data + 12);
492 494 putbe32(base_rev, data + 16);
493 495 putbe32(link_rev, data + 20);
494 496 putbe32(parent_1, data + 24);
495 497 putbe32(parent_2, data + 28);
496 498 memcpy(data + 32, c_node_id, c_node_id_len);
497 499 /* Padding since SHA-1 is only 20 bytes for now */
498 500 memset(data + 32 + c_node_id_len, 0, 32 - c_node_id_len);
499 501 if (self->format_version == format_v2) {
500 502 putbe64(sidedata_offset, data + 64);
501 503 putbe32(sidedata_comp_len, data + 72);
504 data[76] = (char)data_comp_mode;
502 505 /* Padding for 96 bytes alignment */
503 memset(data + 76, 0, self->entry_size - 76);
506 memset(data + 77, 0, self->entry_size - 77);
504 507 }
505 508
506 509 if (self->ntinitialized)
507 510 nt_insert(&self->nt, c_node_id, rev);
508 511
509 512 Py_CLEAR(self->headrevs);
510 513 Py_RETURN_NONE;
511 514 }
512 515
513 516 /* Replace an existing index entry's sidedata offset and length with new ones.
514 517 This cannot be used outside of the context of sidedata rewriting,
515 518 inside the transaction that creates the given revision. */
516 519 static PyObject *index_replace_sidedata_info(indexObject *self, PyObject *args)
517 520 {
518 521 uint64_t offset_flags, sidedata_offset;
519 522 int rev;
520 523 Py_ssize_t sidedata_comp_len;
521 524 char *data;
522 525 #if LONG_MAX == 0x7fffffffL
523 526 const char *const sidedata_format = PY23("nKiK", "nKiK");
524 527 #else
525 528 const char *const sidedata_format = PY23("nkik", "nkik");
526 529 #endif
527 530
528 531 if (self->entry_size == v1_entry_size || self->inlined) {
529 532 /*
530 533 There is a bug in the transaction handling when going from an
531 534 inline revlog to a separate index and data file. Turn it off until
532 535 it's fixed, since v2 revlogs sometimes get rewritten on exchange.
533 536 See issue6485.
534 537 */
535 538 raise_revlog_error();
536 539 return NULL;
537 540 }
538 541
539 542 if (!PyArg_ParseTuple(args, sidedata_format, &rev, &sidedata_offset,
540 543 &sidedata_comp_len, &offset_flags))
541 544 return NULL;
542 545
543 546 if (rev < 0 || rev >= index_length(self)) {
544 547 PyErr_SetString(PyExc_IndexError, "revision outside index");
545 548 return NULL;
546 549 }
547 550 if (rev < self->length) {
548 551 PyErr_SetString(
549 552 PyExc_IndexError,
550 553 "cannot rewrite entries outside of this transaction");
551 554 return NULL;
552 555 }
553 556
554 557 /* Find the newly added node, offset from the "already on-disk" length
555 558 */
556 559 data = self->added + self->entry_size * (rev - self->length);
557 560 putbe64(offset_flags, data);
558 561 putbe64(sidedata_offset, data + 64);
559 562 putbe32(sidedata_comp_len, data + 72);
560 563
561 564 Py_RETURN_NONE;
562 565 }
563 566
564 567 static PyObject *index_stats(indexObject *self)
565 568 {
566 569 PyObject *obj = PyDict_New();
567 570 PyObject *s = NULL;
568 571 PyObject *t = NULL;
569 572
570 573 if (obj == NULL)
571 574 return NULL;
572 575
573 576 #define istat(__n, __d) \
574 577 do { \
575 578 s = PyBytes_FromString(__d); \
576 579 t = PyInt_FromSsize_t(self->__n); \
577 580 if (!s || !t) \
578 581 goto bail; \
579 582 if (PyDict_SetItem(obj, s, t) == -1) \
580 583 goto bail; \
581 584 Py_CLEAR(s); \
582 585 Py_CLEAR(t); \
583 586 } while (0)
584 587
585 588 if (self->added_length)
586 589 istat(new_length, "index entries added");
587 590 istat(length, "revs in memory");
588 591 istat(ntlookups, "node trie lookups");
589 592 istat(ntmisses, "node trie misses");
590 593 istat(ntrev, "node trie last rev scanned");
591 594 if (self->ntinitialized) {
592 595 istat(nt.capacity, "node trie capacity");
593 596 istat(nt.depth, "node trie depth");
594 597 istat(nt.length, "node trie count");
595 598 istat(nt.splits, "node trie splits");
596 599 }
597 600
598 601 #undef istat
599 602
600 603 return obj;
601 604
602 605 bail:
603 606 Py_XDECREF(obj);
604 607 Py_XDECREF(s);
605 608 Py_XDECREF(t);
606 609 return NULL;
607 610 }
608 611
609 612 /*
610 613 * When we cache a list, we want to be sure the caller can't mutate
611 614 * the cached copy.
612 615 */
613 616 static PyObject *list_copy(PyObject *list)
614 617 {
615 618 Py_ssize_t len = PyList_GET_SIZE(list);
616 619 PyObject *newlist = PyList_New(len);
617 620 Py_ssize_t i;
618 621
619 622 if (newlist == NULL)
620 623 return NULL;
621 624
622 625 for (i = 0; i < len; i++) {
623 626 PyObject *obj = PyList_GET_ITEM(list, i);
624 627 Py_INCREF(obj);
625 628 PyList_SET_ITEM(newlist, i, obj);
626 629 }
627 630
628 631 return newlist;
629 632 }
630 633
631 634 static int check_filter(PyObject *filter, Py_ssize_t arg)
632 635 {
633 636 if (filter) {
634 637 PyObject *arglist, *result;
635 638 int isfiltered;
636 639
637 640 arglist = Py_BuildValue("(n)", arg);
638 641 if (!arglist) {
639 642 return -1;
640 643 }
641 644
642 645 result = PyObject_Call(filter, arglist, NULL);
643 646 Py_DECREF(arglist);
644 647 if (!result) {
645 648 return -1;
646 649 }
647 650
648 651 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
649 652 * same as this function, so we can just return it directly.*/
650 653 isfiltered = PyObject_IsTrue(result);
651 654 Py_DECREF(result);
652 655 return isfiltered;
653 656 } else {
654 657 return 0;
655 658 }
656 659 }
657 660
658 661 static inline void set_phase_from_parents(char *phases, int parent_1,
659 662 int parent_2, Py_ssize_t i)
660 663 {
661 664 if (parent_1 >= 0 && phases[parent_1] > phases[i])
662 665 phases[i] = phases[parent_1];
663 666 if (parent_2 >= 0 && phases[parent_2] > phases[i])
664 667 phases[i] = phases[parent_2];
665 668 }
666 669
667 670 static PyObject *reachableroots2(indexObject *self, PyObject *args)
668 671 {
669 672
670 673 /* Input */
671 674 long minroot;
672 675 PyObject *includepatharg = NULL;
673 676 int includepath = 0;
674 677 /* heads and roots are lists */
675 678 PyObject *heads = NULL;
676 679 PyObject *roots = NULL;
677 680 PyObject *reachable = NULL;
678 681
679 682 PyObject *val;
680 683 Py_ssize_t len = index_length(self);
681 684 long revnum;
682 685 Py_ssize_t k;
683 686 Py_ssize_t i;
684 687 Py_ssize_t l;
685 688 int r;
686 689 int parents[2];
687 690
688 691 /* Internal data structure:
689 692 * tovisit: array of length len+1 (all revs + nullrev), filled upto
690 693 * lentovisit
691 694 *
692 695 * revstates: array of length len+1 (all revs + nullrev) */
693 696 int *tovisit = NULL;
694 697 long lentovisit = 0;
695 698 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
696 699 char *revstates = NULL;
697 700
698 701 /* Get arguments */
699 702 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
700 703 &PyList_Type, &roots, &PyBool_Type,
701 704 &includepatharg))
702 705 goto bail;
703 706
704 707 if (includepatharg == Py_True)
705 708 includepath = 1;
706 709
707 710 /* Initialize return set */
708 711 reachable = PyList_New(0);
709 712 if (reachable == NULL)
710 713 goto bail;
711 714
712 715 /* Initialize internal datastructures */
713 716 tovisit = (int *)malloc((len + 1) * sizeof(int));
714 717 if (tovisit == NULL) {
715 718 PyErr_NoMemory();
716 719 goto bail;
717 720 }
718 721
719 722 revstates = (char *)calloc(len + 1, 1);
720 723 if (revstates == NULL) {
721 724 PyErr_NoMemory();
722 725 goto bail;
723 726 }
724 727
725 728 l = PyList_GET_SIZE(roots);
726 729 for (i = 0; i < l; i++) {
727 730 revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
728 731 if (revnum == -1 && PyErr_Occurred())
729 732 goto bail;
730 733 /* If root is out of range, e.g. wdir(), it must be unreachable
731 734 * from heads. So we can just ignore it. */
732 735 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
733 736 continue;
734 737 revstates[revnum + 1] |= RS_ROOT;
735 738 }
736 739
737 740 /* Populate tovisit with all the heads */
738 741 l = PyList_GET_SIZE(heads);
739 742 for (i = 0; i < l; i++) {
740 743 revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i));
741 744 if (revnum == -1 && PyErr_Occurred())
742 745 goto bail;
743 746 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
744 747 PyErr_SetString(PyExc_IndexError, "head out of range");
745 748 goto bail;
746 749 }
747 750 if (!(revstates[revnum + 1] & RS_SEEN)) {
748 751 tovisit[lentovisit++] = (int)revnum;
749 752 revstates[revnum + 1] |= RS_SEEN;
750 753 }
751 754 }
752 755
753 756 /* Visit the tovisit list and find the reachable roots */
754 757 k = 0;
755 758 while (k < lentovisit) {
756 759 /* Add the node to reachable if it is a root*/
757 760 revnum = tovisit[k++];
758 761 if (revstates[revnum + 1] & RS_ROOT) {
759 762 revstates[revnum + 1] |= RS_REACHABLE;
760 763 val = PyInt_FromLong(revnum);
761 764 if (val == NULL)
762 765 goto bail;
763 766 r = PyList_Append(reachable, val);
764 767 Py_DECREF(val);
765 768 if (r < 0)
766 769 goto bail;
767 770 if (includepath == 0)
768 771 continue;
769 772 }
770 773
771 774 /* Add its parents to the list of nodes to visit */
772 775 if (revnum == nullrev)
773 776 continue;
774 777 r = index_get_parents(self, revnum, parents, (int)len - 1);
775 778 if (r < 0)
776 779 goto bail;
777 780 for (i = 0; i < 2; i++) {
778 781 if (!(revstates[parents[i] + 1] & RS_SEEN) &&
779 782 parents[i] >= minroot) {
780 783 tovisit[lentovisit++] = parents[i];
781 784 revstates[parents[i] + 1] |= RS_SEEN;
782 785 }
783 786 }
784 787 }
785 788
786 789 /* Find all the nodes in between the roots we found and the heads
787 790 * and add them to the reachable set */
788 791 if (includepath == 1) {
789 792 long minidx = minroot;
790 793 if (minidx < 0)
791 794 minidx = 0;
792 795 for (i = minidx; i < len; i++) {
793 796 if (!(revstates[i + 1] & RS_SEEN))
794 797 continue;
795 798 r = index_get_parents(self, i, parents, (int)len - 1);
796 799 /* Corrupted index file, error is set from
797 800 * index_get_parents */
798 801 if (r < 0)
799 802 goto bail;
800 803 if (((revstates[parents[0] + 1] |
801 804 revstates[parents[1] + 1]) &
802 805 RS_REACHABLE) &&
803 806 !(revstates[i + 1] & RS_REACHABLE)) {
804 807 revstates[i + 1] |= RS_REACHABLE;
805 808 val = PyInt_FromSsize_t(i);
806 809 if (val == NULL)
807 810 goto bail;
808 811 r = PyList_Append(reachable, val);
809 812 Py_DECREF(val);
810 813 if (r < 0)
811 814 goto bail;
812 815 }
813 816 }
814 817 }
815 818
816 819 free(revstates);
817 820 free(tovisit);
818 821 return reachable;
819 822 bail:
820 823 Py_XDECREF(reachable);
821 824 free(revstates);
822 825 free(tovisit);
823 826 return NULL;
824 827 }
825 828
826 829 static int add_roots_get_min(indexObject *self, PyObject *roots, char *phases,
827 830 char phase)
828 831 {
829 832 Py_ssize_t len = index_length(self);
830 833 PyObject *item;
831 834 PyObject *iterator;
832 835 int rev, minrev = -1;
833 836 char *node;
834 837
835 838 if (!PySet_Check(roots)) {
836 839 PyErr_SetString(PyExc_TypeError,
837 840 "roots must be a set of nodes");
838 841 return -2;
839 842 }
840 843 iterator = PyObject_GetIter(roots);
841 844 if (iterator == NULL)
842 845 return -2;
843 846 while ((item = PyIter_Next(iterator))) {
844 847 if (node_check(self->nodelen, item, &node) == -1)
845 848 goto failed;
846 849 rev = index_find_node(self, node);
847 850 /* null is implicitly public, so negative is invalid */
848 851 if (rev < 0 || rev >= len)
849 852 goto failed;
850 853 phases[rev] = phase;
851 854 if (minrev == -1 || minrev > rev)
852 855 minrev = rev;
853 856 Py_DECREF(item);
854 857 }
855 858 Py_DECREF(iterator);
856 859 return minrev;
857 860 failed:
858 861 Py_DECREF(iterator);
859 862 Py_DECREF(item);
860 863 return -2;
861 864 }
862 865
863 866 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
864 867 {
865 868 /* 0: public (untracked), 1: draft, 2: secret, 32: archive,
866 869 96: internal */
867 870 static const char trackedphases[] = {1, 2, 32, 96};
868 871 PyObject *roots = Py_None;
869 872 PyObject *phasesetsdict = NULL;
870 873 PyObject *phasesets[4] = {NULL, NULL, NULL, NULL};
871 874 Py_ssize_t len = index_length(self);
872 875 char *phases = NULL;
873 876 int minphaserev = -1, rev, i;
874 877 const int numphases = (int)(sizeof(phasesets) / sizeof(phasesets[0]));
875 878
876 879 if (!PyArg_ParseTuple(args, "O", &roots))
877 880 return NULL;
878 881 if (roots == NULL || !PyDict_Check(roots)) {
879 882 PyErr_SetString(PyExc_TypeError, "roots must be a dictionary");
880 883 return NULL;
881 884 }
882 885
883 886 phases = calloc(len, 1);
884 887 if (phases == NULL) {
885 888 PyErr_NoMemory();
886 889 return NULL;
887 890 }
888 891
889 892 for (i = 0; i < numphases; ++i) {
890 893 PyObject *pyphase = PyInt_FromLong(trackedphases[i]);
891 894 PyObject *phaseroots = NULL;
892 895 if (pyphase == NULL)
893 896 goto release;
894 897 phaseroots = PyDict_GetItem(roots, pyphase);
895 898 Py_DECREF(pyphase);
896 899 if (phaseroots == NULL)
897 900 continue;
898 901 rev = add_roots_get_min(self, phaseroots, phases,
899 902 trackedphases[i]);
900 903 if (rev == -2)
901 904 goto release;
902 905 if (rev != -1 && (minphaserev == -1 || rev < minphaserev))
903 906 minphaserev = rev;
904 907 }
905 908
906 909 for (i = 0; i < numphases; ++i) {
907 910 phasesets[i] = PySet_New(NULL);
908 911 if (phasesets[i] == NULL)
909 912 goto release;
910 913 }
911 914
912 915 if (minphaserev == -1)
913 916 minphaserev = len;
914 917 for (rev = minphaserev; rev < len; ++rev) {
915 918 PyObject *pyphase = NULL;
916 919 PyObject *pyrev = NULL;
917 920 int parents[2];
918 921 /*
919 922 * The parent lookup could be skipped for phaseroots, but
920 923 * phase --force would historically not recompute them
921 924 * correctly, leaving descendents with a lower phase around.
922 925 * As such, unconditionally recompute the phase.
923 926 */
924 927 if (index_get_parents(self, rev, parents, (int)len - 1) < 0)
925 928 goto release;
926 929 set_phase_from_parents(phases, parents[0], parents[1], rev);
927 930 switch (phases[rev]) {
928 931 case 0:
929 932 continue;
930 933 case 1:
931 934 pyphase = phasesets[0];
932 935 break;
933 936 case 2:
934 937 pyphase = phasesets[1];
935 938 break;
936 939 case 32:
937 940 pyphase = phasesets[2];
938 941 break;
939 942 case 96:
940 943 pyphase = phasesets[3];
941 944 break;
942 945 default:
943 946 /* this should never happen since the phase number is
944 947 * specified by this function. */
945 948 PyErr_SetString(PyExc_SystemError,
946 949 "bad phase number in internal list");
947 950 goto release;
948 951 }
949 952 pyrev = PyInt_FromLong(rev);
950 953 if (pyrev == NULL)
951 954 goto release;
952 955 if (PySet_Add(pyphase, pyrev) == -1) {
953 956 Py_DECREF(pyrev);
954 957 goto release;
955 958 }
956 959 Py_DECREF(pyrev);
957 960 }
958 961
959 962 phasesetsdict = _dict_new_presized(numphases);
960 963 if (phasesetsdict == NULL)
961 964 goto release;
962 965 for (i = 0; i < numphases; ++i) {
963 966 PyObject *pyphase = PyInt_FromLong(trackedphases[i]);
964 967 if (pyphase == NULL)
965 968 goto release;
966 969 if (PyDict_SetItem(phasesetsdict, pyphase, phasesets[i]) ==
967 970 -1) {
968 971 Py_DECREF(pyphase);
969 972 goto release;
970 973 }
971 974 Py_DECREF(phasesets[i]);
972 975 phasesets[i] = NULL;
973 976 }
974 977
975 978 return Py_BuildValue("nN", len, phasesetsdict);
976 979
977 980 release:
978 981 for (i = 0; i < numphases; ++i)
979 982 Py_XDECREF(phasesets[i]);
980 983 Py_XDECREF(phasesetsdict);
981 984
982 985 free(phases);
983 986 return NULL;
984 987 }
985 988
986 989 static PyObject *index_headrevs(indexObject *self, PyObject *args)
987 990 {
988 991 Py_ssize_t i, j, len;
989 992 char *nothead = NULL;
990 993 PyObject *heads = NULL;
991 994 PyObject *filter = NULL;
992 995 PyObject *filteredrevs = Py_None;
993 996
994 997 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
995 998 return NULL;
996 999 }
997 1000
998 1001 if (self->headrevs && filteredrevs == self->filteredrevs)
999 1002 return list_copy(self->headrevs);
1000 1003
1001 1004 Py_DECREF(self->filteredrevs);
1002 1005 self->filteredrevs = filteredrevs;
1003 1006 Py_INCREF(filteredrevs);
1004 1007
1005 1008 if (filteredrevs != Py_None) {
1006 1009 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
1007 1010 if (!filter) {
1008 1011 PyErr_SetString(
1009 1012 PyExc_TypeError,
1010 1013 "filteredrevs has no attribute __contains__");
1011 1014 goto bail;
1012 1015 }
1013 1016 }
1014 1017
1015 1018 len = index_length(self);
1016 1019 heads = PyList_New(0);
1017 1020 if (heads == NULL)
1018 1021 goto bail;
1019 1022 if (len == 0) {
1020 1023 PyObject *nullid = PyInt_FromLong(-1);
1021 1024 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
1022 1025 Py_XDECREF(nullid);
1023 1026 goto bail;
1024 1027 }
1025 1028 goto done;
1026 1029 }
1027 1030
1028 1031 nothead = calloc(len, 1);
1029 1032 if (nothead == NULL) {
1030 1033 PyErr_NoMemory();
1031 1034 goto bail;
1032 1035 }
1033 1036
1034 1037 for (i = len - 1; i >= 0; i--) {
1035 1038 int isfiltered;
1036 1039 int parents[2];
1037 1040
1038 1041 /* If nothead[i] == 1, it means we've seen an unfiltered child
1039 1042 * of this node already, and therefore this node is not
1040 1043 * filtered. So we can skip the expensive check_filter step.
1041 1044 */
1042 1045 if (nothead[i] != 1) {
1043 1046 isfiltered = check_filter(filter, i);
1044 1047 if (isfiltered == -1) {
1045 1048 PyErr_SetString(PyExc_TypeError,
1046 1049 "unable to check filter");
1047 1050 goto bail;
1048 1051 }
1049 1052
1050 1053 if (isfiltered) {
1051 1054 nothead[i] = 1;
1052 1055 continue;
1053 1056 }
1054 1057 }
1055 1058
1056 1059 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
1057 1060 goto bail;
1058 1061 for (j = 0; j < 2; j++) {
1059 1062 if (parents[j] >= 0)
1060 1063 nothead[parents[j]] = 1;
1061 1064 }
1062 1065 }
1063 1066
1064 1067 for (i = 0; i < len; i++) {
1065 1068 PyObject *head;
1066 1069
1067 1070 if (nothead[i])
1068 1071 continue;
1069 1072 head = PyInt_FromSsize_t(i);
1070 1073 if (head == NULL || PyList_Append(heads, head) == -1) {
1071 1074 Py_XDECREF(head);
1072 1075 goto bail;
1073 1076 }
1074 1077 }
1075 1078
1076 1079 done:
1077 1080 self->headrevs = heads;
1078 1081 Py_XDECREF(filter);
1079 1082 free(nothead);
1080 1083 return list_copy(self->headrevs);
1081 1084 bail:
1082 1085 Py_XDECREF(filter);
1083 1086 Py_XDECREF(heads);
1084 1087 free(nothead);
1085 1088 return NULL;
1086 1089 }
1087 1090
1088 1091 /**
1089 1092 * Obtain the base revision index entry.
1090 1093 *
1091 1094 * Callers must ensure that rev >= 0 or illegal memory access may occur.
1092 1095 */
1093 1096 static inline int index_baserev(indexObject *self, int rev)
1094 1097 {
1095 1098 const char *data;
1096 1099 int result;
1097 1100
1098 1101 data = index_deref(self, rev);
1099 1102 if (data == NULL)
1100 1103 return -2;
1101 1104 result = getbe32(data + 16);
1102 1105
1103 1106 if (result > rev) {
1104 1107 PyErr_Format(
1105 1108 PyExc_ValueError,
1106 1109 "corrupted revlog, revision base above revision: %d, %d",
1107 1110 rev, result);
1108 1111 return -2;
1109 1112 }
1110 1113 if (result < -1) {
1111 1114 PyErr_Format(
1112 1115 PyExc_ValueError,
1113 1116 "corrupted revlog, revision base out of range: %d, %d", rev,
1114 1117 result);
1115 1118 return -2;
1116 1119 }
1117 1120 return result;
1118 1121 }
1119 1122
1120 1123 /**
1121 1124 * Find if a revision is a snapshot or not
1122 1125 *
1123 1126 * Only relevant for sparse-revlog case.
1124 1127 * Callers must ensure that rev is in a valid range.
1125 1128 */
1126 1129 static int index_issnapshotrev(indexObject *self, Py_ssize_t rev)
1127 1130 {
1128 1131 int ps[2];
1129 1132 Py_ssize_t base;
1130 1133 while (rev >= 0) {
1131 1134 base = (Py_ssize_t)index_baserev(self, rev);
1132 1135 if (base == rev) {
1133 1136 base = -1;
1134 1137 }
1135 1138 if (base == -2) {
1136 1139 assert(PyErr_Occurred());
1137 1140 return -1;
1138 1141 }
1139 1142 if (base == -1) {
1140 1143 return 1;
1141 1144 }
1142 1145 if (index_get_parents(self, rev, ps, (int)rev) < 0) {
1143 1146 assert(PyErr_Occurred());
1144 1147 return -1;
1145 1148 };
1146 1149 if (base == ps[0] || base == ps[1]) {
1147 1150 return 0;
1148 1151 }
1149 1152 rev = base;
1150 1153 }
1151 1154 return rev == -1;
1152 1155 }
1153 1156
1154 1157 static PyObject *index_issnapshot(indexObject *self, PyObject *value)
1155 1158 {
1156 1159 long rev;
1157 1160 int issnap;
1158 1161 Py_ssize_t length = index_length(self);
1159 1162
1160 1163 if (!pylong_to_long(value, &rev)) {
1161 1164 return NULL;
1162 1165 }
1163 1166 if (rev < -1 || rev >= length) {
1164 1167 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
1165 1168 rev);
1166 1169 return NULL;
1167 1170 };
1168 1171 issnap = index_issnapshotrev(self, (Py_ssize_t)rev);
1169 1172 if (issnap < 0) {
1170 1173 return NULL;
1171 1174 };
1172 1175 return PyBool_FromLong((long)issnap);
1173 1176 }
1174 1177
1175 1178 static PyObject *index_findsnapshots(indexObject *self, PyObject *args)
1176 1179 {
1177 1180 Py_ssize_t start_rev;
1178 1181 PyObject *cache;
1179 1182 Py_ssize_t base;
1180 1183 Py_ssize_t rev;
1181 1184 PyObject *key = NULL;
1182 1185 PyObject *value = NULL;
1183 1186 const Py_ssize_t length = index_length(self);
1184 1187 if (!PyArg_ParseTuple(args, "O!n", &PyDict_Type, &cache, &start_rev)) {
1185 1188 return NULL;
1186 1189 }
1187 1190 for (rev = start_rev; rev < length; rev++) {
1188 1191 int issnap;
1189 1192 PyObject *allvalues = NULL;
1190 1193 issnap = index_issnapshotrev(self, rev);
1191 1194 if (issnap < 0) {
1192 1195 goto bail;
1193 1196 }
1194 1197 if (issnap == 0) {
1195 1198 continue;
1196 1199 }
1197 1200 base = (Py_ssize_t)index_baserev(self, rev);
1198 1201 if (base == rev) {
1199 1202 base = -1;
1200 1203 }
1201 1204 if (base == -2) {
1202 1205 assert(PyErr_Occurred());
1203 1206 goto bail;
1204 1207 }
1205 1208 key = PyInt_FromSsize_t(base);
1206 1209 allvalues = PyDict_GetItem(cache, key);
1207 1210 if (allvalues == NULL && PyErr_Occurred()) {
1208 1211 goto bail;
1209 1212 }
1210 1213 if (allvalues == NULL) {
1211 1214 int r;
1212 1215 allvalues = PyList_New(0);
1213 1216 if (!allvalues) {
1214 1217 goto bail;
1215 1218 }
1216 1219 r = PyDict_SetItem(cache, key, allvalues);
1217 1220 Py_DECREF(allvalues);
1218 1221 if (r < 0) {
1219 1222 goto bail;
1220 1223 }
1221 1224 }
1222 1225 value = PyInt_FromSsize_t(rev);
1223 1226 if (PyList_Append(allvalues, value)) {
1224 1227 goto bail;
1225 1228 }
1226 1229 Py_CLEAR(key);
1227 1230 Py_CLEAR(value);
1228 1231 }
1229 1232 Py_RETURN_NONE;
1230 1233 bail:
1231 1234 Py_XDECREF(key);
1232 1235 Py_XDECREF(value);
1233 1236 return NULL;
1234 1237 }
1235 1238
1236 1239 static PyObject *index_deltachain(indexObject *self, PyObject *args)
1237 1240 {
1238 1241 int rev, generaldelta;
1239 1242 PyObject *stoparg;
1240 1243 int stoprev, iterrev, baserev = -1;
1241 1244 int stopped;
1242 1245 PyObject *chain = NULL, *result = NULL;
1243 1246 const Py_ssize_t length = index_length(self);
1244 1247
1245 1248 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
1246 1249 return NULL;
1247 1250 }
1248 1251
1249 1252 if (PyInt_Check(stoparg)) {
1250 1253 stoprev = (int)PyInt_AsLong(stoparg);
1251 1254 if (stoprev == -1 && PyErr_Occurred()) {
1252 1255 return NULL;
1253 1256 }
1254 1257 } else if (stoparg == Py_None) {
1255 1258 stoprev = -2;
1256 1259 } else {
1257 1260 PyErr_SetString(PyExc_ValueError,
1258 1261 "stoprev must be integer or None");
1259 1262 return NULL;
1260 1263 }
1261 1264
1262 1265 if (rev < 0 || rev >= length) {
1263 1266 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1264 1267 return NULL;
1265 1268 }
1266 1269
1267 1270 chain = PyList_New(0);
1268 1271 if (chain == NULL) {
1269 1272 return NULL;
1270 1273 }
1271 1274
1272 1275 baserev = index_baserev(self, rev);
1273 1276
1274 1277 /* This should never happen. */
1275 1278 if (baserev <= -2) {
1276 1279 /* Error should be set by index_deref() */
1277 1280 assert(PyErr_Occurred());
1278 1281 goto bail;
1279 1282 }
1280 1283
1281 1284 iterrev = rev;
1282 1285
1283 1286 while (iterrev != baserev && iterrev != stoprev) {
1284 1287 PyObject *value = PyInt_FromLong(iterrev);
1285 1288 if (value == NULL) {
1286 1289 goto bail;
1287 1290 }
1288 1291 if (PyList_Append(chain, value)) {
1289 1292 Py_DECREF(value);
1290 1293 goto bail;
1291 1294 }
1292 1295 Py_DECREF(value);
1293 1296
1294 1297 if (generaldelta) {
1295 1298 iterrev = baserev;
1296 1299 } else {
1297 1300 iterrev--;
1298 1301 }
1299 1302
1300 1303 if (iterrev < 0) {
1301 1304 break;
1302 1305 }
1303 1306
1304 1307 if (iterrev >= length) {
1305 1308 PyErr_SetString(PyExc_IndexError,
1306 1309 "revision outside index");
1307 1310 return NULL;
1308 1311 }
1309 1312
1310 1313 baserev = index_baserev(self, iterrev);
1311 1314
1312 1315 /* This should never happen. */
1313 1316 if (baserev <= -2) {
1314 1317 /* Error should be set by index_deref() */
1315 1318 assert(PyErr_Occurred());
1316 1319 goto bail;
1317 1320 }
1318 1321 }
1319 1322
1320 1323 if (iterrev == stoprev) {
1321 1324 stopped = 1;
1322 1325 } else {
1323 1326 PyObject *value = PyInt_FromLong(iterrev);
1324 1327 if (value == NULL) {
1325 1328 goto bail;
1326 1329 }
1327 1330 if (PyList_Append(chain, value)) {
1328 1331 Py_DECREF(value);
1329 1332 goto bail;
1330 1333 }
1331 1334 Py_DECREF(value);
1332 1335
1333 1336 stopped = 0;
1334 1337 }
1335 1338
1336 1339 if (PyList_Reverse(chain)) {
1337 1340 goto bail;
1338 1341 }
1339 1342
1340 1343 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
1341 1344 Py_DECREF(chain);
1342 1345 return result;
1343 1346
1344 1347 bail:
1345 1348 Py_DECREF(chain);
1346 1349 return NULL;
1347 1350 }
1348 1351
1349 1352 static inline int64_t
1350 1353 index_segment_span(indexObject *self, Py_ssize_t start_rev, Py_ssize_t end_rev)
1351 1354 {
1352 1355 int64_t start_offset;
1353 1356 int64_t end_offset;
1354 1357 int end_size;
1355 1358 start_offset = index_get_start(self, start_rev);
1356 1359 if (start_offset < 0) {
1357 1360 return -1;
1358 1361 }
1359 1362 end_offset = index_get_start(self, end_rev);
1360 1363 if (end_offset < 0) {
1361 1364 return -1;
1362 1365 }
1363 1366 end_size = index_get_length(self, end_rev);
1364 1367 if (end_size < 0) {
1365 1368 return -1;
1366 1369 }
1367 1370 if (end_offset < start_offset) {
1368 1371 PyErr_Format(PyExc_ValueError,
1369 1372 "corrupted revlog index: inconsistent offset "
1370 1373 "between revisions (%zd) and (%zd)",
1371 1374 start_rev, end_rev);
1372 1375 return -1;
1373 1376 }
1374 1377 return (end_offset - start_offset) + (int64_t)end_size;
1375 1378 }
1376 1379
1377 1380 /* returns endidx so that revs[startidx:endidx] has no empty trailing revs */
1378 1381 static Py_ssize_t trim_endidx(indexObject *self, const Py_ssize_t *revs,
1379 1382 Py_ssize_t startidx, Py_ssize_t endidx)
1380 1383 {
1381 1384 int length;
1382 1385 while (endidx > 1 && endidx > startidx) {
1383 1386 length = index_get_length(self, revs[endidx - 1]);
1384 1387 if (length < 0) {
1385 1388 return -1;
1386 1389 }
1387 1390 if (length != 0) {
1388 1391 break;
1389 1392 }
1390 1393 endidx -= 1;
1391 1394 }
1392 1395 return endidx;
1393 1396 }
1394 1397
1395 1398 struct Gap {
1396 1399 int64_t size;
1397 1400 Py_ssize_t idx;
1398 1401 };
1399 1402
1400 1403 static int gap_compare(const void *left, const void *right)
1401 1404 {
1402 1405 const struct Gap *l_left = ((const struct Gap *)left);
1403 1406 const struct Gap *l_right = ((const struct Gap *)right);
1404 1407 if (l_left->size < l_right->size) {
1405 1408 return -1;
1406 1409 } else if (l_left->size > l_right->size) {
1407 1410 return 1;
1408 1411 }
1409 1412 return 0;
1410 1413 }
1411 1414 static int Py_ssize_t_compare(const void *left, const void *right)
1412 1415 {
1413 1416 const Py_ssize_t l_left = *(const Py_ssize_t *)left;
1414 1417 const Py_ssize_t l_right = *(const Py_ssize_t *)right;
1415 1418 if (l_left < l_right) {
1416 1419 return -1;
1417 1420 } else if (l_left > l_right) {
1418 1421 return 1;
1419 1422 }
1420 1423 return 0;
1421 1424 }
1422 1425
1423 1426 static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
1424 1427 {
1425 1428 /* method arguments */
1426 1429 PyObject *list_revs = NULL; /* revisions in the chain */
1427 1430 double targetdensity = 0; /* min density to achieve */
1428 1431 Py_ssize_t mingapsize = 0; /* threshold to ignore gaps */
1429 1432
1430 1433 /* other core variables */
1431 1434 Py_ssize_t idxlen = index_length(self);
1432 1435 Py_ssize_t i; /* used for various iteration */
1433 1436 PyObject *result = NULL; /* the final return of the function */
1434 1437
1435 1438 /* generic information about the delta chain being slice */
1436 1439 Py_ssize_t num_revs = 0; /* size of the full delta chain */
1437 1440 Py_ssize_t *revs = NULL; /* native array of revision in the chain */
1438 1441 int64_t chainpayload = 0; /* sum of all delta in the chain */
1439 1442 int64_t deltachainspan = 0; /* distance from first byte to last byte */
1440 1443
1441 1444 /* variable used for slicing the delta chain */
1442 1445 int64_t readdata = 0; /* amount of data currently planned to be read */
1443 1446 double density = 0; /* ration of payload data compared to read ones */
1444 1447 int64_t previous_end;
1445 1448 struct Gap *gaps = NULL; /* array of notable gap in the chain */
1446 1449 Py_ssize_t num_gaps =
1447 1450 0; /* total number of notable gap recorded so far */
1448 1451 Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
1449 1452 Py_ssize_t num_selected = 0; /* number of gaps skipped */
1450 1453 PyObject *chunk = NULL; /* individual slice */
1451 1454 PyObject *allchunks = NULL; /* all slices */
1452 1455 Py_ssize_t previdx;
1453 1456
1454 1457 /* parsing argument */
1455 1458 if (!PyArg_ParseTuple(args, "O!dn", &PyList_Type, &list_revs,
1456 1459 &targetdensity, &mingapsize)) {
1457 1460 goto bail;
1458 1461 }
1459 1462
1460 1463 /* If the delta chain contains a single element, we do not need slicing
1461 1464 */
1462 1465 num_revs = PyList_GET_SIZE(list_revs);
1463 1466 if (num_revs <= 1) {
1464 1467 result = PyTuple_Pack(1, list_revs);
1465 1468 goto done;
1466 1469 }
1467 1470
1468 1471 /* Turn the python list into a native integer array (for efficiency) */
1469 1472 revs = (Py_ssize_t *)calloc(num_revs, sizeof(Py_ssize_t));
1470 1473 if (revs == NULL) {
1471 1474 PyErr_NoMemory();
1472 1475 goto bail;
1473 1476 }
1474 1477 for (i = 0; i < num_revs; i++) {
1475 1478 Py_ssize_t revnum = PyInt_AsLong(PyList_GET_ITEM(list_revs, i));
1476 1479 if (revnum == -1 && PyErr_Occurred()) {
1477 1480 goto bail;
1478 1481 }
1479 1482 if (revnum < nullrev || revnum >= idxlen) {
1480 1483 PyErr_Format(PyExc_IndexError,
1481 1484 "index out of range: %zd", revnum);
1482 1485 goto bail;
1483 1486 }
1484 1487 revs[i] = revnum;
1485 1488 }
1486 1489
1487 1490 /* Compute and check various property of the unsliced delta chain */
1488 1491 deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
1489 1492 if (deltachainspan < 0) {
1490 1493 goto bail;
1491 1494 }
1492 1495
1493 1496 if (deltachainspan <= mingapsize) {
1494 1497 result = PyTuple_Pack(1, list_revs);
1495 1498 goto done;
1496 1499 }
1497 1500 chainpayload = 0;
1498 1501 for (i = 0; i < num_revs; i++) {
1499 1502 int tmp = index_get_length(self, revs[i]);
1500 1503 if (tmp < 0) {
1501 1504 goto bail;
1502 1505 }
1503 1506 chainpayload += tmp;
1504 1507 }
1505 1508
1506 1509 readdata = deltachainspan;
1507 1510 density = 1.0;
1508 1511
1509 1512 if (0 < deltachainspan) {
1510 1513 density = (double)chainpayload / (double)deltachainspan;
1511 1514 }
1512 1515
1513 1516 if (density >= targetdensity) {
1514 1517 result = PyTuple_Pack(1, list_revs);
1515 1518 goto done;
1516 1519 }
1517 1520
1518 1521 /* if chain is too sparse, look for relevant gaps */
1519 1522 gaps = (struct Gap *)calloc(num_revs, sizeof(struct Gap));
1520 1523 if (gaps == NULL) {
1521 1524 PyErr_NoMemory();
1522 1525 goto bail;
1523 1526 }
1524 1527
1525 1528 previous_end = -1;
1526 1529 for (i = 0; i < num_revs; i++) {
1527 1530 int64_t revstart;
1528 1531 int revsize;
1529 1532 revstart = index_get_start(self, revs[i]);
1530 1533 if (revstart < 0) {
1531 1534 goto bail;
1532 1535 };
1533 1536 revsize = index_get_length(self, revs[i]);
1534 1537 if (revsize < 0) {
1535 1538 goto bail;
1536 1539 };
1537 1540 if (revsize == 0) {
1538 1541 continue;
1539 1542 }
1540 1543 if (previous_end >= 0) {
1541 1544 int64_t gapsize = revstart - previous_end;
1542 1545 if (gapsize > mingapsize) {
1543 1546 gaps[num_gaps].size = gapsize;
1544 1547 gaps[num_gaps].idx = i;
1545 1548 num_gaps += 1;
1546 1549 }
1547 1550 }
1548 1551 previous_end = revstart + revsize;
1549 1552 }
1550 1553 if (num_gaps == 0) {
1551 1554 result = PyTuple_Pack(1, list_revs);
1552 1555 goto done;
1553 1556 }
1554 1557 qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
1555 1558
1556 1559 /* Slice the largest gap first, they improve the density the most */
1557 1560 selected_indices =
1558 1561 (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
1559 1562 if (selected_indices == NULL) {
1560 1563 PyErr_NoMemory();
1561 1564 goto bail;
1562 1565 }
1563 1566
1564 1567 for (i = num_gaps - 1; i >= 0; i--) {
1565 1568 selected_indices[num_selected] = gaps[i].idx;
1566 1569 readdata -= gaps[i].size;
1567 1570 num_selected += 1;
1568 1571 if (readdata <= 0) {
1569 1572 density = 1.0;
1570 1573 } else {
1571 1574 density = (double)chainpayload / (double)readdata;
1572 1575 }
1573 1576 if (density >= targetdensity) {
1574 1577 break;
1575 1578 }
1576 1579 }
1577 1580 qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
1578 1581 &Py_ssize_t_compare);
1579 1582
1580 1583 /* create the resulting slice */
1581 1584 allchunks = PyList_New(0);
1582 1585 if (allchunks == NULL) {
1583 1586 goto bail;
1584 1587 }
1585 1588 previdx = 0;
1586 1589 selected_indices[num_selected] = num_revs;
1587 1590 for (i = 0; i <= num_selected; i++) {
1588 1591 Py_ssize_t idx = selected_indices[i];
1589 1592 Py_ssize_t endidx = trim_endidx(self, revs, previdx, idx);
1590 1593 if (endidx < 0) {
1591 1594 goto bail;
1592 1595 }
1593 1596 if (previdx < endidx) {
1594 1597 chunk = PyList_GetSlice(list_revs, previdx, endidx);
1595 1598 if (chunk == NULL) {
1596 1599 goto bail;
1597 1600 }
1598 1601 if (PyList_Append(allchunks, chunk) == -1) {
1599 1602 goto bail;
1600 1603 }
1601 1604 Py_DECREF(chunk);
1602 1605 chunk = NULL;
1603 1606 }
1604 1607 previdx = idx;
1605 1608 }
1606 1609 result = allchunks;
1607 1610 goto done;
1608 1611
1609 1612 bail:
1610 1613 Py_XDECREF(allchunks);
1611 1614 Py_XDECREF(chunk);
1612 1615 done:
1613 1616 free(revs);
1614 1617 free(gaps);
1615 1618 free(selected_indices);
1616 1619 return result;
1617 1620 }
1618 1621
1619 1622 static inline int nt_level(const char *node, Py_ssize_t level)
1620 1623 {
1621 1624 int v = node[level >> 1];
1622 1625 if (!(level & 1))
1623 1626 v >>= 4;
1624 1627 return v & 0xf;
1625 1628 }
1626 1629
1627 1630 /*
1628 1631 * Return values:
1629 1632 *
1630 1633 * -4: match is ambiguous (multiple candidates)
1631 1634 * -2: not found
1632 1635 * rest: valid rev
1633 1636 */
1634 1637 static int nt_find(nodetree *self, const char *node, Py_ssize_t nodelen,
1635 1638 int hex)
1636 1639 {
1637 1640 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
1638 1641 int level, maxlevel, off;
1639 1642
1640 1643 /* If the input is binary, do a fast check for the nullid first. */
1641 1644 if (!hex && nodelen == self->nodelen && node[0] == '\0' &&
1642 1645 node[1] == '\0' && memcmp(node, nullid, self->nodelen) == 0)
1643 1646 return -1;
1644 1647
1645 1648 if (hex)
1646 1649 maxlevel = nodelen;
1647 1650 else
1648 1651 maxlevel = 2 * nodelen;
1649 1652 if (maxlevel > 2 * self->nodelen)
1650 1653 maxlevel = 2 * self->nodelen;
1651 1654
1652 1655 for (level = off = 0; level < maxlevel; level++) {
1653 1656 int k = getnybble(node, level);
1654 1657 nodetreenode *n = &self->nodes[off];
1655 1658 int v = n->children[k];
1656 1659
1657 1660 if (v < 0) {
1658 1661 const char *n;
1659 1662 Py_ssize_t i;
1660 1663
1661 1664 v = -(v + 2);
1662 1665 n = index_node(self->index, v);
1663 1666 if (n == NULL)
1664 1667 return -2;
1665 1668 for (i = level; i < maxlevel; i++)
1666 1669 if (getnybble(node, i) != nt_level(n, i))
1667 1670 return -2;
1668 1671 return v;
1669 1672 }
1670 1673 if (v == 0)
1671 1674 return -2;
1672 1675 off = v;
1673 1676 }
1674 1677 /* multiple matches against an ambiguous prefix */
1675 1678 return -4;
1676 1679 }
1677 1680
1678 1681 static int nt_new(nodetree *self)
1679 1682 {
1680 1683 if (self->length == self->capacity) {
1681 1684 size_t newcapacity;
1682 1685 nodetreenode *newnodes;
1683 1686 newcapacity = self->capacity * 2;
1684 1687 if (newcapacity >= SIZE_MAX / sizeof(nodetreenode)) {
1685 1688 PyErr_SetString(PyExc_MemoryError,
1686 1689 "overflow in nt_new");
1687 1690 return -1;
1688 1691 }
1689 1692 newnodes =
1690 1693 realloc(self->nodes, newcapacity * sizeof(nodetreenode));
1691 1694 if (newnodes == NULL) {
1692 1695 PyErr_SetString(PyExc_MemoryError, "out of memory");
1693 1696 return -1;
1694 1697 }
1695 1698 self->capacity = newcapacity;
1696 1699 self->nodes = newnodes;
1697 1700 memset(&self->nodes[self->length], 0,
1698 1701 sizeof(nodetreenode) * (self->capacity - self->length));
1699 1702 }
1700 1703 return self->length++;
1701 1704 }
1702 1705
1703 1706 static int nt_insert(nodetree *self, const char *node, int rev)
1704 1707 {
1705 1708 int level = 0;
1706 1709 int off = 0;
1707 1710
1708 1711 while (level < 2 * self->nodelen) {
1709 1712 int k = nt_level(node, level);
1710 1713 nodetreenode *n;
1711 1714 int v;
1712 1715
1713 1716 n = &self->nodes[off];
1714 1717 v = n->children[k];
1715 1718
1716 1719 if (v == 0) {
1717 1720 n->children[k] = -rev - 2;
1718 1721 return 0;
1719 1722 }
1720 1723 if (v < 0) {
1721 1724 const char *oldnode =
1722 1725 index_node_existing(self->index, -(v + 2));
1723 1726 int noff;
1724 1727
1725 1728 if (oldnode == NULL)
1726 1729 return -1;
1727 1730 if (!memcmp(oldnode, node, self->nodelen)) {
1728 1731 n->children[k] = -rev - 2;
1729 1732 return 0;
1730 1733 }
1731 1734 noff = nt_new(self);
1732 1735 if (noff == -1)
1733 1736 return -1;
1734 1737 /* self->nodes may have been changed by realloc */
1735 1738 self->nodes[off].children[k] = noff;
1736 1739 off = noff;
1737 1740 n = &self->nodes[off];
1738 1741 n->children[nt_level(oldnode, ++level)] = v;
1739 1742 if (level > self->depth)
1740 1743 self->depth = level;
1741 1744 self->splits += 1;
1742 1745 } else {
1743 1746 level += 1;
1744 1747 off = v;
1745 1748 }
1746 1749 }
1747 1750
1748 1751 return -1;
1749 1752 }
1750 1753
1751 1754 static PyObject *ntobj_insert(nodetreeObject *self, PyObject *args)
1752 1755 {
1753 1756 Py_ssize_t rev;
1754 1757 const char *node;
1755 1758 Py_ssize_t length;
1756 1759 if (!PyArg_ParseTuple(args, "n", &rev))
1757 1760 return NULL;
1758 1761 length = index_length(self->nt.index);
1759 1762 if (rev < 0 || rev >= length) {
1760 1763 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1761 1764 return NULL;
1762 1765 }
1763 1766 node = index_node_existing(self->nt.index, rev);
1764 1767 if (nt_insert(&self->nt, node, (int)rev) == -1)
1765 1768 return NULL;
1766 1769 Py_RETURN_NONE;
1767 1770 }
1768 1771
1769 1772 static int nt_delete_node(nodetree *self, const char *node)
1770 1773 {
1771 1774 /* rev==-2 happens to get encoded as 0, which is interpreted as not set
1772 1775 */
1773 1776 return nt_insert(self, node, -2);
1774 1777 }
1775 1778
1776 1779 static int nt_init(nodetree *self, indexObject *index, unsigned capacity)
1777 1780 {
1778 1781 /* Initialize before overflow-checking to avoid nt_dealloc() crash. */
1779 1782 self->nodes = NULL;
1780 1783
1781 1784 self->index = index;
1782 1785 /* The input capacity is in terms of revisions, while the field is in
1783 1786 * terms of nodetree nodes. */
1784 1787 self->capacity = (capacity < 4 ? 4 : capacity / 2);
1785 1788 self->nodelen = index->nodelen;
1786 1789 self->depth = 0;
1787 1790 self->splits = 0;
1788 1791 if (self->capacity > SIZE_MAX / sizeof(nodetreenode)) {
1789 1792 PyErr_SetString(PyExc_ValueError, "overflow in init_nt");
1790 1793 return -1;
1791 1794 }
1792 1795 self->nodes = calloc(self->capacity, sizeof(nodetreenode));
1793 1796 if (self->nodes == NULL) {
1794 1797 PyErr_NoMemory();
1795 1798 return -1;
1796 1799 }
1797 1800 self->length = 1;
1798 1801 return 0;
1799 1802 }
1800 1803
1801 1804 static int ntobj_init(nodetreeObject *self, PyObject *args)
1802 1805 {
1803 1806 PyObject *index;
1804 1807 unsigned capacity;
1805 1808 if (!PyArg_ParseTuple(args, "O!I", &HgRevlogIndex_Type, &index,
1806 1809 &capacity))
1807 1810 return -1;
1808 1811 Py_INCREF(index);
1809 1812 return nt_init(&self->nt, (indexObject *)index, capacity);
1810 1813 }
1811 1814
1812 1815 static int nt_partialmatch(nodetree *self, const char *node, Py_ssize_t nodelen)
1813 1816 {
1814 1817 return nt_find(self, node, nodelen, 1);
1815 1818 }
1816 1819
1817 1820 /*
1818 1821 * Find the length of the shortest unique prefix of node.
1819 1822 *
1820 1823 * Return values:
1821 1824 *
1822 1825 * -3: error (exception set)
1823 1826 * -2: not found (no exception set)
1824 1827 * rest: length of shortest prefix
1825 1828 */
1826 1829 static int nt_shortest(nodetree *self, const char *node)
1827 1830 {
1828 1831 int level, off;
1829 1832
1830 1833 for (level = off = 0; level < 2 * self->nodelen; level++) {
1831 1834 int k, v;
1832 1835 nodetreenode *n = &self->nodes[off];
1833 1836 k = nt_level(node, level);
1834 1837 v = n->children[k];
1835 1838 if (v < 0) {
1836 1839 const char *n;
1837 1840 v = -(v + 2);
1838 1841 n = index_node_existing(self->index, v);
1839 1842 if (n == NULL)
1840 1843 return -3;
1841 1844 if (memcmp(node, n, self->nodelen) != 0)
1842 1845 /*
1843 1846 * Found a unique prefix, but it wasn't for the
1844 1847 * requested node (i.e the requested node does
1845 1848 * not exist).
1846 1849 */
1847 1850 return -2;
1848 1851 return level + 1;
1849 1852 }
1850 1853 if (v == 0)
1851 1854 return -2;
1852 1855 off = v;
1853 1856 }
1854 1857 /*
1855 1858 * The node was still not unique after 40 hex digits, so this won't
1856 1859 * happen. Also, if we get here, then there's a programming error in
1857 1860 * this file that made us insert a node longer than 40 hex digits.
1858 1861 */
1859 1862 PyErr_SetString(PyExc_Exception, "broken node tree");
1860 1863 return -3;
1861 1864 }
1862 1865
1863 1866 static PyObject *ntobj_shortest(nodetreeObject *self, PyObject *args)
1864 1867 {
1865 1868 PyObject *val;
1866 1869 char *node;
1867 1870 int length;
1868 1871
1869 1872 if (!PyArg_ParseTuple(args, "O", &val))
1870 1873 return NULL;
1871 1874 if (node_check(self->nt.nodelen, val, &node) == -1)
1872 1875 return NULL;
1873 1876
1874 1877 length = nt_shortest(&self->nt, node);
1875 1878 if (length == -3)
1876 1879 return NULL;
1877 1880 if (length == -2) {
1878 1881 raise_revlog_error();
1879 1882 return NULL;
1880 1883 }
1881 1884 return PyInt_FromLong(length);
1882 1885 }
1883 1886
1884 1887 static void nt_dealloc(nodetree *self)
1885 1888 {
1886 1889 free(self->nodes);
1887 1890 self->nodes = NULL;
1888 1891 }
1889 1892
1890 1893 static void ntobj_dealloc(nodetreeObject *self)
1891 1894 {
1892 1895 Py_XDECREF(self->nt.index);
1893 1896 nt_dealloc(&self->nt);
1894 1897 PyObject_Del(self);
1895 1898 }
1896 1899
1897 1900 static PyMethodDef ntobj_methods[] = {
1898 1901 {"insert", (PyCFunction)ntobj_insert, METH_VARARGS,
1899 1902 "insert an index entry"},
1900 1903 {"shortest", (PyCFunction)ntobj_shortest, METH_VARARGS,
1901 1904 "find length of shortest hex nodeid of a binary ID"},
1902 1905 {NULL} /* Sentinel */
1903 1906 };
1904 1907
1905 1908 static PyTypeObject nodetreeType = {
1906 1909 PyVarObject_HEAD_INIT(NULL, 0) /* header */
1907 1910 "parsers.nodetree", /* tp_name */
1908 1911 sizeof(nodetreeObject), /* tp_basicsize */
1909 1912 0, /* tp_itemsize */
1910 1913 (destructor)ntobj_dealloc, /* tp_dealloc */
1911 1914 0, /* tp_print */
1912 1915 0, /* tp_getattr */
1913 1916 0, /* tp_setattr */
1914 1917 0, /* tp_compare */
1915 1918 0, /* tp_repr */
1916 1919 0, /* tp_as_number */
1917 1920 0, /* tp_as_sequence */
1918 1921 0, /* tp_as_mapping */
1919 1922 0, /* tp_hash */
1920 1923 0, /* tp_call */
1921 1924 0, /* tp_str */
1922 1925 0, /* tp_getattro */
1923 1926 0, /* tp_setattro */
1924 1927 0, /* tp_as_buffer */
1925 1928 Py_TPFLAGS_DEFAULT, /* tp_flags */
1926 1929 "nodetree", /* tp_doc */
1927 1930 0, /* tp_traverse */
1928 1931 0, /* tp_clear */
1929 1932 0, /* tp_richcompare */
1930 1933 0, /* tp_weaklistoffset */
1931 1934 0, /* tp_iter */
1932 1935 0, /* tp_iternext */
1933 1936 ntobj_methods, /* tp_methods */
1934 1937 0, /* tp_members */
1935 1938 0, /* tp_getset */
1936 1939 0, /* tp_base */
1937 1940 0, /* tp_dict */
1938 1941 0, /* tp_descr_get */
1939 1942 0, /* tp_descr_set */
1940 1943 0, /* tp_dictoffset */
1941 1944 (initproc)ntobj_init, /* tp_init */
1942 1945 0, /* tp_alloc */
1943 1946 };
1944 1947
1945 1948 static int index_init_nt(indexObject *self)
1946 1949 {
1947 1950 if (!self->ntinitialized) {
1948 1951 if (nt_init(&self->nt, self, (int)self->length) == -1) {
1949 1952 nt_dealloc(&self->nt);
1950 1953 return -1;
1951 1954 }
1952 1955 if (nt_insert(&self->nt, nullid, -1) == -1) {
1953 1956 nt_dealloc(&self->nt);
1954 1957 return -1;
1955 1958 }
1956 1959 self->ntinitialized = 1;
1957 1960 self->ntrev = (int)index_length(self);
1958 1961 self->ntlookups = 1;
1959 1962 self->ntmisses = 0;
1960 1963 }
1961 1964 return 0;
1962 1965 }
1963 1966
1964 1967 /*
1965 1968 * Return values:
1966 1969 *
1967 1970 * -3: error (exception set)
1968 1971 * -2: not found (no exception set)
1969 1972 * rest: valid rev
1970 1973 */
1971 1974 static int index_find_node(indexObject *self, const char *node)
1972 1975 {
1973 1976 int rev;
1974 1977
1975 1978 if (index_init_nt(self) == -1)
1976 1979 return -3;
1977 1980
1978 1981 self->ntlookups++;
1979 1982 rev = nt_find(&self->nt, node, self->nodelen, 0);
1980 1983 if (rev >= -1)
1981 1984 return rev;
1982 1985
1983 1986 /*
1984 1987 * For the first handful of lookups, we scan the entire index,
1985 1988 * and cache only the matching nodes. This optimizes for cases
1986 1989 * like "hg tip", where only a few nodes are accessed.
1987 1990 *
1988 1991 * After that, we cache every node we visit, using a single
1989 1992 * scan amortized over multiple lookups. This gives the best
1990 1993 * bulk performance, e.g. for "hg log".
1991 1994 */
1992 1995 if (self->ntmisses++ < 4) {
1993 1996 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1994 1997 const char *n = index_node_existing(self, rev);
1995 1998 if (n == NULL)
1996 1999 return -3;
1997 2000 if (memcmp(node, n, self->nodelen) == 0) {
1998 2001 if (nt_insert(&self->nt, n, rev) == -1)
1999 2002 return -3;
2000 2003 break;
2001 2004 }
2002 2005 }
2003 2006 } else {
2004 2007 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2005 2008 const char *n = index_node_existing(self, rev);
2006 2009 if (n == NULL)
2007 2010 return -3;
2008 2011 if (nt_insert(&self->nt, n, rev) == -1) {
2009 2012 self->ntrev = rev + 1;
2010 2013 return -3;
2011 2014 }
2012 2015 if (memcmp(node, n, self->nodelen) == 0) {
2013 2016 break;
2014 2017 }
2015 2018 }
2016 2019 self->ntrev = rev;
2017 2020 }
2018 2021
2019 2022 if (rev >= 0)
2020 2023 return rev;
2021 2024 return -2;
2022 2025 }
2023 2026
2024 2027 static PyObject *index_getitem(indexObject *self, PyObject *value)
2025 2028 {
2026 2029 char *node;
2027 2030 int rev;
2028 2031
2029 2032 if (PyInt_Check(value)) {
2030 2033 long idx;
2031 2034 if (!pylong_to_long(value, &idx)) {
2032 2035 return NULL;
2033 2036 }
2034 2037 return index_get(self, idx);
2035 2038 }
2036 2039
2037 2040 if (node_check(self->nodelen, value, &node) == -1)
2038 2041 return NULL;
2039 2042 rev = index_find_node(self, node);
2040 2043 if (rev >= -1)
2041 2044 return PyInt_FromLong(rev);
2042 2045 if (rev == -2)
2043 2046 raise_revlog_error();
2044 2047 return NULL;
2045 2048 }
2046 2049
2047 2050 /*
2048 2051 * Fully populate the radix tree.
2049 2052 */
2050 2053 static int index_populate_nt(indexObject *self)
2051 2054 {
2052 2055 int rev;
2053 2056 if (self->ntrev > 0) {
2054 2057 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2055 2058 const char *n = index_node_existing(self, rev);
2056 2059 if (n == NULL)
2057 2060 return -1;
2058 2061 if (nt_insert(&self->nt, n, rev) == -1)
2059 2062 return -1;
2060 2063 }
2061 2064 self->ntrev = -1;
2062 2065 }
2063 2066 return 0;
2064 2067 }
2065 2068
2066 2069 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
2067 2070 {
2068 2071 const char *fullnode;
2069 2072 Py_ssize_t nodelen;
2070 2073 char *node;
2071 2074 int rev, i;
2072 2075
2073 2076 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &node, &nodelen))
2074 2077 return NULL;
2075 2078
2076 2079 if (nodelen < 1) {
2077 2080 PyErr_SetString(PyExc_ValueError, "key too short");
2078 2081 return NULL;
2079 2082 }
2080 2083
2081 2084 if (nodelen > 2 * self->nodelen) {
2082 2085 PyErr_SetString(PyExc_ValueError, "key too long");
2083 2086 return NULL;
2084 2087 }
2085 2088
2086 2089 for (i = 0; i < nodelen; i++)
2087 2090 hexdigit(node, i);
2088 2091 if (PyErr_Occurred()) {
2089 2092 /* input contains non-hex characters */
2090 2093 PyErr_Clear();
2091 2094 Py_RETURN_NONE;
2092 2095 }
2093 2096
2094 2097 if (index_init_nt(self) == -1)
2095 2098 return NULL;
2096 2099 if (index_populate_nt(self) == -1)
2097 2100 return NULL;
2098 2101 rev = nt_partialmatch(&self->nt, node, nodelen);
2099 2102
2100 2103 switch (rev) {
2101 2104 case -4:
2102 2105 raise_revlog_error();
2103 2106 return NULL;
2104 2107 case -2:
2105 2108 Py_RETURN_NONE;
2106 2109 case -1:
2107 2110 return PyBytes_FromStringAndSize(nullid, self->nodelen);
2108 2111 }
2109 2112
2110 2113 fullnode = index_node_existing(self, rev);
2111 2114 if (fullnode == NULL) {
2112 2115 return NULL;
2113 2116 }
2114 2117 return PyBytes_FromStringAndSize(fullnode, self->nodelen);
2115 2118 }
2116 2119
2117 2120 static PyObject *index_shortest(indexObject *self, PyObject *args)
2118 2121 {
2119 2122 PyObject *val;
2120 2123 char *node;
2121 2124 int length;
2122 2125
2123 2126 if (!PyArg_ParseTuple(args, "O", &val))
2124 2127 return NULL;
2125 2128 if (node_check(self->nodelen, val, &node) == -1)
2126 2129 return NULL;
2127 2130
2128 2131 self->ntlookups++;
2129 2132 if (index_init_nt(self) == -1)
2130 2133 return NULL;
2131 2134 if (index_populate_nt(self) == -1)
2132 2135 return NULL;
2133 2136 length = nt_shortest(&self->nt, node);
2134 2137 if (length == -3)
2135 2138 return NULL;
2136 2139 if (length == -2) {
2137 2140 raise_revlog_error();
2138 2141 return NULL;
2139 2142 }
2140 2143 return PyInt_FromLong(length);
2141 2144 }
2142 2145
2143 2146 static PyObject *index_m_get(indexObject *self, PyObject *args)
2144 2147 {
2145 2148 PyObject *val;
2146 2149 char *node;
2147 2150 int rev;
2148 2151
2149 2152 if (!PyArg_ParseTuple(args, "O", &val))
2150 2153 return NULL;
2151 2154 if (node_check(self->nodelen, val, &node) == -1)
2152 2155 return NULL;
2153 2156 rev = index_find_node(self, node);
2154 2157 if (rev == -3)
2155 2158 return NULL;
2156 2159 if (rev == -2)
2157 2160 Py_RETURN_NONE;
2158 2161 return PyInt_FromLong(rev);
2159 2162 }
2160 2163
2161 2164 static int index_contains(indexObject *self, PyObject *value)
2162 2165 {
2163 2166 char *node;
2164 2167
2165 2168 if (PyInt_Check(value)) {
2166 2169 long rev;
2167 2170 if (!pylong_to_long(value, &rev)) {
2168 2171 return -1;
2169 2172 }
2170 2173 return rev >= -1 && rev < index_length(self);
2171 2174 }
2172 2175
2173 2176 if (node_check(self->nodelen, value, &node) == -1)
2174 2177 return -1;
2175 2178
2176 2179 switch (index_find_node(self, node)) {
2177 2180 case -3:
2178 2181 return -1;
2179 2182 case -2:
2180 2183 return 0;
2181 2184 default:
2182 2185 return 1;
2183 2186 }
2184 2187 }
2185 2188
2186 2189 static PyObject *index_m_has_node(indexObject *self, PyObject *args)
2187 2190 {
2188 2191 int ret = index_contains(self, args);
2189 2192 if (ret < 0)
2190 2193 return NULL;
2191 2194 return PyBool_FromLong((long)ret);
2192 2195 }
2193 2196
2194 2197 static PyObject *index_m_rev(indexObject *self, PyObject *val)
2195 2198 {
2196 2199 char *node;
2197 2200 int rev;
2198 2201
2199 2202 if (node_check(self->nodelen, val, &node) == -1)
2200 2203 return NULL;
2201 2204 rev = index_find_node(self, node);
2202 2205 if (rev >= -1)
2203 2206 return PyInt_FromLong(rev);
2204 2207 if (rev == -2)
2205 2208 raise_revlog_error();
2206 2209 return NULL;
2207 2210 }
2208 2211
2209 2212 typedef uint64_t bitmask;
2210 2213
2211 2214 /*
2212 2215 * Given a disjoint set of revs, return all candidates for the
2213 2216 * greatest common ancestor. In revset notation, this is the set
2214 2217 * "heads(::a and ::b and ...)"
2215 2218 */
2216 2219 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
2217 2220 int revcount)
2218 2221 {
2219 2222 const bitmask allseen = (1ull << revcount) - 1;
2220 2223 const bitmask poison = 1ull << revcount;
2221 2224 PyObject *gca = PyList_New(0);
2222 2225 int i, v, interesting;
2223 2226 int maxrev = -1;
2224 2227 bitmask sp;
2225 2228 bitmask *seen;
2226 2229
2227 2230 if (gca == NULL)
2228 2231 return PyErr_NoMemory();
2229 2232
2230 2233 for (i = 0; i < revcount; i++) {
2231 2234 if (revs[i] > maxrev)
2232 2235 maxrev = revs[i];
2233 2236 }
2234 2237
2235 2238 seen = calloc(sizeof(*seen), maxrev + 1);
2236 2239 if (seen == NULL) {
2237 2240 Py_DECREF(gca);
2238 2241 return PyErr_NoMemory();
2239 2242 }
2240 2243
2241 2244 for (i = 0; i < revcount; i++)
2242 2245 seen[revs[i]] = 1ull << i;
2243 2246
2244 2247 interesting = revcount;
2245 2248
2246 2249 for (v = maxrev; v >= 0 && interesting; v--) {
2247 2250 bitmask sv = seen[v];
2248 2251 int parents[2];
2249 2252
2250 2253 if (!sv)
2251 2254 continue;
2252 2255
2253 2256 if (sv < poison) {
2254 2257 interesting -= 1;
2255 2258 if (sv == allseen) {
2256 2259 PyObject *obj = PyInt_FromLong(v);
2257 2260 if (obj == NULL)
2258 2261 goto bail;
2259 2262 if (PyList_Append(gca, obj) == -1) {
2260 2263 Py_DECREF(obj);
2261 2264 goto bail;
2262 2265 }
2263 2266 sv |= poison;
2264 2267 for (i = 0; i < revcount; i++) {
2265 2268 if (revs[i] == v)
2266 2269 goto done;
2267 2270 }
2268 2271 }
2269 2272 }
2270 2273 if (index_get_parents(self, v, parents, maxrev) < 0)
2271 2274 goto bail;
2272 2275
2273 2276 for (i = 0; i < 2; i++) {
2274 2277 int p = parents[i];
2275 2278 if (p == -1)
2276 2279 continue;
2277 2280 sp = seen[p];
2278 2281 if (sv < poison) {
2279 2282 if (sp == 0) {
2280 2283 seen[p] = sv;
2281 2284 interesting++;
2282 2285 } else if (sp != sv)
2283 2286 seen[p] |= sv;
2284 2287 } else {
2285 2288 if (sp && sp < poison)
2286 2289 interesting--;
2287 2290 seen[p] = sv;
2288 2291 }
2289 2292 }
2290 2293 }
2291 2294
2292 2295 done:
2293 2296 free(seen);
2294 2297 return gca;
2295 2298 bail:
2296 2299 free(seen);
2297 2300 Py_XDECREF(gca);
2298 2301 return NULL;
2299 2302 }
2300 2303
2301 2304 /*
2302 2305 * Given a disjoint set of revs, return the subset with the longest
2303 2306 * path to the root.
2304 2307 */
2305 2308 static PyObject *find_deepest(indexObject *self, PyObject *revs)
2306 2309 {
2307 2310 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
2308 2311 static const Py_ssize_t capacity = 24;
2309 2312 int *depth, *interesting = NULL;
2310 2313 int i, j, v, ninteresting;
2311 2314 PyObject *dict = NULL, *keys = NULL;
2312 2315 long *seen = NULL;
2313 2316 int maxrev = -1;
2314 2317 long final;
2315 2318
2316 2319 if (revcount > capacity) {
2317 2320 PyErr_Format(PyExc_OverflowError,
2318 2321 "bitset size (%ld) > capacity (%ld)",
2319 2322 (long)revcount, (long)capacity);
2320 2323 return NULL;
2321 2324 }
2322 2325
2323 2326 for (i = 0; i < revcount; i++) {
2324 2327 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2325 2328 if (n > maxrev)
2326 2329 maxrev = n;
2327 2330 }
2328 2331
2329 2332 depth = calloc(sizeof(*depth), maxrev + 1);
2330 2333 if (depth == NULL)
2331 2334 return PyErr_NoMemory();
2332 2335
2333 2336 seen = calloc(sizeof(*seen), maxrev + 1);
2334 2337 if (seen == NULL) {
2335 2338 PyErr_NoMemory();
2336 2339 goto bail;
2337 2340 }
2338 2341
2339 2342 interesting = calloc(sizeof(*interesting), ((size_t)1) << revcount);
2340 2343 if (interesting == NULL) {
2341 2344 PyErr_NoMemory();
2342 2345 goto bail;
2343 2346 }
2344 2347
2345 2348 if (PyList_Sort(revs) == -1)
2346 2349 goto bail;
2347 2350
2348 2351 for (i = 0; i < revcount; i++) {
2349 2352 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2350 2353 long b = 1l << i;
2351 2354 depth[n] = 1;
2352 2355 seen[n] = b;
2353 2356 interesting[b] = 1;
2354 2357 }
2355 2358
2356 2359 /* invariant: ninteresting is the number of non-zero entries in
2357 2360 * interesting. */
2358 2361 ninteresting = (int)revcount;
2359 2362
2360 2363 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
2361 2364 int dv = depth[v];
2362 2365 int parents[2];
2363 2366 long sv;
2364 2367
2365 2368 if (dv == 0)
2366 2369 continue;
2367 2370
2368 2371 sv = seen[v];
2369 2372 if (index_get_parents(self, v, parents, maxrev) < 0)
2370 2373 goto bail;
2371 2374
2372 2375 for (i = 0; i < 2; i++) {
2373 2376 int p = parents[i];
2374 2377 long sp;
2375 2378 int dp;
2376 2379
2377 2380 if (p == -1)
2378 2381 continue;
2379 2382
2380 2383 dp = depth[p];
2381 2384 sp = seen[p];
2382 2385 if (dp <= dv) {
2383 2386 depth[p] = dv + 1;
2384 2387 if (sp != sv) {
2385 2388 interesting[sv] += 1;
2386 2389 seen[p] = sv;
2387 2390 if (sp) {
2388 2391 interesting[sp] -= 1;
2389 2392 if (interesting[sp] == 0)
2390 2393 ninteresting -= 1;
2391 2394 }
2392 2395 }
2393 2396 } else if (dv == dp - 1) {
2394 2397 long nsp = sp | sv;
2395 2398 if (nsp == sp)
2396 2399 continue;
2397 2400 seen[p] = nsp;
2398 2401 interesting[sp] -= 1;
2399 2402 if (interesting[sp] == 0)
2400 2403 ninteresting -= 1;
2401 2404 if (interesting[nsp] == 0)
2402 2405 ninteresting += 1;
2403 2406 interesting[nsp] += 1;
2404 2407 }
2405 2408 }
2406 2409 interesting[sv] -= 1;
2407 2410 if (interesting[sv] == 0)
2408 2411 ninteresting -= 1;
2409 2412 }
2410 2413
2411 2414 final = 0;
2412 2415 j = ninteresting;
2413 2416 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
2414 2417 if (interesting[i] == 0)
2415 2418 continue;
2416 2419 final |= i;
2417 2420 j -= 1;
2418 2421 }
2419 2422 if (final == 0) {
2420 2423 keys = PyList_New(0);
2421 2424 goto bail;
2422 2425 }
2423 2426
2424 2427 dict = PyDict_New();
2425 2428 if (dict == NULL)
2426 2429 goto bail;
2427 2430
2428 2431 for (i = 0; i < revcount; i++) {
2429 2432 PyObject *key;
2430 2433
2431 2434 if ((final & (1 << i)) == 0)
2432 2435 continue;
2433 2436
2434 2437 key = PyList_GET_ITEM(revs, i);
2435 2438 Py_INCREF(key);
2436 2439 Py_INCREF(Py_None);
2437 2440 if (PyDict_SetItem(dict, key, Py_None) == -1) {
2438 2441 Py_DECREF(key);
2439 2442 Py_DECREF(Py_None);
2440 2443 goto bail;
2441 2444 }
2442 2445 }
2443 2446
2444 2447 keys = PyDict_Keys(dict);
2445 2448
2446 2449 bail:
2447 2450 free(depth);
2448 2451 free(seen);
2449 2452 free(interesting);
2450 2453 Py_XDECREF(dict);
2451 2454
2452 2455 return keys;
2453 2456 }
2454 2457
2455 2458 /*
2456 2459 * Given a (possibly overlapping) set of revs, return all the
2457 2460 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
2458 2461 */
2459 2462 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
2460 2463 {
2461 2464 PyObject *ret = NULL;
2462 2465 Py_ssize_t argcount, i, len;
2463 2466 bitmask repeat = 0;
2464 2467 int revcount = 0;
2465 2468 int *revs;
2466 2469
2467 2470 argcount = PySequence_Length(args);
2468 2471 revs = PyMem_Malloc(argcount * sizeof(*revs));
2469 2472 if (argcount > 0 && revs == NULL)
2470 2473 return PyErr_NoMemory();
2471 2474 len = index_length(self);
2472 2475
2473 2476 for (i = 0; i < argcount; i++) {
2474 2477 static const int capacity = 24;
2475 2478 PyObject *obj = PySequence_GetItem(args, i);
2476 2479 bitmask x;
2477 2480 long val;
2478 2481
2479 2482 if (!PyInt_Check(obj)) {
2480 2483 PyErr_SetString(PyExc_TypeError,
2481 2484 "arguments must all be ints");
2482 2485 Py_DECREF(obj);
2483 2486 goto bail;
2484 2487 }
2485 2488 val = PyInt_AsLong(obj);
2486 2489 Py_DECREF(obj);
2487 2490 if (val == -1) {
2488 2491 ret = PyList_New(0);
2489 2492 goto done;
2490 2493 }
2491 2494 if (val < 0 || val >= len) {
2492 2495 PyErr_SetString(PyExc_IndexError, "index out of range");
2493 2496 goto bail;
2494 2497 }
2495 2498 /* this cheesy bloom filter lets us avoid some more
2496 2499 * expensive duplicate checks in the common set-is-disjoint
2497 2500 * case */
2498 2501 x = 1ull << (val & 0x3f);
2499 2502 if (repeat & x) {
2500 2503 int k;
2501 2504 for (k = 0; k < revcount; k++) {
2502 2505 if (val == revs[k])
2503 2506 goto duplicate;
2504 2507 }
2505 2508 } else
2506 2509 repeat |= x;
2507 2510 if (revcount >= capacity) {
2508 2511 PyErr_Format(PyExc_OverflowError,
2509 2512 "bitset size (%d) > capacity (%d)",
2510 2513 revcount, capacity);
2511 2514 goto bail;
2512 2515 }
2513 2516 revs[revcount++] = (int)val;
2514 2517 duplicate:;
2515 2518 }
2516 2519
2517 2520 if (revcount == 0) {
2518 2521 ret = PyList_New(0);
2519 2522 goto done;
2520 2523 }
2521 2524 if (revcount == 1) {
2522 2525 PyObject *obj;
2523 2526 ret = PyList_New(1);
2524 2527 if (ret == NULL)
2525 2528 goto bail;
2526 2529 obj = PyInt_FromLong(revs[0]);
2527 2530 if (obj == NULL)
2528 2531 goto bail;
2529 2532 PyList_SET_ITEM(ret, 0, obj);
2530 2533 goto done;
2531 2534 }
2532 2535
2533 2536 ret = find_gca_candidates(self, revs, revcount);
2534 2537 if (ret == NULL)
2535 2538 goto bail;
2536 2539
2537 2540 done:
2538 2541 PyMem_Free(revs);
2539 2542 return ret;
2540 2543
2541 2544 bail:
2542 2545 PyMem_Free(revs);
2543 2546 Py_XDECREF(ret);
2544 2547 return NULL;
2545 2548 }
2546 2549
2547 2550 /*
2548 2551 * Given a (possibly overlapping) set of revs, return the greatest
2549 2552 * common ancestors: those with the longest path to the root.
2550 2553 */
2551 2554 static PyObject *index_ancestors(indexObject *self, PyObject *args)
2552 2555 {
2553 2556 PyObject *ret;
2554 2557 PyObject *gca = index_commonancestorsheads(self, args);
2555 2558 if (gca == NULL)
2556 2559 return NULL;
2557 2560
2558 2561 if (PyList_GET_SIZE(gca) <= 1) {
2559 2562 return gca;
2560 2563 }
2561 2564
2562 2565 ret = find_deepest(self, gca);
2563 2566 Py_DECREF(gca);
2564 2567 return ret;
2565 2568 }
2566 2569
2567 2570 /*
2568 2571 * Invalidate any trie entries introduced by added revs.
2569 2572 */
2570 2573 static void index_invalidate_added(indexObject *self, Py_ssize_t start)
2571 2574 {
2572 2575 Py_ssize_t i, len;
2573 2576
2574 2577 len = self->length + self->new_length;
2575 2578 i = start - self->length;
2576 2579 if (i < 0)
2577 2580 return;
2578 2581
2579 2582 for (i = start; i < len; i++)
2580 2583 nt_delete_node(&self->nt, index_deref(self, i) + 32);
2581 2584
2582 2585 self->new_length = start - self->length;
2583 2586 }
2584 2587
2585 2588 /*
2586 2589 * Delete a numeric range of revs, which must be at the end of the
2587 2590 * range.
2588 2591 */
2589 2592 static int index_slice_del(indexObject *self, PyObject *item)
2590 2593 {
2591 2594 Py_ssize_t start, stop, step, slicelength;
2592 2595 Py_ssize_t length = index_length(self) + 1;
2593 2596 int ret = 0;
2594 2597
2595 2598 /* Argument changed from PySliceObject* to PyObject* in Python 3. */
2596 2599 #ifdef IS_PY3K
2597 2600 if (PySlice_GetIndicesEx(item, length, &start, &stop, &step,
2598 2601 &slicelength) < 0)
2599 2602 #else
2600 2603 if (PySlice_GetIndicesEx((PySliceObject *)item, length, &start, &stop,
2601 2604 &step, &slicelength) < 0)
2602 2605 #endif
2603 2606 return -1;
2604 2607
2605 2608 if (slicelength <= 0)
2606 2609 return 0;
2607 2610
2608 2611 if ((step < 0 && start < stop) || (step > 0 && start > stop))
2609 2612 stop = start;
2610 2613
2611 2614 if (step < 0) {
2612 2615 stop = start + 1;
2613 2616 start = stop + step * (slicelength - 1) - 1;
2614 2617 step = -step;
2615 2618 }
2616 2619
2617 2620 if (step != 1) {
2618 2621 PyErr_SetString(PyExc_ValueError,
2619 2622 "revlog index delete requires step size of 1");
2620 2623 return -1;
2621 2624 }
2622 2625
2623 2626 if (stop != length - 1) {
2624 2627 PyErr_SetString(PyExc_IndexError,
2625 2628 "revlog index deletion indices are invalid");
2626 2629 return -1;
2627 2630 }
2628 2631
2629 2632 if (start < self->length) {
2630 2633 if (self->ntinitialized) {
2631 2634 Py_ssize_t i;
2632 2635
2633 2636 for (i = start; i < self->length; i++) {
2634 2637 const char *node = index_node_existing(self, i);
2635 2638 if (node == NULL)
2636 2639 return -1;
2637 2640
2638 2641 nt_delete_node(&self->nt, node);
2639 2642 }
2640 2643 if (self->new_length)
2641 2644 index_invalidate_added(self, self->length);
2642 2645 if (self->ntrev > start)
2643 2646 self->ntrev = (int)start;
2644 2647 } else if (self->new_length) {
2645 2648 self->new_length = 0;
2646 2649 }
2647 2650
2648 2651 self->length = start;
2649 2652 goto done;
2650 2653 }
2651 2654
2652 2655 if (self->ntinitialized) {
2653 2656 index_invalidate_added(self, start);
2654 2657 if (self->ntrev > start)
2655 2658 self->ntrev = (int)start;
2656 2659 } else {
2657 2660 self->new_length = start - self->length;
2658 2661 }
2659 2662 done:
2660 2663 Py_CLEAR(self->headrevs);
2661 2664 return ret;
2662 2665 }
2663 2666
2664 2667 /*
2665 2668 * Supported ops:
2666 2669 *
2667 2670 * slice deletion
2668 2671 * string assignment (extend node->rev mapping)
2669 2672 * string deletion (shrink node->rev mapping)
2670 2673 */
2671 2674 static int index_assign_subscript(indexObject *self, PyObject *item,
2672 2675 PyObject *value)
2673 2676 {
2674 2677 char *node;
2675 2678 long rev;
2676 2679
2677 2680 if (PySlice_Check(item) && value == NULL)
2678 2681 return index_slice_del(self, item);
2679 2682
2680 2683 if (node_check(self->nodelen, item, &node) == -1)
2681 2684 return -1;
2682 2685
2683 2686 if (value == NULL)
2684 2687 return self->ntinitialized ? nt_delete_node(&self->nt, node)
2685 2688 : 0;
2686 2689 rev = PyInt_AsLong(value);
2687 2690 if (rev > INT_MAX || rev < 0) {
2688 2691 if (!PyErr_Occurred())
2689 2692 PyErr_SetString(PyExc_ValueError, "rev out of range");
2690 2693 return -1;
2691 2694 }
2692 2695
2693 2696 if (index_init_nt(self) == -1)
2694 2697 return -1;
2695 2698 return nt_insert(&self->nt, node, (int)rev);
2696 2699 }
2697 2700
2698 2701 /*
2699 2702 * Find all RevlogNG entries in an index that has inline data. Update
2700 2703 * the optional "offsets" table with those entries.
2701 2704 */
2702 2705 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
2703 2706 {
2704 2707 const char *data = (const char *)self->buf.buf;
2705 2708 Py_ssize_t pos = 0;
2706 2709 Py_ssize_t end = self->buf.len;
2707 2710 long incr = self->entry_size;
2708 2711 Py_ssize_t len = 0;
2709 2712
2710 2713 while (pos + self->entry_size <= end && pos >= 0) {
2711 2714 uint32_t comp_len, sidedata_comp_len = 0;
2712 2715 /* 3rd element of header is length of compressed inline data */
2713 2716 comp_len = getbe32(data + pos + 8);
2714 2717 if (self->entry_size == v2_entry_size) {
2715 2718 sidedata_comp_len = getbe32(data + pos + 72);
2716 2719 }
2717 2720 incr = self->entry_size + comp_len + sidedata_comp_len;
2718 2721 if (offsets)
2719 2722 offsets[len] = data + pos;
2720 2723 len++;
2721 2724 pos += incr;
2722 2725 }
2723 2726
2724 2727 if (pos != end) {
2725 2728 if (!PyErr_Occurred())
2726 2729 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2727 2730 return -1;
2728 2731 }
2729 2732
2730 2733 return len;
2731 2734 }
2732 2735
2733 2736 static int index_init(indexObject *self, PyObject *args, PyObject *kwargs)
2734 2737 {
2735 2738 PyObject *data_obj, *inlined_obj, *revlogv2;
2736 2739 Py_ssize_t size;
2737 2740
2738 2741 static char *kwlist[] = {"data", "inlined", "revlogv2", NULL};
2739 2742
2740 2743 /* Initialize before argument-checking to avoid index_dealloc() crash.
2741 2744 */
2742 2745 self->added = NULL;
2743 2746 self->new_length = 0;
2744 2747 self->added_length = 0;
2745 2748 self->data = NULL;
2746 2749 memset(&self->buf, 0, sizeof(self->buf));
2747 2750 self->headrevs = NULL;
2748 2751 self->filteredrevs = Py_None;
2749 2752 Py_INCREF(Py_None);
2750 2753 self->ntinitialized = 0;
2751 2754 self->offsets = NULL;
2752 2755 self->nodelen = 20;
2753 2756 self->nullentry = NULL;
2754 2757
2755 2758 revlogv2 = NULL;
2756 2759 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|O", kwlist,
2757 2760 &data_obj, &inlined_obj, &revlogv2))
2758 2761 return -1;
2759 2762 if (!PyObject_CheckBuffer(data_obj)) {
2760 2763 PyErr_SetString(PyExc_TypeError,
2761 2764 "data does not support buffer interface");
2762 2765 return -1;
2763 2766 }
2764 2767 if (self->nodelen < 20 || self->nodelen > (Py_ssize_t)sizeof(nullid)) {
2765 2768 PyErr_SetString(PyExc_RuntimeError, "unsupported node size");
2766 2769 return -1;
2767 2770 }
2768 2771
2769 2772 if (revlogv2 && PyObject_IsTrue(revlogv2)) {
2770 2773 self->format_version = format_v2;
2771 2774 self->entry_size = v2_entry_size;
2772 2775 } else {
2773 2776 self->format_version = format_v1;
2774 2777 self->entry_size = v1_entry_size;
2775 2778 }
2776 2779
2777 2780 self->nullentry = Py_BuildValue(PY23("iiiiiiis#iiB", "iiiiiiiy#iiB"), 0,
2778 2781 0, 0, -1, -1, -1, -1, nullid,
2779 2782 self->nodelen, 0, 0, comp_mode_inline);
2780 2783
2781 2784 if (!self->nullentry)
2782 2785 return -1;
2783 2786 PyObject_GC_UnTrack(self->nullentry);
2784 2787
2785 2788 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
2786 2789 return -1;
2787 2790 size = self->buf.len;
2788 2791
2789 2792 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
2790 2793 self->data = data_obj;
2791 2794
2792 2795 self->ntlookups = self->ntmisses = 0;
2793 2796 self->ntrev = -1;
2794 2797 Py_INCREF(self->data);
2795 2798
2796 2799 if (self->inlined) {
2797 2800 Py_ssize_t len = inline_scan(self, NULL);
2798 2801 if (len == -1)
2799 2802 goto bail;
2800 2803 self->length = len;
2801 2804 } else {
2802 2805 if (size % self->entry_size) {
2803 2806 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2804 2807 goto bail;
2805 2808 }
2806 2809 self->length = size / self->entry_size;
2807 2810 }
2808 2811
2809 2812 return 0;
2810 2813 bail:
2811 2814 return -1;
2812 2815 }
2813 2816
2814 2817 static PyObject *index_nodemap(indexObject *self)
2815 2818 {
2816 2819 Py_INCREF(self);
2817 2820 return (PyObject *)self;
2818 2821 }
2819 2822
2820 2823 static void _index_clearcaches(indexObject *self)
2821 2824 {
2822 2825 if (self->offsets) {
2823 2826 PyMem_Free((void *)self->offsets);
2824 2827 self->offsets = NULL;
2825 2828 }
2826 2829 if (self->ntinitialized) {
2827 2830 nt_dealloc(&self->nt);
2828 2831 }
2829 2832 self->ntinitialized = 0;
2830 2833 Py_CLEAR(self->headrevs);
2831 2834 }
2832 2835
2833 2836 static PyObject *index_clearcaches(indexObject *self)
2834 2837 {
2835 2838 _index_clearcaches(self);
2836 2839 self->ntrev = -1;
2837 2840 self->ntlookups = self->ntmisses = 0;
2838 2841 Py_RETURN_NONE;
2839 2842 }
2840 2843
2841 2844 static void index_dealloc(indexObject *self)
2842 2845 {
2843 2846 _index_clearcaches(self);
2844 2847 Py_XDECREF(self->filteredrevs);
2845 2848 if (self->buf.buf) {
2846 2849 PyBuffer_Release(&self->buf);
2847 2850 memset(&self->buf, 0, sizeof(self->buf));
2848 2851 }
2849 2852 Py_XDECREF(self->data);
2850 2853 PyMem_Free(self->added);
2851 2854 Py_XDECREF(self->nullentry);
2852 2855 PyObject_Del(self);
2853 2856 }
2854 2857
2855 2858 static PySequenceMethods index_sequence_methods = {
2856 2859 (lenfunc)index_length, /* sq_length */
2857 2860 0, /* sq_concat */
2858 2861 0, /* sq_repeat */
2859 2862 (ssizeargfunc)index_get, /* sq_item */
2860 2863 0, /* sq_slice */
2861 2864 0, /* sq_ass_item */
2862 2865 0, /* sq_ass_slice */
2863 2866 (objobjproc)index_contains, /* sq_contains */
2864 2867 };
2865 2868
2866 2869 static PyMappingMethods index_mapping_methods = {
2867 2870 (lenfunc)index_length, /* mp_length */
2868 2871 (binaryfunc)index_getitem, /* mp_subscript */
2869 2872 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
2870 2873 };
2871 2874
2872 2875 static PyMethodDef index_methods[] = {
2873 2876 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
2874 2877 "return the gca set of the given revs"},
2875 2878 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
2876 2879 METH_VARARGS,
2877 2880 "return the heads of the common ancestors of the given revs"},
2878 2881 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
2879 2882 "clear the index caches"},
2880 2883 {"get", (PyCFunction)index_m_get, METH_VARARGS, "get an index entry"},
2881 2884 {"get_rev", (PyCFunction)index_m_get, METH_VARARGS,
2882 2885 "return `rev` associated with a node or None"},
2883 2886 {"has_node", (PyCFunction)index_m_has_node, METH_O,
2884 2887 "return True if the node exist in the index"},
2885 2888 {"rev", (PyCFunction)index_m_rev, METH_O,
2886 2889 "return `rev` associated with a node or raise RevlogError"},
2887 2890 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets, METH_VARARGS,
2888 2891 "compute phases"},
2889 2892 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
2890 2893 "reachableroots"},
2891 2894 {"replace_sidedata_info", (PyCFunction)index_replace_sidedata_info,
2892 2895 METH_VARARGS, "replace an existing index entry with a new value"},
2893 2896 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
2894 2897 "get head revisions"}, /* Can do filtering since 3.2 */
2895 2898 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
2896 2899 "get filtered head revisions"}, /* Can always do filtering */
2897 2900 {"issnapshot", (PyCFunction)index_issnapshot, METH_O,
2898 2901 "True if the object is a snapshot"},
2899 2902 {"findsnapshots", (PyCFunction)index_findsnapshots, METH_VARARGS,
2900 2903 "Gather snapshot data in a cache dict"},
2901 2904 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
2902 2905 "determine revisions with deltas to reconstruct fulltext"},
2903 2906 {"slicechunktodensity", (PyCFunction)index_slicechunktodensity,
2904 2907 METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"},
2905 2908 {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
2906 2909 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
2907 2910 "match a potentially ambiguous node ID"},
2908 2911 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
2909 2912 "find length of shortest hex nodeid of a binary ID"},
2910 2913 {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"},
2911 2914 {"entry_binary", (PyCFunction)index_entry_binary, METH_O,
2912 2915 "return an entry in binary form"},
2913 2916 {"pack_header", (PyCFunction)index_pack_header, METH_VARARGS,
2914 2917 "pack the revlog header information into binary"},
2915 2918 {NULL} /* Sentinel */
2916 2919 };
2917 2920
2918 2921 static PyGetSetDef index_getset[] = {
2919 2922 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
2920 2923 {NULL} /* Sentinel */
2921 2924 };
2922 2925
2923 2926 static PyMemberDef index_members[] = {
2924 2927 {"entry_size", T_LONG, offsetof(indexObject, entry_size), 0,
2925 2928 "size of an index entry"},
2926 2929 {NULL} /* Sentinel */
2927 2930 };
2928 2931
2929 2932 PyTypeObject HgRevlogIndex_Type = {
2930 2933 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2931 2934 "parsers.index", /* tp_name */
2932 2935 sizeof(indexObject), /* tp_basicsize */
2933 2936 0, /* tp_itemsize */
2934 2937 (destructor)index_dealloc, /* tp_dealloc */
2935 2938 0, /* tp_print */
2936 2939 0, /* tp_getattr */
2937 2940 0, /* tp_setattr */
2938 2941 0, /* tp_compare */
2939 2942 0, /* tp_repr */
2940 2943 0, /* tp_as_number */
2941 2944 &index_sequence_methods, /* tp_as_sequence */
2942 2945 &index_mapping_methods, /* tp_as_mapping */
2943 2946 0, /* tp_hash */
2944 2947 0, /* tp_call */
2945 2948 0, /* tp_str */
2946 2949 0, /* tp_getattro */
2947 2950 0, /* tp_setattro */
2948 2951 0, /* tp_as_buffer */
2949 2952 Py_TPFLAGS_DEFAULT, /* tp_flags */
2950 2953 "revlog index", /* tp_doc */
2951 2954 0, /* tp_traverse */
2952 2955 0, /* tp_clear */
2953 2956 0, /* tp_richcompare */
2954 2957 0, /* tp_weaklistoffset */
2955 2958 0, /* tp_iter */
2956 2959 0, /* tp_iternext */
2957 2960 index_methods, /* tp_methods */
2958 2961 index_members, /* tp_members */
2959 2962 index_getset, /* tp_getset */
2960 2963 0, /* tp_base */
2961 2964 0, /* tp_dict */
2962 2965 0, /* tp_descr_get */
2963 2966 0, /* tp_descr_set */
2964 2967 0, /* tp_dictoffset */
2965 2968 (initproc)index_init, /* tp_init */
2966 2969 0, /* tp_alloc */
2967 2970 };
2968 2971
2969 2972 /*
2970 2973 * returns a tuple of the form (index, cache) with elements as
2971 2974 * follows:
2972 2975 *
2973 2976 * index: an index object that lazily parses Revlog (v1 or v2) records
2974 2977 * cache: if data is inlined, a tuple (0, index_file_content), else None
2975 2978 * index_file_content could be a string, or a buffer
2976 2979 *
2977 2980 * added complications are for backwards compatibility
2978 2981 */
2979 2982 PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs)
2980 2983 {
2981 2984 PyObject *cache = NULL;
2982 2985 indexObject *idx;
2983 2986 int ret;
2984 2987
2985 2988 idx = PyObject_New(indexObject, &HgRevlogIndex_Type);
2986 2989 if (idx == NULL)
2987 2990 goto bail;
2988 2991
2989 2992 ret = index_init(idx, args, kwargs);
2990 2993 if (ret == -1)
2991 2994 goto bail;
2992 2995
2993 2996 if (idx->inlined) {
2994 2997 cache = Py_BuildValue("iO", 0, idx->data);
2995 2998 if (cache == NULL)
2996 2999 goto bail;
2997 3000 } else {
2998 3001 cache = Py_None;
2999 3002 Py_INCREF(cache);
3000 3003 }
3001 3004
3002 3005 return Py_BuildValue("NN", idx, cache);
3003 3006
3004 3007 bail:
3005 3008 Py_XDECREF(idx);
3006 3009 Py_XDECREF(cache);
3007 3010 return NULL;
3008 3011 }
3009 3012
3010 3013 static Revlog_CAPI CAPI = {
3011 3014 /* increment the abi_version field upon each change in the Revlog_CAPI
3012 3015 struct or in the ABI of the listed functions */
3013 3016 2,
3014 3017 index_length,
3015 3018 index_node,
3016 3019 HgRevlogIndex_GetParents,
3017 3020 };
3018 3021
3019 3022 void revlog_module_init(PyObject *mod)
3020 3023 {
3021 3024 PyObject *caps = NULL;
3022 3025 HgRevlogIndex_Type.tp_new = PyType_GenericNew;
3023 3026 if (PyType_Ready(&HgRevlogIndex_Type) < 0)
3024 3027 return;
3025 3028 Py_INCREF(&HgRevlogIndex_Type);
3026 3029 PyModule_AddObject(mod, "index", (PyObject *)&HgRevlogIndex_Type);
3027 3030
3028 3031 nodetreeType.tp_new = PyType_GenericNew;
3029 3032 if (PyType_Ready(&nodetreeType) < 0)
3030 3033 return;
3031 3034 Py_INCREF(&nodetreeType);
3032 3035 PyModule_AddObject(mod, "nodetree", (PyObject *)&nodetreeType);
3033 3036
3034 3037 caps = PyCapsule_New(&CAPI, "mercurial.cext.parsers.revlog_CAPI", NULL);
3035 3038 if (caps != NULL)
3036 3039 PyModule_AddObject(mod, "revlog_CAPI", caps);
3037 3040 }
@@ -1,417 +1,415 b''
1 1 # parsers.py - Python implementation of parsers.c
2 2 #
3 3 # Copyright 2009 Olivia Mackall <olivia@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import struct
11 11 import zlib
12 12
13 13 from ..node import (
14 14 nullrev,
15 15 sha1nodeconstants,
16 16 )
17 17 from .. import (
18 18 error,
19 19 pycompat,
20 20 util,
21 21 )
22 22
23 23 from ..revlogutils import nodemap as nodemaputil
24 24 from ..revlogutils import constants as revlog_constants
25 25
26 26 stringio = pycompat.bytesio
27 27
28 28
29 29 _pack = struct.pack
30 30 _unpack = struct.unpack
31 31 _compress = zlib.compress
32 32 _decompress = zlib.decompress
33 33
34 34 # Some code below makes tuples directly because it's more convenient. However,
35 35 # code outside this module should always use dirstatetuple.
36 36 def dirstatetuple(*x):
37 37 # x is a tuple
38 38 return x
39 39
40 40
41 41 def gettype(q):
42 42 return int(q & 0xFFFF)
43 43
44 44
45 45 def offset_type(offset, type):
46 46 return int(int(offset) << 16 | type)
47 47
48 48
49 49 class BaseIndexObject(object):
50 50 # Format of an index entry according to Python's `struct` language
51 51 index_format = revlog_constants.INDEX_ENTRY_V1
52 52 # Size of a C unsigned long long int, platform independent
53 53 big_int_size = struct.calcsize(b'>Q')
54 54 # Size of a C long int, platform independent
55 55 int_size = struct.calcsize(b'>i')
56 56 # An empty index entry, used as a default value to be overridden, or nullrev
57 57 null_item = (
58 58 0,
59 59 0,
60 60 0,
61 61 -1,
62 62 -1,
63 63 -1,
64 64 -1,
65 65 sha1nodeconstants.nullid,
66 66 0,
67 67 0,
68 68 revlog_constants.COMP_MODE_INLINE,
69 69 )
70 70
71 71 @util.propertycache
72 72 def entry_size(self):
73 73 return self.index_format.size
74 74
75 75 @property
76 76 def nodemap(self):
77 77 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
78 78 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
79 79 return self._nodemap
80 80
81 81 @util.propertycache
82 82 def _nodemap(self):
83 83 nodemap = nodemaputil.NodeMap({sha1nodeconstants.nullid: nullrev})
84 84 for r in range(0, len(self)):
85 85 n = self[r][7]
86 86 nodemap[n] = r
87 87 return nodemap
88 88
89 89 def has_node(self, node):
90 90 """return True if the node exist in the index"""
91 91 return node in self._nodemap
92 92
93 93 def rev(self, node):
94 94 """return a revision for a node
95 95
96 96 If the node is unknown, raise a RevlogError"""
97 97 return self._nodemap[node]
98 98
99 99 def get_rev(self, node):
100 100 """return a revision for a node
101 101
102 102 If the node is unknown, return None"""
103 103 return self._nodemap.get(node)
104 104
105 105 def _stripnodes(self, start):
106 106 if '_nodemap' in vars(self):
107 107 for r in range(start, len(self)):
108 108 n = self[r][7]
109 109 del self._nodemap[n]
110 110
111 111 def clearcaches(self):
112 112 self.__dict__.pop('_nodemap', None)
113 113
114 114 def __len__(self):
115 115 return self._lgt + len(self._extra)
116 116
117 117 def append(self, tup):
118 118 if '_nodemap' in vars(self):
119 119 self._nodemap[tup[7]] = len(self)
120 120 data = self._pack_entry(tup)
121 121 self._extra.append(data)
122 122
123 123 def _pack_entry(self, entry):
124 124 assert entry[8] == 0
125 125 assert entry[9] == 0
126 126 return self.index_format.pack(*entry[:8])
127 127
128 128 def _check_index(self, i):
129 129 if not isinstance(i, int):
130 130 raise TypeError(b"expecting int indexes")
131 131 if i < 0 or i >= len(self):
132 132 raise IndexError
133 133
134 134 def __getitem__(self, i):
135 135 if i == -1:
136 136 return self.null_item
137 137 self._check_index(i)
138 138 if i >= self._lgt:
139 139 data = self._extra[i - self._lgt]
140 140 else:
141 141 index = self._calculate_index(i)
142 142 data = self._data[index : index + self.entry_size]
143 143 r = self._unpack_entry(data)
144 144 if self._lgt and i == 0:
145 145 r = (offset_type(0, gettype(r[0])),) + r[1:]
146 146 return r
147 147
148 148 def _unpack_entry(self, data):
149 149 r = self.index_format.unpack(data)
150 150 r = r + (0, 0, revlog_constants.COMP_MODE_INLINE)
151 151 return r
152 152
153 153 def pack_header(self, header):
154 154 """pack header information as binary"""
155 155 v_fmt = revlog_constants.INDEX_HEADER
156 156 return v_fmt.pack(header)
157 157
158 158 def entry_binary(self, rev):
159 159 """return the raw binary string representing a revision"""
160 160 entry = self[rev]
161 161 p = revlog_constants.INDEX_ENTRY_V1.pack(*entry[:8])
162 162 if rev == 0:
163 163 p = p[revlog_constants.INDEX_HEADER.size :]
164 164 return p
165 165
166 166
167 167 class IndexObject(BaseIndexObject):
168 168 def __init__(self, data):
169 169 assert len(data) % self.entry_size == 0, (
170 170 len(data),
171 171 self.entry_size,
172 172 len(data) % self.entry_size,
173 173 )
174 174 self._data = data
175 175 self._lgt = len(data) // self.entry_size
176 176 self._extra = []
177 177
178 178 def _calculate_index(self, i):
179 179 return i * self.entry_size
180 180
181 181 def __delitem__(self, i):
182 182 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
183 183 raise ValueError(b"deleting slices only supports a:-1 with step 1")
184 184 i = i.start
185 185 self._check_index(i)
186 186 self._stripnodes(i)
187 187 if i < self._lgt:
188 188 self._data = self._data[: i * self.entry_size]
189 189 self._lgt = i
190 190 self._extra = []
191 191 else:
192 192 self._extra = self._extra[: i - self._lgt]
193 193
194 194
195 195 class PersistentNodeMapIndexObject(IndexObject):
196 196 """a Debug oriented class to test persistent nodemap
197 197
198 198 We need a simple python object to test API and higher level behavior. See
199 199 the Rust implementation for more serious usage. This should be used only
200 200 through the dedicated `devel.persistent-nodemap` config.
201 201 """
202 202
203 203 def nodemap_data_all(self):
204 204 """Return bytes containing a full serialization of a nodemap
205 205
206 206 The nodemap should be valid for the full set of revisions in the
207 207 index."""
208 208 return nodemaputil.persistent_data(self)
209 209
210 210 def nodemap_data_incremental(self):
211 211 """Return bytes containing a incremental update to persistent nodemap
212 212
213 213 This containst the data for an append-only update of the data provided
214 214 in the last call to `update_nodemap_data`.
215 215 """
216 216 if self._nm_root is None:
217 217 return None
218 218 docket = self._nm_docket
219 219 changed, data = nodemaputil.update_persistent_data(
220 220 self, self._nm_root, self._nm_max_idx, self._nm_docket.tip_rev
221 221 )
222 222
223 223 self._nm_root = self._nm_max_idx = self._nm_docket = None
224 224 return docket, changed, data
225 225
226 226 def update_nodemap_data(self, docket, nm_data):
227 227 """provide full block of persisted binary data for a nodemap
228 228
229 229 The data are expected to come from disk. See `nodemap_data_all` for a
230 230 produceur of such data."""
231 231 if nm_data is not None:
232 232 self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data)
233 233 if self._nm_root:
234 234 self._nm_docket = docket
235 235 else:
236 236 self._nm_root = self._nm_max_idx = self._nm_docket = None
237 237
238 238
239 239 class InlinedIndexObject(BaseIndexObject):
240 240 def __init__(self, data, inline=0):
241 241 self._data = data
242 242 self._lgt = self._inline_scan(None)
243 243 self._inline_scan(self._lgt)
244 244 self._extra = []
245 245
246 246 def _inline_scan(self, lgt):
247 247 off = 0
248 248 if lgt is not None:
249 249 self._offsets = [0] * lgt
250 250 count = 0
251 251 while off <= len(self._data) - self.entry_size:
252 252 start = off + self.big_int_size
253 253 (s,) = struct.unpack(
254 254 b'>i',
255 255 self._data[start : start + self.int_size],
256 256 )
257 257 if lgt is not None:
258 258 self._offsets[count] = off
259 259 count += 1
260 260 off += self.entry_size + s
261 261 if off != len(self._data):
262 262 raise ValueError(b"corrupted data")
263 263 return count
264 264
265 265 def __delitem__(self, i):
266 266 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
267 267 raise ValueError(b"deleting slices only supports a:-1 with step 1")
268 268 i = i.start
269 269 self._check_index(i)
270 270 self._stripnodes(i)
271 271 if i < self._lgt:
272 272 self._offsets = self._offsets[:i]
273 273 self._lgt = i
274 274 self._extra = []
275 275 else:
276 276 self._extra = self._extra[: i - self._lgt]
277 277
278 278 def _calculate_index(self, i):
279 279 return self._offsets[i]
280 280
281 281
282 282 def parse_index2(data, inline, revlogv2=False):
283 283 if not inline:
284 284 cls = IndexObject2 if revlogv2 else IndexObject
285 285 return cls(data), None
286 286 cls = InlinedIndexObject2 if revlogv2 else InlinedIndexObject
287 287 return cls(data, inline), (0, data)
288 288
289 289
290 290 class Index2Mixin(object):
291 291 index_format = revlog_constants.INDEX_ENTRY_V2
292 292
293 293 def replace_sidedata_info(
294 294 self, rev, sidedata_offset, sidedata_length, offset_flags
295 295 ):
296 296 """
297 297 Replace an existing index entry's sidedata offset and length with new
298 298 ones.
299 299 This cannot be used outside of the context of sidedata rewriting,
300 300 inside the transaction that creates the revision `rev`.
301 301 """
302 302 if rev < 0:
303 303 raise KeyError
304 304 self._check_index(rev)
305 305 if rev < self._lgt:
306 306 msg = b"cannot rewrite entries outside of this transaction"
307 307 raise KeyError(msg)
308 308 else:
309 309 entry = list(self[rev])
310 310 entry[0] = offset_flags
311 311 entry[8] = sidedata_offset
312 312 entry[9] = sidedata_length
313 313 entry = tuple(entry)
314 314 new = self._pack_entry(entry)
315 315 self._extra[rev - self._lgt] = new
316 316
317 317 def _unpack_entry(self, data):
318 return self.index_format.unpack(data) + (
319 revlog_constants.COMP_MODE_INLINE,
320 )
318 return self.index_format.unpack(data)
321 319
322 320 def _pack_entry(self, entry):
323 return self.index_format.pack(*entry[:10])
321 return self.index_format.pack(*entry[:11])
324 322
325 323 def entry_binary(self, rev):
326 324 """return the raw binary string representing a revision"""
327 325 entry = self[rev]
328 326 return self._pack_entry(entry)
329 327
330 328 def pack_header(self, header):
331 329 """pack header information as binary"""
332 330 msg = 'version header should go in the docket, not the index: %d'
333 331 msg %= header
334 332 raise error.ProgrammingError(msg)
335 333
336 334
337 335 class IndexObject2(Index2Mixin, IndexObject):
338 336 pass
339 337
340 338
341 339 class InlinedIndexObject2(Index2Mixin, InlinedIndexObject):
342 340 def _inline_scan(self, lgt):
343 341 sidedata_length_pos = 72
344 342 off = 0
345 343 if lgt is not None:
346 344 self._offsets = [0] * lgt
347 345 count = 0
348 346 while off <= len(self._data) - self.entry_size:
349 347 start = off + self.big_int_size
350 348 (data_size,) = struct.unpack(
351 349 b'>i',
352 350 self._data[start : start + self.int_size],
353 351 )
354 352 start = off + sidedata_length_pos
355 353 (side_data_size,) = struct.unpack(
356 354 b'>i', self._data[start : start + self.int_size]
357 355 )
358 356 if lgt is not None:
359 357 self._offsets[count] = off
360 358 count += 1
361 359 off += self.entry_size + data_size + side_data_size
362 360 if off != len(self._data):
363 361 raise ValueError(b"corrupted data")
364 362 return count
365 363
366 364
367 365 def parse_index_devel_nodemap(data, inline):
368 366 """like parse_index2, but alway return a PersistentNodeMapIndexObject"""
369 367 return PersistentNodeMapIndexObject(data), None
370 368
371 369
372 370 def parse_dirstate(dmap, copymap, st):
373 371 parents = [st[:20], st[20:40]]
374 372 # dereference fields so they will be local in loop
375 373 format = b">cllll"
376 374 e_size = struct.calcsize(format)
377 375 pos1 = 40
378 376 l = len(st)
379 377
380 378 # the inner loop
381 379 while pos1 < l:
382 380 pos2 = pos1 + e_size
383 381 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
384 382 pos1 = pos2 + e[4]
385 383 f = st[pos2:pos1]
386 384 if b'\0' in f:
387 385 f, c = f.split(b'\0')
388 386 copymap[f] = c
389 387 dmap[f] = e[:4]
390 388 return parents
391 389
392 390
393 391 def pack_dirstate(dmap, copymap, pl, now):
394 392 now = int(now)
395 393 cs = stringio()
396 394 write = cs.write
397 395 write(b"".join(pl))
398 396 for f, e in pycompat.iteritems(dmap):
399 397 if e[0] == b'n' and e[3] == now:
400 398 # The file was last modified "simultaneously" with the current
401 399 # write to dirstate (i.e. within the same second for file-
402 400 # systems with a granularity of 1 sec). This commonly happens
403 401 # for at least a couple of files on 'update'.
404 402 # The user could change the file without changing its size
405 403 # within the same second. Invalidate the file's mtime in
406 404 # dirstate, forcing future 'status' calls to compare the
407 405 # contents of the file if the size is the same. This prevents
408 406 # mistakenly treating such files as clean.
409 407 e = dirstatetuple(e[0], e[1], e[2], -1)
410 408 dmap[f] = e
411 409
412 410 if f in copymap:
413 411 f = b"%s\0%s" % (f, copymap[f])
414 412 e = _pack(b">cllll", e[0], e[1], e[2], e[3], len(f))
415 413 write(e)
416 414 write(f)
417 415 return cs.getvalue()
@@ -1,164 +1,165 b''
1 1 # revlogdeltas.py - constant used for revlog logic.
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 # Copyright 2018 Octobus <contact@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 """Helper class to compute deltas stored inside revlogs"""
9 9
10 10 from __future__ import absolute_import
11 11
12 12 import struct
13 13
14 14 from ..interfaces import repository
15 15
16 16 ### Internal utily constants
17 17
18 18 KIND_CHANGELOG = 1001 # over 256 to not be comparable with a bytes
19 19 KIND_MANIFESTLOG = 1002
20 20 KIND_FILELOG = 1003
21 21 KIND_OTHER = 1004
22 22
23 23 ALL_KINDS = {
24 24 KIND_CHANGELOG,
25 25 KIND_MANIFESTLOG,
26 26 KIND_FILELOG,
27 27 KIND_OTHER,
28 28 }
29 29
30 30 ### main revlog header
31 31
32 32 INDEX_HEADER = struct.Struct(b">I")
33 33
34 34 ## revlog version
35 35 REVLOGV0 = 0
36 36 REVLOGV1 = 1
37 37 # Dummy value until file format is finalized.
38 38 REVLOGV2 = 0xDEAD
39 39
40 40 ## global revlog header flags
41 41 # Shared across v1 and v2.
42 42 FLAG_INLINE_DATA = 1 << 16
43 43 # Only used by v1, implied by v2.
44 44 FLAG_GENERALDELTA = 1 << 17
45 45 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
46 46 REVLOG_DEFAULT_FORMAT = REVLOGV1
47 47 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
48 48 REVLOGV0_FLAGS = 0
49 49 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
50 50 REVLOGV2_FLAGS = FLAG_INLINE_DATA
51 51
52 52 ### individual entry
53 53
54 54 ## index v0:
55 55 # 4 bytes: offset
56 56 # 4 bytes: compressed length
57 57 # 4 bytes: base rev
58 58 # 4 bytes: link rev
59 59 # 20 bytes: parent 1 nodeid
60 60 # 20 bytes: parent 2 nodeid
61 61 # 20 bytes: nodeid
62 62 INDEX_ENTRY_V0 = struct.Struct(b">4l20s20s20s")
63 63
64 64 ## index v1
65 65 # 6 bytes: offset
66 66 # 2 bytes: flags
67 67 # 4 bytes: compressed length
68 68 # 4 bytes: uncompressed length
69 69 # 4 bytes: base rev
70 70 # 4 bytes: link rev
71 71 # 4 bytes: parent 1 rev
72 72 # 4 bytes: parent 2 rev
73 73 # 32 bytes: nodeid
74 74 INDEX_ENTRY_V1 = struct.Struct(b">Qiiiiii20s12x")
75 75 assert INDEX_ENTRY_V1.size == 32 * 2
76 76
77 77 # 6 bytes: offset
78 78 # 2 bytes: flags
79 79 # 4 bytes: compressed length
80 80 # 4 bytes: uncompressed length
81 81 # 4 bytes: base rev
82 82 # 4 bytes: link rev
83 83 # 4 bytes: parent 1 rev
84 84 # 4 bytes: parent 2 rev
85 85 # 32 bytes: nodeid
86 86 # 8 bytes: sidedata offset
87 87 # 4 bytes: sidedata compressed length
88 # 20 bytes: Padding to align to 96 bytes (see RevlogV2Plan wiki page)
89 INDEX_ENTRY_V2 = struct.Struct(b">Qiiiiii20s12xQi20x")
90 assert INDEX_ENTRY_V2.size == 32 * 3
88 # 1 bytes: compression mode (2 lower bit are data_compression_mode)
89 # 19 bytes: Padding to align to 96 bytes (see RevlogV2Plan wiki page)
90 INDEX_ENTRY_V2 = struct.Struct(b">Qiiiiii20s12xQiB19x")
91 assert INDEX_ENTRY_V2.size == 32 * 3, INDEX_ENTRY_V2.size
91 92
92 93 # revlog index flags
93 94
94 95 # For historical reasons, revlog's internal flags were exposed via the
95 96 # wire protocol and are even exposed in parts of the storage APIs.
96 97
97 98 # revision has censor metadata, must be verified
98 99 REVIDX_ISCENSORED = repository.REVISION_FLAG_CENSORED
99 100 # revision hash does not match data (narrowhg)
100 101 REVIDX_ELLIPSIS = repository.REVISION_FLAG_ELLIPSIS
101 102 # revision data is stored externally
102 103 REVIDX_EXTSTORED = repository.REVISION_FLAG_EXTSTORED
103 104 # revision changes files in a way that could affect copy tracing.
104 105 REVIDX_HASCOPIESINFO = repository.REVISION_FLAG_HASCOPIESINFO
105 106 REVIDX_DEFAULT_FLAGS = 0
106 107 # stable order in which flags need to be processed and their processors applied
107 108 REVIDX_FLAGS_ORDER = [
108 109 REVIDX_ISCENSORED,
109 110 REVIDX_ELLIPSIS,
110 111 REVIDX_EXTSTORED,
111 112 REVIDX_HASCOPIESINFO,
112 113 ]
113 114
114 115 # bitmark for flags that could cause rawdata content change
115 116 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
116 117
117 118 ## chunk compression mode constants:
118 119 # These constants are used in revlog version >=2 to denote the compression used
119 120 # for a chunk.
120 121
121 122 # Chunk use a compression mode stored "inline" at the start of the chunk
122 123 # itself. This is the mode always used for revlog version "0" and "1"
123 124 COMP_MODE_INLINE = 2
124 125
125 126 SUPPORTED_FLAGS = {
126 127 REVLOGV0: REVLOGV0_FLAGS,
127 128 REVLOGV1: REVLOGV1_FLAGS,
128 129 REVLOGV2: REVLOGV2_FLAGS,
129 130 }
130 131
131 132 _no = lambda flags: False
132 133 _yes = lambda flags: True
133 134
134 135
135 136 def _from_flag(flag):
136 137 return lambda flags: bool(flags & flag)
137 138
138 139
139 140 FEATURES_BY_VERSION = {
140 141 REVLOGV0: {
141 142 b'inline': _no,
142 143 b'generaldelta': _no,
143 144 b'sidedata': False,
144 145 b'docket': False,
145 146 },
146 147 REVLOGV1: {
147 148 b'inline': _from_flag(FLAG_INLINE_DATA),
148 149 b'generaldelta': _from_flag(FLAG_GENERALDELTA),
149 150 b'sidedata': False,
150 151 b'docket': False,
151 152 },
152 153 REVLOGV2: {
153 154 # The point of inline-revlog is to reduce the number of files used in
154 155 # the store. Using a docket defeat this purpose. So we needs other
155 156 # means to reduce the number of files for revlogv2.
156 157 b'inline': _no,
157 158 b'generaldelta': _yes,
158 159 b'sidedata': True,
159 160 b'docket': True,
160 161 },
161 162 }
162 163
163 164
164 165 SPARSE_REVLOG_MAX_CHAIN_LENGTH = 1000
General Comments 0
You need to be logged in to leave comments. Login now